In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler

In [17]:
df = pd.read_csv("Madedata1.csv")

In [18]:
df.shape

(2499, 12)

In [19]:
df.head()

Unnamed: 0,Country,Age,Gender,fever,Bodypain,Runny_nose,Difficulty_in_breathing,Nasal_congestion,Sore_throat,Severity,Contact_with_covid_patient,Infected
0,China,10,Male,102,1,0,0,0,1,Mild,No,0
1,Italy,20,Male,103,1,1,0,0,0,Moderate,Not known,1
2,Iran,55,Transgender,99,0,0,0,1,1,Severe,No,0
3,Republic of Korean,37,Female,100,0,1,1,0,0,Mild,Yes,1
4,France,45,Male,101,1,1,1,1,0,Moderate,Yes,1


In [20]:
df.drop(columns=['Country'], inplace=True)

In [21]:
df.shape

(2499, 11)

In [22]:
df.head()

Unnamed: 0,Age,Gender,fever,Bodypain,Runny_nose,Difficulty_in_breathing,Nasal_congestion,Sore_throat,Severity,Contact_with_covid_patient,Infected
0,10,Male,102,1,0,0,0,1,Mild,No,0
1,20,Male,103,1,1,0,0,0,Moderate,Not known,1
2,55,Transgender,99,0,0,0,1,1,Severe,No,0
3,37,Female,100,0,1,1,0,0,Mild,Yes,1
4,45,Male,101,1,1,1,1,0,Moderate,Yes,1


In [23]:
df['Gender'].unique()

array(['Male', 'Transgender', 'Female'], dtype=object)

In [24]:
df['Severity'].unique()

array(['Mild', 'Moderate', 'Severe'], dtype=object)

In [25]:
df['Contact_with_covid_patient'].unique()

array(['No', 'Not known', 'Yes', 'yes'], dtype=object)

In [26]:
df['Contact_with_covid_patient'] = df['Contact_with_covid_patient'].str.lower()

In [27]:
gender = df['Gender'].values
severity = df['Severity'].values
contact = df['Contact_with_covid_patient'].values

In [28]:
df.drop(columns=['Gender','Severity','Contact_with_covid_patient'], inplace=True)

In [30]:
gender_label = LabelEncoder()
gender = gender_label.fit_transform(gender)

In [31]:
gender

array([1, 1, 2, ..., 0, 0, 0])

In [32]:
gender_onehot = OneHotEncoder()

In [34]:
gender_onehot.fit_transform(gender.reshape(-1,1))

<2499x3 sparse matrix of type '<class 'numpy.float64'>'
	with 2499 stored elements in Compressed Sparse Row format>

In [36]:
gender_onehot.fit_transform(gender.reshape(-1,1)).toarray()

array([[0., 1., 0.],
       [0., 1., 0.],
       [0., 0., 1.],
       ...,
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.]])

In [37]:
gender = gender_onehot.fit_transform(gender.reshape(-1,1)).toarray()

In [38]:
severity_label = LabelEncoder()
severity = severity_label.fit_transform(severity)

severity_onehot = OneHotEncoder()
severity = severity_onehot.fit_transform(severity.reshape(-1,1)).toarray()

contact_label = LabelEncoder()
contact = severity_label.fit_transform(contact)

contact_onehot = OneHotEncoder()
contact = contact_onehot.fit_transform(contact.reshape(-1,1)).toarray()

In [39]:
severity

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.],
       ...,
       [1., 0., 0.],
       [1., 0., 0.],
       [0., 0., 1.]])

In [40]:
contact

array([[1., 0., 0.],
       [0., 1., 0.],
       [1., 0., 0.],
       ...,
       [0., 0., 1.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [41]:
X = df.iloc[:,:-1].values
y = df.iloc[:,-1].values

In [42]:
X[0]

array([ 10, 102,   1,   0,   0,   0,   1], dtype=int64)

In [43]:
y[0]

0

In [44]:
X = np.c_[X, gender, severity, contact]

In [45]:
X[0]

array([ 10., 102.,   1.,   0.,   0.,   0.,   1.,   0.,   1.,   0.,   1.,
         0.,   0.,   1.,   0.,   0.])

In [46]:
st = StandardScaler()
X = st.fit_transform(X)

In [47]:
from sklearn.model_selection import train_test_split

In [48]:
X.shape

(2499, 16)

In [54]:
# whenever we will execute this code, it will always shuffle the data
# x_train, x_test, y_train, y_test = train_test_split(X,y,test_size=0.25)

# if we don't want to shuffle data then pass random state
x_train, x_test, y_train, y_test = train_test_split(X,y,test_size=0.25,random_state=10)

In [55]:
x_train.shape

(1874, 16)

In [56]:
x_test.shape

(625, 16)

In [57]:
y_train.shape

(1874,)

In [58]:
y_test.shape

(625,)

In [59]:
from sklearn.linear_model import LogisticRegression

In [60]:
logistic = LogisticRegression()

In [61]:
logistic.fit(x_train, y_train)

LogisticRegression()

In [62]:
y_pred = logistic.predict(x_test)

In [63]:
from sklearn.metrics import accuracy_score

In [64]:
accuracy_score(y_test, y_pred)

0.9456

In [65]:
# confusion matrix
from sklearn.metrics import confusion_matrix

In [66]:
confusion_matrix(y_test, y_pred)

array([[290,  23],
       [ 11, 301]], dtype=int64)

In [67]:
(290 + 301) / 625

0.9456

In [68]:
from sklearn.metrics import precision_score, recall_score, accuracy_score, f1_score

In [69]:
precision_score(y_test, y_pred)

0.9290123456790124

In [70]:
recall_score(y_test, y_pred)

0.9647435897435898

In [71]:
f1_score(y_test, y_pred)

0.9465408805031447

In [72]:
from sklearn.metrics import classification_report

In [74]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.96      0.93      0.94       313
           1       0.93      0.96      0.95       312

    accuracy                           0.95       625
   macro avg       0.95      0.95      0.95       625
weighted avg       0.95      0.95      0.95       625

