# **Import Libraries**

In [41]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline

# **Read Data**

In [42]:
data = pd.read_csv("covid_dataset.csv")

# **EDA**

In [43]:
data.shape

(532, 16)

In [44]:
data.columns

Index(['Q1', 'Q2', 'Q3', 'Q4', 'Q5', 'Q6', 'Q7', 'Q8', 'Q9', 'Q10', 'Q11',
       'Q12', 'Q13', 'Q14', 'Q15', 'Result'],
      dtype='object')

In [45]:
data.head()

Unnamed: 0,Q1,Q2,Q3,Q4,Q5,Q6,Q7,Q8,Q9,Q10,Q11,Q12,Q13,Q14,Q15,Result
0,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,Infected
1,1,0,1,0,0,1,0,0,1,1,0,0,0,0,0,Infected
2,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,Infected
3,0,0,0,0,1,0,0,0,0,0,0,0,1,1,0,Infected
4,1,0,1,1,1,1,0,0,1,1,0,0,0,0,0,Infected


In [46]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 532 entries, 0 to 531
Data columns (total 16 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Q1      532 non-null    int64 
 1   Q2      532 non-null    int64 
 2   Q3      532 non-null    int64 
 3   Q4      532 non-null    int64 
 4   Q5      532 non-null    int64 
 5   Q6      532 non-null    int64 
 6   Q7      532 non-null    int64 
 7   Q8      532 non-null    int64 
 8   Q9      532 non-null    int64 
 9   Q10     532 non-null    int64 
 10  Q11     532 non-null    int64 
 11  Q12     532 non-null    int64 
 12  Q13     532 non-null    int64 
 13  Q14     532 non-null    int64 
 14  Q15     532 non-null    int64 
 15  Result  532 non-null    object
dtypes: int64(15), object(1)
memory usage: 66.6+ KB


In [47]:
data.describe()

Unnamed: 0,Q1,Q2,Q3,Q4,Q5,Q6,Q7,Q8,Q9,Q10,Q11,Q12,Q13,Q14,Q15
count,532.0,532.0,532.0,532.0,532.0,532.0,532.0,532.0,532.0,532.0,532.0,532.0,532.0,532.0,532.0
mean,0.266917,0.212406,0.304511,0.233083,0.317669,0.236842,0.159774,0.195489,0.287594,0.255639,0.174812,0.12594,0.191729,0.163534,0.163534
std,0.442765,0.409396,0.460633,0.423192,0.466008,0.425545,0.366742,0.39695,0.453067,0.43663,0.380164,0.332094,0.394032,0.3702,0.3702
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0
max,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


#**Preprocessing**

In [48]:
X = data[['Q1', 'Q2', 'Q3', 'Q4', 'Q5', 'Q6', 'Q7', 'Q8', 'Q9', 'Q10', 'Q11',
       'Q12', 'Q13', 'Q14', 'Q15']].values
y = data.iloc[:, -1].values

In [49]:
from sklearn.preprocessing import StandardScaler
std_scaler = StandardScaler()
scaled_X = std_scaler.fit_transform(X)

#**Model Building**

In [50]:
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

In [51]:

X_train, X_test, y_train, y_test = train_test_split(scaled_X, y, test_size=0.2, random_state = 2, stratify = y)
#X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state = 2, stratify = y)

In [52]:
model = SVC(kernel='linear', gamma='auto', C=0.3, probability=True)
model.fit(X_train, y_train)

SVC(C=0.3, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='auto', kernel='linear',
    max_iter=-1, probability=True, random_state=None, shrinking=True, tol=0.001,
    verbose=False)

#**Prediction and Evaluation**

In [53]:
prediction = model.predict(X_test)

In [54]:
print(classification_report(y_test,prediction))

              precision    recall  f1-score   support

    Infected       0.90      0.90      0.90        30
        Mild       0.93      0.96      0.95        27
    Moderate       0.89      0.80      0.84        20
        Safe       0.94      0.97      0.95        30

    accuracy                           0.92       107
   macro avg       0.91      0.91      0.91       107
weighted avg       0.92      0.92      0.91       107



In [55]:
model.score(X_test, y_test)

0.9158878504672897

In [56]:
confusion_matrix(y_test,prediction)

array([[27,  1,  2,  0],
       [ 0, 26,  0,  1],
       [ 3,  0, 16,  1],
       [ 0,  1,  0, 29]])

In [58]:

X_predict_scaled = std_scaler.transform([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])
prediction = model.predict(X_predict_scaled)
prediction

array(['Safe'], dtype=object)

In [59]:
X_predict_scaled = std_scaler.transform([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])
prediction = model.predict(X_predict_scaled)
prediction

array(['Infected'], dtype=object)

#**Saving Model**

In [61]:
from sklearn.externals import joblib

# Save to file in the current working directory
joblib.dump(model, "covid_model.pkl")
joblib.dump(std_scaler, "covid_scaler.pkl")

['covid_scaler.pkl']

In [62]:
model2 = joblib.load("covid_model.pkl")
scaler2 = joblib.load("covid_scaler.pkl")

In [72]:
scaled_data = scaler2.transform([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0]])
prediction = model2.predict(scaled_data)
prediction

array(['Safe'], dtype=object)

In [71]:
scaled_data = scaler2.transform([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0]])
prediction = model2.predict(scaled_data)
prediction

array(['Infected'], dtype=object)