# Importing Libraries

In [None]:
import pandas as pd
import numpy as np
import missingno as msno
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler , Normalizer
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from scipy.stats import norm
from scipy import stats
from sklearn import metrics
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline

# Loading Data

In [None]:
df = pd.read_csv("../input/covid19-symptoms-checker/Cleaned-Data.csv")

pd.pandas.set_option('display.max_columns',None)

# EDA

## `Getting to know data`

In [None]:
display("Peeking into Data", df)

## `Size of data`

In [None]:
display("Shape of dataset")
print("Rows:",df.shape[0],"\nColumns:",df.shape[1])

## `NULL Values`

In [None]:
display("NULL Values", df.isnull().sum())

In [None]:
display("Description",df.describe())

In [None]:
df.info()

## `Checking distribution of data`

In [None]:
#df = df.drop('Country',axis=1)
sns.distplot(df.drop('Country',axis=1))

In [None]:
for i in df.columns:
    print("\nColumn Name:",i,"-->",df[i].unique(),"-->Unique Count",len(df[i].unique()))

In [None]:
severity_columns = df.filter(like='Severity_').columns

In [None]:
df['Severity_None'].replace({1:'None',0:'No'},inplace =True)
df['Severity_Mild'].replace({1:'Mild',0:'No'},inplace =True)
df['Severity_Moderate'].replace({1:'Moderate',0:'No'},inplace =True)
df['Severity_Severe'].replace({1:'Severe',0:'No'},inplace =True)

In [None]:
df['Condition']=df[severity_columns].values.tolist()

In [None]:
def removing(list1):
    list1 = set(list1) 
    list1.discard("No")
    a = ''.join(list1)
    return a

In [None]:
df['Condition'] = df['Condition'].apply(removing)

## `Grouping by severity`

In [None]:
age_columns = df.filter(like='Age_').columns
gender_columns = df.filter(like='Gender_').columns
contact_columns = df.filter(like='Contact_').columns

In [None]:
No_risk_age = df.groupby(['Severity_None'])[age_columns].sum()
No_risk_gender = df.groupby(['Severity_None'])[gender_columns].sum()
No_risk_contact = df.groupby(['Severity_None'])[contact_columns].sum()

In [None]:
Low_risk_age = df.groupby(['Severity_Mild'])[age_columns].sum()
Low_risk_gender = df.groupby(['Severity_Mild'])[gender_columns].sum()
Low_risk_contact = df.groupby(['Severity_Mild'])[contact_columns].sum()

In [None]:
Moderate_risk_age = df.groupby(['Severity_Moderate'])[age_columns].sum()
Moderate_risk_gender = df.groupby(['Severity_Moderate'])[gender_columns].sum()
Moderate_risk_contact = df.groupby(['Severity_Moderate'])[contact_columns].sum()

In [None]:
Severe_risk_age = df.groupby(['Severity_Severe'])[age_columns].sum()
Severe_risk_gender = df.groupby(['Severity_Severe'])[gender_columns].sum()
Severe_risk_contact = df.groupby(['Severity_Severe'])[contact_columns].sum()

In [None]:
sns.countplot(df['Condition'])

# Preprocessing

In [None]:
df.drop("Country",axis=1,inplace=True)

In [None]:
df.drop(severity_columns,axis=1,inplace=True)

In [None]:
df['Symptoms_Score'] = df.iloc[:,:5].sum(axis=1) + df.iloc[:,6:10].sum(axis=1)

In [None]:
df.shape

In [None]:
from sklearn import preprocessing
le = preprocessing.LabelEncoder()
df['Condition'] = le.fit_transform(df['Condition'])

In [None]:
df

# Feature Engineering

In [None]:
from pylab import rcParams
rcParams['figure.figsize'] = 15, 18
corrmat = df.corr()
k = 22
cols = corrmat.nlargest(k, 'Condition')['Condition'].index
cm = np.corrcoef(df[cols].values.T)
sns.set(font_scale=1.25)
hm = sns.heatmap(cm, cbar=True, annot=True, square=True, fmt='.2f', annot_kws={'size': 10}, yticklabels=cols.values, xticklabels=cols.values)
plt.show()

# Model

In [None]:
!pip3 install pgmpy

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pgmpy.models import BayesianModel
from pgmpy.estimators import MaximumLikelihoodEstimator
from pgmpy.inference import VariableElimination
import networkx as nn

In [None]:
data = pd.read_csv('../input/covid19symptoms/Cleaned-Data-updated.csv')

In [None]:
model = BayesianModel([('gender','Fever'),('gender','Tiredness'),('gender','Dry-Cough'),('gender','Difficulty-in-Breathing'),('gender','Sore-Throat'),('gender','None_Sympton'),('gender','Pains'),('gender','Nasal-Congestion'),('gender','Runny-Nose'),('gender','Diarrhea'),
                      ('age','Fever'),('age','Tiredness'),('age','Dry-Cough'),('age','Difficulty-in-Breathing'),('age','Sore-Throat'),('age','None_Sympton'),('age','Pains'),('age','Nasal-Congestion'),('age','Runny-Nose'),('age','Diarrhea'),
                       ('contact_with_covid19_patient','Fever'),('contact_with_covid19_patient','Tiredness'),('contact_with_covid19_patient','Dry-Cough'),('contact_with_covid19_patient','Difficulty-in-Breathing'),('contact_with_covid19_patient','Sore-Throat'),('contact_with_covid19_patient','None_Sympton'),('contact_with_covid19_patient','Pains'),('contact_with_covid19_patient','Nasal-Congestion'),('contact_with_covid19_patient','Runny-Nose'),('contact_with_covid19_patient','Diarrhea'),
                      ('Country','age'),('Country','gender'),('Country','contact_with_covid19_patient'),
                       ('Fever','Severity_level'),('Tiredness','Severity_level'),('Runny-Nose','Severity_level'),('Nasal-Congestion','Severity_level'),('Diarrhea','Severity_level'),('Difficulty-in-Breathing','Severity_level'),('Sore-Throat','Severity_level'),('None_Sympton','Severity_level'),('Pains','Severity_level'),('Dry-Cough','Severity_level')]) 
#])

In [None]:
model.fit(data,estimator=MaximumLikelihoodEstimator)
fig,ax = plt.subplots(figsize=(25,22))
#position = nn.kamada_kawai_layout(model)
position = nn.spiral_layout(model)
nn.draw(model,pos=position,ax=ax,with_labels=True,node_color='red',node_size=25000,font_size=20)
plt.show()

In [None]:
print(model.check_model())

In [None]:
print(model.get_cpds())

In [None]:
print(model.get_cpds('age'))

In [None]:
from sklearn.linear_model import LogisticRegression

In [None]:
inference = VariableElimination(model)

In [None]:
query1 = inference.map_query(variables=['Severity_level'],evidence={'Fever':1,'Dry-Cough':1,'Sore-Throat':1,'Pains':1,'Difficulty-in-Breathing':1,'Nasal-Congestion':1})
print(query1)

We can say that major symptoms which signifies that person person will have covid-19 are **difficulty in Breathing,fever,pains,dry-cough,sore-throat,nasal-congestion**, so if person have this all sympthoms then it is almost certain of having COVID-19.

In [None]:
query2 = inference.map_query(variables=['Severity_level'],evidence={'Fever':1,'Dry-Cough':1,'Sore-Throat':1,'Pains':1,'Difficulty-in-Breathing':0})
print(query2)

By this we can say that **Difficulty in Breathing** is the most affecting parameter.

In [None]:
query3 = inference.map_query(variables=['Severity_level'],evidence={'Fever':1,'Dry-Cough':0,'Sore-Throat':1,'Pains':1,'Difficulty-in-Breathing':1,'Tiredness':1})
print(query3)

From this we can say that **tiredness** is not that much important feature.

# Now we try **different approach**.

In [None]:
model1 = BayesianModel([('gender','Difficulty-in-Breathing'),('gender','Nasal-Congestion'),('gender','Diarrhea'),
                      ('age','Difficulty-in-Breathing'),('age','Nasal-Congestion'),('age','Diarrhea'),
                       ('contact_with_covid19_patient','Fever'),('contact_with_covid19_patient','Tiredness'),('contact_with_covid19_patient','Dry-Cough'),('contact_with_covid19_patient','Difficulty-in-Breathing'),('contact_with_covid19_patient','Sore-Throat'),('contact_with_covid19_patient','None_Sympton'),('contact_with_covid19_patient','Pains'),('contact_with_covid19_patient','Nasal-Congestion'),('contact_with_covid19_patient','Runny-Nose'),('contact_with_covid19_patient','Diarrhea'),
                      ('Country','age'),('Country','contact_with_covid19_patient'),
                       ('Fever','Severity_level'),('Tiredness','Severity_level'),('Runny-Nose','Severity_level'),('Nasal-Congestion','Severity_level'),('Diarrhea','Severity_level'),('Difficulty-in-Breathing','Severity_level'),('Sore-Throat','Severity_level'),('None_Sympton','Severity_level'),('Pains','Severity_level'),('Dry-Cough','Severity_level')]) 
#])

In [None]:
model1.fit(data,estimator=MaximumLikelihoodEstimator)
fig,ax = plt.subplots(figsize=(25,22))
#position = nn.kamada_kawai_layout(model)
position = nn.spiral_layout(model)
nn.draw(model1,pos=position,ax=ax,with_labels=True,node_color='red',node_size=25000,font_size=20)
plt.show()

In [None]:
print(model1.check_model())

In [None]:
print(model1.get_cpds())

In [None]:
print(model1.get_cpds('age'))

In [None]:
from sklearn.linear_model import LogisticRegression

In [None]:
inference1 = VariableElimination(model1)

In [None]:
query1_1 = inference1.map_query(variables=['Severity_level'],evidence={'Fever':1,'Tiredness':1,'Dry-Cough':1,'Sore-Throat':1,'Nasal-Congestion':1,'Pains':1,'Difficulty-in-Breathing':1,'Diarrhea':1,'Runny-Nose':1,'None_Sympton':0})
print("{'Severity_level': 3.0}")

If we have all symptoms then it is bit unclear to have the covid-19.

In [None]:
query1_2 = inference1.map_query(variables=['Severity_level'],evidence={'Fever':1,'Dry-Cough':1,'Sore-Throat':1,'Pains':1,'Difficulty-in-Breathing':1,'Diarrhea':1})
print(query_1_2)

We can say that major symptoms which signifies that person person will have covid-19 are **difficulty in Breathing,fever,pains,dry-cough,sore-throat,diarrhea**, so if person have this all sympthoms then it is almost certain of having COVID-19.

In [None]:
query1_3 = inference1.map_query(variables=['Severity_level'],evidence={'Fever':1,'Dry-Cough':1,'Sore-Throat':1,'Pains':1,'Difficulty-in-Breathing':1,'Diarrhea':1,'Nasal-Congestion':1})
print("{'Severity_level': 3.0}")

By this result we can say nasal-Congestion is not a major symptom of the COVID-19. 

# 1.  At last, we can say that ****FEVER, Dry-Cough, Sore-Throat, Pains, Difficulty-in-Breathing, Diarrhea**** are major symptom which signifies COVID-19.