In [89]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [90]:
!pip install pgmpy



In [91]:
import pandas as pd
from pgmpy.models import BayesianModel
from pgmpy.estimators import ParameterEstimator
from pgmpy.estimators import MaximumLikelihoodEstimator
import pickle
from pgmpy.estimators import BayesianEstimator

In [92]:
data = pd.read_csv('drive/MyDrive/Probabilistic_Graphical_Models/test.csv')
data = data.fillna('Empty')
data_head = data.head()
print(data_head)
data_columns = data.columns

            Disease   Symptom_1              Symptom_2              Symptom_3
0  Fungal infection     itching              skin_rash   nodal_skin_eruptions
1  Fungal infection   skin_rash   nodal_skin_eruptions    dischromic _patches
2  Fungal infection     itching   nodal_skin_eruptions    dischromic _patches
3  Fungal infection     itching              skin_rash    dischromic _patches
4  Fungal infection     itching              skin_rash   nodal_skin_eruptions


In [93]:
data_disease_unique = sorted(data.Disease.unique())
data_symptom1_unique = sorted(data.Symptom_1.unique())
data_symptom2_unique = sorted(data.Symptom_2.unique())
data_symptom3_unique = sorted(data.Symptom_3.unique())
#data_symptom4_unique = sorted(data.Symptom_4.unique())

In [94]:
print(len(data_disease_unique))
print(len(data_symptom1_unique))
print(len(data_symptom2_unique))
print(len(data_symptom3_unique))

41
34
48
54


In [95]:
def CreateLinks(columns):
    links = []
    
    for i in range(1, len(columns)):
        couple = (columns[i], columns[0])
        links.append(couple)
    
    return links

In [96]:
links = CreateLinks(data_columns)
model = BayesianModel(links)

In [97]:
pe = ParameterEstimator(model, data)

# Print ParameterEstimator unconditional
pe_symptom1 = pe.state_counts('Symptom_1')
pe_symptom1.to_csv('drive/MyDrive/Probabilistic_Graphical_Models/pe_symptom1.csv')

# Print ParameterEstimator conditional disease
pe_disease = pe.state_counts('Disease')
pe_disease.to_csv('drive/MyDrive/Probabilistic_Graphical_Models/pe_disease.csv')

In [99]:
def ConvertCPDToDataFrame(cpd):
  values = cpd.get_values()
  header = pd.MultiIndex.from_product([data_symptom1_unique, 
                                       data_symptom2_unique, 
                                       data_symptom3_unique],
                                      names=['Symptom_1', 
                                             'Symptom_2', 
                                             'Symptom_3'])
  df = pd.DataFrame(data=values, columns=header, index=data_disease_unique)
  return df

In [100]:
mle = MaximumLikelihoodEstimator(model, data)

# Print MaximumLikelihoodEstimator unconditional
mle_symptom1 = mle.estimate_cpd('Symptom_1')
print(mle_symptom1)

+-----------------------------------------+------------+
| Symptom_1( acidity)                     | 0.0243902  |
+-----------------------------------------+------------+
| Symptom_1( back_pain)                   | 0.0219512  |
+-----------------------------------------+------------+
| Symptom_1( bladder_discomfort)          | 0.00243902 |
+-----------------------------------------+------------+
| Symptom_1( breathlessness)              | 0.00243902 |
+-----------------------------------------+------------+
| Symptom_1( burning_micturition)         | 0.0219512  |
+-----------------------------------------+------------+
| Symptom_1( chest_pain)                  | 0.00243902 |
+-----------------------------------------+------------+
| Symptom_1( chills)                      | 0.0963415  |
+-----------------------------------------+------------+
| Symptom_1( constipation)                | 0.0231707  |
+-----------------------------------------+------------+
| Symptom_1( continuous_sneezin

In [101]:
# Print MaximumLikelihoodEstimator conditional
mle_disease = mle.estimate_cpd('Disease')

In [102]:
df_mle_disease = ConvertCPDToDataFrame(mle_disease)

In [103]:
df_mle_disease.to_csv('drive/MyDrive/Probabilistic_Graphical_Models/mle_disease.csv')

In [104]:
from pgmpy.estimators import BayesianEstimator
est = BayesianEstimator(model, data)
est_disease = est.estimate_cpd('Disease', prior_type='BDeu', equivalent_sample_size=10)

In [105]:
df_est_disease = ConvertCPDToDataFrame(est_disease)

In [106]:
df_est_disease.to_csv('drive/MyDrive/Probabilistic_Graphical_Models/est_disease.csv')