# Chickenpox Prediction Model

In [1]:
import pandas as pd
import numpy as np

Loading data

In [2]:
# Read/Load dataset
dataset_path = 'chickenpox_dataset.csv'
df = pd.read_csv(dataset_path)

df.head(10)

Unnamed: 0,Age,Gender,Vaccination_Status,Fever,Rash,Itching,Fatigue,Chickenpox
0,1,1,2,0,1,0,0,1
1,20,1,2,1,1,0,1,1
2,24,0,0,1,1,0,0,1
3,37,1,1,1,1,1,1,1
4,8,1,2,1,0,1,0,1
5,80,1,1,0,1,0,0,0
6,32,1,0,0,0,0,0,0
7,62,1,0,0,0,0,1,0
8,79,0,1,0,0,1,0,0
9,86,1,1,1,0,0,0,0


In [None]:
# Key:
# Gender (binary: 0 for Male, 1 for Female)
# Vaccination Status (binary: 0 for Fully Vaccinated, 1 for Partially vaccinated, 2 for Not Vaccinated)
# Symptoms:
    # Fever (binary: 0 for No, 1 for Yes)
    # Rash (binary: 0 for No, 1 for Yes)
    # Itching (binary: 0 for No, 1 for Yes)
    # Fatigue (binary: 0 for No, 1 for Yes)

In [4]:
# Encoding categorical variables (Gender, Vaccination Status and Symptoms)
# from sklearn.preprocessing impot LabelEncoder
# No need for encoding variables since the dataset is already in coded form

Model Implementation, Training and Evaluation

In [5]:
# Split training and testing sets
from sklearn.model_selection import train_test_split

# Define X(Features) and y(Variable)
X = df.drop('Chickenpox', axis=1)
y = df['Chickenpox']

# Splits (80% Training, 20% Testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
# # KNN
from sklearn.neighbors import KNeighborsClassifier
# KNeighborsClassifierModel = KNeighborsClassifier()

# # Naive Bayes
from sklearn.naive_bayes import GaussianNB
# GaussianNBModel = GaussianNB()

# # Logistic Regression
from sklearn.linear_model import LogisticRegression
# LogisticRegressionModel = LogisticRegression()

# # Random Forest
from sklearn.ensemble import RandomForestClassifier
# RandomForestClassifierModel = RandomForestClassifier()

# # Extreme Gradient Boost
from xgboost import XGBClassifier
# # XGBClassifierModel = XGBClassifier()

# # Decision Tree
from sklearn.tree import DecisionTreeClassifier
# DecisionTreeClassifierModel = DecisionTreeClassifier()

# # Support Vector
from sklearn.svm import SVC
# SVCModel = SVC()

In [7]:
from sklearn.metrics import accuracy_score, classification_report

# Models
models = [
    ('K-Nearest Neighbors', KNeighborsClassifier()),
    ('Naive Bayes', GaussianNB()),
    ('Logistic Regression', LogisticRegression()),
    ('Random Forest', RandomForestClassifier()),
    ('XGBoost', XGBClassifier()),
    ('Decision Tree', DecisionTreeClassifier()),
    ('Support Vector Machine', SVC())
]

# Iterate through models and evaluate
for model_name, model in models:
    model.fit(X_train, y_train) # train the model on training data
    
    # predict on testing data
    y_pred = model.predict(X_test)
    
    # Evaluate model (Accuracy Reporting)
    accuracy = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred)
    
    # Generate model evaluation results
    print(f"Model: {model_name}")
    print(f"Prediction: {y_pred}")
    print(f"Accuracy: {accuracy}")
    print(report)
    print('\n' + '-'*50 + '\n') # Sepertating each model's results
    



Model: K-Nearest Neighbors
Prediction: [0 0 0 ... 0 1 0]
Accuracy: 1.0
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     12510
           1       1.00      1.00      1.00      7490

    accuracy                           1.00     20000
   macro avg       1.00      1.00      1.00     20000
weighted avg       1.00      1.00      1.00     20000


--------------------------------------------------

Model: Naive Bayes
Prediction: [0 0 0 ... 0 1 0]
Accuracy: 0.9371
              precision    recall  f1-score   support

           0       0.93      0.97      0.95     12510
           1       0.94      0.89      0.91      7490

    accuracy                           0.94     20000
   macro avg       0.94      0.93      0.93     20000
weighted avg       0.94      0.94      0.94     20000


--------------------------------------------------

Model: Logistic Regression
Prediction: [0 0 1 ... 0 1 1]
Accuracy: 0.8707
              precision    re