In [1]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import precision_score, accuracy_score, \
                            recall_score, f1_score

df = pd.read_csv('preprocessed_data.csv')

# Split data
X = df.drop(columns=['h1n1_vaccine','seasonal_vaccine'])
# Predicting on two values --> Two models
y_h1n1 = df['h1n1_vaccine']
y_seasonal = df['seasonal_vaccine']

# Training and testing sets (20% testing)
X_train, X_test, y_train_h1n1, y_test_h1n1 = train_test_split(X, y_h1n1, test_size= 0.2,random_state=123)
X_train, X_test, y_train_seasonal, y_test_seasonal = train_test_split(X, y_seasonal, test_size= 0.2,random_state=123)

# Scale data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [2]:
# Train and fit model
rf = RandomForestClassifier()
rf.fit(X_train,y_train_h1n1)

In [3]:
# Prediction
y_pred_h1n1 = rf.predict(X_test)
# Retrain model for seasonal vaccination prediction
rf.fit(X_train, y_train_seasonal)
y_pred_seasonal = rf.predict(X_test)

In [4]:
# Metrics for H1N1 Prediction
accuracy = accuracy_score(y_test_h1n1, y_pred_h1n1)
precision = precision_score(y_test_h1n1, y_pred_h1n1)
recall = recall_score(y_test_h1n1, y_pred_h1n1)
f1 = f1_score(y_test_h1n1, y_pred_h1n1)
print("Prediction for H1N1 Vaccination Status\n")
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)

Prediction for H1N1 Vaccination Status

Accuracy: 0.8275926619243729
Precision: 0.6831210191082803
Recall: 0.37271937445699393
F1 Score: 0.4822934232715008


In [6]:
# Metrics for Seasonal Prediction
accuracy = accuracy_score(y_test_seasonal, y_pred_seasonal)
precision = precision_score(y_test_seasonal, y_pred_seasonal)
recall = recall_score(y_test_seasonal, y_pred_seasonal)
f1 = f1_score(y_test_seasonal, y_pred_seasonal)
print("Prediction for Seasonal Vaccination Status\n")
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)

Prediction for Seasonal Vaccination Status

Accuracy: 0.770685136652939
Precision: 0.768
Recall: 0.7301841473178543
F1 Score: 0.7486148163349066
