In [33]:
import pandas as pd
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.model_selection import GridSearchCV

In [15]:
test = pd.read_csv('test_df.csv')
train = pd.read_csv('train_df.csv')
valid = pd.read_csv('valid_df.csv')

x_test = test.drop(['is_corona_positive'],axis=1)
y_test = test['is_corona_positive']
x_train = train.drop(['is_corona_positive'],axis=1)
y_train = train['is_corona_positive']
x_valid = valid.drop(['is_corona_positive'],axis=1)
y_valid = valid['is_corona_positive']

In [14]:
y_test

0        0
1        0
2        0
3        1
4        0
        ..
27466    0
27467    0
27468    0
27469    0
27470    0
Name: is_corona_positive, Length: 27471, dtype: int64

In [37]:
model = xgb.XGBClassifier(n_estimators=100, max_depth=3, learning_rate=0.1)

model.fit(x_train,y_train)

y_pred = model.predict(x_test)

accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

f1 = f1_score(y_test, y_pred)
print("F1 Score: ",f1)

Accuracy: 0.967966218921772
F1 Score:  0.6881644223954643


In [24]:
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [3, 5, 7],
    'learning_rate': [0.1, 0.01, 0.001]
}

model_tune = xgb.XGBClassifier()

grid_search = GridSearchCV(estimator=model_tune, param_grid=param_grid, cv=5)
grid_search.fit(x_train, y_train)

# Retrieve the best hyperparameters
best_params = grid_search.best_params_

In [25]:
best_params

{'learning_rate': 0.1, 'max_depth': 7, 'n_estimators': 300}

In [38]:
model_tuned = xgb.XGBClassifier(n_estimators=300, max_depth=7, learning_rate=0.1)

model_tuned.fit(x_train,y_train)

y_tuned_pred = model_tuned.predict(x_test)

tuned_accuracy = accuracy_score(y_test, y_tuned_pred)
print("Tuned Accuracy:", tuned_accuracy)

tuned_f1 = f1_score(y_test, y_tuned_pred)
print("Tuned F1 Score: ",tuned_f1)

Tuned Accuracy: 0.9701867423828765
Tuned F1 Score:  0.684393063583815


In [27]:
print("Accuracy:", tuned_accuracy)

Accuracy: 0.967966218921772
