In [1]:
# 1. Import required libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from xgboost import XGBClassifier

# 2. Load and preprocess dataset
data = pd.read_csv("../Datasets/dengue.csv")

# Check and encode 'Sex' column
print(data['Sex'].unique())
data['Sex'] = data['Sex'].map({'Male': 0, 'Female': 1, 'Child': 2})
print(data['Sex'].isna().sum())
print(data[['Sex']].head())

# Check and fill missing values
print('Missing values:', data.isnull().sum())
data = data.fillna(data.mean(numeric_only=True))

# 3. Split features and target
X = data.drop('target', axis=1)
y = data['target'].astype(int)
print(y)

# 4. Train-test split
print(y.unique())
print(y.dtype)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# 5. Initialize and train XGBoost model
model = XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42)
model.fit(X_train, y_train)

# 6. Make predictions
y_pred = model.predict(X_test)

# 7. Evaluate model
print('\n✅ Accuracy:', accuracy_score(y_test, y_pred))
print('\n✅ Classification Report:\n', classification_report(y_test, y_pred))
print('\n✅ Confusion Matrix:\n', confusion_matrix(y_test, y_pred))


['Male' 'Female' 'Child']
0
   Sex
0    0
1    0
2    1
3    1
4    1
Missing values: Age                    0
Sex                    0
Haemoglobin            0
WBC Count             24
Differential Count     0
RBC PANEL              0
Platelet Count        17
PDW                   19
target                14
dtype: int64
0       1
1       1
2       1
3       1
4       1
       ..
998     0
999     0
1000    0
1001    0
1002    0
Name: target, Length: 1003, dtype: int64
[1 0]
int64

✅ Accuracy: 0.9833887043189369

✅ Classification Report:
               precision    recall  f1-score   support

           0       0.99      0.96      0.97        97
           1       0.98      1.00      0.99       204

    accuracy                           0.98       301
   macro avg       0.99      0.98      0.98       301
weighted avg       0.98      0.98      0.98       301


✅ Confusion Matrix:
 [[ 93   4]
 [  1 203]]


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
