In [7]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, precision_score, roc_auc_score
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import VotingClassifier
import xgboost as xgb
from catboost import CatBoostClassifier
import lightgbm as lgb
from sklearn.preprocessing import StandardScaler

# Split data into features (X) and target (y)
X = pd.read_csv('clean2_train.csv')
y = pd.read_csv('target.csv')
y = y.target
test = pd.read_csv('clean2_test.csv')


# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale data using StandardScaler
# scaler = StandardScaler()
# X_train_scaled = scaler.fit_transform(X_train)
# X_test_scaled = scaler.transform(X_test)
# Define hyperparameter tuning space for each model
xgb_params = {'max_depth': [3, 5], 'learning_rate': [0.1, 0.5], 'scale_pos_weight': [5, 10]}
catboost_params = {'depth': [6, 8], 'learning_rate': [0.1, 0.5], 'scale_pos_weight': [5, 10]}
lgb_params = {'max_depth': [3, 5], 'learning_rate': [0.1, 0.5], 'is_unbalance': [True]}

# Define models
xgb_model = xgb.XGBClassifier(random_state=42)
catboost_model = CatBoostClassifier(random_state=42)
lgb_model = lgb.LGBMClassifier(random_state=42)

# Perform grid search for hyperparameter tuning
xgb_grid = GridSearchCV(xgb_model, xgb_params, cv=4, scoring='f1')
catboost_grid = GridSearchCV(catboost_model, catboost_params, cv=4, scoring='f1')
lgb_grid = GridSearchCV(lgb_model, lgb_params, cv=4, scoring='f1')

xgb_grid.fit(X_train, y_train)
catboost_grid.fit(X_train, y_train)
lgb_grid.fit(X_train, y_train)

# Get best-performing models
best_xgb = xgb_grid.best_estimator_
best_catboost = catboost_grid.best_estimator_
best_lgb = lgb_grid.best_estimator_
# Make predictions using best-performing models
y_pred_xgb = best_xgb.predict(X_test)
y_pred_catboost = best_catboost.predict(X_test)
y_pred_lgb = best_lgb.predict(X_test)

# Define voting classifier
voting_clf = VotingClassifier(estimators=[
    ('xgb', best_xgb), ('catboost', best_catboost), ('lgb', best_lgb)
], voting='unanimous')

# Fit voting classifier
voting_clf.fit(X_train, y_train)

# Make predictions using voting classifier
y_pred_voting = voting_clf.predict(X_test)

# Evaluate models
print("XGBoost:")
print(f"AUC-ROC: {roc_auc_score(y_test, y_pred_xgb)}")
print(f"F1 score: {f1_score(y_test, y_pred_xgb)}")
print(f"Accuracy: {accuracy_score(y_test, y_pred_xgb)}")
print(f"Precision: {precision_score(y_test, y_pred_xgb)}")

print("\nCatBoost:")
print(f"AUC-ROC: {roc_auc_score(y_test, y_pred_catboost)}")
print(f"F1 score: {f1_score(y_test, y_pred_catboost)}")
print(f"Accuracy: {accuracy_score(y_test, y_pred_catboost)}")
print(f"Precision: {precision_score(y_test, y_pred_catboost)}")

print("\nLightGBM:")
print(f"AUC-ROC: {roc_auc_score(y_test, y_pred_lgb)}")
print(f"F1 score: {f1_score(y_test, y_pred_lgb)}")
print(f"Accuracy: {accuracy_score(y_test, y_pred_lgb)}")
print(f"Precision: {precision_score(y_test, y_pred_lgb)}")

print("\nVoting Classifier:")
print(f"AUC-ROC: {roc_auc_score(y_test, y_pred_voting)}")
print(f"F1 score: {f1_score(y_test, y_pred_voting)}")
print(f"Accuracy: {accuracy_score(y_test, y_pred_voting)}")
print(f"Precision: {precision_score(y_test, y_pred_voting)}")

#predict on test data and save
submit = voting_clf.predict(test.drop(['patient_id'], axis=1))

# Create submission DataFrame and save to CSV
submit_df = pd.DataFrame({'id': test['patient_id'], 'target': submit})
submit_df.to_csv('submitmulticlassifers.csv', index=False)

0:	learn: 0.5631235	total: 28.6ms	remaining: 28.5s
1:	learn: 0.4679746	total: 33.3ms	remaining: 16.6s
2:	learn: 0.3960179	total: 38.6ms	remaining: 12.8s
3:	learn: 0.3075440	total: 44ms	remaining: 10.9s
4:	learn: 0.2416403	total: 49.4ms	remaining: 9.83s
5:	learn: 0.2171421	total: 54.8ms	remaining: 9.08s
6:	learn: 0.2034418	total: 60.7ms	remaining: 8.62s
7:	learn: 0.1921203	total: 67.1ms	remaining: 8.32s
8:	learn: 0.1839575	total: 72.3ms	remaining: 7.96s
9:	learn: 0.1664959	total: 78.6ms	remaining: 7.78s
10:	learn: 0.1544841	total: 91.2ms	remaining: 8.2s
11:	learn: 0.1521502	total: 95.9ms	remaining: 7.9s
12:	learn: 0.1430489	total: 103ms	remaining: 7.83s
13:	learn: 0.1417675	total: 108ms	remaining: 7.61s
14:	learn: 0.1385286	total: 113ms	remaining: 7.43s
15:	learn: 0.1378094	total: 119ms	remaining: 7.34s
16:	learn: 0.1333051	total: 126ms	remaining: 7.28s
17:	learn: 0.1296151	total: 133ms	remaining: 7.27s
18:	learn: 0.1277049	total: 138ms	remaining: 7.14s
19:	learn: 0.1261077	total: 143ms