In [1]:
import lightgbm as lgb
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import GridSearchCV
import pandas as pd

ModuleNotFoundError: No module named 'lightgbm'

In [None]:
train_df = pd.read_csv("train_df.csv")
valid_df = pd.read_csv("valid_df.csv")
test_df = pd.read_csv("test_df.csv")


# Prepare the data
X_train = train_df.drop('is_corona_positive', axis=1)
y_train = train_df['is_corona_positive']

X_valid = valid_df.drop('is_corona_positive', axis=1)
y_valid = valid_df['is_corona_positive']

In [None]:
# Define the LightGBM model
model = lgb.LGBMClassifier(random_state=42)

# Define the hyperparameters to tune
param_grid = {
    'learning_rate': [0.01, 0.1, 0.5],
    'num_leaves': [20, 30, 40],
    'max_depth': [5, 10, 15],
    'subsample': [0.8, 0.9, 1.0],
    'colsample_bytree': [0.8, 0.9, 1.0]
}

# Perform grid search cross-validation
grid_search = GridSearchCV(
    estimator=model,
    param_grid=param_grid,
    scoring='roc_auc',
    cv=5,
    verbose=1,
    n_jobs=-1
)

# Fit the grid search to the training data
grid_search.fit(X_train, y_train)

# Get the best hyperparameters and score
best_params = grid_search.best_params_
best_score = grid_search.best_score_

# Train a new LightGBM model using the best hyperparameters
best_model = lgb.LGBMClassifier(random_state=42, **best_params)
best_model.fit(X_train, y_train)

# Evaluate the best model on the validation set
y_pred_valid = best_model.predict_proba(X_valid)[:, 1]
valid_auc_score = roc_auc_score(y_valid, y_pred_valid)

print("Best Hyperparameters:", best_params)
print("Best AUC score:", best_score)
print("Validation AUC score:", valid_auc_score)