In [3]:
# For CM1
# Import required libraries
import nbimporter
import global_functions as gf  # Import global functions from global_functions.ipynb
import lightgbm as lgb
from sklearn.metrics import roc_auc_score, accuracy_score, classification_report
import matplotlib.pyplot as plt
import seaborn as sns

# Paths to datasets
dataset_paths = r"C:\Local Disk (A)\Github Files\Projects\Mass-Sceptra-Classification\Datasets\CM1"


# Main execution block
def process_dataset(base_path):
    """Process a single dataset, train and evaluate LightGBM model."""
    print(f"\nProcessing dataset at {base_path}...")

    # Prepare dataset using global function
    X_train, X_test, y_train, y_test, scaler = gf.prepare_dataset(base_path)
    if X_train is None:
        print(f"No valid data found at {base_path}. Skipping...")
        return

    # Initialize LightGBM dataset
    train_data = lgb.Dataset(X_train, label=y_train)
    test_data = lgb.Dataset(X_test, label=y_test, reference=train_data)

    # Define LightGBM parameters
    params = {
        'objective': 'binary',
        'metric': 'auc',
        'boosting_type': 'gbdt',
        'learning_rate': 0.01,
        'num_leaves': 31,
        'feature_fraction': 0.8,
        'bagging_fraction': 0.8,
        'bagging_freq': 5,
        'max_depth': -1,
        'verbosity': -1,
        'seed': 42
    }

    # Train LightGBM model
    print("\nTraining LightGBM model...")
    model = lgb.train(
        params,
        train_data,
        valid_sets=[train_data, test_data],
        num_boost_round=1000,
        early_stopping_rounds=50,
        verbose_eval=100
    )

    # Make predictions
    print("\nEvaluating model...")
    y_pred = (model.predict(X_test) > 0.5).astype(int)
    y_prob = model.predict(X_test)

    # Evaluation metrics
    print("\nTest Set Performance:")
    print(f"Accuracy: {accuracy_score(y_test, y_pred):.4f}")
    print(f"ROC AUC: {roc_auc_score(y_test, y_prob):.4f}")
    print("\nClassification Report:")
    print(classification_report(y_test, y_pred))

    # Plot feature importance
    print("\nPlotting feature importance...")
    lgb.plot_importance(model, max_num_features=10, importance_type='gain')
    plt.title('Feature Importance')
    plt.show()

    # Plot confusion matrix using a function from global_functions
    print("\nConfusion Matrix:")
    gf.plot_confusion_matrix(y_test, y_pred)

    # Save the model for the current dataset
    model_filename = base_path.split("\\")[-1] + "_lightgbm_model.txt"
    model.save_model(model_filename)
    print(f"Model saved as {model_filename}.")

# Run processing for each dataset
for dataset_path in dataset_paths:
    process_dataset(dataset_path)


ModuleNotFoundError: No module named 'global_functions'