In [1]:
import pandas as pd
import numpy as np
import os
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from sklearn.metrics import classification_report
import joblib

# Load the CSV file
data = pd.read_csv('./tabular_dataset/train.csv')

# Separate features (X) and target (y)
X = data.iloc[:, :-1].values  # Assume last column is the target
y = data.iloc[:, -1].values
print(y)
# Encode the target column if it's categorical
y = LabelEncoder().fit_transform(y)

# Split the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Define classifiers to test
classifiers = {
    'Logistic Regression': LogisticRegression(),
    'Support Vector Classifier': SVC(),
    'Random Forest': RandomForestClassifier(),
    'Gradient Boosting': GradientBoostingClassifier(),
    'XGBoost': XGBClassifier(),
    'LightGBM': LGBMClassifier()
}

# Create a directory to save model checkpoints
os.makedirs('checkpoints', exist_ok=True)

# Train and evaluate each classifier
for name, clf in classifiers.items():
    print(f"Training {name}...")
    clf.fit(X_train, y_train)
    
    # Save the trained model
    checkpoint_path = os.path.join('checkpoints', f"{name.replace(' ', '_')}.joblib")
    joblib.dump(clf, checkpoint_path)
    print(f"Model saved to {checkpoint_path}")

    # Evaluate the model
    y_pred = clf.predict(X_test)
    print(f"Classification Report for {name}:")
    print(classification_report(y_test, y_pred))

# Example of loading a saved model
# loaded_model = joblib.load('checkpoints/Logistic_Regression.joblib')
# y_pred_loaded = loaded_model.predict(X_test)
# print("Classification Report for Loaded Logistic Regression:")
# print(classification_report(y_test, y_pred_loaded))


[1 0 0 0 0 1 0 0 0 0 1 0 1 1 1 1 0 1 1 1 1 0 1 1 1 1 1 1 0 0 1 0 0 0 0 0 0
 0 0 1 1 0 0 1 1 0 1 0 0 0 0 0 0 1 1 1 0 0 0 1 1 0 0 0 1 0 0 1 1 0 0 0 0 0
 1 0 1 0 0 0 0 1 1 1 1 1 0 1 1 0 1 0 0 1 0 0 0 0 1 0 1 0 0 0 0 1 0 0 1 0 0
 1 1 1 1 0 0 0 0 1 0 0 0 1 0 0 0 0 1 1 0 0 1 1 1 1 0 1 0 1 0 1 0 0 1 0 1 0
 1 0 1 1 0 1 0 1 1 0 1 0 0 0 0 1 1 1 1 1 0 1 1 1 1 1 0 0 1 1 1 0 0 0 1 1 0
 0 1 1 1 1 1 1 0 1 0 0 0 1 1 1 0 1 0 0 0]
Training Logistic Regression...
Model saved to checkpoints/Logistic_Regression.joblib
Classification Report for Logistic Regression:
              precision    recall  f1-score   support

           0       0.78      0.75      0.77        24
           1       0.67      0.71      0.69        17

    accuracy                           0.73        41
   macro avg       0.72      0.73      0.73        41
weighted avg       0.73      0.73      0.73        41

Training Support Vector Classifier...
Model saved to checkpoints/Support_Vector_Classifier.joblib
Classification Report for

In [2]:
!pip install xgboost
!pip install lightgbm

