In [None]:
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split
# Separate features (X) and target (y)
X = hourly_dataframe_2.drop('target', axis=1)
y = hourly_dataframe_2['target']

# oversampling the train dataset using SMOTE
smt = SMOTE()
X_train_sm, y_train_sm = smt.fit_resample(X, y)

y_train_sm.value_counts()

# Split the dataset into training and testing sets
# (70% train, 30% test)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42, stratify=y
)

from sklearn.preprocessing import StandardScaler

ss_train = StandardScaler()
X_train = ss_train.fit_transform(X_train)

ss_test = StandardScaler()
X_test = ss_test.fit_transform(X_test)


In [None]:
# Define models
models = {
    "Logistic Regression": make_pipeline(StandardScaler(), LogisticRegression(max_iter=500)),
    "Naive Bayes": GaussianNB(),
    "Decision Tree": DecisionTreeClassifier(),
    "K-Nearest Neighbors": KNeighborsClassifier(),
    "Support Vector Machine": SVC(probability=True),
    "Random Forest": RandomForestClassifier(),
    "Gradient Boosting": GradientBoostingClassifier(),
    "Neural Network": MLPClassifier(max_iter=1000)
}
 
# Store evaluation results
results = []

# Train and evaluate each model
for name, model in models.items():
    print(f"\nProcessing model: {name}")

    # Record start time
    train_start_time = time.time()

    # Train the model
    try:
        model.fit(X_train, y_train)
    except Exception as e:
        print(f"Error training model {name}: {e}")
        continue

    # Record end time
    train_end_time = time.time()

    # Calculate training time
    train_time = train_end_time - train_start_time

    # Record start time for prediction
    predict_start_time = time.time()

    # Predict
    try:
        y_pred = model.predict(X_test)
        y_proba = model.predict_proba(X_test)[:, 1]  # Get probabilities for the positive class
    except Exception as e:
        print(f"Error predicting with model {name}: {e}")
        continue

    # Record end time for prediction
    predict_end_time = time.time()

    # Calculate prediction time
    predict_time = predict_end_time - predict_start_time

    # Calculate metrics
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    auc_roc = roc_auc_score(y_test, y_proba)

    # Store results
    results.append({
        "Model": name,
        "Training Time (s)": train_time,
        "Prediction Time (s)": predict_time,
        "Accuracy": accuracy,
        "Precision": precision,
        "Recall": recall,
        "F1 Score": f1,
        "AUC-ROC": auc_roc
    })

# Convert results to DataFrame for viewing
results_df = pd.DataFrame(results)

# Print model evaluation results
print("\nModel evaluation results:")
print(results_df)

results_df