In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
df = pd.read_csv("feature.csv")

In [None]:
# Assuming 'df' is your DataFrame
X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values

label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# 1. SVC
model_svc = SVC(kernel='linear', random_state=0)
model_svc.fit(X_train, y_train)
y_pred_svc = model_svc.predict(X_test)

# 2. Random Forest
model_rf = RandomForestClassifier(random_state=42)
model_rf.fit(X_train, y_train)
y_pred_rf = model_rf.predict(X_test)

# 3. KNN
knn = KNeighborsClassifier(n_neighbors=7)
knn.fit(X_train, y_train)
y_pred_knn = knn.predict(X_test)

# 4. Logistic Regression
model_lr = LogisticRegression()
model_lr.fit(X_train, y_train)
y_pred_lr = model_lr.predict(X_test)

# 5. XGBoost with GridSearchCV
params = {
    'n_estimators': [100, 200, 300],
    'learning_rate': [0.01, 0.02, 0.03],
    'max_depth': [3, 5, 7],
    'subsample': [0.7, 0.8, 0.9]
}

xgb = XGBClassifier(use_label_encoder=False)
grid_search = GridSearchCV(xgb, params, cv=5, scoring='accuracy', n_jobs=-1, verbose=1)
grid_search.fit(X_train, y_train)  # Use unscaled data for XGBoost
best_xgb = grid_search.best_estimator_
y_pred_xgb = best_xgb.predict(X_test)

In [None]:
# Create and display confusion matrices
models = {
    'SVC': y_pred_svc,
    'Random Forest': y_pred_rf,
    'KNN': y_pred_knn,
    'Logistic Regression': y_pred_lr,
    'XGBoost': y_pred_xgb
}

for model_name, y_pred in models.items():
    cm = confusion_matrix(y_test, y_pred)
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                xticklabels=label_encoder.classes_,
                yticklabels=label_encoder.classes_)
    plt.title(f'Confusion Matrix - {model_name}')
    plt.xlabel('Predicted Label')
    plt.ylabel('True Label')
    plt.show()

In [None]:
for model_name, y_pred in models.items():
    report = classification_report(y_test, y_pred, target_names=label_encoder.classes_, output_dict=True)
    df_report = pd.DataFrame(report).transpose()

    # Remove 'support' row and transpose
    df_report_heatmap = df_report.iloc[:-1, :-1]  # No .T here

    plt.figure(figsize=(12, 8))
    sns.heatmap(df_report_heatmap, annot=True, cmap='rocket', fmt=".2f")
    plt.yticks(rotation=0)
    plt.xticks(rotation=0)  # Rotate x-axis labels for better readability
    plt.xlabel("")
    plt.ylabel("")
    plt.title(f'Classification Report - {model_name}')
    plt.show()