In [7]:
import pandas as pd
import numpy as np
import math
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import classification_report, mean_squared_error
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from imblearn.over_sampling import SMOTE

# Tạo dữ liệu mô phỏng
np.random.seed(42)
data = pd.DataFrame({
    'Workshop': np.random.randint(2, 6, 495),
    'Content': np.random.randint(2, 6, 495),
    'Skill': np.random.randint(2, 6, 495),
    'Speaker': np.random.randint(2, 6, 495),
    'Recommend': np.random.choice(['Yes', 'No', 'Maybe'], 495),
    'ReEnroll': np.random.choice(['Yes', 'No', 'Maybe'], 495),
    'Status': np.random.choice([0, 1, 2], 495)
})

# Mã hóa Recommend và ReEnroll
le = LabelEncoder()
data['Recommend'] = le.fit_transform(data['Recommend'])
data['ReEnroll'] = le.fit_transform(data['ReEnroll'])

# Tách đặc trưng và mục tiêu
X = data[['Workshop', 'Content', 'Skill', 'Speaker']]
y_knn = data['Recommend']  # KNN với đa lớp
y_svm = (data['Status'] != 0).astype(int)  # SVM nhị phân: 0 = churn, 1 = retain

# Train-test split
X_train, X_test, y_train_knn, y_test_knn = train_test_split(X, y_knn, test_size=0.2, random_state=42)
_, _, y_train_svm, y_test_svm = train_test_split(X, y_svm, test_size=0.2, random_state=42)

# Chuẩn hóa dữ liệu
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# KNN
knn = KNeighborsClassifier(n_neighbors=5, metric='manhattan')
knn.fit(X_train_scaled, y_train_knn)
y_pred_knn = knn.predict(X_test_scaled)
print("KNN Classification Report:\n", classification_report(y_test_knn, y_pred_knn))
print("KNN RMSE:", math.sqrt(mean_squared_error(y_test_knn, y_pred_knn)))

# SMOTE cho SVM
sm = SMOTE(random_state=42)
X_svm_bal, y_svm_bal = sm.fit_resample(X_train_scaled, y_train_svm)

# SVM
svm = SVC(kernel='rbf')
svm.fit(X_svm_bal, y_svm_bal)
y_pred_svm = svm.predict(X_test_scaled)
print("SVM Classification Report:\n", classification_report(y_test_svm, y_pred_svm))

# Random Forest
rf = RandomForestClassifier(random_state=42)
rf.fit(X_train_scaled, y_train_knn)
y_pred_rf = rf.predict(X_test_scaled)
print("Random Forest Classification Report:\n", classification_report(y_test_knn, y_pred_rf))


KNN Classification Report:
               precision    recall  f1-score   support

           0       0.37      0.62      0.47        32
           1       0.26      0.25      0.25        28
           2       0.61      0.28      0.39        39

    accuracy                           0.38        99
   macro avg       0.41      0.39      0.37        99
weighted avg       0.43      0.38      0.37        99

KNN RMSE: 1.1055415967851334


SVM Classification Report:
               precision    recall  f1-score   support

           0       0.39      0.41      0.40        37
           1       0.64      0.63      0.63        62

    accuracy                           0.55        99
   macro avg       0.52      0.52      0.52        99
weighted avg       0.55      0.55      0.55        99



Random Forest Classification Report:
               precision    recall  f1-score   support

           0       0.37      0.41      0.39        32
           1       0.29      0.43      0.34        28
           2       0.55      0.31      0.39        39

    accuracy                           0.37        99
   macro avg       0.40      0.38      0.37        99
weighted avg       0.42      0.37      0.38        99

