In [None]:
import pandas as pd
import numpy as np
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from imblearn.over_sampling import SMOTE
import matplotlib.pyplot as plt
import seaborn as sns

# Load data
try:
    df = pd.read_csv('food_nutrient_temperament.csv')
except FileNotFoundError:
    print("Error: File 'food_nutrient_pivot.csv' not found. Please check the file path.")
    exit()

# Columns handling
required_columns = ['food_ description', 'Temperament']
if not all(col in df.columns for col in required_columns):
    missing_cols = [col for col in required_columns if col not in df.columns]
    print(f"Error: Missing columns {missing_cols} in DataFrame.")
    exit()

# preprocessing
X = df.iloc[:, 2:-1].fillna(0)  # features
y = df['Temperament']
feature_names = X.columns

# missing value handling in Temperament
if y.isnull().any():
    print("Warning: Missing values found in 'Temperament'. Filling with -1 (Unknown).")
    y = y.fillna(-1)
    df['Temperament'] = y

# make Temperament numerical
df['Temperament'] = pd.to_numeric(df['Temperament'], errors='coerce').fillna(-1).astype(int)

# Unique Temperament values and class destribution
unique_temperaments = df['Temperament'].unique()
print("\nUnique Temperament Values:", unique_temperaments)
print("Class Distribution:\n", df['Temperament'].value_counts())

# Valid Temperament filtering
valid_temperaments = [0, 1, 2]  # 0: Cold, 1: Hot, 2: Moderate
valid_mask = df['Temperament'].isin(valid_temperaments)
X = X[valid_mask]
y = df['Temperament'][valid_mask]

# valid temperament values
if valid_mask.sum() == 0:
    print("Error: No valid Temperament values found (expected: 0, 1, 2). All values are:", unique_temperaments)
    exit()
else:
    print("\nNumber of valid samples:", len(y))

# Normalization
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Train-test splitting
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42, stratify=y
)

# SMOTE
try:
    smote = SMOTE(random_state=42, k_neighbors=3)
    X_train_res, y_train_res = smote.fit_resample(X_train, y_train)
    print("\nClass Distribution after SMOTE:\n", pd.Series(y_train_res).value_counts())
except ValueError as e:
    print("SMOTE failed:", e)
    print("Falling back to class weights.")
    X_train_res, y_train_res = X_train, y_train

# GridSearch
param_grid = {
    'C': [0.1, 1, 10],
    'kernel': ['rbf', 'linear'],
    'gamma': ['scale', 'auto']
}
svm = SVC(random_state=42, class_weight='balanced')
grid_search = GridSearchCV(svm, param_grid, cv=3, scoring='accuracy', n_jobs=-1)
grid_search.fit(X_train_res, y_train_res)

# Best model
best_svm = grid_search.best_estimator_
print("\nBest Parameters from GridSearchCV:", grid_search.best_params_)
print("Best Cross-Validation Accuracy:", grid_search.best_score_)

# Evaluation
y_pred = best_svm.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("\nTest Accuracy:", accuracy)
print("Classification Report:\n", classification_report(y_test, y_pred, target_names=['Cold', 'Hot', 'Moderate']))

# ماتریس درهم‌ریختگی
cm = confusion_matrix(y_test, y_pred)
cm_df = pd.DataFrame(cm, index=['Cold', 'Hot', 'Moderate'], columns=['Cold', 'Hot', 'Moderate'])
plt.figure(figsize=(8, 6))
sns.heatmap(cm_df, annot=True, fmt='d', cmap='Blues')
plt.title('Confusion Matrix (SVM)')
plt.xlabel('Predicted')
plt.ylabel('True')
plt.show()