In [7]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Load the dataset
file_path = 'Unique_Extended_Dataset-Mental-Disorders-5261.csv'  # Update the path if necessary
data = pd.read_csv("Unique_Extended_Dataset-Mental-Disorders-5261.csv")

# Preview the dataset
print("Dataset preview:")
print(data.head())

# Identify target and features
target_column = 'Expert Diagnose'
if target_column not in data.columns:
    raise KeyError(f"The specified target column '{target_column}' is not found in the dataset.")

# Drop unnecessary columns (e.g., patient identifiers)
unnecessary_columns = ['Patient Number']
data = data.drop(columns=[col for col in unnecessary_columns if col in data.columns])

# Separate features and target
X = data.drop(columns=[target_column])
y = data[target_column]

# Handle missing values
X = X.fillna(X.select_dtypes(include=['number']).mean(numeric_only=True))  # Fill numeric columns with mean
X = X.fillna(X.mode().iloc[0])  # Fill categorical columns with mode

# Encode categorical variables
X = pd.get_dummies(X, drop_first=True)

# Encode target variable
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

# Standardize the features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and tune models
models = {
    'Random Forest': RandomForestClassifier(random_state=42),
    'Gradient Boosting': GradientBoostingClassifier(random_state=42)
}

# Hyperparameter grids
param_grids = {
    'Random Forest': {
        'n_estimators': [100, 200],
        'max_depth': [None, 10, 20],
        'min_samples_split': [2, 5]
    },
    'Gradient Boosting': {
        'n_estimators': [100, 200],
        'learning_rate': [0.01, 0.1, 0.2],
        'max_depth': [3, 5, 7]
    }
}

best_models = {}

for model_name, model in models.items():
    print(f"\nTraining {model_name}...")
    grid_search = GridSearchCV(model, param_grids[model_name], cv=5, scoring='accuracy', n_jobs=-1)
    grid_search.fit(X_train, y_train)
    best_model = grid_search.best_estimator_
    best_models[model_name] = best_model

    # Make predictions
    y_pred = best_model.predict(X_test)

    # Evaluate
    accuracy = accuracy_score(y_test, y_pred)
    print(f"\n{model_name} Accuracy: {accuracy * 100:.2f}%")
    print("Classification Report:")
    print(classification_report(y_test, y_pred))
    print("Confusion Matrix:")
    print(confusion_matrix(y_test, y_pred))

# Display the best model
best_model_name = max(best_models, key=lambda name: accuracy_score(y_test, best_models[name].predict(X_test)))
print(f"\nBest Model: {best_model_name}")
best_model = best_models[best_model_name]

# Save the final model
import joblib
joblib.dump(best_model, f"best_model_{best_model_name}.pkl")
print(f"The best model '{best_model_name}' has been saved.")


Dataset preview:
  Patient Number    Sadness    Euphoric  Exhausted Sleep dissorder Mood Swing  \
0     Patiant-01    Usually      Seldom  Sometimes       Sometimes        YES   
1     Patiant-02    Usually      Seldom    Usually       Sometimes         NO   
2     Patiant-03  Sometimes  Most-Often  Sometimes       Sometimes        YES   
3     Patiant-04    Usually      Seldom    Usually      Most-Often        YES   
4     Patiant-05    Usually     Usually  Sometimes       Sometimes         NO   

  Suicidal thoughts Anorxia Authority Respect Try-Explanation  \
0              YES       NO                NO             YES   
1               YES      NO                NO              NO   
2                NO      NO                NO             YES   
3               YES     YES                NO             YES   
4                NO      NO                NO              NO   

  Aggressive Response Ignore & Move-On Nervous Break-down Admit Mistakes  \
0                  NO        