In [8]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, classification_report
import os

# Fix potential threading issues
os.environ['OMP_NUM_THREADS'] = '1'
os.environ['MKL_NUM_THREADS'] = '1'

def main():
    try:
        # Prompt user to provide the dataset file path
        file_path = input("Please enter the dataset file path: ").strip()
        data = pd.read_csv(file_path)
        dataset_loaded = True
    except FileNotFoundError:
        print("Dataset not found. Please provide the correct file path.")
        dataset_loaded = False

    if dataset_loaded:
        # Preview the dataset
        print("Dataset preview:")
        print(data.head())

        # Identify target and features
        target_column = 'Expert Diagnose'
        if target_column not in data.columns:
            raise KeyError(f"The specified target column '{target_column}' is not found in the dataset.")

        # Drop unnecessary columns (e.g., patient identifiers)
        unnecessary_columns = ['Patient Number']
        data = data.drop(columns=[col for col in unnecessary_columns if col in data.columns])

        # Separate features and target
        X = data.drop(columns=[target_column])
        y = data[target_column]

        # Handle missing values
        X = X.fillna(X.select_dtypes(include=['number']).mean(numeric_only=True))  # Fill numeric columns with mean
        X = X.fillna(X.mode().iloc[0])  # Fill categorical columns with mode

        # Encode categorical variables
        X = pd.get_dummies(X, drop_first=True)

        # Encode target variable
        label_encoder = LabelEncoder()
        y = label_encoder.fit_transform(y)

        # Standardize the features
        scaler = StandardScaler()
        X = scaler.fit_transform(X)

        # Split the dataset
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

        # Define the models
        models = {
            'Decision Tree': DecisionTreeClassifier(random_state=42),
            'Random Forest': RandomForestClassifier(random_state=42),
            'Naive Bayes': GaussianNB(),
            'SVM': SVC(kernel='linear', random_state=42),
            'KNN': KNeighborsClassifier(n_neighbors=5, algorithm='auto')
        }

        # Train and evaluate each model
        results = {}

        for model_name, model in models.items():
            print(f"\nTraining {model_name}...")
            try:
                model.fit(X_train, y_train)
                y_pred = model.predict(X_test)

                # Evaluate
                accuracy = accuracy_score(y_test, y_pred)
                results[model_name] = accuracy
                print(f"{model_name} Accuracy: {accuracy * 100:.2f}%")
                print("Classification Report:")
                print(classification_report(y_test, y_pred, zero_division=0))  # Handle undefined metrics
            except Exception as e:
                print(f"Error during training or prediction for {model_name}: {e}")

        # Find the best model
        if results:
            best_model_name = max(results, key=results.get)
            print(f"\nBest Model: {best_model_name} with Accuracy: {results[best_model_name] * 100:.2f}%")
        else:
            print("No model could be evaluated successfully.")
    else:
        print("Dataset loading skipped. No processing or predictions made.")

if __name__ == "__main__":
    main()


Please enter the dataset file path:  Unique_Extended_Dataset-Mental-Disorders-5261.csv


Dataset preview:
  Patient Number    Sadness    Euphoric  Exhausted Sleep dissorder Mood Swing  \
0     Patiant-01    Usually      Seldom  Sometimes       Sometimes        YES   
1     Patiant-02    Usually      Seldom    Usually       Sometimes         NO   
2     Patiant-03  Sometimes  Most-Often  Sometimes       Sometimes        YES   
3     Patiant-04    Usually      Seldom    Usually      Most-Often        YES   
4     Patiant-05    Usually     Usually  Sometimes       Sometimes         NO   

  Suicidal thoughts Anorxia Authority Respect Try-Explanation  \
0              YES       NO                NO             YES   
1               YES      NO                NO              NO   
2                NO      NO                NO             YES   
3               YES     YES                NO             YES   
4                NO      NO                NO              NO   

  Aggressive Response Ignore & Move-On Nervous Break-down Admit Mistakes  \
0                  NO        