<a href="https://colab.research.google.com/github/ramya2110f/Ramz/blob/master/accuracynew.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
import shap
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import classification_report, roc_auc_score
import xgboost as xgb
import scipy.special
from sklearn.preprocessing import label_binarize

# Load dataset
df = pd.read_csv('/content/Dataset-Mental-Disorders.csv')
label_encoder = LabelEncoder()
df["Expert Diagnose"] = label_encoder.fit_transform(df["Expert Diagnose"])  # Encode labels

# Define features and target
X = df.drop(columns=["Expert Diagnose"])
y = df["Expert Diagnose"]
X = X.apply(lambda col: LabelEncoder().fit_transform(col) if col.dtype == 'object' else col)

# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42, stratify=y)

# Binarize labels for AUC calculation
y_test_binarized = label_binarize(y_test, classes=np.unique(y))

# Train and evaluate different models
models = {
    "Logistic Regression": LogisticRegression(),
    "SVM": SVC(probability=True),
    "Decision Tree": DecisionTreeClassifier(),
    "Random Forest": RandomForestClassifier(),
    "Gradient Boosting": GradientBoostingClassifier()
}

for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    y_prob = model.predict_proba(X_test) if hasattr(model, "predict_proba") else None

    print(f"\n{name} Classification Report:")
    print(classification_report(y_test, y_pred))

    if y_prob is not None:
        auc_score = roc_auc_score(y_test_binarized, y_prob, average='macro', multi_class='ovr')
        print(f"{name} AUC Score: {auc_score:.4f}")

# Improved Custom Weighted Focal Loss
def custom_focal_loss(preds, dtrain):
    labels = dtrain.get_label()
    gamma = 2.0
    alpha = 0.5
    lambda_entropy = 0.01
    beta_confidence = 0.01

    num_class = preds.shape[1]
    preds = scipy.special.softmax(preds, axis=1)

    p_t = preds[np.arange(len(labels)), labels.astype(int)]
    focal_weight = alpha * (1 - p_t) ** gamma

    grad = preds.copy()
    grad[np.arange(len(labels)), labels.astype(int)] -= 1
    grad *= focal_weight[:, np.newaxis]

    hess = focal_weight[:, np.newaxis] * preds * (1 - preds)

    entropy = -np.sum(preds * np.log(preds + 1e-9), axis=1)
    grad += lambda_entropy * entropy[:, np.newaxis]
    hess += lambda_entropy * (1 / (preds + 1e-9))

    confidence_penalty = preds ** 2
    grad += beta_confidence * confidence_penalty
    hess += beta_confidence * 2 * preds

    return grad.flatten(), hess.flatten()

# Train XGBoost model with default loss
xgb_model_default = xgb.train(
    {
        'objective': 'multi:softprob',
        'num_class': len(np.unique(y)),
        'eval_metric': 'mlogloss'
    },
    xgb.DMatrix(X_train, label=y_train),
    num_boost_round=100
)

# Train XGBoost model with improved custom loss
xgb_model_custom = xgb.train(
    {
        'objective': 'multi:softprob',
        'num_class': len(np.unique(y)),
        'eval_metric': 'mlogloss'
    },
    xgb.DMatrix(X_train, label=y_train),
    num_boost_round=100,
    obj=custom_focal_loss
)

# Make predictions
y_pred_default = xgb_model_default.predict(xgb.DMatrix(X_test))
y_pred_labels_default = np.argmax(y_pred_default, axis=1)
auc_score_default = roc_auc_score(y_test_binarized, y_pred_default, average='macro', multi_class='ovr')

y_pred_custom = xgb_model_custom.predict(xgb.DMatrix(X_test))
y_pred_labels_custom = np.argmax(y_pred_custom, axis=1)
auc_score_custom = roc_auc_score(y_test_binarized, y_pred_custom, average='macro', multi_class='ovr')

# Evaluate performance
print("\nXGBoost Default Classification Report:")
print(classification_report(y_test, y_pred_labels_default))
print(f"XGBoost Default AUC Score: {auc_score_default:.4f}")

print("\nXGBoost Custom Loss Classification Report:")
print(classification_report(y_test, y_pred_labels_custom))
print(f"XGBoost Custom Loss AUC Score: {auc_score_custom:.4f}")



Logistic Regression Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.67      0.80         6
           1       0.75      1.00      0.86         6
           2       0.75      1.00      0.86         6
           3       1.00      0.67      0.80         6

    accuracy                           0.83        24
   macro avg       0.88      0.83      0.83        24
weighted avg       0.88      0.83      0.83        24

Logistic Regression AUC Score: 0.9491

SVM Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.67      0.80         6
           1       0.67      1.00      0.80         6
           2       0.75      1.00      0.86         6
           3       1.00      0.50      0.67         6

    accuracy                           0.79        24
   macro avg       0.85      0.79      0.78        24
weighted avg       0.85      0.79      0.78        24

SVM AUC Score: 0.939

