In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
import joblib
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# 1️⃣ Load Dataset (Update with your dataset path)
# df = pd.read_csv(r"C:\Users\MAYUR\Desktop\mental_health_prediction\mental_health_data_structured.csv")  # Replace with actual file
df = pd.read_csv("./mental_health_data_structured.csv")  # Replace with actual file

# 2️⃣ Handle Missing Values (if any)
df.fillna(df.mean(), inplace=True)  # Replace NaN with column means

# 3️⃣ Encode Categorical Variables (if applicable)
df = pd.get_dummies(df, drop_first=True)  # Convert categorical to numeric

# 4️⃣ Split Dataset into Features & Labels
X = df.drop(columns=["mental_health_condition"])  # Replace "target" with actual target column name
y = df["mental_health_condition"]  # Define target variable

# 5️⃣ Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 6️⃣ Scale Features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 7️⃣ Train the Model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train_scaled, y_train)

# 8️⃣ Evaluate Model
y_pred = model.predict(X_test_scaled)
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy:.2f}")

# 9️⃣ Save Model & Scaler
joblib.dump(model, "mental_random_forest.pkl")
joblib.dump(scaler, "scaler.pkl")

print("✅ Model and Scaler saved successfully!")


Model Accuracy: 1.00
✅ Model and Scaler saved successfully!
