In [None]:
import warnings
from sklearn.exceptions import ConvergenceWarning

warnings.filterwarnings("ignore", category=ConvergenceWarning)
warnings.filterwarnings("ignore", message="Pandas requires version")

from sklearn.neural_network import MLPRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import GridSearchCV,RandomizedSearchCV, train_test_split
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
import pandas as pd
import numpy as np

from scipy.stats import randint, uniform

In [None]:
df = pd.read_csv(''data/csv/user_data.csv'') 
df.columns
removed_cols = ['id', 'hobbies', 'average_positive_probability_before', 
                'average_positive_probability_after', 'diff','score_change', 
                'count_id_after','count_id_before','urban_rural','race_ethnicity']
df = df.drop(columns=removed_cols)
df.shape

df = df.apply(pd.to_numeric, errors='coerce')  # Convert all columns to numeric
df = df.fillna(df.mean())  # Impute missing values with mean

# Define categorical columns
categorical_columns = ['gender', 'socio_economic_status', 'parental_involvement']

# Convert categorical columns to one-hot encoding
df = pd.get_dummies(df, columns=categorical_columns, drop_first=True)

# Define features (X) and target (y)
X = df.drop(columns=['mental_health_score_after'])
y = df['mental_health_score_after']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
nn_model = make_pipeline(StandardScaler(), MLPRegressor(random_state=42, max_iter=500))
nn_model.fit(X_train, y_train)
y_pred_nn = nn_model.predict(X_test)

# Evaluate model performance
mse_nn = mean_squared_error(y_test, y_pred_nn)
print(f'Neural Network MSE: {mse_nn:.2f}')

In [None]:
print(f'NaNs in X_train: {np.isnan(X_train).sum()}')
print(f'NaNs in y_train: {np.isnan(y_train).sum()}')

In [None]:
y_train

In [None]:
param_dist = {
    'mlpregressor__hidden_layer_sizes': [(50,), (100,), (50, 50), (100, 50)],
    'mlpregressor__activation': ['relu', 'tanh'],
    'mlpregressor__solver': ['adam', 'sgd'],
    'mlpregressor__alpha': [0.001, 0.01, 0.1],
    'mlpregressor__learning_rate': ['constant', 'invscaling', 'adaptive'],
    'mlpregressor__learning_rate_init': uniform(0.001, 0.1),
    'mlpregressor__max_iter': [200, 300, 400],
    'mlpregressor__early_stopping': [True, False]
}
param_dist = {
    'mlpregressor__hidden_layer_sizes': [(50,), (100,), (50, 50), (100, 50)],
    'mlpregressor__activation': ['relu', 'tanh'],
    'mlpregressor__solver': ['adam', 'sgd'],  # Exclude 'sgd' for robustness
    'mlpregressor__alpha': [0.001, 0.01, 0.1],
    'mlpregressor__learning_rate': ['constant', 'adaptive'],
    'mlpregressor__learning_rate_init': uniform(0.001, 0.01),  # Reduced range
    'mlpregressor__max_iter': [200, 300],
    'mlpregressor__early_stopping': [True, False]
}
# Initialize MLPRegressor within a pipeline (optional but recommended for scaling)
mlp_model = make_pipeline(StandardScaler(), MLPRegressor(random_state=42))

# Initialize RandomizedSearchCV
random_search = RandomizedSearchCV(estimator=mlp_model,
                                   param_distributions=param_dist,
                                   n_iter=300,  # Number of parameter settings that are sampled
                                   scoring='neg_mean_squared_error',  # Use a suitable metric for your problem
                                   cv=5,  # Number of cross-validation folds
                                   verbose=2,  # Higher verbosity to see more details
                                   n_jobs=-1,  # Use all available cores
                                   random_state=42)

# Fit the RandomizedSearchCV to the training data
random_search.fit(X_train, y_train)

# Get the best parameters and the best score
best_params = random_search.best_params_
best_score = random_search.best_score_

print(f'Best parameters found by RandomizedSearchCV: {best_params}')
print(f'Best score (negative MSE): {best_score:.2f}')

# Get the best estimator
best_mlp = random_search.best_estimator_

# Predict using the best estimator
y_pred = best_mlp.predict(X_test)

# Evaluate tuned model performance
mse = mean_squared_error(y_test, y_pred)
print(f'Tuned MLPRegressor MSE: {mse:.2f}')

In [None]:
# Predict on the test data
y_pred = best_mlp.predict(X_test)

# Calculate Mean Squared Error and R^2 score
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)

print(f'Mean Squared Error (MSE): {mse:.2f}')
print(f'R-squared (R2): {r2:.2f}')
print(f"Mean Absolute Error (MAE): {mae}")

In [None]:
import matplotlib.pyplot as plt

# Extract relevant columns from results DataFrame
params = results['params']
mean_scores = results['mean_test_score']

# Plotting mean scores
plt.figure(figsize=(12, 6))
plt.barh(range(len(params)), mean_scores, align='center')
plt.yticks(range(len(params)), params)
plt.xlabel('Mean Test Score')
plt.title('Mean Test Score of Hyperparameter Combinations')
plt.show()

In [None]:
# Predict using the best estimator
y_pred = best_mlp.predict(X_test)

# Calculate residuals
residuals = y_test - y_pred

# Plotting residuals
plt.figure(figsize=(10, 6))
plt.scatter(y_pred, residuals, alpha=0.5)
plt.axhline(y=0, color='r', linestyle='-', linewidth=1)
plt.xlabel('Predicted Values')
plt.ylabel('Residuals')
plt.title('Residual Plot')
plt.show()


In [None]:
# Plotting predicted vs actual values
plt.figure(figsize=(10, 6))
plt.scatter(y_test, y_pred, alpha=0.5)
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--', lw=2)
plt.xlabel('Actual Values')
plt.ylabel('Predicted Values')
plt.title('Scatter Plot of Predicted vs Actual Values')
plt.show()

In [None]:
import seaborn as sns
plt.figure(figsize=(10, 6))
sns.histplot(residuals, kde=True)
plt.xlabel('Residuals')
plt.ylabel('Frequency')
plt.title('Distribution of Residuals')
plt.show()

In [None]:
# Example of correlation matrix plot
plt.figure(figsize=(10, 8))
sns.heatmap(X_train.corr(), annot=True, cmap='coolwarm', center=0)
plt.title('Correlation Matrix of Features')
plt.show()

# Example of VIF plot
from statsmodels.stats.outliers_influence import variance_inflation_factor

# Calculate VIF for each feature
vif = pd.DataFrame()
vif["Feature"] = X_train.columns
vif["VIF"] = [variance_inflation_factor(X_train.values, i) for i in range(X_train.shape[1])]
print(vif)


In [None]:
# Accessing the coefficients (weights) of the first layer
first_layer_weights = best_mlp.named_steps['mlpregressor'].coefs_[0]

# Assuming X_train is your training data
feature_names = X_train.columns

# Plotting the magnitude of weights for each feature in the first layer
plt.figure(figsize=(12, 6))
plt.barh(range(len(feature_names)), np.abs(first_layer_weights).mean(axis=1), align='center')
plt.yticks(range(len(feature_names)), feature_names)
plt.xlabel('Average Magnitude of Weights')
plt.title('Feature Importance (Average Magnitude of Weights) in the First Layer')
plt.show()


In [None]:
from sklearn.inspection import permutation_importance

# Calculate permutation importance
perm_importance = permutation_importance(best_mlp, X_test, y_test, n_repeats=10, random_state=42, n_jobs=-1)

# Sort features by their importance
sorted_idx = perm_importance.importances_mean.argsort()

# Plot permutation importance
plt.figure(figsize=(12, 6))
plt.barh(range(len(feature_names)), perm_importance.importances_mean[sorted_idx], align='center')
plt.yticks(range(len(feature_names)), np.array(feature_names)[sorted_idx])
plt.xlabel('Permutation Importance')
plt.title('Feature Importance (Permutation Importance)')
plt.show()
