# Suicide Risk Prediction Model

In [92]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## Import necessary libraries

In [93]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from imblearn.over_sampling import SMOTE

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier

import joblib

## Pre-processing

Loading the data

In [94]:
df = pd.read_csv("foreveralone.csv")

Categorical to numerical value mapping

In [95]:
# Bodyweight mapping
df['bodyweight'] = df['bodyweight'].map({'Normal weight': 0, 'Under weight': 1, 'Over weight': 2, 'Obese': 3})
# Income mapping
df['income'] = df['income'].map({'$30,000 to $39,999': 0, '$1 to $10,000': 1, '$0': 2, '$50,000 to $74,999': 3})
# Gender mapping
df['gender'] = df['gender'].map({'Male': 0, 'Female': 1, 'Transgender male': 2, 'Transgender female': 3})

In [96]:
# Yes/ No mapping
for col in ['attempt_suicide', 'depressed', 'social_fear']:
  df[col] = df[col].map({"Yes": 1, "No": 0})

In [97]:
# Drop missing values
df.dropna(inplace=True)
print(df.head())

                  time  gender sexuallity  age  income  \
5   5/17/2016 20:09:09       0   Straight   24     3.0   
11  5/17/2016 20:18:37       0   Straight   25     1.0   
12  5/17/2016 20:26:57       0   Straight   24     1.0   
14  5/17/2016 20:30:26       0   Straight   22     0.0   
16  5/17/2016 20:32:26       1   Straight   29     1.0   

                      race  bodyweight virgin prostitution_legal  \
5       White non-Hispanic         0.0    Yes                 No   
11                   Asian         0.0    Yes                 No   
12      White non-Hispanic         0.0     No                 No   
14  Hispanic (of any race)         0.0    Yes                 No   
16         white and asian         0.0    Yes                 No   

          pay_for_sex  friends  social_fear  depressed  \
5   Yes but I haven't      2.0            1          1   
11                 No      0.0            1          1   
12  Yes but I haven't     20.0            0          0   
14  Yes bu

## Train-Test split

Spliting table into Input and output

In [98]:
# Selected features for prediction
features = ['age', 'friends', 'depressed', 'social_fear', 'gender', 'income', 'bodyweight']

# Features
X = df[features]
# Outputs
y = df['attempt_suicide']

Handling Class Imbalance

In [99]:
from imblearn.over_sampling import SMOTE

smote = SMOTE(random_state=42)
X, y = smote.fit_resample(X, y)

Train-test split

In [100]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## Model Training

XGBoost

In [101]:
# Train XGBoost
xgb_model = XGBClassifier(n_estimators=200, learning_rate=0.05, max_depth=5, random_state=42)
xgb_model.fit(X_train, y_train)

# Predictions
y_pred_xgb = xgb_model.predict(X_test)

# Evaluate
accuracy_xgb = accuracy_score(y_test, y_pred_xgb)
print(f"XGBoost Accuracy: {accuracy_xgb:.2f}")


XGBoost Accuracy: 0.92


Logistic Regression

In [102]:
# Train Logistic Regression
lr_model = LogisticRegression()
lr_model.fit(X_train, y_train)

# Prediction
y_pred = lr_model.predict(X_test)

# Evaluate
lr_accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", lr_accuracy)

Accuracy: 0.625


Random Forest

In [104]:
# Train Random Forest Classifier
rf_model = RandomForestClassifier()
rf_model.fit(X_train, y_train)

# Prediction
y_pred = rf_model.predict(X_test)

# Evaluate
rf_accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", rf_accuracy)

Accuracy: 0.8333333333333334


## Saving the model

In [106]:
# Save the model
joblib.dump(xgb_model, "suicide_predictor.pkl")
loaded_model = joblib.load("suicide_predictor.pkl")

Testing the model

In [107]:
new_data = np.array([[22, 1, 1, 1, 0, 1, 2]])  # Example input (age, friends, etc.)
prediction = loaded_model.predict(new_data)

if prediction[0] == 1:
    print("High risk of suicide attempt. Immediate attention needed.")
else:
    print("Low risk of suicide attempt.")


High risk of suicide attempt. Immediate attention needed.
