## **Model to be Used at the Backend of UI**

In [82]:
import pandas as pd
import numpy as np

In [83]:
data_bs = pd.read_csv('bootstrapped_data.csv')
data_bs.head()

Unnamed: 0,Age,Feeling_sad_or_Tearful,Irritable_towards_baby_&_partner,Trouble_sleeping_at_night,Problems_concentrating_or_making_decision,Overeating_or_loss_of_appetite,Feeling_anxious,Feeling_of_guilt,Problems_of_bonding_with_baby,Suicide_attempt
0,45-50,No,Sometimes,Yes,Often,No,Yes,No,No,No
1,45-50,Yes,No,Two or more days a week,No,No,Yes,Maybe,Sometimes,No
2,25-30,Sometimes,Yes,Two or more days a week,Yes,No,No,Maybe,No,Yes
3,40-45,No,Sometimes,Yes,No,No,No,Maybe,Sometimes,Yes
4,30-35,Yes,No,Two or more days a week,No,No,Yes,Maybe,Sometimes,No


In [84]:
data_bs.isna().sum()
lbl_enc = data_bs.copy()

In [85]:
lbl_enc.isna().sum()

Age                                            0
Feeling_sad_or_Tearful                         0
Irritable_towards_baby_&_partner             463
Trouble_sleeping_at_night                      0
Problems_concentrating_or_making_decision    943
Overeating_or_loss_of_appetite                 0
Feeling_anxious                                0
Feeling_of_guilt                             695
Problems_of_bonding_with_baby                  0
Suicide_attempt                                0
dtype: int64

In [86]:
lbl_enc.dropna(inplace=True)

In [87]:
lbl_enc.isna().sum()

Age                                          0
Feeling_sad_or_Tearful                       0
Irritable_towards_baby_&_partner             0
Trouble_sleeping_at_night                    0
Problems_concentrating_or_making_decision    0
Overeating_or_loss_of_appetite               0
Feeling_anxious                              0
Feeling_of_guilt                             0
Problems_of_bonding_with_baby                0
Suicide_attempt                              0
dtype: int64

In [88]:
# Define the mapping for the target column
mapping_1 = {'Yes': 2, 'No': 0, 'Not interested to say': 1}
mapping_2 = {'Yes': 2, 'No': 0, 'Sometimes': 1}
mapping_3 = {'Yes': 2, 'No': 0, 'Maybe': 1}
mapping_4 = {'Yes': 1, 'No': 0}
mapping_5 = {'Yes': 2, 'No': 1, 'Not at all': 0}
mapping_6 = {'Yes': 1, 'No': 0, 'Often': 2}
mapping_7 = {'Two or more days a week': 2, 'No': 0, 'Yes': 1}
mapping_8 = {'Yes': 2, 'No': 0, 'Sometimes' : 1}
mapping_9 = {'Yes': 2, 'No': 0, 'Sometimes' : 1}
mapping_10 = {'25-30': 0, '30-35': 1, '35-40' : 2, '40-45' : 3, '45-50' : 4}

# Apply the mapping to the target column
lbl_enc['Suicide_attempt'] = lbl_enc['Suicide_attempt'].map(mapping_1)
lbl_enc['Problems_of_bonding_with_baby'] = lbl_enc['Problems_of_bonding_with_baby'].map(mapping_2)
lbl_enc['Feeling_of_guilt'] = lbl_enc['Feeling_of_guilt'].map(mapping_3)
lbl_enc['Feeling_anxious'] = lbl_enc['Feeling_anxious'].map(mapping_4)
lbl_enc['Overeating_or_loss_of_appetite'] = lbl_enc['Overeating_or_loss_of_appetite'].map(mapping_5)
lbl_enc['Problems_concentrating_or_making_decision'] = lbl_enc['Problems_concentrating_or_making_decision'].map(mapping_6)
lbl_enc['Trouble_sleeping_at_night'] = lbl_enc['Trouble_sleeping_at_night'].map(mapping_7)
lbl_enc['Irritable_towards_baby_&_partner'] = lbl_enc['Irritable_towards_baby_&_partner'].map(mapping_8)
lbl_enc['Feeling_sad_or_Tearful'] = lbl_enc['Feeling_sad_or_Tearful'].map(mapping_9)
lbl_enc['Age'] = lbl_enc['Age'].map(mapping_10)

# Print the encoded DataFrame
lbl_enc.head()

Unnamed: 0,Age,Feeling_sad_or_Tearful,Irritable_towards_baby_&_partner,Trouble_sleeping_at_night,Problems_concentrating_or_making_decision,Overeating_or_loss_of_appetite,Feeling_anxious,Feeling_of_guilt,Problems_of_bonding_with_baby,Suicide_attempt
0,4,0,1,1,2,1,1,0,0,0
1,4,2,0,2,0,1,1,1,1,0
2,0,1,2,2,1,1,0,1,0,2
3,3,0,1,1,0,1,0,1,1,2
4,1,2,0,2,0,1,1,1,1,0


In [89]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Dropping non-numeric columns
X = lbl_enc.drop(['Suicide_attempt'], axis=1)
y = lbl_enc['Suicide_attempt']

# # Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the Random Forest Classifier
rf_classifier = RandomForestClassifier(random_state=42)

# Train the classifier
rf_classifier.fit(X_train, y_train)

# Predict on the test set
y_pred = rf_classifier.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.9960184409052808


In [90]:
X_new_test = pd.read_csv("New_data.csv")
# Replace spaces with underscores in column names
X_new_test.rename(columns=lambda x: x.replace(' ', '_'), inplace=True)
 
# Display the updated DataFrame with modified column names
X_new_test.head()
# X_final = pd.get_dummies(X_new_test, dtype =  int)

Unnamed: 0,Age,Feeling_sad_or_Tearful,Irritable_towards_baby_&_partner,Trouble_sleeping_at_night,Problems_concentrating_or_making_decision,Overeating_or_loss_of_appetite,Feeling_anxious,Feeling_of_guilt,Problems_of_bonding_with_baby
0,25-30,Sometimes,Yes,Two or more days a week,Often,Yes,No,Yes,Sometimes


In [91]:
X_new_test.columns

Index(['Age', 'Feeling_sad_or_Tearful', 'Irritable_towards_baby_&_partner',
       'Trouble_sleeping_at_night',
       'Problems_concentrating_or_making_decision',
       'Overeating_or_loss_of_appetite', 'Feeling_anxious', 'Feeling_of_guilt',
       'Problems_of_bonding_with_baby'],
      dtype='object')

In [92]:
# Define the mapping for the target column
# mapping_1 = {'Yes': 2, 'No': 0, 'Not interested to say': 1}
mapping_2 = {'Yes': 2, 'No': 0, 'Sometimes': 1}
mapping_3 = {'Yes': 2, 'No': 0, 'Maybe': 1}
mapping_4 = {'Yes': 1, 'No': 0}
mapping_5 = {'Yes': 2, 'No': 1, 'Not at all': 0}
mapping_6 = {'Yes': 1, 'No': 0, 'Often': 2}
mapping_7 = {'Two or more days a week': 2, 'No': 0, 'Yes': 1}
mapping_8 = {'Yes': 2, 'No': 0, 'Sometimes' : 1}
mapping_9 = {'Yes': 2, 'No': 0, 'Sometimes' : 1}
mapping_10 = {'25-30': 0, '30-35': 1, '35-40' : 2, '40-45' : 3, '45-50' : 4}

# Apply the mapping to the target column
# X_new_test['Suicide_attempt'] = X_new_test['Suicide_attempt'].map(mapping_1)
X_new_test['Problems_of_bonding_with_baby'] = X_new_test['Problems_of_bonding_with_baby'].map(mapping_2)
X_new_test['Feeling_of_guilt'] = X_new_test['Feeling_of_guilt'].map(mapping_3)
X_new_test['Feeling_anxious'] = X_new_test['Feeling_anxious'].map(mapping_4)
X_new_test['Overeating_or_loss_of_appetite'] = X_new_test['Overeating_or_loss_of_appetite'].map(mapping_5)
X_new_test['Problems_concentrating_or_making_decision'] = X_new_test['Problems_concentrating_or_making_decision'].map(mapping_6)
X_new_test['Trouble_sleeping_at_night'] = X_new_test['Trouble_sleeping_at_night'].map(mapping_7)
X_new_test['Irritable_towards_baby_&_partner'] = X_new_test['Irritable_towards_baby_&_partner'].map(mapping_8)
X_new_test['Feeling_sad_or_Tearful'] = X_new_test['Feeling_sad_or_Tearful'].map(mapping_9)
X_new_test['Age'] = X_new_test['Age'].map(mapping_10)

# Print the encoded DataFrame
X_new_test.head()

Unnamed: 0,Age,Feeling_sad_or_Tearful,Irritable_towards_baby_&_partner,Trouble_sleeping_at_night,Problems_concentrating_or_making_decision,Overeating_or_loss_of_appetite,Feeling_anxious,Feeling_of_guilt,Problems_of_bonding_with_baby
0,0,1,2,2,2,2,0,2,1


In [93]:
rf_classifier.predict(X_new_test)

array([1], dtype=int64)

In [94]:
import joblib
 
# Serialize (saving) the trained model to a file
joblib.dump(rf_classifier, 'random_forest_model.pkl')

['random_forest_model.pkl']

In [95]:
X_new_test.columns

Index(['Age', 'Feeling_sad_or_Tearful', 'Irritable_towards_baby_&_partner',
       'Trouble_sleeping_at_night',
       'Problems_concentrating_or_making_decision',
       'Overeating_or_loss_of_appetite', 'Feeling_anxious', 'Feeling_of_guilt',
       'Problems_of_bonding_with_baby'],
      dtype='object')