In [1]:
import json
import pickle
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

In [4]:
df = pd.read_csv('mental_disorder_prediction.csv')
df.head(10)

Unnamed: 0,feeling.nervous,panic,breathing.rapidly,sweating,trouble.in.concentration,having.trouble.in.sleeping,having.trouble.with.work,hopelessness,anger,over.react,...,weight.gain,material.possessions,introvert,popping.up.stressful.memory,having.nightmares,avoids.people.or.activities,feeling.negative,trouble.concentrating,blamming.yourself,Disorder
0,yes,yes,yes,yes,yes,yes,no,no,no,no,...,no,no,no,no,no,no,no,no,no,Anxiety
1,no,no,no,no,no,no,yes,yes,yes,yes,...,no,no,no,no,no,no,no,no,no,Depression
2,no,no,no,no,no,no,no,no,no,no,...,yes,yes,yes,no,no,no,no,no,no,Loneliness
3,no,no,no,no,no,no,no,no,no,no,...,no,no,no,yes,yes,yes,yes,yes,yes,Stress
4,no,no,no,no,no,no,no,no,no,no,...,no,no,no,no,no,no,no,no,no,Normal
5,yes,yes,yes,yes,yes,yes,no,no,no,no,...,no,no,no,no,no,no,no,no,no,Anxiety
6,no,no,no,no,no,no,yes,yes,yes,yes,...,no,no,no,no,no,no,no,no,no,Depression
7,no,no,no,no,no,no,no,no,no,no,...,yes,yes,yes,no,no,no,no,no,no,Loneliness
8,no,no,no,no,no,no,no,no,no,no,...,no,no,no,yes,yes,yes,yes,yes,yes,Stress
9,no,no,no,no,no,no,no,no,no,no,...,no,no,no,no,no,no,no,no,no,Normal


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 40960 entries, 0 to 40959
Data columns (total 25 columns):
 #   Column                       Non-Null Count  Dtype 
---  ------                       --------------  ----- 
 0   feeling.nervous              40960 non-null  object
 1   panic                        40960 non-null  object
 2   breathing.rapidly            40960 non-null  object
 3   sweating                     40960 non-null  object
 4   trouble.in.concentration     40960 non-null  object
 5   having.trouble.in.sleeping   40960 non-null  object
 6   having.trouble.with.work     40960 non-null  object
 7   hopelessness                 40960 non-null  object
 8   anger                        40960 non-null  object
 9   over.react                   40960 non-null  object
 10  change.in.eating             40960 non-null  object
 11  suicidal.thought             40960 non-null  object
 12  feeling.tired                40960 non-null  object
 13  close.friend                 40

In [6]:
df.columns = df.columns.str.lower()
df.columns = df.columns.str.replace('.', '_', regex=False)
df.columns

Index(['feeling_nervous', 'panic', 'breathing_rapidly', 'sweating',
       'trouble_in_concentration', 'having_trouble_in_sleeping',
       'having_trouble_with_work', 'hopelessness', 'anger', 'over_react',
       'change_in_eating', 'suicidal_thought', 'feeling_tired', 'close_friend',
       'social_media_addiction', 'weight_gain', 'material_possessions',
       'introvert', 'popping_up_stressful_memory', 'having_nightmares',
       'avoids_people_or_activities', 'feeling_negative',
       'trouble_concentrating', 'blamming_yourself', 'disorder'],
      dtype='object')

In [7]:
df.head(10)

Unnamed: 0,feeling_nervous,panic,breathing_rapidly,sweating,trouble_in_concentration,having_trouble_in_sleeping,having_trouble_with_work,hopelessness,anger,over_react,...,weight_gain,material_possessions,introvert,popping_up_stressful_memory,having_nightmares,avoids_people_or_activities,feeling_negative,trouble_concentrating,blamming_yourself,disorder
0,yes,yes,yes,yes,yes,yes,no,no,no,no,...,no,no,no,no,no,no,no,no,no,Anxiety
1,no,no,no,no,no,no,yes,yes,yes,yes,...,no,no,no,no,no,no,no,no,no,Depression
2,no,no,no,no,no,no,no,no,no,no,...,yes,yes,yes,no,no,no,no,no,no,Loneliness
3,no,no,no,no,no,no,no,no,no,no,...,no,no,no,yes,yes,yes,yes,yes,yes,Stress
4,no,no,no,no,no,no,no,no,no,no,...,no,no,no,no,no,no,no,no,no,Normal
5,yes,yes,yes,yes,yes,yes,no,no,no,no,...,no,no,no,no,no,no,no,no,no,Anxiety
6,no,no,no,no,no,no,yes,yes,yes,yes,...,no,no,no,no,no,no,no,no,no,Depression
7,no,no,no,no,no,no,no,no,no,no,...,yes,yes,yes,no,no,no,no,no,no,Loneliness
8,no,no,no,no,no,no,no,no,no,no,...,no,no,no,yes,yes,yes,yes,yes,yes,Stress
9,no,no,no,no,no,no,no,no,no,no,...,no,no,no,no,no,no,no,no,no,Normal


In [8]:
le_features = LabelEncoder()
for column in df.columns[:-1]:
    df[column] = le_features.fit_transform(df[column])
print(le_features.classes_)

['no' 'yes']


In [9]:
le_target = LabelEncoder()
df['disorder'] = le_target.fit_transform(df['disorder'])
print(le_target.classes_)

['Anxiety' 'Depression' 'Loneliness' 'Normal' 'Stress']


In [10]:
label_dict = {}
for idx, label in enumerate(le_target.classes_):
    label_dict[idx] = label
print(label_dict)
json_object = json.dumps(label_dict)
with open("labels.json", "w") as outfile:
    outfile.write(json_object)

{0: 'Anxiety', 1: 'Depression', 2: 'Loneliness', 3: 'Normal', 4: 'Stress'}


In [11]:
df = df.sample(frac=1.0).reset_index(drop=True)
df.head(20)

Unnamed: 0,feeling_nervous,panic,breathing_rapidly,sweating,trouble_in_concentration,having_trouble_in_sleeping,having_trouble_with_work,hopelessness,anger,over_react,...,weight_gain,material_possessions,introvert,popping_up_stressful_memory,having_nightmares,avoids_people_or_activities,feeling_negative,trouble_concentrating,blamming_yourself,disorder
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,1,1,1,1,1,1,4
1,1,1,1,1,1,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,1,1,1,0,0,0,0,0,0,2
3,0,0,0,0,0,0,1,1,1,1,...,0,0,0,0,0,0,0,0,0,1
4,0,0,0,0,0,0,0,0,0,0,...,1,1,1,0,0,0,0,0,0,2
5,1,1,1,1,1,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,3
7,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,3
8,0,0,0,0,0,0,0,0,0,0,...,1,1,1,0,0,0,0,0,0,2
9,0,0,0,0,0,0,1,1,1,1,...,0,0,0,0,0,0,0,0,0,1


In [12]:
x_data = df.drop(['disorder'], axis=1).values
y_data = df['disorder'].values

In [13]:
x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size=0.2, random_state=47)

In [14]:
len(x_train), len(x_test)

(32768, 8192)

In [15]:
rfc = RandomForestClassifier()
rfc.fit(x_train, y_train)

In [16]:
len(x_test[0])

24

In [17]:
rfc.predict(x_test)

array([0, 1, 3, ..., 3, 0, 3])

In [18]:
rfc.score(x_test, y_test)

1.0

In [19]:
filename = 'trained_model.pickle'
pickle.dump(rfc, open(filename, 'wb'))

In [20]:
trained_model = pickle.load(open(filename, 'rb'))