In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.optimizers import Adam
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt

In [2]:
dataset = pd.read_csv('dataset_mental3.csv')

In [3]:
dataset

Unnamed: 0.1,Unnamed: 0,Age,Gender,family_history,treatment,work_interfere,care_options,wellness_program,seek_help,mental_health_consequence,phys_health_consequence,Cluster
0,0,37,1,0,1,3,2,0,1,0,0,3
1,1,44,0,0,0,2,0,2,2,2,0,5
2,2,32,0,0,0,2,0,0,0,0,0,6
3,3,31,0,1,1,3,1,0,0,1,1,2
4,4,31,0,0,0,0,0,2,2,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...
1208,1208,26,0,0,1,4,0,0,0,0,0,0
1209,1209,32,0,1,1,3,1,0,0,0,0,2
1210,1210,34,0,1,1,1,1,0,0,1,1,2
1211,1211,46,1,0,0,4,1,0,0,1,0,4


In [4]:
dataset = dataset.drop('Unnamed: 0', axis = True)

In [5]:
dataset

Unnamed: 0,Age,Gender,family_history,treatment,work_interfere,care_options,wellness_program,seek_help,mental_health_consequence,phys_health_consequence,Cluster
0,37,1,0,1,3,2,0,1,0,0,3
1,44,0,0,0,2,0,2,2,2,0,5
2,32,0,0,0,2,0,0,0,0,0,6
3,31,0,1,1,3,1,0,0,1,1,2
4,31,0,0,0,0,0,2,2,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...
1208,26,0,0,1,4,0,0,0,0,0,0
1209,32,0,1,1,3,1,0,0,0,0,2
1210,34,0,1,1,1,1,0,0,1,1,2
1211,46,1,0,0,4,1,0,0,1,0,4


In [6]:
train_dataset = dataset.sample(frac=0.8, random_state=0)
test_dataset = dataset.drop(train_dataset.index)

In [7]:
print(train_dataset)

      Age  Gender  family_history  treatment  work_interfere  care_options  \
899    43       0               0          1               1             1   
830    31       0               1          0               4             2   
55     27       0               1          1               2             1   
523    23       0               0          0               2             0   
495    25       0               0          0               0             2   
...   ...     ...             ...        ...             ...           ...   
1038   43       0               1          1               1             2   
322    30       1               0          0               0             1   
765    25       0               1          1               1             1   
885    30       0               0          1               1             0   
556    32       0               0          1               1             0   

      wellness_program  seek_help  mental_health_consequence  \

In [8]:
print(test_dataset)

      Age  Gender  family_history  treatment  work_interfere  care_options  \
11     29       0               0          0               0             2   
23     41       0               0          1               0             0   
24     33       0               1          1               2             2   
25     35       0               1          1               1             1   
28     34       0               0          1               1             2   
...   ...     ...             ...        ...             ...           ...   
1194   60       0               0          1               3             0   
1197   30       0               0          0               1             0   
1202   36       0               0          1               3             0   
1208   26       0               0          1               4             0   
1209   32       0               1          1               3             1   

      wellness_program  seek_help  mental_health_consequence  \

In [9]:
train_features = train_dataset.copy()
test_features = test_dataset.copy()

train_labels = train_features.pop('Cluster')
test_labels = test_features.pop('Cluster')

In [10]:
normalizer = tf.keras.layers.Normalization(axis=-1)
normalizer.adapt(np.asarray(train_features))
print(normalizer.mean.numpy())
first = np.array(train_features[:1])

with np.printoptions(precision=2, suppress=True):
  print('First example:', first)
  print()
  print('Normalized:', normalizer(first).numpy())

[[32.169067    0.22783503  0.4020618   0.5041237   1.8329895   0.85670096
   0.49484524  0.7927834   0.9907217   0.46494842]]
First example: [[43  0  0  1  1  1  0  1  1  2]]

Normalized: [[ 1.44 -0.5  -0.82  0.99 -0.59  0.18 -0.66  0.24  0.01  1.88]]


In [11]:
model = tf.keras.Sequential([
    tf.keras.layers.Dense(64, activation='relu', input_shape=(train_features.shape[1],)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(10, activation='softmax')
])


In [12]:
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [13]:
model.fit(train_features, train_labels, epochs=50, batch_size=32, validation_split=0.2)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.src.callbacks.History at 0x7cc6d1519c60>

In [14]:
loss, accuracy = model.evaluate(train_features, train_labels)
print(f"Test Accuracy: {accuracy}")

Test Accuracy: 0.9278350472450256


In [18]:
prediksi = model.predict([[49,  0,  1,  0,  4,  2,  0,  1,  1,  2],[19,  1,  1,  0,  3,  2,  0,  0,  1,  2]])
predicted_classes = np.argmax(prediksi, axis=1)
print(predicted_classes)
print(prediksi)


[4 9]
[[1.14492627e-06 8.96145082e-07 4.36532032e-03 1.29220381e-04
  3.80228430e-01 2.50285491e-03 4.18694224e-03 1.44456606e-03
  3.08612108e-01 2.98528552e-01]
 [1.33271601e-06 2.38429311e-05 1.29673805e-03 1.23221345e-01
  1.83659536e-03 4.55908186e-04 3.13178425e-05 1.98907888e-04
  1.31345280e-02 8.59799385e-01]]
