<a href="https://colab.research.google.com/github/priyariyyer/AIML_Projects/blob/main/AnxietyLevelClassifier.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Problem Statement and Objective

In [1]:
#Healthcare worker anxiety, depression and burnout are major factors in the health workforce crisis.
#One such sample from a population is available for analysing the statistics.
#Objective of the project is "To create a classifier that classifies healthcare workers into anxiety level buckets."

## Data Gathering

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [3]:
df = pd.read_excel('/content/Anxiety Level in Healthcare Workers Dataset.xlsx')
df

Unnamed: 0,Timestamp,Name,Gender,Martial Status,Age,Education,Role in healthcare workers,Working Hours,Location,Income,ANXIETY SCORE
0,2021-11-13 13:44:48,GUNDIKAYALA JYOSTHNA,1,1,1,7,4,0,2,2,3
1,2021-11-13 14:28:11,M SANDHYA RANI,1,1,1,7,4,0,2,1,5
2,2021-11-13 15:40:48,G. SAHITYA,1,1,1,7,4,0,2,1,20
3,2021-11-14 12:15:26,S.nagendra babu,0,1,1,3,2,0,2,1,11
4,2021-11-16 22:15:26,Manaswitha Bheemavarapu,1,1,1,6,6,2,1,1,13
...,...,...,...,...,...,...,...,...,...,...,...
216,2022-01-03 20:08:00,Sravani,1,0,2,1,0,1,0,0,2
217,2022-01-03 20:09:00,Asma,1,0,1,2,5,1,0,0,4
218,2022-01-03 20:11:00,Krishna reddy,0,0,2,6,6,2,0,3,3
219,2022-01-03 20:13:00,sahastra,1,0,1,1,0,1,0,0,2


In [4]:
df.shape

(221, 11)

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 221 entries, 0 to 220
Data columns (total 11 columns):
 #   Column                      Non-Null Count  Dtype         
---  ------                      --------------  -----         
 0   Timestamp                   221 non-null    datetime64[ns]
 1   Name                        221 non-null    object        
 2   Gender                      221 non-null    int64         
 3   Martial Status              221 non-null    int64         
 4   Age                         221 non-null    int64         
 5   Education                   221 non-null    int64         
 6   Role in healthcare workers  221 non-null    int64         
 7   Working Hours               221 non-null    int64         
 8   Location                    221 non-null    int64         
 9   Income                      221 non-null    int64         
 10  ANXIETY SCORE               221 non-null    int64         
dtypes: datetime64[ns](1), int64(9), object(1)
memory usage: 19

In [6]:
df.describe().transpose()

Unnamed: 0,count,mean,min,25%,50%,75%,max,std
Timestamp,221.0,2021-12-04 09:54:32.036199168,2021-11-13 13:44:48,2021-11-20 19:21:28,2021-11-23 13:23:56,2021-12-18 17:14:48,2022-01-03 20:24:00,
Gender,221.0,0.597285,0.0,0.0,1.0,1.0,1.0,0.491558
Martial Status,221.0,0.434389,0.0,0.0,0.0,1.0,1.0,0.496802
Age,221.0,1.20362,0.0,1.0,1.0,2.0,3.0,0.919083
Education,221.0,3.656109,0.0,2.0,3.0,6.0,8.0,2.213455
Role in healthcare workers,221.0,3.773756,0.0,2.0,4.0,6.0,6.0,2.056793
Working Hours,221.0,1.425339,0.0,1.0,1.0,2.0,4.0,1.01806
Location,221.0,0.846154,0.0,0.0,1.0,1.0,2.0,0.753082
Income,221.0,1.067873,0.0,0.0,1.0,2.0,3.0,1.187017
ANXIETY SCORE,221.0,7.751131,0.0,4.0,7.0,11.0,21.0,4.836092


# Data Preparation

## Intuitive Data Modifications

In [7]:
# Drop unnecessary features
df.drop(['Timestamp','Name'], axis=1, inplace=True)

In [8]:
df.shape

(221, 9)

In [9]:
# Convert Anxiety Score to Anxiety Level using below mapping
#0–4 Minimal anxiety
#5–9 Mild anxiety
#10–14 Moderate anxiety
#15–21 Severe anxiety

def anxiety_level(score):
    if score in range(0,5):
        return 'Minimal anxiety'
    elif score in range(5,10):
        return 'Mild anxiety'
    elif score in range(10,15):
        return 'Moderate anxiety'
    else:
        return 'Severe anxiety'

In [10]:
df['ANXIETY_LEVEL'] = df['ANXIETY SCORE'].apply(anxiety_level)
df

Unnamed: 0,Gender,Martial Status,Age,Education,Role in healthcare workers,Working Hours,Location,Income,ANXIETY SCORE,ANXIETY_LEVEL
0,1,1,1,7,4,0,2,2,3,Minimal anxiety
1,1,1,1,7,4,0,2,1,5,Mild anxiety
2,1,1,1,7,4,0,2,1,20,Severe anxiety
3,0,1,1,3,2,0,2,1,11,Moderate anxiety
4,1,1,1,6,6,2,1,1,13,Moderate anxiety
...,...,...,...,...,...,...,...,...,...,...
216,1,0,2,1,0,1,0,0,2,Minimal anxiety
217,1,0,1,2,5,1,0,0,4,Minimal anxiety
218,0,0,2,6,6,2,0,3,3,Minimal anxiety
219,1,0,1,1,0,1,0,0,2,Minimal anxiety


In [11]:
df.drop(['ANXIETY SCORE'], axis=1, inplace=True)

In [12]:
df.shape

(221, 9)

## Data Quality Check

In [13]:
for col in df.select_dtypes(include=['int64','float64']):
  print(col, ":", df[col].unique())

Gender : [1 0]
Martial Status : [1 0]
Age : [1 0 2 3]
Education : [7 3 6 5 1 2 4 0 8]
Role in healthcare workers : [4 2 6 3 5 1 0]
Working Hours : [0 2 3 1 4]
Location : [2 1 0]
Income : [2 1 0 3]


In [14]:
df['ANXIETY_LEVEL'].unique()

array(['Minimal anxiety', 'Mild anxiety', 'Severe anxiety',
       'Moderate anxiety'], dtype=object)

In [15]:
#Since all features are encoded, and there are no junk values in the features, data quality is fine.

## Missing Data Handling

In [16]:
df.isnull().sum()

Unnamed: 0,0
Gender,0
Martial Status,0
Age,0
Education,0
Role in healthcare workers,0
Working Hours,0
Location,0
Income,0
ANXIETY_LEVEL,0


In [17]:
#there are no missing data in the dataset

## Feature Encoding

In [18]:
#Encode Anxiety Level

encoding = {'Minimal anxiety':0, 'Mild anxiety': 1, 'Moderate anxiety': 2, 'Severe anxiety': 3}
df['ANXIETY_LEVEL'] = df['ANXIETY_LEVEL'].map(encoding)
df

Unnamed: 0,Gender,Martial Status,Age,Education,Role in healthcare workers,Working Hours,Location,Income,ANXIETY_LEVEL
0,1,1,1,7,4,0,2,2,0
1,1,1,1,7,4,0,2,1,1
2,1,1,1,7,4,0,2,1,3
3,0,1,1,3,2,0,2,1,2
4,1,1,1,6,6,2,1,1,2
...,...,...,...,...,...,...,...,...,...
216,1,0,2,1,0,1,0,0,0
217,1,0,1,2,5,1,0,0,0
218,0,0,2,6,6,2,0,3,0
219,1,0,1,1,0,1,0,0,0


## Training and Testing Data Split

In [19]:
# pip check

In [20]:
# !pip install tensorflow==2.12.0 scikit-learn==1.2.2

In [21]:
# import tensorflow as tf
# import sklearn
# print(tf.__version__, sklearn.__version__)

In [22]:
# !pip install numpy==1.26.0 --upgrade --force-reinstall

In [29]:
# pip uninstall keras tensorflow scikit-learn

Found existing installation: keras 2.12.0
Uninstalling keras-2.12.0:
  Would remove:
    /usr/local/lib/python3.11/dist-packages/keras-2.12.0.dist-info/*
    /usr/local/lib/python3.11/dist-packages/keras/*
Proceed (Y/n)? y
  Successfully uninstalled keras-2.12.0
Found existing installation: tensorflow 2.12.0
Uninstalling tensorflow-2.12.0:
  Would remove:
    /usr/local/bin/estimator_ckpt_converter
    /usr/local/bin/import_pb_to_tensorboard
    /usr/local/bin/saved_model_cli
    /usr/local/bin/tensorboard
    /usr/local/bin/tf_upgrade_v2
    /usr/local/bin/tflite_convert
    /usr/local/bin/toco
    /usr/local/bin/toco_from_protos
    /usr/local/lib/python3.11/dist-packages/tensorflow-2.12.0.dist-info/*
    /usr/local/lib/python3.11/dist-packages/tensorflow/*
Proceed (Y/n)? y
  Successfully uninstalled tensorflow-2.12.0
Found existing installation: scikit-learn 1.2.2
Uninstalling scikit-learn-1.2.2:
  Would remove:
    /usr/local/lib/python3.11/dist-packages/scikit_learn-1.2.2.dist-inf

In [35]:
# !pip install scikit_learn==1.4.2 --upgrade --force-reinstall

Collecting scikit_learn
  Downloading scikit_learn-1.7.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (11 kB)
Downloading scikit_learn-1.7.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (9.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.7/9.7 MB[0m [31m84.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: scikit_learn
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
scikeras 0.13.0 requires keras>=3.2.0, which is not installed.
sklearn-compat 0.1.3 requires scikit-learn<1.7,>=1.2, but you have scikit-learn 1.7.1 which is incompatible.[0m[31m
[0mSuccessfully installed scikit_learn-1.7.1


In [23]:
x = df.drop(['ANXIETY_LEVEL'], axis=1)
y = df['ANXIETY_LEVEL']

In [24]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.30,random_state=42)
x_train.shape, x_test.shape, y_train.shape, y_test.shape

((154, 8), (67, 8), (154,), (67,))

# Model Creation

## Classification using Logistic Regression - One v/s Rest

In [25]:
from sklearn.linear_model import LogisticRegression
from sklearn.multiclass import OneVsRestClassifier

model_ovr = OneVsRestClassifier(LogisticRegression())
model_ovr = model_ovr.fit(x_train, y_train)

In [26]:
y_pred = model_ovr.predict(x_test)

In [27]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

print('Using Logistic Regression - One v/s Rest')
print('Accuracy Score: ', accuracy_score(y_test, y_pred))
print('Classification Report:\n', classification_report(y_test, y_pred))

Using Logistic Regression - One v/s Rest
Accuracy Score:  0.40298507462686567
Classification Report:
               precision    recall  f1-score   support

           0       0.36      0.26      0.30        19
           1       0.49      0.70      0.58        27
           2       0.21      0.21      0.21        14
           3       0.00      0.00      0.00         7

    accuracy                           0.40        67
   macro avg       0.26      0.30      0.27        67
weighted avg       0.34      0.40      0.36        67



  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


## Classification using SVC Model

In [28]:
from sklearn.svm import SVC

model_svc = SVC()
model_svc = model_svc.fit(x_train, y_train)

In [29]:
y_pred = model_svc.predict(x_test)

In [30]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

print('Using SVC Model')
print('Accuracy Score: ', accuracy_score(y_test, y_pred))
print('Classification Report: \n', classification_report(y_test, y_pred))
print('Confusion Matrix: \n', confusion_matrix(y_test, y_pred))

Using SVC Model
Accuracy Score:  0.417910447761194
Classification Report: 
               precision    recall  f1-score   support

           0       0.40      0.21      0.28        19
           1       0.47      0.74      0.57        27
           2       0.29      0.29      0.29        14
           3       0.00      0.00      0.00         7

    accuracy                           0.42        67
   macro avg       0.29      0.31      0.28        67
weighted avg       0.36      0.42      0.37        67

Confusion Matrix: 
 [[ 4 12  3  0]
 [ 3 20  4  0]
 [ 2  8  4  0]
 [ 1  3  3  0]]


  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


## Classification using Naive Bayes Classifier



In [31]:
from sklearn.naive_bayes import GaussianNB

model_nb = GaussianNB()
model_nb = model_nb.fit(x_train, y_train)

In [32]:
y_pred = model_nb.predict(x_test)

In [33]:
print('Using Gaussian Naive Bayes Algorithm')
print('Accuracy Score: ', accuracy_score(y_test, y_pred))
print('Classification Report: \n', classification_report(y_test, y_pred))

Using Gaussian Naive Bayes Algorithm
Accuracy Score:  0.47761194029850745
Classification Report: 
               precision    recall  f1-score   support

           0       0.47      0.37      0.41        19
           1       0.52      0.63      0.57        27
           2       0.45      0.36      0.40        14
           3       0.38      0.43      0.40         7

    accuracy                           0.48        67
   macro avg       0.45      0.45      0.44        67
weighted avg       0.47      0.48      0.47        67



## Classification using Random Forest Classifier

In [34]:
from sklearn.ensemble import RandomForestClassifier

model_rf = RandomForestClassifier()
model_rf = model_rf.fit(x_train, y_train)

In [35]:
y_pred = model_rf.predict(x_test)

In [36]:
print('Using Random Forest Classifier')
print('Accuracy Score: ', accuracy_score(y_test, y_pred))
print('Classification Report: \n', classification_report(y_test, y_pred))

Using Random Forest Classifier
Accuracy Score:  0.4626865671641791
Classification Report: 
               precision    recall  f1-score   support

           0       0.47      0.37      0.41        19
           1       0.54      0.52      0.53        27
           2       0.36      0.57      0.44        14
           3       0.50      0.29      0.36         7

    accuracy                           0.46        67
   macro avg       0.47      0.44      0.44        67
weighted avg       0.48      0.46      0.46        67



## Classification using Decision Tree Classifier Model

In [37]:
from sklearn.tree import DecisionTreeClassifier

model_dt = DecisionTreeClassifier()
model_dt = model_dt.fit(x_train, y_train)

In [38]:
y_pred = model_dt.predict(x_test)

In [39]:
print('Using Decision Tree Classifier')
print('Accuracy Score: ', accuracy_score(y_test, y_pred))
print('Classification Report: \n', classification_report(y_test, y_pred))

Using Decision Tree Classifier
Accuracy Score:  0.43283582089552236
Classification Report: 
               precision    recall  f1-score   support

           0       0.29      0.26      0.28        19
           1       0.58      0.56      0.57        27
           2       0.35      0.50      0.41        14
           3       0.50      0.29      0.36         7

    accuracy                           0.43        67
   macro avg       0.43      0.40      0.40        67
weighted avg       0.44      0.43      0.43        67



## Classification using K Nearest Neighbour Algorithm

In [40]:
from sklearn.neighbors import KNeighborsClassifier

model_knn = KNeighborsClassifier()
model_knn = model_knn.fit(x_train, y_train)

In [41]:
y_pred = model_knn.predict(x_test)

In [42]:
print('Using K Nearest Neighbour Algorithm')
print('Accuracy Score: ', accuracy_score(y_test, y_pred))
print('Classification Report: \n', classification_report(y_test, y_pred))

Using K Nearest Neighbour Algorithm
Accuracy Score:  0.4626865671641791
Classification Report: 
               precision    recall  f1-score   support

           0       0.39      0.58      0.47        19
           1       0.52      0.44      0.48        27
           2       0.46      0.43      0.44        14
           3       0.67      0.29      0.40         7

    accuracy                           0.46        67
   macro avg       0.51      0.43      0.45        67
weighted avg       0.49      0.46      0.46        67



## Classification using XGBoost Model

In [43]:
from xgboost import XGBClassifier

model_xgb = XGBClassifier()
model_xgb = model_xgb.fit(x_train, y_train)

In [44]:
y_pred = model_xgb.predict(x_test)

In [45]:
print('Using XGBoost Model')
print('Accuracy Score: ', accuracy_score(y_test, y_pred))
print('Classification Report: \n', classification_report(y_test, y_pred))

Using XGBoost Model
Accuracy Score:  0.47761194029850745
Classification Report: 
               precision    recall  f1-score   support

           0       0.42      0.26      0.32        19
           1       0.57      0.63      0.60        27
           2       0.40      0.57      0.47        14
           3       0.40      0.29      0.33         7

    accuracy                           0.48        67
   macro avg       0.45      0.44      0.43        67
weighted avg       0.47      0.48      0.47        67



## Classification using Artificial Neural Network

In [46]:
x_train

Unnamed: 0,Gender,Martial Status,Age,Education,Role in healthcare workers,Working Hours,Location,Income
65,1,0,1,4,5,2,1,0
112,0,1,1,2,3,2,1,2
186,1,0,3,1,0,2,0,0
155,1,1,0,6,6,0,2,2
31,0,1,1,2,2,3,0,1
...,...,...,...,...,...,...,...,...
106,1,0,1,2,5,2,1,0
14,1,1,0,6,6,1,2,3
92,1,0,2,0,0,2,1,0
179,0,0,3,2,3,2,0,0


In [47]:
y_train

Unnamed: 0,ANXIETY_LEVEL
65,2
112,0
186,0
155,0
31,1
...,...
106,1
14,3
92,2
179,3


In [92]:
# !pip uninstall -y keras tensorflow
# !pip install tensorflow==2.15.0

Found existing installation: keras 3.10.0
Uninstalling keras-3.10.0:
  Successfully uninstalled keras-3.10.0
Found existing installation: tensorflow 2.15.0
Uninstalling tensorflow-2.15.0:
  Successfully uninstalled tensorflow-2.15.0
Collecting tensorflow==2.15.0
  Using cached tensorflow-2.15.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.4 kB)
Collecting ml-dtypes~=0.2.0 (from tensorflow==2.15.0)
  Using cached ml_dtypes-0.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (20 kB)
Collecting numpy<2.0.0,>=1.23.5 (from tensorflow==2.15.0)
  Downloading numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.0/61.0 kB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
Collecting keras<2.16,>=2.15.0 (from tensorflow==2.15.0)
  Using cached keras-2.15.0-py3-none-any.whl.metadata (2.4 kB)
Using cached tensorflow-2.15.0-cp311-cp311-manylinux_2_17_x86_64.

In [54]:
# !pip install scikeras==0.13.0 --force-reinstall

Collecting scikeras==0.13.0
  Using cached scikeras-0.13.0-py3-none-any.whl.metadata (3.1 kB)
Collecting keras>=3.2.0 (from scikeras==0.13.0)
  Downloading keras-3.10.0-py3-none-any.whl.metadata (6.0 kB)
Collecting scikit-learn>=1.4.2 (from scikeras==0.13.0)
  Using cached scikit_learn-1.7.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (11 kB)
Collecting absl-py (from keras>=3.2.0->scikeras==0.13.0)
  Downloading absl_py-2.3.1-py3-none-any.whl.metadata (3.3 kB)
Collecting numpy (from keras>=3.2.0->scikeras==0.13.0)
  Using cached numpy-2.3.1-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (62 kB)
Collecting rich (from keras>=3.2.0->scikeras==0.13.0)
  Downloading rich-14.0.0-py3-none-any.whl.metadata (18 kB)
Collecting namex (from keras>=3.2.0->scikeras==0.13.0)
  Downloading namex-0.1.0-py3-none-any.whl.metadata (322 bytes)
Collecting h5py (from keras>=3.2.0->scikeras==0.13.0)
  Downloading h5py-3.14.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.me

In [55]:
# pip check

ipython 7.34.0 requires jedi, which is not installed.
tensorflow 2.15.0 has requirement keras<2.16,>=2.15.0, but you have keras 3.10.0.
tensorflow 2.15.0 has requirement ml-dtypes~=0.2.0, but you have ml-dtypes 0.5.1.
tensorflow 2.15.0 has requirement numpy<2.0.0,>=1.23.5, but you have numpy 2.3.1.
tf-keras 2.18.0 has requirement tensorflow<2.19,>=2.18, but you have tensorflow 2.15.0.
opencv-python-headless 4.12.0.88 has requirement numpy<2.3.0,>=2; python_version >= "3.9", but you have numpy 2.3.1.
cupy-cuda12x 13.3.0 has requirement numpy<2.3,>=1.22, but you have numpy 2.3.1.
torch 2.6.0+cu124 has requirement nvidia-cublas-cu12==12.4.5.8; platform_system == "Linux" and platform_machine == "x86_64", but you have nvidia-cublas-cu12 12.5.3.2.
torch 2.6.0+cu124 has requirement nvidia-cuda-cupti-cu12==12.4.127; platform_system == "Linux" and platform_machine == "x86_64", but you have nvidia-cuda-cupti-cu12 12.5.82.
torch 2.6.0+cu124 has requirement nvidia-cuda-nvrtc-cu12==12.4.127; platfo

In [93]:
import tensorflow as tf
import keras

In [94]:
from keras.models import Sequential
from keras.layers import Dense
from keras.losses import CategoricalCrossentropy
from keras.optimizers import Adam
# from scikeras.wrappers import KerasClassifier


In [95]:
x_train.shape

(154, 8)

In [115]:
def create_ANN_model(input_dim):
  model = Sequential()
  model.add(Dense(15,activation='relu', input_dim = input_dim)) # Input Layer set as 6 neurons
  model.add(Dense(15,activation='relu')) # 3 Dense layers added each with 15 neurons
  model.add(Dense(4,activation='softmax')) # Output layer with activation function used for multiclass classification containing 4 outcomes
  adam = Adam(learning_rate=0.001)
  model.compile(optimizer=adam,loss='categorical_crossentropy',metrics=['accuracy']) # Loss function set for multiclass classification
  return model

In [116]:
from keras.utils import to_categorical

# One-hot encode the target variables in training and testing datasets. This is required for multiclass classification using Softmax activation function.
y_train_encoded = to_categorical(y_train, num_classes=4)
y_test_encoded = to_categorical(y_test, num_classes=4)

model = create_ANN_model(x_train.shape[1])
# model = KerasClassifier(model=create_ANN_model, input_dim=x_train.shape[1], loss=CategoricalCrossentropy(), optimizer=Adam(learning_rate=0.001), metrics=['accuracy'], verbose=1)
# model = KerasClassifier(model=create_ANN_model, input_dim=x_train.shape[1], verbose=1)
model.fit(x_train, y_train_encoded, epochs=100, batch_size=20, verbose=1)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.src.callbacks.History at 0x7bc9a275c1d0>

In [117]:
y_pred = model.predict(x_test)



In [118]:
y_pred[0]

array([0.46087745, 0.2917703 , 0.16157271, 0.08577961], dtype=float32)

In [119]:
y_pred_ = np.argmax(y_pred, axis=1)

In [120]:
y_pred_[0]

0

In [121]:
print('Using Artificial Neural Network')
print('Accuracy Score: ', accuracy_score(y_test, y_pred_))
print('Classification Report: \n', classification_report(y_test, y_pred_))

Using Artificial Neural Network
Accuracy Score:  0.40298507462686567
Classification Report: 
               precision    recall  f1-score   support

           0       0.40      0.32      0.35        19
           1       0.49      0.67      0.56        27
           2       0.20      0.21      0.21        14
           3       0.00      0.00      0.00         7

    accuracy                           0.40        67
   macro avg       0.27      0.30      0.28        67
weighted avg       0.35      0.40      0.37        67



  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


In [122]:
y_pred_decoded = pd.Series(y_pred_).map(dict(map(reversed, encoding.items())))
y_pred_decoded

Unnamed: 0,0
0,Minimal anxiety
1,Mild anxiety
2,Minimal anxiety
3,Mild anxiety
4,Minimal anxiety
...,...
62,Minimal anxiety
63,Mild anxiety
64,Moderate anxiety
65,Moderate anxiety


In [123]:
y_test_decoded = pd.Series(y_test).map(dict(map(reversed, encoding.items())))
y_test_decoded

Unnamed: 0,ANXIETY_LEVEL
132,Severe anxiety
148,Mild anxiety
93,Moderate anxiety
180,Mild anxiety
15,Severe anxiety
...,...
139,Mild anxiety
56,Mild anxiety
156,Severe anxiety
176,Moderate anxiety


In [124]:
for i, j in zip(y_test_decoded, y_pred_decoded):
  print("Actual: ", i, " Predicted: ", j, "Match:", (i == j))

Actual:  Severe anxiety  Predicted:  Minimal anxiety Match: False
Actual:  Mild anxiety  Predicted:  Mild anxiety Match: True
Actual:  Moderate anxiety  Predicted:  Minimal anxiety Match: False
Actual:  Mild anxiety  Predicted:  Mild anxiety Match: True
Actual:  Severe anxiety  Predicted:  Minimal anxiety Match: False
Actual:  Minimal anxiety  Predicted:  Mild anxiety Match: False
Actual:  Minimal anxiety  Predicted:  Moderate anxiety Match: False
Actual:  Moderate anxiety  Predicted:  Mild anxiety Match: False
Actual:  Mild anxiety  Predicted:  Mild anxiety Match: True
Actual:  Mild anxiety  Predicted:  Minimal anxiety Match: False
Actual:  Moderate anxiety  Predicted:  Mild anxiety Match: False
Actual:  Minimal anxiety  Predicted:  Mild anxiety Match: False
Actual:  Minimal anxiety  Predicted:  Minimal anxiety Match: True
Actual:  Minimal anxiety  Predicted:  Moderate anxiety Match: False
Actual:  Moderate anxiety  Predicted:  Mild anxiety Match: False
Actual:  Minimal anxiety  Predi

# Model Parameter Tuning

## Hyperparameterization of RandomForestClassifier Model

In [56]:
# Among the ML models, Random Forest gave the best result of 0.49 accuracy score. Hence, optimizing paramters of RandomForestClassifier model.
from sklearn.model_selection import GridSearchCV

params = {'max_depth': list(range(5,15)), 'n_estimators' :[150,200,300]}

gridcv = GridSearchCV(estimator=RandomForestClassifier(), param_grid=params, cv=10, scoring='accuracy')
gridcv.fit(x_train, y_train)

In [57]:
print('Best Paramters for Random Forest Classifier Model:',gridcv.best_params_)

Best Paramters for Random Forest Classifier Model: {'max_depth': 5, 'n_estimators': 200}


In [58]:
model_rf_tuned = RandomForestClassifier(max_depth=5, n_estimators=150)
model_rf_tuned = model_rf_tuned.fit(x_train, y_train)

In [59]:
y_pred = model_rf_tuned.predict(x_test)

In [60]:
print('Using Hyperparameterized Random Forest Classifier')
print('Accuracy Score: ', accuracy_score(y_test, y_pred))
print('Classification Report:\n', classification_report(y_test, y_pred))

Using Hyperparameterized Random Forest Classifier
Accuracy Score:  0.4925373134328358
Classification Report:
               precision    recall  f1-score   support

           0       0.46      0.32      0.38        19
           1       0.51      0.70      0.59        27
           2       0.43      0.43      0.43        14
           3       0.67      0.29      0.40         7

    accuracy                           0.49        67
   macro avg       0.52      0.43      0.45        67
weighted avg       0.50      0.49      0.48        67



## Hyperparameterization of Naive Bayes Classifier Model

In [61]:
params = {'var_smoothing': np.logspace(-9, -6, 50, 100) }
gridcv_nb = GridSearchCV(estimator=GaussianNB(), param_grid=params, cv=10, scoring='accuracy')
gridcv_nb.fit(x_train, y_train)

In [62]:
print('Best parameters for Naive Bayes Classifier:', gridcv_nb.best_params_)

Best parameters for Naive Bayes Classifier: {'var_smoothing': np.float64(1e-09)}


In [63]:
model_nb_tuned = GaussianNB(var_smoothing=np.float64(1e-09))
model_nb_tuned = model_nb_tuned.fit(x_train, y_train)
y_pred = gridcv_nb.predict(x_test)

In [64]:
print('Using Hyperparameterized Naive Bayes Classifier:')
print('Accuracy Score: ', accuracy_score(y_test, y_pred))
print('Classification Report:\n', classification_report(y_test, y_pred))

Using Hyperparameterized Naive Bayes Classifier:
Accuracy Score:  0.47761194029850745
Classification Report:
               precision    recall  f1-score   support

           0       0.47      0.37      0.41        19
           1       0.52      0.63      0.57        27
           2       0.45      0.36      0.40        14
           3       0.38      0.43      0.40         7

    accuracy                           0.48        67
   macro avg       0.45      0.45      0.44        67
weighted avg       0.47      0.48      0.47        67



## Hyperparameterization of XGBoost Model

In [65]:
params = {'learning_rate': [0.009,0.02,0.09,0.2], 'n_estimators': [50, 100, 200, 300]}
gridcv_xgb = GridSearchCV(estimator=XGBClassifier(), param_grid=params, cv=10, scoring='accuracy')
gridcv_xgb.fit(x_train, y_train)

In [66]:
print('Best XGBoost Parameters: ', gridcv_xgb.best_params_)

Best XGBoost Parameters:  {'learning_rate': 0.009, 'n_estimators': 100}


In [67]:
model_xgb_tuned = XGBClassifier(learning_rate=0.009, n_estimators=100)
model_xgb_tuned = model_xgb_tuned.fit(x_train, y_train)
y_pred = model_xgb_tuned.predict(x_test)

In [68]:
print('Using Hyperparameterized XGBoost Model:')
print('Accuracy Score: ', accuracy_score(y_test, y_pred))
print('Classification Report:\n', classification_report(y_test, y_pred))

Using Hyperparameterized XGBoost Model:
Accuracy Score:  0.4626865671641791
Classification Report:
               precision    recall  f1-score   support

           0       0.46      0.32      0.38        19
           1       0.50      0.59      0.54        27
           2       0.41      0.50      0.45        14
           3       0.40      0.29      0.33         7

    accuracy                           0.46        67
   macro avg       0.44      0.42      0.43        67
weighted avg       0.46      0.46      0.45        67



## Hyperparameterization of ANN Model

In [78]:
# !pip install scikit_learn==1.2.0 --upgrade --force-reinstall

Collecting scikit_learn==1.2.0
  Downloading scikit_learn-1.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (11 kB)
Collecting numpy>=1.17.3 (from scikit_learn==1.2.0)
  Downloading numpy-2.3.1-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (62 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.1/62.1 kB[0m [31m5.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting scipy>=1.3.2 (from scikit_learn==1.2.0)
  Downloading scipy-1.16.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (61 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.9/61.9 kB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting joblib>=1.1.1 (from scikit_learn==1.2.0)
  Downloading joblib-1.5.1-py3-none-any.whl.metadata (5.6 kB)
Collecting threadpoolctl>=2.0.0 (from scikit_learn==1.2.0)
  Downloading threadpoolctl-3.6.0-py3-none-any.whl.metadata (13 kB)
Downloading scikit_learn-1.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x

In [69]:
# !pip install scikeras



In [70]:
!pip install scipy



In [73]:
# !pip install keras==2.3.1

Collecting keras==2.3.1
  Downloading Keras-2.3.1-py2.py3-none-any.whl.metadata (2.2 kB)
Collecting keras-applications>=1.0.6 (from keras==2.3.1)
  Downloading Keras_Applications-1.0.8-py3-none-any.whl.metadata (1.7 kB)
Collecting keras-preprocessing>=1.0.5 (from keras==2.3.1)
  Downloading Keras_Preprocessing-1.1.2-py2.py3-none-any.whl.metadata (1.9 kB)
Downloading Keras-2.3.1-py2.py3-none-any.whl (377 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m377.8/377.8 kB[0m [31m12.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading Keras_Applications-1.0.8-py3-none-any.whl (50 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.7/50.7 kB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading Keras_Preprocessing-1.1.2-py2.py3-none-any.whl (42 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.6/42.6 kB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: keras-preprocessing, keras-applications, keras
  Attem

In [112]:
# !pip install keras-tuner

Collecting keras-tuner
  Downloading keras_tuner-1.4.7-py3-none-any.whl.metadata (5.4 kB)
Collecting kt-legacy (from keras-tuner)
  Downloading kt_legacy-1.0.5-py3-none-any.whl.metadata (221 bytes)
Downloading keras_tuner-1.4.7-py3-none-any.whl (129 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m129.1/129.1 kB[0m [31m6.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading kt_legacy-1.0.5-py3-none-any.whl (9.6 kB)
Installing collected packages: kt-legacy, keras-tuner
Successfully installed keras-tuner-1.4.7 kt-legacy-1.0.5


In [125]:
# from scikeras.wrappers import KerasClassifier
# from sklearn.model_selection import GridSearchCV

# # Wrap the Keras model with KerasClassifier
# model = KerasClassifier(model=create_ANN_model, input_dim=x_train.shape[1], optimizer=Adam(learning_rate=0.001), metrics=['accuracy'], verbose=0)
# params = {'batch_size':[10,30,50], 'epochs': [10,30,50,80,100]}
# gridcv_ann = GridSearchCV(estimator=model, param_grid=params, cv=10, scoring='accuracy', verbose=1)
# gridcv_ann.fit(x_train, y_train_encoded)

Fitting 10 folds for each of 15 candidates, totalling 150 fits


ValueError: 
All the 150 fits failed.
It is very likely that your model is misconfigured.
You can try to debug the error by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
150 fits failed with the following error:
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/sklearn/model_selection/_validation.py", line 859, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/usr/local/lib/python3.11/dist-packages/scikeras/wrappers.py", line 1501, in fit
    super().fit(X=X, y=y, sample_weight=sample_weight, **kwargs)
  File "/usr/local/lib/python3.11/dist-packages/scikeras/wrappers.py", line 770, in fit
    self._fit(
  File "/usr/local/lib/python3.11/dist-packages/scikeras/wrappers.py", line 928, in _fit
    self._ensure_compiled_model()
  File "/usr/local/lib/python3.11/dist-packages/scikeras/wrappers.py", line 439, in _ensure_compiled_model
    if not self.model_.compiled:
           ^^^^^^^^^^^^^^^^^^^^
AttributeError: 'Sequential' object has no attribute 'compiled'


In [126]:
# import keras_tuner as kt

# model = create_ANN_model(x_train.shape[1])
# tuner = kt.Hyperband(model, objective='val_accuracy', max_epochs=10, factor=3)

TypeError: Inputs to a layer should be tensors. Got '<keras_tuner.src.engine.hyperparameters.hyperparameters.HyperParameters object at 0x7bc9a2230fd0>' (of type <class 'keras_tuner.src.engine.hyperparameters.hyperparameters.HyperParameters'>) as input for layer 'sequential_455'.

In [127]:
## Using Keras Tuner to avoid compatibility issues with Scikit and Tensorflow
import keras_tuner as kt

#build ANN model using Keras Tuner
def build_model(hp):
    model = Sequential()
    model.add(Dense(units=hp.Int('units_input', min_value=8, max_value=32, step=4), activation='relu', input_shape=(x_train.shape[1],)))
    model.add(Dense(units=hp.Int('units_hidden', min_value=8, max_value=32, step=4), activation='relu'))
    model.add(Dense(4, activation='softmax'))

    hp_learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])
    model.compile(optimizer=Adam(learning_rate=hp_learning_rate),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model

# Instantiate the tuner
tuner = kt.Hyperband(build_model,
                     objective='val_accuracy',
                     max_epochs=10,
                     factor=3,
                     directory='my_dir',
                     project_name='intro_to_kt')

# Print a summary of the search space
tuner.search_space_summary()

Search space summary
Default search space size: 3
units_input (Int)
{'default': None, 'conditions': [], 'min_value': 8, 'max_value': 32, 'step': 4, 'sampling': 'linear'}
units_hidden (Int)
{'default': None, 'conditions': [], 'min_value': 8, 'max_value': 32, 'step': 4, 'sampling': 'linear'}
learning_rate (Choice)
{'default': 0.01, 'conditions': [], 'values': [0.01, 0.001, 0.0001], 'ordered': True}


In [128]:
# Perform the hyperparameter search
tuner.search(x_train, y_train_encoded, epochs=50, validation_split=0.2)

Trial 30 Complete [00h 00m 02s]
val_accuracy: 0.4193548262119293

Best val_accuracy So Far: 0.5806451439857483
Total elapsed time: 00h 00m 58s


In [129]:
# Get the best hyperparameters
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]

print(f"""
The optimal number of units in the input layer is {best_hps.get('units_input')}
The optimal number of units in the hidden layer is {best_hps.get('units_hidden')}
The optimal learning rate for the optimizer is {best_hps.get('learning_rate')}.
""")


The optimal number of units in the input layer is 12
The optimal number of units in the hidden layer is 24
The optimal learning rate for the optimizer is 0.01.



In [130]:
# Build the best model
model_ann_tuned = tuner.hypermodel.build(best_hps)

# Evaluate the best model on the test data
loss, accuracy = model_ann_tuned.evaluate(x_test, y_test_encoded)
print(f'Accuracy of the tuned ANN model on the test data: {accuracy}')

Accuracy of the tuned ANN model on the test data: 0.31343284249305725


In [None]:
###### Unable to tune the ANN model to improve accuracy !!!