### Sklearn Synthetic Data For Model Baseline


In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import linalg
from sklearn.datasets import make_multilabel_classification as mlc

In [2]:
import tensorflow as tf
import keras
from keras import layers
from keras.optimizers import SGD
from keras.metrics import BinaryAccuracy

In [3]:
df = pd.read_csv("synthetic_data_final.csv",index_col="index")

In [4]:
df = df.assign(OTC1PostPainCat=pd.cut(df["OTC1PostPain"],bins=[0,2,5,20],labels=["Low","Medium","High"]))
df = df.assign(OTC2PostPainCat=pd.cut(df["OTC2PostPain"],bins=[0,2,5,20],labels=["Low","Medium","High"]))
df = df.assign(OTC3PostPainCat=pd.cut(df["OTC3PostPain"],bins=[0,2,5,20],labels=["Low","Medium","High"]))
df = df.assign(Exe1PostPainCat=pd.cut(df["Exe1PostPain"],bins=[0,2,5,20],labels=["Low","Medium","High"]))
df = df.assign(Exe2PostPainCat=pd.cut(df["Exe2PostPain"],bins=[0,2,5,20],labels=["Low","Medium","High"]))
df = df.assign(Exe3PostPainCat=pd.cut(df["Exe3PostPain"],bins=[0,2,5,20],labels=["Low","Medium","High"]))

In [5]:
df.drop(columns=["Unnamed: 0","bmi","education-num","hours-per-week","OTC1NSAID","OTC1Acetaminophen","OTC1Anasthetic","OTC1Supplements","OTC1Device",
                "OTC2NSAID","OTC2Acetaminophen","OTC2Anasthetic","OTC2Supplements","OTC2Device",
                "OTC3NSAID","OTC3Acetaminophen","OTC3Anasthetic","OTC3Supplements","OTC3Device",
                "Exe1NSAID","Exe1Acetaminophen","Exe1Anasthetic","Exe1Supplements","Exe1Device",
                "Exe2NSAID","Exe2Acetaminophen","Exe2Anasthetic","Exe2Supplements","Exe2Device",
                "Exe3NSAID","Exe3Acetaminophen","Exe3Anasthetic","Exe3Supplements","Exe3Device",
                "OTCJobTitle","OTC1PostPain","OTC2PostPain","OTC3PostPain","Exe1PostPain","Exe2PostPain","Exe3PostPain"],inplace=True)
df.columns

Index(['OTCAge', 'OTCGender', 'OTCRegion', 'OTCEmployment', 'OTCEducation',
       'OTCRace', 'OTCIncome', 'OTCSpanish', 'OTCStand', 'OTCJobCategory',
       ...
       'Exe3UseSameTime', 'Exe3WhyStop_NoPain', 'Exe3WhyStop_ReducePain',
       'Exe3WhyStop_Prescription', 'OTC1PostPainCat', 'OTC2PostPainCat',
       'OTC3PostPainCat', 'Exe1PostPainCat', 'Exe2PostPainCat',
       'Exe3PostPainCat'],
      dtype='object', length=230)

In [6]:
df1 = pd.get_dummies(df)
df1.shape                   

(1845, 448)

In [7]:
cols = list(df1.columns)
#cols[235:431]

In [8]:
otc_exe_cols=cols[235:431]
postpain_cols = cols[431:]

In [9]:
from  sklearn.model_selection import train_test_split

X = df1[cols[:235]]
y = df1[cols[235:]]
X_train,X_tst,y_train,y_tst = train_test_split(X,y,test_size=0.3,random_state=42)

In [10]:
y_otc_train = y_train[otc_exe_cols]
y_pain_train = y_train[postpain_cols]

y_otc_test = y_tst[otc_exe_cols]
y_pain_test = y_tst[postpain_cols]

In [11]:
y_pain_test.shape

(554, 17)

MultiClass Classification

In [27]:
model = keras.Sequential()
model.add(layers.Dense(235,activation="relu",input_shape=(X_train.shape[1],)))
model.add(layers.Dense(200,activation="relu"))
model.add(layers.Dense(200,activation="relu"))
model.add(layers.Dense(200,activation="relu"))   
model.add(layers.Dense(y_otc_train.shape[1],activation="sigmoid"))
model.summary()

In [29]:
model.compile(loss='binary_crossentropy', optimizer=SGD(), metrics=[BinaryAccuracy])
model.fit(X_train, y_otc_train,epochs=100,batch_size=100, validation_split=0.2)

Epoch 1/100
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 20ms/step - binary_accuracy: 0.5645 - loss: 0.7519 - val_binary_accuracy: 0.6019 - val_loss: 0.6762
Epoch 2/100
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - binary_accuracy: 0.6125 - loss: 0.6604 - val_binary_accuracy: 0.6420 - val_loss: 0.6247
Epoch 3/100
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - binary_accuracy: 0.6497 - loss: 0.6152 - val_binary_accuracy: 0.6789 - val_loss: 0.5871
Epoch 4/100
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - binary_accuracy: 0.6863 - loss: 0.5786 - val_binary_accuracy: 0.7123 - val_loss: 0.5533
Epoch 5/100
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - binary_accuracy: 0.7200 - loss: 0.5450 - val_binary_accuracy: 0.7442 - val_loss: 0.5198
Epoch 6/100
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - binary_accuracy: 0.7539 - loss: 0.511

<keras.src.callbacks.history.History at 0x20f63e77350>

In [31]:
loss, accuracy = model.evaluate(X_tst,y_otc_test)
accuracy

[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - binary_accuracy: 0.9763 - loss: 0.1263 


0.9764791131019592

## 2 Node Neural Network

In [12]:
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense

# Input layer
input_layer = Input(shape=(X_train.shape[1],))

# Shared layers
x = Dense(200, activation='relu')(input_layer)
x = Dense(200, activation='relu')(x)
x = Dense(200, activation='relu')(x)
x = Dense(200, activation='relu')(x)
x = Dense(200, activation='relu')(x)

# Branch 1
branch1 = Dense(198, activation='relu')(x)
branch1 = Dense(198, activation='relu')(x)
branch1 = Dense(198, activation='relu')(x)
branch1 = Dense(198, activation='relu')(branch1)
output1 = Dense(y_otc_train.shape[1], activation='softmax')(branch1)

# Branch 2
branch2 = Dense(100, activation='relu')(x)
branch1 = Dense(50, activation='relu')(x)
branch1 = Dense(50, activation='relu')(x)
branch2 = Dense(20, activation='relu')(branch2)
output2 = Dense(y_pain_train.shape[1], activation='softmax')(branch2)

# Combine the branches
model_1 = Model(inputs=input_layer, outputs=[output1, output2])

# Compile the model
model_1.compile(loss='binary_crossentropy', optimizer=SGD(), metrics=[BinaryAccuracy,BinaryAccuracy])
model_1.summary()

In [13]:
model_1.fit(X_train,[y_otc_train,y_pain_train],epochs=100,batch_size=100, validation_split=0.2)


Epoch 1/100
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - dense_14_binary_accuracy: 0.7536 - dense_9_binary_accuracy: 0.9689 - loss: 1.4472 - val_dense_14_binary_accuracy: 0.7343 - val_dense_9_binary_accuracy: 0.9691 - val_loss: 1.3883
Epoch 2/100
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - dense_14_binary_accuracy: 0.7514 - dense_9_binary_accuracy: 0.9690 - loss: 1.3823 - val_dense_14_binary_accuracy: 0.7343 - val_dense_9_binary_accuracy: 0.9691 - val_loss: 1.3632
Epoch 3/100
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - dense_14_binary_accuracy: 0.7543 - dense_9_binary_accuracy: 0.9689 - loss: 1.3644 - val_dense_14_binary_accuracy: 0.7343 - val_dense_9_binary_accuracy: 0.9691 - val_loss: 1.3484
Epoch 4/100
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - dense_14_binary_accuracy: 0.7480 - dense_9_binary_accuracy: 0.9689 - loss: 1.3539 - val_dense_14_binary_accuracy: 0.7

<keras.src.callbacks.history.History at 0x21cbf9cf210>

In [17]:
accuracy = model_1.evaluate(X_tst,[y_otc_test,y_pain_test])
accuracy

[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - dense_14_binary_accuracy: 0.7414 - dense_9_binary_accuracy: 0.9690 - loss: 0.5835 


[0.5863441228866577, 0.7421958446502686, 0.9690470099449158]