In [41]:
pip install tensorflow


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.3.1[0m[39;49m -> [0m[32;49m25.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [42]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
import tensorflow as tf
from tensorflow.keras import layers, models

train = pd.read_csv('train_df.csv')
test = pd.read_csv('test_df.csv')

# cleaning and sets
validation_subjects = {"co2a0000377", "co2a0000364", "co2c0000342", "co2c0000345"}
val = test[test['subject'].isin(validation_subjects)].drop(columns=['sample', 'match', 'err', 'object'])
test = test[~test['subject'].isin(validation_subjects)]

train = train.drop(columns=['sample', 'match', 'err', 'object'])
test = test.drop(columns=['sample', 'match', 'err', 'object'])

X_train = train.drop(columns=['alcoholic', 'subject'])
y_train = train[['alcoholic']]

X_test = test.drop(columns=['alcoholic', 'subject'])
y_test = test[['alcoholic']]

X_val = val.drop(columns=['alcoholic', 'subject'])
y_val = val[['alcoholic']]

# normalize and reshape
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_val = X_val.reshape((X_val.shape[0], X_val.shape[1], 1))
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))

# model layers and compile
model = models.Sequential()

model.add(layers.Conv1D(64, 3, activation='relu', input_shape=(X_train.shape[1], 1)))
model.add(layers.MaxPooling1D(2))

model.add(layers.Conv1D(128, 3, activation='relu'))
model.add(layers.MaxPooling1D(2))

model.add(layers.Flatten())
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))

model.compile(optimizer='adam', 
              loss='binary_crossentropy', 
              metrics=['accuracy', tf.keras.metrics.Precision(), tf.keras.metrics.Recall()])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [43]:
# OLD CODE
# train
# history = model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_val, y_val))

In [44]:
# train
history = model.fit(X_train, y_train, epochs=10, validation_data=(X_val, y_val))

# eval
y_pred = (model.predict(X_test) > 0.5)
macro_accuracy = accuracy_score(y_test, y_pred)

Epoch 1/10
[1m4800/4800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 4ms/step - accuracy: 0.8055 - loss: 0.3988 - precision_3: 0.8072 - recall_3: 0.7983 - val_accuracy: 0.8909 - val_loss: 0.2533 - val_precision_3: 0.8926 - val_recall_3: 0.8887
Epoch 2/10
[1m4800/4800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 4ms/step - accuracy: 0.9292 - loss: 0.1731 - precision_3: 0.9272 - recall_3: 0.9315 - val_accuracy: 0.9001 - val_loss: 0.2367 - val_precision_3: 0.9255 - val_recall_3: 0.8702
Epoch 3/10
[1m4800/4800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 4ms/step - accuracy: 0.9552 - loss: 0.1127 - precision_3: 0.9534 - recall_3: 0.9574 - val_accuracy: 0.9152 - val_loss: 0.2165 - val_precision_3: 0.9077 - val_recall_3: 0.9244
Epoch 4/10
[1m4800/4800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 4ms/step - accuracy: 0.9671 - loss: 0.0869 - precision_3: 0.9654 - recall_3: 0.9688 - val_accuracy: 0.9043 - val_loss: 0.2533 - val_precision_3: 0.9176 - v

In [45]:
# test classification report
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

       False       0.87      0.87      0.87     61440
        True       0.87      0.87      0.87     61440

    accuracy                           0.87    122880
   macro avg       0.87      0.87      0.87    122880
weighted avg       0.87      0.87      0.87    122880



In [53]:
# our test metrics
test_metrics = model.evaluate(X_test, y_test)
test_loss = test_metrics[0]
test_acc = test_metrics[1]

print(f'Test Loss: {test_loss}')
print(f'Test Accuracy: {test_acc}')

[1m3840/3840[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 2ms/step - accuracy: 0.8727 - loss: 0.5967 - precision_3: 0.8125 - recall_3: 0.8101
Test Loss: 0.6875490546226501
Test Accuracy: 0.8704020380973816


In [57]:
# our validation metrics
y_val_pred = (model.predict(X_val) > 0.5)
val_metrics = model.evaluate(X_val, y_val)
val_loss = val_metrics[0]
val_acc = val_metrics[1]

print(f'Val Loss: {val_loss}')
print(f'Val Accuracy: {val_acc}')

[1m960/960[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
[1m960/960[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.8941 - loss: 0.3543 - precision_3: 0.9713 - recall_3: 0.8988
Val Loss: 0.32811078429222107
Val Accuracy: 0.9100911617279053


In [59]:
# validation classification report
print(classification_report(y_val, y_val_pred))

              precision    recall  f1-score   support

       False       0.92      0.90      0.91     15360
        True       0.91      0.92      0.91     15360

    accuracy                           0.91     30720
   macro avg       0.91      0.91      0.91     30720
weighted avg       0.91      0.91      0.91     30720

