In [1]:
import sys
sys.path.append('../')

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import json

from docs.create_cvae import Cvae

from keras.utils import to_categorical
from keras.callbacks import EarlyStopping

from sklearn.model_selection import train_test_split

2023-12-22 11:10:57.660437: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


- Split dataset into train and test and save them.

In [8]:
# Import data (Escherichia coli spectra) into dataframe with Pandas

path_to_data = '../CVAE/real_data/Escherichia_coli_CVAE.txt'

df = pd.read_csv(path_to_data, sep='\t', header=0)
df.head()

Unnamed: 0.1,Unnamed: 0,2000,2003,2006,2009,2012,2015,2018,2021,2024,...,19973,19976,19979,19982,19985,19988,19991,19994,19997,Ampicillin
0,dad6d494-9432-407e-a03e-fdac7a2739a1,0.00044,0.000482,0.000417,0.000386,0.000244,0.000159,1.7e-05,0.000139,0.000563,...,5.2e-05,5.6e-05,2.5e-05,1.4e-05,3.801878e-08,1.4e-05,2e-05,2.9e-05,2.7e-05,R
1,617f14d0-86b1-4c28-8995-b02006a85e81,0.000681,0.000405,0.000159,0.000266,0.000248,0.000213,0.000151,6.8e-05,0.000751,...,1.3e-05,7e-06,3e-06,5e-06,1.374761e-05,2.5e-05,5e-05,3.7e-05,3.5e-05,R
2,d5bb4389-5053-4107-9c05-bfed9e9159c9,0.00027,0.000194,0.000124,0.000156,0.00025,0.000275,8.8e-05,4.7e-05,0.000201,...,1.9e-05,2e-05,2.4e-05,2.2e-05,3.64776e-05,2.8e-05,2.3e-05,2.8e-05,7e-06,S
3,fd880c7e-5f0c-4870-a124-19be1d474d1e,0.001685,0.001845,0.001435,0.001704,0.001408,0.000919,0.000734,0.000556,0.000646,...,1.4e-05,2e-05,1.5e-05,1.5e-05,1.592923e-05,2.4e-05,7e-06,9e-06,1.1e-05,R
4,0527af15-2d0f-4e8b-b49f-ef0232db70a0,0.00028,0.000225,0.000297,0.000479,0.000393,0.000397,0.000629,0.000334,7.6e-05,...,2.3e-05,2e-05,1.9e-05,1e-05,9.404278e-06,7e-06,1.6e-05,2.3e-05,2.4e-05,S


In [9]:
# Remove the 'Unnamed: 0' column to select only the intensity values and susceptibility
df = df.drop(['Unnamed: 0'],  axis = 1)
col = df.columns.values.tolist() # get the list of all columns that will be needed later

# Replace 'R' and 'S' with 0 and 1
df['Ampicillin'] = df['Ampicillin'].replace('R', 0)
df['Ampicillin'] = df['Ampicillin'].replace('S', 1)
df.head()

Unnamed: 0,2000,2003,2006,2009,2012,2015,2018,2021,2024,2027,...,19973,19976,19979,19982,19985,19988,19991,19994,19997,Ampicillin
0,0.00044,0.000482,0.000417,0.000386,0.000244,0.000159,1.7e-05,0.000139,0.000563,0.000477,...,5.2e-05,5.6e-05,2.5e-05,1.4e-05,3.801878e-08,1.4e-05,2e-05,2.9e-05,2.7e-05,0
1,0.000681,0.000405,0.000159,0.000266,0.000248,0.000213,0.000151,6.8e-05,0.000751,0.001529,...,1.3e-05,7e-06,3e-06,5e-06,1.374761e-05,2.5e-05,5e-05,3.7e-05,3.5e-05,0
2,0.00027,0.000194,0.000124,0.000156,0.00025,0.000275,8.8e-05,4.7e-05,0.000201,0.000245,...,1.9e-05,2e-05,2.4e-05,2.2e-05,3.64776e-05,2.8e-05,2.3e-05,2.8e-05,7e-06,1
3,0.001685,0.001845,0.001435,0.001704,0.001408,0.000919,0.000734,0.000556,0.000646,0.000707,...,1.4e-05,2e-05,1.5e-05,1.5e-05,1.592923e-05,2.4e-05,7e-06,9e-06,1.1e-05,0
4,0.00028,0.000225,0.000297,0.000479,0.000393,0.000397,0.000629,0.000334,7.6e-05,9.2e-05,...,2.3e-05,2e-05,1.9e-05,1e-05,9.404278e-06,7e-06,1.6e-05,2.3e-05,2.4e-05,1


In [10]:
df_X = df.drop(['Ampicillin'], axis = 1) # dataframe containing only intensity values
df_labels = pd.DataFrame(df['Ampicillin']) # dataframe containing only susceptibility information

In [14]:
# Split the data, stratifying by label

train_x, test_x, train_labels, test_labels = train_test_split(df_X, df_labels,
                                                    stratify=df_labels, 
                                                    random_state=42)

# saving as a CSV file
train_x.to_csv('../CVAE/real_data/train/Escherichia_coli_CVAE_train_x.csv', sep ='\t')
train_labels.to_csv('../CVAE/real_data/train/Escherichia_coli_CVAE_train_labels.csv', sep ='\t')
test_x.to_csv('../CVAE/real_data/test/Escherichia_coli_CVAE_test_x.csv', sep ='\t')
test_labels.to_csv('../CVAE/real_data/test/Escherichia_coli_CVAE_test_labels.csv', sep ='\t')

### Training step

In [15]:
# import training data
path_to_train_x = '../CVAE/real_data/train/Escherichia_coli_CVAE_train_x.csv'
path_to_train_labels = '../CVAE/real_data/train/Escherichia_coli_CVAE_train_labels.csv'

train_x = pd.read_csv(path_to_train_x, sep='\t', header=0)
train_labels = pd.read_csv(path_to_train_labels, sep='\t', header=0)

In [18]:
train_x = train_x.drop(['Unnamed: 0'],  axis = 1)
train_labels = train_labels.drop(['Unnamed: 0'],  axis = 1)

In [21]:
train_labels = to_categorical(train_labels, num_classes=2)

In [22]:
# create CVAE
cvae_64 = Cvae(64)

In [23]:
# This callback will stop the training when there is no improvement in
# the loss for ten consecutive epochs.

callback = EarlyStopping(monitor='loss', patience=10)

In [24]:
# fit
history_64 = cvae_64.fit(train_x, train_labels, batch_size=6, epochs=120, callbacks=[callback])
print("Number of epochs run", len(history_64.history['loss']))

# save weights
cvae_64.save_weights('../CVAE/weights/cvae_64_weights.h5')
# save history on file
with open('../CVAE/history/history_cvae_64.json', 'w') as f:
    json.dump(history_64.history, f)

Epoch 1/120
Epoch 2/120
Epoch 3/120
Epoch 4/120
Epoch 5/120
Epoch 6/120
Epoch 7/120
Epoch 8/120
Epoch 9/120
Epoch 10/120
Epoch 11/120
Epoch 12/120
Epoch 13/120
Epoch 14/120
Epoch 15/120
Epoch 16/120
Epoch 17/120
Epoch 18/120
Epoch 19/120
Epoch 20/120
Epoch 21/120
Epoch 22/120
Epoch 23/120
Epoch 24/120
Epoch 25/120
Epoch 26/120
Epoch 27/120
Epoch 28/120
Epoch 29/120
Epoch 30/120
Epoch 31/120
Epoch 32/120
Epoch 33/120
Epoch 34/120
Epoch 35/120
Epoch 36/120
Epoch 37/120
Epoch 38/120
Epoch 39/120
Epoch 40/120
Epoch 41/120
Epoch 42/120
Epoch 43/120
Epoch 44/120
Epoch 45/120
Epoch 46/120
Epoch 47/120
Epoch 48/120
Epoch 49/120
Epoch 50/120
Epoch 51/120
Epoch 52/120
Epoch 53/120
Epoch 54/120
Epoch 55/120
Epoch 56/120
Epoch 57/120
Epoch 58/120
Epoch 59/120
Epoch 60/120
Epoch 61/120
Epoch 62/120
Epoch 63/120
Epoch 64/120
Epoch 65/120
Epoch 66/120
Epoch 67/120
Epoch 68/120
Epoch 69/120
Epoch 70/120
Epoch 71/120
Epoch 72/120
Epoch 73/120
Epoch 74/120
Epoch 75/120
Epoch 76/120
Epoch 77/120
Epoch 78

NameError: name 'json' is not defined