In [None]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as animation 
from pyts.image import MarkovTransitionField
import dask.dataframe as dd  
import pandas as pd

import warnings
warnings.filterwarnings('ignore')

In [None]:
ddf = dd.read_parquet("dataset/train.parquet", engine="pyarrow")
df_metadata = pd.read_csv('dataset/metadata_train.csv')

In [None]:
np.random.seed(42)
# Index 1

index_1 = np.where(df_metadata[['target']].T.to_numpy()[0]==1)[0]
len_index_1 = len(index_1)
index_train_1, index_test_1 = index_1[:len_index_1//2], index_1[len_index_1//2:]


# Index 0

index_0 = np.where(df_metadata[['target']].T.to_numpy()[0]==0)[0]
index_train_0, index_test_0 = index_0[:len_index_1//2], index_0[len_index_1//2:]


index_train = np.concatenate([index_train_0, index_train_1])
np.random.shuffle(index_train)

index_test = np.concatenate([index_test_0, index_test_1])
np.random.shuffle(index_test)

print(index_train)

In [None]:

for col in tqdm(index_train):
    images = []
    signal = ddf[[str(col)]].compute()

    n_itemsxchunk = 1000
    n_chunks = int(len(signal)/n_itemsxchunk)

    for i in range(n_chunks):
        signal_chunk = signal[i*n_itemsxchunk:(i+1)*n_itemsxchunk-1].T
        transformer = MarkovTransitionField(30)
        img = transformer.fit_transform(signal_chunk)[0]
        images.append(img)
        
    videos.append(images)

In [None]:
%matplotlib notebook
n_signal = 0
fig = plt.figure() # make figure
im = plt.imshow(videos[n_signal][0], cmap='gray')

# function to update figure
def updatefig(j):
    # set the data in the axesimage object
    im.set_array(videos[n_signal][j])
    fig.suptitle(str(j))
    # return the artists set
    return [im]
# kick off the animation
ani = animation.FuncAnimation(fig, updatefig, frames=range(n_chunks), 
                              interval=10, blit=True)
plt.show()

In [None]:
labels = df_metadata[["target"]].loc[index_train]
print(labels)

In [None]:
videos = np.array(videos)
videos = videos.reshape(len(index_train),30,30,800)

In [None]:
videos.shape

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_val, y_train, y_val = train_test_split(videos, labels, test_size=0.3, random_state=42, stratify=labels)

In [None]:
import tensorflow as tf
from tensorflow.keras import datasets, layers, models
import matplotlib.pyplot as plt


In [None]:
# Creación de modelo
model = models.Sequential()
# Capas encargadas de obtener información de la imagen
model.add(layers.Conv2D(256, (3, 3), activation='relu', input_shape=(30,30,800)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(128, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
# Capas para la clasificación en base a la información obtenida en 
# capas anteriores
model.add(layers.Flatten())
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(1, activation = 'sigmoid'))
model.summary()

In [None]:
model.compile(loss = 'bce', optimizer = 'adam', metrics=['accuracy'])
history_cnn = model.fit(X_train, y_train, epochs=100, 
                    validation_data=(X_val, y_val))

In [None]:
plt.plot(history_cnn.history['accuracy'], label='accuracy')
plt.plot(history_cnn.history['val_accuracy'], label = 'val_accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.ylim([0.5, 1])
plt.legend(loc='lower right')
plt.show()



In [None]:
plt.plot(history_cnn.history['loss'], label='loss')
plt.plot(history_cnn.history['val_loss'], label = 'val_loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
# plt.ylim([0.5, 1])
plt.legend(loc='lower right')
plt.show()

In [None]:
test_loss, test_acc = model.evaluate(X_val,  y_val, verbose=2)

In [None]:
predictions = (model.predict(X_val)>0.5).reshape(1,-1)[0].astype(int)
real_values = y_val.T.to_numpy()[0]

print(predictions)
print(real_values)

In [None]:
# Test
df_metadata[["target"]].loc[5*n_signals:6*n_signals-1].sum()

In [None]:
videos_test = []

n_signals = 100 

for col in tqdm(ddf.columns[5*n_signals:6*n_signals]):
    images = []
    signal = ddf[[col]].compute()

    n_itemsxchunk = 1000
    n_chunks = int(len(signal)/n_itemsxchunk)

    for i in range(n_chunks):
        signal_chunk = signal[i*n_itemsxchunk:(i+1)*n_itemsxchunk-1].T
        transformer = MarkovTransitionField(30)
        img = transformer.fit_transform(signal_chunk)[0]
        images.append(img)
        
    videos_test.append(images)
    
videos_test = np.array(videos_test)
videos_test = videos_test.reshape(n_signals,30,30,800)

labels_test = df_metadata[["target"]].loc[5*n_signals:6*n_signals-1]

In [None]:
print(videos_test.shape)
print(labels_test.shape)

In [None]:
X_test = videos_test
y_test = labels_test

In [None]:
test_loss, test_acc = model.evaluate(X_test,  y_test, verbose=2)

In [None]:
predictions = (model.predict(X_test)>0.5).reshape(1,-1)[0].astype(int)
real_values = y_test.T.to_numpy()[0]

print(predictions)
print(real_values)

In [None]:
# Index 1

index_1= np.where(df_metadata[['target']].T.to_numpy()[0]==1)[0]
len_index_1 = len(index_1)
index_train_1, index_test_1 = index_1[:len_index_1//2], index_1[len_index_1//2:]


# Index 0

index_0 = np.where(df_metadata[['target']].T.to_numpy()[0]==0)[0]
index_train_0, index_test_0 = index_0[:len_index_1//2], index_0[len_index_1//2:]
print(index_train_1[:20], index_train_0[:20])

In [None]:
df_metadata[['target']].head(n=20)