In [None]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
from scipy.signal import spectrogram
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tqdm import tqdm


In [None]:
labels_df = pd.read_csv("training_labels.csv")
print(labels_df.head())


In [None]:
def generate_spectrogram(signal, fs=2048):
    f, t, Sxx = spectrogram(signal, fs=fs, nperseg=256, noverlap=128)
    return np.log1p(Sxx)

def waveform_to_rgb(waveform):
    rgb = []
    for i in range(3):  # 3 κανάλια
        Sxx = generate_spectrogram(waveform[i])
        Sxx = np.log1p(Sxx)
        rgb.append(Sxx)
    image = np.stack(rgb, axis=-1)
    return (image - np.mean(image)) / (np.std(image) + 1e-8)
    #return np.stack(rgb, axis=-1)  # (freq, time, 3)


In [None]:
import glob

def build_file_map(base_path="train"):
    all_files = glob.glob(os.path.join(base_path, "*", "*", "*", "*.npy"))
    return {os.path.splitext(os.path.basename(f))[0]: f for f in all_files}

file_map = build_file_map()

def load_data_with_map(file_map, labels_df, max_samples=50000):
    X, y = [], []
    for i, row in tqdm(labels_df.iterrows(), total=min(len(labels_df), max_samples)):
        if i >= max_samples:
            break
        file_id = row["id"]
        label = row["target"]
        if file_id in file_map:
            waveform = np.load(file_map[file_id])
            img = waveform_to_rgb(waveform)
            X.append(img)
            y.append(label)
        else:
            print(f"Missing file: {file_id}")
    return np.array(X), np.array(y)


In [None]:
X, y = load_data_with_map(file_map, labels_df, max_samples=50000)

In [None]:
X = X / np.max(X)

In [None]:
# balance of the dataset
print(np.unique(y, return_counts=True))

In [None]:
#X = X / np.max(X)  # Normalization

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
def build_cnn(input_shape):
    Conv1 = tf.keras.layers.Conv2D(filters=32, kernel_size=(3, 3), input_shape=input_shape, activation="elu")
    Maxpooling1 = tf.keras.layers.MaxPool2D()
    Conv2 = tf.keras.layers.Conv2D(filters=32, kernel_size=(3, 3), activation="elu")
    Maxpooling2 = tf.keras.layers.MaxPool2D()
    Conv3 = tf.keras.layers.Conv2D(filters=32, kernel_size=(3, 3), activation="elu")
    Maxpooling3 = tf.keras.layers.MaxPool2D()
    flatten = tf.keras.layers.Flatten()
    Dense1 = tf.keras.layers.Dense(64, activation="relu")
    dense2 = tf.keras.layers.Dense(1, activation="sigmoid")

    model = tf.keras.Sequential([
        Conv1, Maxpooling1,
        Conv2, Maxpooling2,
        Conv3, Maxpooling3,
        flatten, Dense1, dense2
    ])
    
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
        loss='binary_crossentropy',
        metrics=[tf.keras.metrics.BinaryAccuracy(name='accuracy'),
        tf.keras.metrics.AUC(name='auc'),
        tf.keras.metrics.Precision(name='precision'),
        tf.keras.metrics.Recall(name='recall')]
    )
    return model

model = build_cnn((X_train.shape[1:]))
model.summary()

In [None]:
def build_cnn(input_shape):
    model = tf.keras.Sequential([
        tf.keras.layers.Conv2D(32, (3, 3), padding='same', input_shape=input_shape),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Activation("elu"),
        tf.keras.layers.MaxPooling2D(),

        tf.keras.layers.Conv2D(32, (3, 3), padding='same'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Activation("elu"),
        tf.keras.layers.MaxPooling2D(),

        tf.keras.layers.Conv2D(32, (3, 3), padding='same'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Activation("elu"),
        tf.keras.layers.MaxPooling2D(),

        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(64, activation="relu"),
        tf.keras.layers.Dense(1, activation="sigmoid")
    ])

    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
        loss='binary_crossentropy',
        metrics=[
            tf.keras.metrics.BinaryAccuracy(name='accuracy'),
            tf.keras.metrics.AUC(name='auc'),
            tf.keras.metrics.Precision(name='precision'),
            tf.keras.metrics.Recall(name='recall')
        ]
    )

    return model


In [None]:
history = model.fit(
    X_train, y_train,
    epochs=40,
    batch_size=16,
    validation_data=(X_val, y_val)
)


In [None]:
# confusion matrix
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
y_pred = (model.predict(X_val) > 0.5).astype("int32")
cm = confusion_matrix(y_val, y_pred)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=["No", "Yes"])
disp.plot(cmap=plt.cm.Blues)
plt.title("Confusion Matrix")
plt.show()