In [None]:
import pandas as pd
from tcn import TCN
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.utils import to_categorical
import os

## Testing by discarding Nan values

In [2]:
def difference(healthy, patho):
    # Ensure both dataframes have the same columns
    assert all(healthy.columns == patho.columns), "DataFrames must have the same columns"

    
    # Calculate the difference
    diff = healthy - patho
    diff = diff.dropna(axis=1)

    return abs(diff)

In [3]:
# healthy_data = pd.read_csv('C:\DumbStuff\epf study\Meta-Elasto\data\Resultados\Elastome_0001_Healthy_angle_1.csv')
# patho_data = pd.read_csv('C:\DumbStuff\epf study\Meta-Elasto\data\Resultados\Elastome_0001_Patho_angle_1.csv')

# diff_data = difference(healthy_data, patho_data)
# diff_data.to_csv('diff.csv', index=False)

In [4]:
dir = 'C:\\DumbStuff\\epf study\\Meta-Elasto\\data\\Resultados'

for i in range(1, 29):
    try:
        healthy=pd.read_csv(f'{dir}\\Elastome_{i:04}_Healthy_angle_1.csv')
        patho=pd.read_csv(f'{dir}\\Elastome_{i:04}_Patho_angle_1.csv')
        diff = difference(healthy, patho)
        diff.to_csv(f'csvs\\diff_{i:04}.csv', index=False)
    except FileNotFoundError:
        continue

In [5]:
labels_csv = pd.read_excel('C:\\DumbStuff\\epf study\\Meta-Elasto\\data\\real0list.xlsx')

y=labels_csv['Score (type)'].values
print(y.shape)

(19,)


In [6]:
all_patient_dataset = []
dir2 = 'C:\\DumbStuff\\epf study\\Meta-Elasto\\separate\\csvs'
all_patient_dataset = []
for filename in os.listdir(dir2):
    filepath = os.path.join(dir2, filename)
    data = pd.read_csv(filepath)
    all_patient_dataset.append(data.values)

X = np.array(all_patient_dataset)
print(X.shape)

(19, 2000, 7)


In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print("\n--- After splitting ---")
print(f"X_train shape: {X_train.shape}")
print(f"y_train shape: {y_train.shape}")
print(f"X_test shape: {X_test.shape}")
print(f"y_test shape: {y_test.shape}")


--- After splitting ---
X_train shape: (15, 2000, 7)
y_train shape: (15,)
X_test shape: (4, 2000, 7)
y_test shape: (4,)


In [11]:
num_classes = len(np.unique(y))

# One-hot encode the integer labels
y_train_categorical = to_categorical(y_train, num_classes=num_classes)
y_test_categorical = to_categorical(y_test, num_classes=num_classes)

# BASELINE MODEL
model_baseline = Sequential([
    Input(shape=(X_train.shape[1], X_train.shape[2])),
    TCN(nb_filters=32, kernel_size=3, dilations=[1, 2, 4, 8], dropout_rate=0.2),
    Dense(num_classes, activation='softmax')
])
model_baseline.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

model_baseline.summary()

# Train the model with the one-hot encoded labels
model_baseline.fit(
    X_train, 
    y_train_categorical, 
    validation_data=(X_test, y_test_categorical), 
    epochs=120, 
    batch_size=32
)

Epoch 1/120
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step - accuracy: 0.2000 - loss: 3.0259 - val_accuracy: 0.0000e+00 - val_loss: 1.8503
Epoch 2/120
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 102ms/step - accuracy: 0.1333 - loss: 3.0781 - val_accuracy: 0.0000e+00 - val_loss: 1.8291
Epoch 3/120
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 98ms/step - accuracy: 0.1333 - loss: 2.1885 - val_accuracy: 0.0000e+00 - val_loss: 1.8204
Epoch 4/120
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 96ms/step - accuracy: 0.2000 - loss: 1.9301 - val_accuracy: 0.0000e+00 - val_loss: 1.8148
Epoch 5/120
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 91ms/step - accuracy: 0.0667 - loss: 2.3846 - val_accuracy: 0.2500 - val_loss: 1.8048
Epoch 6/120
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 92ms/step - accuracy: 0.2000 - loss: 1.9205 - val_accuracy: 0.5000 - val_loss: 1.7955
Epoch 7/120
[1m1/1[0m 

<keras.src.callbacks.history.History at 0x256ed1aa350>

## Testing by filling in Nan Values

In [4]:
def fill_missing_values(df):
    """
    Fills NaN values in a DataFrame using linear interpolation.
    
    Args:
        df (pd.DataFrame): The input DataFrame that may contain NaN values.
        
    Returns:
        pd.DataFrame: A new DataFrame with all NaN values filled.
    """
    # interpolate() fills missing values.
    # - method='linear' treats values as equally spaced, which is good for time series.
    # - limit_direction='both' fills NaNs at the beginning and end of each column.
    df_filled = df.interpolate(method='linear', limit_direction='both')
    
    return df_filled

In [5]:
def difference(healthy, patho):

    if healthy.isnull().values.any():
        fill_missing_values(healthy)
    if patho.isnull().values.any():
        fill_missing_values(patho)

    # Ensure both dataframes have the same columns
    assert all(healthy.columns == patho.columns), "DataFrames must have the same columns"
    
    # Calculate the difference
    diff = healthy - patho

    return abs(diff)

In [7]:
dir = 'C:\\DumbStuff\\epf study\\Meta-Elasto\\data\\Resultados'

for i in range(1, 29):
    try:
        healthy=pd.read_csv(f'{dir}\\Elastome_{i:04}_Healthy_angle_1.csv')
        patho=pd.read_csv(f'{dir}\\Elastome_{i:04}_Patho_angle_1.csv')
        diff = difference(healthy, patho)
        diff.to_csv(f'csvs-nan\\diff_{i:04}.csv', index=False)
    except FileNotFoundError:
        continue

In [8]:
labels_csv = pd.read_excel('C:\\DumbStuff\\epf study\\Meta-Elasto\\data\\real0list.xlsx')

y=labels_csv['Score (type)'].values
print(y.shape)

(19,)


In [9]:
all_patient_dataset = []
dir2 = 'C:\\DumbStuff\\epf study\\Meta-Elasto\\separate\\csvs-nan'
all_patient_dataset = []
for filename in os.listdir(dir2):
    filepath = os.path.join(dir2, filename)
    data = pd.read_csv(filepath)
    all_patient_dataset.append(data.values)

X = np.array(all_patient_dataset)
print(X.shape)

(19, 2000, 18)


In [10]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print("\n--- After splitting ---")
print(f"X_train shape: {X_train.shape}")
print(f"y_train shape: {y_train.shape}")
print(f"X_test shape: {X_test.shape}")
print(f"y_test shape: {y_test.shape}")


--- After splitting ---
X_train shape: (15, 2000, 18)
y_train shape: (15,)
X_test shape: (4, 2000, 18)
y_test shape: (4,)


In [11]:
num_classes = len(np.unique(y))

# One-hot encode the integer labels
y_train_categorical = to_categorical(y_train, num_classes=num_classes)
y_test_categorical = to_categorical(y_test, num_classes=num_classes)

# BASELINE MODEL
model_baseline = Sequential([
    Input(shape=(X_train.shape[1], X_train.shape[2])),
    TCN(nb_filters=32, kernel_size=3, dilations=[1, 2, 4, 8], dropout_rate=0.2),
    Dense(num_classes, activation='softmax')
])
model_baseline.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

model_baseline.summary()

# Train the model with the one-hot encoded labels
model_baseline.fit(
    X_train, 
    y_train_categorical, 
    validation_data=(X_test, y_test_categorical), 
    epochs=120, 
    batch_size=32
)




Epoch 1/120
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step - accuracy: 0.1333 - loss: nan - val_accuracy: 0.2500 - val_loss: nan
Epoch 2/120
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 96ms/step - accuracy: 0.1333 - loss: nan - val_accuracy: 0.2500 - val_loss: nan
Epoch 3/120
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 93ms/step - accuracy: 0.1333 - loss: nan - val_accuracy: 0.2500 - val_loss: nan
Epoch 4/120
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 92ms/step - accuracy: 0.1333 - loss: nan - val_accuracy: 0.2500 - val_loss: nan
Epoch 5/120
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 92ms/step - accuracy: 0.1333 - loss: nan - val_accuracy: 0.2500 - val_loss: nan
Epoch 6/120
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 93ms/step - accuracy: 0.1333 - loss: nan - val_accuracy: 0.2500 - val_loss: nan
Epoch 7/120
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 92ms

<keras.src.callbacks.history.History at 0x239e21796a0>