# Headers

In [1]:
import pandas as pd
import time
import numpy as np

# Metrics
from sklearn.metrics import accuracy_score, f1_score

In [2]:
from data_treatment import train_df, test_df, \
                            treated_train_df, treated_test_df, \
                            new_features_train_df, new_features_test_df, \
                            svd_train_df, svd_test_df, \
                            targets_for_test_df

from data_treatment import train_val_split

# Data Processing


In [3]:
import h5py
def create_sliding_windows_batch(df, window_size, batch_size):
    for start in range(0, len(df) - window_size + 1, batch_size):
        end = min(start + batch_size, len(df) - window_size + 1)
        batch_windows = [
            df.iloc[i:i + window_size].values for i in range(start, end)
        ]
        yield np.array(batch_windows)


window_size = 60
batch_size = 1024

# split the train_df into train and val
X_train, y_train, X_val, y_val = train_val_split(train_df)
X_train['target'] = y_train
train_slice_df = X_train.copy()

X_val['target'] = y_val
val_slice_df = X_val.copy()

train_file = 'data/train_images.h5'
with h5py.File(train_file, 'w') as h5f:
    batch_index = 0
    for batch in create_sliding_windows_batch(train_slice_df, window_size, batch_size=batch_size):
        train_images = batch.reshape(-1, window_size, train_slice_df.shape[1])
        h5f.create_dataset(f'batch_{batch_index}', data=train_images)
        batch_index += 1

val_file = 'data/validation_images.h5'
with h5py.File(val_file, 'w') as h5f:
    batch_index = 0
    for batch in create_sliding_windows_batch(val_slice_df, window_size, batch_size=batch_size):
        val_images = batch.reshape(-1, window_size, val_slice_df.shape[1])
        h5f.create_dataset(f'batch_{batch_index}', data=val_images)
        batch_index += 1

test_file = 'data/test_images.h5'
with h5py.File(test_file, 'w') as h5f:
    batch_index = 0
    for batch in create_sliding_windows_batch(test_df, window_size, batch_size=batch_size):
        test_images = batch.reshape(-1, window_size, test_df.shape[1])
        h5f.create_dataset(f'batch_{batch_index}', data=test_images)
        batch_index += 1




# Basic CNN

In [4]:
import torch

import torch.nn as nn
import torch.nn.functional as F

class BasicCNN(nn.Module):
    def __init__(self, input_channels, num_classes):
        super(BasicCNN, self).__init__()
        self.conv1 = nn.Conv1d(in_channels=input_channels, out_channels=32, kernel_size=3, padding=1)
        self.pool = nn.MaxPool1d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv1d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(64 * (window_size // 2 // 2), 128)  # Adjust based on pooling layers
        self.fc2 = nn.Linear(128, num_classes)
    
    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 64 * (window_size // 2 // 2))  # Flatten
        x = F.relu(self.fc1(x))
        x = torch.sigmoid(self.fc2(x))
        return x


def load_data_for_prediction_batch(h5_file, start_index, batch_size, window_size):
    with h5py.File(h5_file, 'r') as h5f:
        inputs = h5f['inputs'][start_index:start_index + batch_size]  # Load a batch of windows
    return inputs  # Shape will be (batch_size, window_size, num_features)

The history saving thread hit an unexpected error (OperationalError('database is locked')).History will not be written to the database.



A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.0.2 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "C:\Users\PLour\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\LocalCache\local-packages\Python311\site-packages\ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "C:\Users\PLour\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\LocalCache\local-packages\Python311\site-packages\traitlets\config\application.py", line 1075, in launch

In [5]:




start_time = time.time()

input_channels = len(train_df.columns)  # Number of features
num_classes = 1  # Binary classification

model = BasicCNN(input_channels=input_channels, num_classes=num_classes)
print(model)
with h5py.File('train_images.h5', 'r') as h5f:
    for batch_name in h5f:
        train_images = h5f[batch_name][:]
        # Train model with this batch


predictions = []
batch_size = 32  # Number of samples to predict at once
for i in range(0, 100, batch_size):  # Predicting for the first 100 samples
    input_batch = load_data_for_prediction_batch(val_file, i, batch_size, window_size)
    predictions_batch = model.predict(input_batch)
    predictions.extend(predictions_batch.flatten())
    


BasicCNN(
  (conv1): Conv1d(11, 32, kernel_size=(3,), stride=(1,), padding=(1,))
  (pool): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv1d(32, 64, kernel_size=(3,), stride=(1,), padding=(1,))
  (fc1): Linear(in_features=960, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=1, bias=True)
)


KeyError: "Unable to synchronously open object (object 'inputs' doesn't exist)"

In [None]:
end_time = time.time()
minutes = (end_time - start_time) // 60
seconds = (end_time - start_time) % 60
print(f'Time elapsed: {minutes:.0f}m {seconds:.1f}s')
print('--------------------------------------')
# Calculate accuracy
accuracy = accuracy_score(y_val, predictions)
print(f'Validation Accuracy: {accuracy:.5f}')

# Calculate F1 macro score
f1_macro = f1_score(y_val, predictions, average='macro')
print(f'Validation F1 Macro Score: {f1_macro:.5f}')

In [None]:

predictions = []
batch_size = 32  # Number of samples to predict at once
for i in range(0, 100, batch_size):  # Predicting for the first 100 samples
    input_batch = load_data_for_prediction_batch(test_file, i, batch_size, window_size)
    predictions_batch = model.predict(input_batch)
    predictions.extend(predictions_batch.flatten())
    


IndentationError: unexpected indent (589211355.py, line 2)