# Dataset

In [None]:
import glob
import pandas as pd
import numpy as np
from torch.utils.data import Dataset
from pathlib import Path
import pywt

BASE_FEATURE_COUNT = 5
WINDOW_LENGTH = 10240
STRIDE_DENOMINATOR = 8
STRIDE_LENGTH = WINDOW_LENGTH // STRIDE_DENOMINATOR

def GetBandPowers(signals, sample_rate_hz, freq_bands):
    # Clculate time vector.
    sample_count = len(signals)
    t = np.arange(0, sample_count) / sample_rate_hz

    # Set up wavelet parameters
    wavelet = 'mexh'
    
    freqs = np.logspace(np.log10(0.5), np.log10(freq_bands[-1][1]), num=100)  # Frequency range to analyze
    n_cycles = freqs / 2.  # Number of cycles for each frequency

    # Loop through each accelerometer and compute wavelet power
    power = np.zeros((signals.shape[1], len(freqs), signals.shape[0]), dtype=np.float32)
    for i in range(signals.shape[1]):
        cwtmatr, _ = pywt.cwt(signals[:, i], n_cycles, wavelet, sampling_period=1/sample_rate_hz)
        power[i] = (np.abs(cwtmatr)**2) / sample_rate_hz

    # Sum power across frequency bands
    band_power = np.zeros((signals.shape[1], len(freq_bands), signals.shape[0]), dtype=np.float32)
    for i in range(signals.shape[1]):
        for j, band in enumerate(freq_bands):
            freq_idx = np.logical_and(freqs >= band[0], freqs < band[1])
            band_power[i, j] = np.sum(power[i, freq_idx, :], axis=0)

    return band_power

class ParkinsonsDataset(Dataset):
    def __init__(self, frequency_bands, max_sequence_length = 10240, stride_denominator = 1):
        self.MaxLength = max_sequence_length
        self.FeaturesSequences = []
        self.TimeSequences = []
        self.FileIds = []

        self.FileIdsToMaxTimestamps = {}
        
        self.FrequencyBands = frequency_bands
        
        # Has accleration units in g (9.806 m/s^2)
        filepaths = glob.glob('/kaggle/input/tlvmc-parkinsons-freezing-gait-prediction/test/*/*.csv')
        for filepath in filepaths:
            df = pd.read_csv(filepath)
            
            #if 'tdcsfog' in filepath:
            #if 'tdcsfog' not in filepath:
            #    G = 9.806
            #    df[['AccV', 'AccML', 'AccAP']] /= G
                
            df['NormalizedTime'] = df['Time'] / df['Time'].max()
            df['SinNormalizedTime'] = np.sin(df['NormalizedTime'] * np.pi)
            signals = df[['AccV', 'AccML', 'AccAP']].to_numpy()
            sample_rate_hz = 100 if 'defog' in filepath else 128
            if self.FrequencyBands is not None:
                band_powers = GetBandPowers(signals, sample_rate_hz, self.FrequencyBands) # Has shape (accelerometer axis count, band count, sequence length)
                all_features = np.concatenate((
                    df[['NormalizedTime', 'SinNormalizedTime']].to_numpy(),
                    band_powers.reshape(-1, band_powers.shape[-1]).T,
                    signals
                ), axis = 1)
            else:
                all_features = np.concatenate((
                    df[['NormalizedTime', 'SinNormalizedTime']].to_numpy(),
                    signals
                ), axis = 1)
            
            times = df['Time'].to_numpy()

            raw_length = len(all_features)
            file_id = Path(filepath).stem
            for start_index in range(0, raw_length, self.MaxLength // stride_denominator):
                self.FeaturesSequences.append(all_features[start_index:start_index+self.MaxLength])
                self.TimeSequences.append(times[start_index:start_index+self.MaxLength])
                self.FileIds.append(file_id)
                
            self.FileIdsToMaxTimestamps[file_id] = max(times)

    def __len__(self):
        return len(self.FeaturesSequences)

    def __getitem__(self, index):
        sequence_length = len(self.FeaturesSequences[index])

        feature_count = BASE_FEATURE_COUNT + 3*len(self.FrequencyBands) if self.FrequencyBands is not None else BASE_FEATURE_COUNT
        padded_features = np.zeros((self.MaxLength, feature_count))
        padded_features[:sequence_length,:] = self.FeaturesSequences[index]
        
        padded_times = np.ones((self.MaxLength,), dtype = int) * -1
        padded_times[:sequence_length] = self.TimeSequences[index]

        return padded_features, padded_times, self.FileIds[index]
    
#dataset = ParkinsonsDataset([(0.5, 3), (3, 10)], WINDOW_LENGTH, STRIDE_DENOMINATOR)
dataset = ParkinsonsDataset(None, WINDOW_LENGTH, STRIDE_DENOMINATOR)
print(len(dataset))
print(dataset[0])
print(np.shape(dataset[0][0]))

print(dataset[-1])

# Model architecture

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

import torch
import torch.nn as nn

class Conv1dBlockSE(nn.Module):
    def __init__(self, channels, kernel_size=3, stride=1, padding=1, reduction=16, dropout=0):
        super(Conv1dBlockSE, self).__init__()
        self.conv = nn.Conv1d(channels, channels, kernel_size, stride, padding, bias=False, groups=1)
        self.bn = nn.BatchNorm1d(channels)
        self.relu = nn.ReLU(inplace=True)
        self.dropout = nn.Dropout1d(p=dropout)

        self.pool = nn.AdaptiveAvgPool1d(1)
        self.fc1 = nn.Linear(channels, channels // reduction)
        self.fc2 = nn.Linear(channels // reduction, channels)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        residual = x
        x = self.conv(x)
        x = self.bn(x)
        x = self.relu(x)
        x = self.dropout(x)

        # Squeeze
        se = self.pool(x)
        se = se.view(se.size(0), -1)
        se = self.fc1(se)
        se = self.relu(se)
        se = self.fc2(se)
        se = self.sigmoid(se)

        # Excitation
        x = x * se.unsqueeze(2)

        # Add residual and return
        x += residual

        return x

class NonResidualConvSE(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, padding=1, reduction=16, dropout=0):
        super(NonResidualConvSE, self).__init__()

        self.Conv = nn.Sequential(
            nn.Conv1d(in_channels, out_channels, kernel_size, stride, padding, bias=False),
            nn.BatchNorm1d(out_channels),
            nn.ReLU(),
            nn.Dropout(p = dropout),
        )

        self.Pooling = nn.AdaptiveAvgPool1d(1)
        self.SqueezeExcitationWeightGenerator = nn.Sequential(
            nn.Linear(out_channels, out_channels // reduction),
            nn.ReLU(),
            nn.Linear(out_channels // reduction, out_channels),
            nn.Sigmoid()
        )

    def forward(self, x):
        x = self.Conv(x)
        pooled_x = self.Pooling(x)
        excitation_weights = self.SqueezeExcitationWeightGenerator(pooled_x.view(pooled_x.size(0), -1))
        x = x * excitation_weights.unsqueeze(2)

        return x

class Conv1dBlockResidual(nn.Module):
    def __init__(self, channels, kernel_size=3, stride=1, padding=1, dropout=0):
        super(Conv1dBlockResidual, self).__init__()

        self.Layers = nn.Sequential(
            nn.Conv1d(channels, channels, kernel_size, stride, padding, bias=False),
            nn.BatchNorm1d(channels),
            nn.ReLU(),
            nn.Dropout1d(p = dropout),
            nn.Conv1d(channels, channels, kernel_size, stride, padding, bias=False),
            nn.BatchNorm1d(channels),
            nn.ReLU(),
            nn.Dropout1d(p = dropout)
        )

    def forward(self, x):
        x = x + self.Layers(x)

        return x

class Conv1dBlock(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, padding=1, dropout=0):
        super(Conv1dBlock, self).__init__()
        self.conv = nn.Conv1d(in_channels, out_channels, kernel_size, stride, padding, bias=False)
        self.bn = nn.BatchNorm1d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        self.dropout = nn.Dropout1d(p=dropout)

    def forward(self, x):
        x = self.conv(x)
        x = self.bn(x)
        x = self.relu(x)
        x = self.dropout(x)
        return x

class Conv1dBlockPreprocessedSE(nn.Module):
    def __init__(self, in_channels, out_channels, reduction=16, use_second_se=False, preprocessor_dropout=0, se_dropout=0):
        super(Conv1dBlockPreprocessedSE, self).__init__()

        KERNEL_SIZE=3
        STRIDE=1
        PADDING=1
        self.Preprocessor = Conv1dBlock(in_channels, out_channels, KERNEL_SIZE, STRIDE, PADDING, preprocessor_dropout)
        self.SqueezeAndExcitation1 = Conv1dBlockSE(out_channels, KERNEL_SIZE, STRIDE, PADDING, reduction, se_dropout)
        self.SqueezeAndExcitation2 = Conv1dBlockSE(out_channels, KERNEL_SIZE, STRIDE, PADDING, reduction, se_dropout)

        self.UseSecondSe = use_second_se

    def forward(self, x):
        x = self.Preprocessor(x)
        x = self.SqueezeAndExcitation1(x)
        if self.UseSecondSe:
            x = self.SqueezeAndExcitation2(x)
        return x

class SequenceClassifier(nn.Module):
    def __init__(
            self, 
            in_channels=3, 
            out_channels=4, 
            model_width_coef=32, 
            reduction=16, 
            use_second_se=False, 
            preprocessor_dropout=0, 
            se_dropout=0,
            initial_dropout=0,
            center_dropout=0):
        super(SequenceClassifier, self).__init__()

        features = model_width_coef

        self.encoder1 = nn.Sequential(
            NonResidualConvSE(in_channels, features, reduction = reduction//2, dropout=initial_dropout),
            NonResidualConvSE(features, features, reduction = reduction//2, dropout=initial_dropout),
            Conv1dBlockPreprocessedSE(features, features, reduction, use_second_se, preprocessor_dropout, se_dropout)
        )

        # Encoder part
        # self.encoder1 = Conv1dBlockPreprocessedSE(in_channels, features, reduction, use_second_se, preprocessor_dropout, se_dropout)
        self.pool1 = nn.MaxPool1d(kernel_size=2, stride=2)
        self.encoder2 = Conv1dBlockPreprocessedSE(features, features*2, reduction, use_second_se, preprocessor_dropout, se_dropout)
        self.pool2 = nn.MaxPool1d(kernel_size=2, stride=2)
        self.encoder3 = Conv1dBlockPreprocessedSE(features*2, features*4, reduction, use_second_se, preprocessor_dropout, se_dropout)
        self.pool3 = nn.MaxPool1d(kernel_size=2, stride=2)
        self.encoder4 = Conv1dBlockPreprocessedSE(features*4, features*8, reduction, use_second_se, preprocessor_dropout, se_dropout)
        self.pool4 = nn.MaxPool1d(kernel_size=2, stride=2)
        self.encoder5 = Conv1dBlockPreprocessedSE(features*8, features*16, reduction, use_second_se, preprocessor_dropout, se_dropout)
        self.pool5 = nn.MaxPool1d(kernel_size=2, stride=2)

        # Bottleneck part
        self.bottleneck = Conv1dBlock(features*16, features*32, dropout=center_dropout)

        # Decoder part
        self.upconv5 = nn.ConvTranspose1d(features*32, features*16, kernel_size=2, stride=2)
        self.decoder5 = Conv1dBlockPreprocessedSE(features*32, features*16, reduction, use_second_se, preprocessor_dropout, se_dropout)
        self.upconv4 = nn.ConvTranspose1d(features*16, features*8, kernel_size=2, stride=2)
        self.decoder4 = Conv1dBlockPreprocessedSE(features*16, features*8, reduction, use_second_se, preprocessor_dropout, se_dropout)
        self.upconv3 = nn.ConvTranspose1d(features*8, features*4, kernel_size=2, stride=2)
        self.decoder3 = Conv1dBlockPreprocessedSE(features*8, features*4, reduction, use_second_se, preprocessor_dropout, se_dropout)
        self.upconv2 = nn.ConvTranspose1d(features*4, features*2, kernel_size=2, stride=2)
        self.decoder2 = Conv1dBlockPreprocessedSE(features*4, features*2, reduction, use_second_se, preprocessor_dropout, se_dropout)
        self.upconv1 = nn.ConvTranspose1d(features*2, features, kernel_size=2, stride=2)
        self.decoder1 = Conv1dBlockPreprocessedSE(features*2, features, reduction, use_second_se, preprocessor_dropout, se_dropout)

        # Output layer
        self.out_conv = nn.Conv1d(features, out_channels, kernel_size=1)

    def forward(self, x):
        x = x.permute(0, 2, 1)

        # Encoder part
        enc1 = self.encoder1(x)
        enc2 = self.encoder2(self.pool1(enc1))
        enc3 = self.encoder3(self.pool2(enc2))
        enc4 = self.encoder4(self.pool3(enc3))
        enc5 = self.encoder5(self.pool4(enc4))

        # enc1 = self.Enc1Resnet(enc1)
        # enc2 = self.Enc2Resnet(enc2)


        # Bottleneck part
        bottleneck = self.bottleneck(self.pool5(enc5))

        # Decoder part
        dec5 = self.upconv5(bottleneck)
        dec5 = self.decoder5(torch.cat((dec5, enc5), dim=1))
        dec4 = self.upconv4(dec5)
        dec4 = self.decoder4(torch.cat((dec4, enc4), dim=1))
        dec3 = self.upconv3(dec4)
        dec3 = self.decoder3(torch.cat((dec3, enc3), dim=1))
        dec2 = self.upconv2(dec3)
        dec2 = self.decoder2(torch.cat((dec2, enc2), dim=1))
        dec1 = self.upconv1(dec2)
        dec1 = self.decoder1(torch.cat((dec1, enc1), dim = 1))

        results = self.out_conv(dec1)
        results = results.permute(0, 2, 1)

        return results

# Inference

In [None]:
import torch
from torch.utils.data import DataLoader
import sys
from math import ceil
import glob

BATCH_SIZE = 64
dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, num_workers=1)

model_paths =[
    #'/kaggle/input/parkinsonsfrequencydomainmodels/Limited/split0_346_459_48_479.pth'
    #'/kaggle/input/parkinsonsfrequencydomainmodels/Limited/split1_410_738_146_374.pth'
    #'/kaggle/input/parkinsonsfrequencydomainmodels/Limited/split10_385_570_133_417.pth'
    
    '/kaggle/input/parkinsonsfrequencydomainmodels/None/split10_504_543_164_386.pth', # Strong! 0.411
    
    #'/kaggle/input/cherrypickedparkinsons/CherryPicked/split10_494_544_150_406.pth'
    '/kaggle/input/cherrypickedparkinsons/CherryPicked/split10_551_573_175_376.pth' # Strong ish. 0.38
    #'/kaggle/input/cherrypickedparkinsons/CherryPicked/split10_547_564_198_358.pth'
    
]
#model_paths = glob.glob('/kaggle/input/parkinsonsfrequencydomainmodels/Limited/*.pth')

#model_paths = glob.glob('/kaggle/input/cherrypickedparkinsons/CherryPicked/*.pth')

models = []
for model_path in model_paths:
    model = SequenceClassifier(in_channels = BASE_FEATURE_COUNT, model_width_coef=32).cuda()
    state_dict = torch.load(model_path, map_location=torch.device('cuda:0'))
    for key in list(state_dict.keys()):
        state_dict[key.replace('_orig_mod.', '')] = state_dict.pop(key)

    model.load_state_dict(state_dict)
    model.eval()
    models.append(model)

# Should have entries like
# FileId : {
#     timestamp_0 : [
#         [class 1 score 1, class 1 score 2, ...],
#         [class 2 score 1, class 2 score 2, ...],
#         [class 3 score 1, class 3 score 2, ...],
#     ],
#     timestamp_1 : [
#         [class 1 score 1, class 1 score 2, ...],
#         [class 2 score 1, class 2 score 2, ...],
#         [class 3 score 1, class 3 score 2, ...],
#     ],
#     ...
# }
# 
file_ids_to_timestamps_to_scores = {}
with open('submission.csv', 'w') as submission_file:
    submission_file.write('Id,StartHesitation,Turn,Walking\n')
    
    for padded_features, padded_times, padded_ids in dataloader:
        padded_features = padded_features.float().cuda()
        # Has shape (model count, series count, padded_series_length, event_class_count + 1)
        all_predictions = []
        with torch.no_grad():
            for model in models:
                predictions = model(padded_features)
                predictions = torch.nn.functional.softmax(predictions, dim=2).cpu().numpy()
                all_predictions.append(predictions)
                
                padded_features[:,:,-2] *= -1
                predictions = model(padded_features)
                predictions = torch.nn.functional.softmax(predictions, dim=2).cpu().numpy()
                all_predictions.append(predictions)

        averaged_predictions = np.mean(all_predictions, axis = 0)

        for series_index, file_id in enumerate(padded_ids):
            if file_id not in file_ids_to_timestamps_to_scores.keys():
                file_ids_to_timestamps_to_scores[file_id] = [
                    [[], [] ,[]]
                    for timestamp in range(dataset.FileIdsToMaxTimestamps[file_id] + 1)
                ]

            series_predictions = averaged_predictions[series_index]
            series_timestamps = padded_times[series_index].numpy()
            file_timestamps_to_scores = file_ids_to_timestamps_to_scores[file_id]
            max_file_timestamp = dataset.FileIdsToMaxTimestamps[file_id]
            for timestep_index, timestep_predictions in enumerate(series_predictions):
                timestamp = int(series_timestamps[timestep_index])
                if timestamp < 0:
                    break

                timestamp_scores = file_timestamps_to_scores[timestamp]
                timestamp_scores[0].append(timestep_predictions[1])
                timestamp_scores[1].append(timestep_predictions[2])
                timestamp_scores[2].append(timestep_predictions[3])

                max_expected_endpoint_samples = min(ceil(dataset.FileIdsToMaxTimestamps[file_id] / STRIDE_LENGTH), STRIDE_DENOMINATOR)
                if (timestamp == max_file_timestamp) and (len(timestamp_scores[0]) == max_expected_endpoint_samples):
                    print('Dumping:', file_id, timestep_index, timestamp, max_file_timestamp)
                    for timestamp_to_store in range(dataset.FileIdsToMaxTimestamps[file_id] + 1):
                        sample_id = f'{file_id}_{timestamp_to_store}'
                        scores = file_timestamps_to_scores[timestamp_to_store]
                        
                        weights = np.ones(len(scores[0]))
                        #weights[0] = .5
                        weights[-1] = .5
                        
                        #weight_count = len(scores[0])
                        #weights = np.sin(np.linspace(0, np.pi, weight_count)) + 0.25
                        
                        avg_scores = np.average(scores, axis = 1, weights = weights)
                        submission_file.write(f'{sample_id},{avg_scores[0]},{avg_scores[1]},{avg_scores[2]}\n')
                    
                    file_ids_to_timestamps_to_scores.pop(file_id)


        print(sys.getsizeof(file_ids_to_timestamps_to_scores))


# Sanity check results

In [None]:
import pandas as pd

predictions_df = pd.read_csv('submission.csv')
predictions_df.head()

In [None]:
predictions_df.tail()

In [None]:
predictions_df.describe()

In [None]:
file_ids_to_timestamps_to_scores