### Initial exploration of data features
In this notebook we will analyse some of the initial data, by finding out what features are available, what granularity the data comes in, as well as seeing if there is any obvious noise we need to take into account.

In [1]:
experiment = "data/experiments/experiment_2/"

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os

In [None]:
## Loading in the data from data/experiments/experiment_1, which contains button_presses.csv for the labels, and other csv files for the features. In experiments_1/meta we have the system time and the device info
def load_data(path, within_range=True, temp_features=True):
    # Load the starting time
    time_df = pd.read_csv(path + 'meta/time.csv')
    start_time = time_df.loc[time_df['event'] == 'START', 'system time'].iloc[0]

    data_frames = []
    for filename in os.listdir(path):
        if filename.endswith(".csv"):
            df = pd.read_csv(path + filename)
            # Check if 'Time (s)' column exists
            if 'Time (s)' in df.columns:
                # Convert 'Time (s)' column to datetime index for each dataframe
                df.index = pd.to_datetime(df['Time (s)'], unit='s', origin=pd.Timestamp(start_time, unit='s'))
                data_frames.append(df)
            else:
                print(f"'Time (s)' column not found in file: {filename}")
                print(f"Columns found: {df.columns}")
    
    # Concatenate dataframes
    data = pd.concat(data_frames)
    
    # resample to 10 Hz
    data_resampled = data.resample('100ms').mean()
    
    # Load label dataset
    labels = pd.read_csv(path+'button_presses.csv', names=['Timestamp', 'Label'])
    labels['Timestamp'] = pd.to_datetime(labels['Timestamp'], unit='s')
    
    # Filter timestamps within label range
    if within_range:
        first_label_timestamp = labels['Timestamp'].iloc[0]
        last_label_timestamp = labels['Timestamp'].iloc[-1]
        data_resampled = data_resampled[(data_resampled.index >= first_label_timestamp) & (data_resampled.index <= last_label_timestamp)]
    
    if len(data_resampled):
        # Add labels
        def get_recent_label(row):
            return labels[labels['Timestamp'] <= row.name]['Label'].iloc[-1]

        data_resampled['Label'] = data_resampled.apply(get_recent_label, axis=1)

        # Add temporal label features
        def get_time_until_next(row):
            next_label = labels[labels['Timestamp'] > row.name]['Timestamp'].min()
            if pd.isnull(next_label):
                return pd.NaT
            else:
                return (next_label - row.name).total_seconds()

        def get_time_since_previous(row):
            previous_label = labels[labels['Timestamp'] < row.name]['Timestamp'].max()
            if pd.isnull(previous_label):
                return pd.NaT
            else:
                return (row.name - previous_label).total_seconds()

        data_resampled['Time_Until_Next_Label'] = data_resampled.apply(get_time_until_next, axis=1)
        data_resampled['Time_Since_Previous_Label'] = data_resampled.apply(get_time_since_previous, axis=1)
        data_resampled['Time_Until_Next_Label'] = data_resampled['Time_Until_Next_Label'].fillna(0.0)
        data_resampled['Time_Since_Previous_Label'] = data_resampled['Time_Since_Previous_Label'].fillna(0.0)
    return data_resampled

data = load_data(experiment)
# display(data)

In [None]:
import xml.etree.ElementTree as ET
from datetime import datetime

# Define a conversion function
def convert_timestamp(timestamp):
    datetime_obj = datetime.strptime(timestamp, '%Y-%m-%dT%H:%M:%S.%fZ')
    return datetime_obj

# Load the xml file into a dataframe
def load_xml(path, convert_time=True):
    # Parse the XML file
    tree = ET.parse(path + 'activity_11340269258.tcx')
    root = tree.getroot()

    # Define the namespaces
    namespaces = {
        'tc': 'http://www.garmin.com/xmlschemas/TrainingCenterDatabase/v2',
        'activity': 'http://www.garmin.com/xmlschemas/TrainingCenterDatabase/v2',
        'ns3': 'http://www.garmin.com/xmlschemas/ActivityExtension/v2',
        'ns5': 'http://www.garmin.com/xmlschemas/ActivityGoals/v1',
        'ns2': 'http://www.garmin.com/xmlschemas/UserProfile/v2',
        'xsi': 'http://www.w3.org/2001/XMLSchema-instance',
        'ns4': 'http://www.garmin.com/xmlschemas/ProfileExtension/v1'
    }

    # Extract data from XML and create a dictionary
    xml_data = {'Time': [], 'AltitudeMeters': [], 'HeartRate': []}

    for trackpoint in root.findall('.//tc:Trackpoint', namespaces):
        time = trackpoint.find('tc:Time', namespaces).text
        altitude = trackpoint.find('tc:AltitudeMeters', namespaces).text
        heart_rate = trackpoint.find('tc:HeartRateBpm/tc:Value', namespaces).text

        xml_data['Time'].append(time)
        xml_data['AltitudeMeters'].append(altitude)
        xml_data['HeartRate'].append(heart_rate)

    # Create a DataFrame from the extracted data
    df = pd.DataFrame(xml_data)
    
    df['AltitudeMeters'] = df['AltitudeMeters'].astype(float)
    df['HeartRate'] = df['HeartRate'].astype(float)
    
    # Apply the conversion function to the 'Time' column
    if convert_time:
        df['Time'] = df['Time'].apply(convert_timestamp)
    
    df = df.set_index('Time')
    
    return df

xml_data = load_xml(experiment)

In [None]:
# Merges the csv and xml data
def merge(data, xml_data):
    first_timestamp = data.index[0]
    last_timestamp = data.index[-1]
    df_filtered = xml_data[(xml_data.index >= first_timestamp) & (xml_data.index <= last_timestamp)]
    merged_df = pd.merge(data, df_filtered, left_index=True, right_index=True, how='left')
    
    return merged_df
df = merge(data, xml_data)


## Load dataframe from file

In [None]:
df = pd.read_csv(experiment + 'merged/added_features.csv')

## Change labels to numbers

In [None]:
df['Label'] = df['Label'].astype('category').cat.codes

## Remove unuseful columns

In [None]:
def remove_columns(df, columns):
    return df.drop(columns=columns)

# 'Latitude (°)' 'Longitude (°)' 'X (hPa)' nog evt
columns = ['Velocity (m/s)', 'Direction (°)', 'Distance (cm)', 'Horizontal Accuracy (m)', 'Time_Until_Next_Label', 'Time_Since_Previous_Label', 'Time (s)']
df_filt = df.drop(columns=columns)
df_filt = df_filt.rename(columns={'Time (s).1': 'Time (s)'})

In [None]:
df_filt

## Interpolation

In [None]:
from ImputationMissingValues import ImputationMissingValues

# Interpolates specified columns of the dataframe (all columns with NaNs as default)
def interpolate(df, columns=None):
    if columns is None:
        columns = df.columns[df.isna().any()].tolist()
    
    imputer = ImputationMissingValues()
    for column in columns:
        df = imputer.impute_interpolate(df, column)
    
    return df

interpolated_df = interpolate(df_filt)
# interpolated_df

In [None]:
interpolated_df

## Adding features
Mean, std, min, max, difference

In [None]:
def create_features(df, seconds, columns=None):
    # Create a new DataFrame to store the aggregated values
    new_df = pd.DataFrame()

    # Define the window size
    window_size = seconds * 10
    
    if columns == None:
        columns = df.loc[:, ~df.columns.str.startswith('Label')].columns

    # Iterate over the rolling windows in the original DataFrame
    for i in range(len(df) - window_size + 1):
        # Select a rolling window subset
        subset = df.iloc[i:i+window_size]

        # Iterate over each column in the subset
        for col in columns:
            if col == "Time (s)":
                new_df.loc[i, f'Starttime (s)'] = subset[col].iloc[0]
                new_df.loc[i, f'Endtime (s)'] = subset[col].iloc[-1]
            
            else:
                col_mean = subset[col].mean()
                col_std = subset[col].std()
                col_min = subset[col].min()
                col_max = subset[col].max()
                col_diff = subset[col].iloc[-1] - subset[col].iloc[0]

                # Create new columns in the new DataFrame
                new_df.loc[i, f'{col} mean'] = col_mean
                new_df.loc[i, f'{col} std'] = col_std
                new_df.loc[i, f'{col} min'] = col_min
                new_df.loc[i, f'{col} max'] = col_max
                new_df.loc[i, f'{col} diff'] = col_diff

        # Get the most frequent label within the window
        most_frequent_label = subset['Label'].mode().iloc[0]
        new_df.loc[i, 'Label'] = most_frequent_label

    # Reset the index of the new DataFrame
    new_df.reset_index(drop=True, inplace=True)

    # Print the new DataFrame
    return new_df


In [3]:
df_full = pd.read_csv(experiment + 'merged/added_features_stats.csv')

## Principal Component Analysis

In [4]:
from DataTransformation import PrincipalComponentAnalysis

# PCA function where number of PC's and columns used can be specified (all columns but the label as default).
# Adds new PCA columns to the dataframe
def pca(df, num_comp, columns=None):
    if columns is None:
        columns_to_exclude = ['Starttime (s)', 'Endtime (s)', 'Label']
        columns = [col for col in df.columns if col not in columns_to_exclude]
    PCA = PrincipalComponentAnalysis()
    pca_df = PCA.apply_pca(df, columns, num_comp)
    
    return pca_df

def determine_pc_explained_variance(df, columns = None):
    if columns is None:
        columns_to_exclude = ['Starttime (s)', 'Endtime (s)', 'Label']
        columns = [col for col in df.columns if col not in columns_to_exclude]
    PCA = PrincipalComponentAnalysis()
    ratio = PCA.determine_pc_explained_variance(df, columns)
    
    return ratio

ratio = determine_pc_explained_variance(df_full, columns = None)

In [5]:
cumulative_variance = np.cumsum(ratio)
n_components = np.argmax(cumulative_variance >= 0.95) + 1

print("Number of components to keep:", n_components)

Number of components to keep: 27


In [126]:
pca_df = pca(df_full, n_components)
pca_df

Unnamed: 0,Starttime (s),Endtime (s),X (m/s^2) mean,X (m/s^2) std,X (m/s^2) min,X (m/s^2) max,X (m/s^2) diff,Y (m/s^2) mean,Y (m/s^2) std,Y (m/s^2) min,Y (m/s^2) max,Y (m/s^2) diff,Z (m/s^2) mean,Z (m/s^2) std,Z (m/s^2) min,Z (m/s^2) max,Z (m/s^2) diff,X (hPa) mean,X (hPa) std,X (hPa) min,X (hPa) max,X (hPa) diff,X (rad/s) mean,X (rad/s) std,X (rad/s) min,X (rad/s) max,X (rad/s) diff,Y (rad/s) mean,Y (rad/s) std,Y (rad/s) min,Y (rad/s) max,Y (rad/s) diff,Z (rad/s) mean,Z (rad/s) std,Z (rad/s) min,Z (rad/s) max,Z (rad/s) diff,Latitude (°) mean,Latitude (°) std,Latitude (°) min,Latitude (°) max,Latitude (°) diff,Longitude (°) mean,Longitude (°) std,Longitude (°) min,Longitude (°) max,Longitude (°) diff,Height (m) mean,Height (m) std,Height (m) min,Height (m) max,Height (m) diff,Vertical Accuracy (°) mean,Vertical Accuracy (°) std,Vertical Accuracy (°) min,Vertical Accuracy (°) max,Vertical Accuracy (°) diff,X (µT) mean,X (µT) std,X (µT) min,X (µT) max,X (µT) diff,Y (µT) mean,Y (µT) std,Y (µT) min,Y (µT) max,Y (µT) diff,Z (µT) mean,Z (µT) std,Z (µT) min,Z (µT) max,Z (µT) diff,AltitudeMeters mean,AltitudeMeters std,AltitudeMeters min,AltitudeMeters max,AltitudeMeters diff,HeartRate mean,HeartRate std,HeartRate min,HeartRate max,HeartRate diff,Roll (°) mean,Roll (°) std,Roll (°) min,Roll (°) max,Roll (°) diff,Pitch (°) mean,Pitch (°) std,Pitch (°) min,Pitch (°) max,Pitch (°) diff,Yaw (°) mean,Yaw (°) std,Yaw (°) min,Yaw (°) max,Yaw (°) diff,Label,pca_1,pca_2,pca_3,pca_4,pca_5,pca_6,pca_7,pca_8,pca_9,pca_10,pca_11,pca_12,pca_13,pca_14,pca_15,pca_16,pca_17,pca_18,pca_19,pca_20,pca_21,pca_22,pca_23,pca_24,pca_25,pca_26,pca_27
0,4.049522,8.947556,4.668066,0.736433,2.254366,6.540396,0.564754,0.247722,0.687264,-0.974138,2.154488,-0.039186,0.943490,1.293481,-1.257709,4.701795,-2.077609,1015.844628,0.001750,1015.843048,1015.849823,0.004105,-0.018197,0.288434,-0.999210,0.578402,0.112204,-0.003596,0.484012,-1.052395,1.348128,-0.220877,-0.020165,0.166541,-0.384679,0.233272,0.041534,52.355728,0.000001,52.355727,52.355731,0.000004,4.956966,2.195755e-07,4.956965,4.956966,-7.472308e-07,-2.208062,0.006206,-2.225801,-2.204683,-0.021119,43.221538,0.406872,43.000000,44.384615,1.384615,-38.165336,4.596663,-42.750142,-25.614095,-3.855437,-12.642756,1.889757,-15.835959,-8.784474,3.040558,-13.104054,7.943040,-32.317590,-5.507129,2.099371,-33.656726,0.047532,-33.741817,-33.599998,-0.141819,82.031818,0.334935,81.0,82.354545,1.354545,-74.049133,11.886516,-88.847026,-46.183610,-25.978217,9.767464,61.738170,-174.320612,131.182650,0.558601,-8.402301,4.001984,-14.794028,0.188840,-5.700601,0.0,0.265961,0.314463,0.338350,0.367011,0.307676,-0.168426,-0.813453,0.333065,0.318347,0.091337,0.199322,0.056509,0.179423,-0.110070,0.088244,-0.030301,-0.221104,-0.056230,0.003013,0.013705,-0.294196,-0.258365,0.103761,-0.246610,-0.006830,0.030016,0.054190
1,4.149481,9.047517,4.681272,0.732677,2.254366,6.540396,-1.740343,0.248589,0.687040,-0.974138,2.154488,-1.025743,0.899289,1.276461,-1.257709,4.701795,-0.831744,1015.844722,0.001926,1015.843048,1015.850410,0.004692,-0.017758,0.288501,-0.999210,0.578402,0.179526,-0.009020,0.482772,-1.052395,1.348128,-0.195523,-0.019171,0.166738,-0.384679,0.233272,0.004649,52.355728,0.000001,52.355727,52.355731,0.000004,4.956966,2.388087e-07,4.956965,4.956966,-7.970462e-07,-2.208512,0.006749,-2.227209,-2.204683,-0.022527,43.251077,0.442511,43.000000,44.476923,1.476923,-38.243963,4.641571,-42.755756,-25.614095,-4.027117,-12.579805,1.860284,-15.835959,-8.784474,2.930623,-13.060916,7.979281,-32.317590,-5.449609,2.750525,-33.659635,0.048432,-33.745454,-33.599998,-0.145455,82.059091,0.303220,81.1,82.363636,1.263636,-74.598732,11.799771,-88.847026,-46.183610,-9.930988,10.096522,61.735405,-174.320612,131.182650,-27.060404,-8.512004,3.838812,-14.794028,0.188840,-5.673973,0.0,0.265705,0.312726,0.339895,0.366083,0.304637,-0.171658,-0.811701,0.332987,0.319077,0.093688,0.199153,0.062712,0.181236,-0.106040,0.073863,-0.039326,-0.214258,-0.067937,-0.031952,0.042633,-0.283655,-0.257074,0.124553,-0.258471,-0.019146,0.022936,0.062959
2,4.249441,9.147477,4.648119,0.682631,2.254366,6.380989,-0.740532,0.221434,0.678612,-0.974138,2.154488,-1.880231,0.883980,1.278189,-1.257709,4.701795,0.776599,1015.844827,0.002117,1015.843048,1015.850998,0.005280,-0.013797,0.288102,-0.999210,0.578402,0.524711,-0.013562,0.482198,-1.052395,1.348128,-0.363922,-0.019579,0.166633,-0.384679,0.233272,0.227943,52.355728,0.000001,52.355727,52.355731,0.000004,4.956966,2.582431e-07,4.956965,4.956966,-8.468615e-07,-2.208991,0.007299,-2.228617,-2.204683,-0.023934,43.282462,0.478523,43.000000,44.569231,1.569231,-38.326055,4.686398,-42.833213,-25.614095,-4.417441,-12.519029,1.834788,-15.835959,-8.784474,3.038658,-12.998975,8.029645,-32.317590,-5.103088,3.995813,-33.662617,0.049268,-33.749090,-33.599998,-0.149091,82.084545,0.272976,81.2,82.372727,1.172727,-74.770446,11.887197,-88.847026,-46.183610,-11.653161,8.547880,61.820527,-174.320612,131.182650,-136.692489,-8.624044,3.657137,-14.794028,-1.044704,-4.368480,0.0,0.264132,0.308177,0.340218,0.364742,0.302563,-0.173086,-0.811621,0.328094,0.320917,0.096231,0.199501,0.072583,0.180596,-0.083777,0.037209,-0.073449,-0.215343,-0.108883,-0.169898,0.116946,-0.278532,-0.270261,0.213136,-0.284745,-0.022761,0.026415,0.034134
3,4.349400,9.247437,4.633952,0.669198,2.254366,6.380989,-0.008888,0.186991,0.647944,-0.974138,2.154488,-2.232538,0.898023,1.267506,-1.257709,4.701795,1.550825,1015.844945,0.002321,1015.843048,1015.851586,0.005868,-0.004095,0.280495,-0.999210,0.578402,1.014398,-0.019801,0.480089,-1.052395,1.348128,-1.117946,-0.014904,0.164290,-0.384679,0.233272,0.156502,52.355728,0.000001,52.355727,52.355731,0.000004,4.956966,2.778328e-07,4.956965,4.956966,-8.966769e-07,-2.209498,0.007852,-2.230025,-2.204683,-0.025342,43.315692,0.514823,43.000000,44.661538,1.661538,-38.413866,4.729053,-42.833213,-25.614095,-4.968143,-12.453740,1.808836,-15.835959,-8.784474,3.085238,-12.916555,8.091407,-32.317590,-4.977875,6.970129,-33.665671,0.050035,-33.752726,-33.599998,-0.152728,82.108182,0.244503,81.3,82.381818,1.081818,-75.035143,11.999102,-88.847026,-46.183610,-23.338073,6.170253,60.344113,-174.320612,131.182650,-135.185236,-8.709309,3.524100,-14.794028,-1.120424,-3.472157,0.0,0.259635,0.302205,0.337905,0.361261,0.302331,-0.172823,-0.810884,0.325297,0.322381,0.094533,0.200141,0.070156,0.174766,-0.086296,0.048738,-0.071612,-0.235073,-0.111718,-0.178526,0.109578,-0.290271,-0.276589,0.216358,-0.280393,-0.004841,0.033079,0.009655
4,4.449360,9.347398,4.631971,0.668469,2.254366,6.380989,2.570389,0.143421,0.582925,-0.974138,1.864050,0.215713,0.928949,1.232208,-1.026714,4.701795,0.137804,1015.845074,0.002534,1015.843048,1015.852173,0.006455,0.015485,0.241002,-0.640914,0.578402,0.620672,-0.041519,0.451915,-1.052395,1.348128,-1.329278,-0.011797,0.163379,-0.384679,0.233272,0.125606,52.355728,0.000001,52.355727,52.355732,0.000005,4.956966,2.975337e-07,4.956965,4.956966,-9.464923e-07,-2.210033,0.008409,-2.231433,-2.204683,-0.026750,43.350769,0.551329,43.000000,44.753846,1.753846,-38.516345,4.771650,-42.962132,-25.614095,-7.132392,-12.391973,1.784915,-15.835959,-8.784474,2.089801,-12.777244,8.168024,-32.317590,-4.977875,12.700836,-33.668799,0.050728,-33.756363,-33.599998,-0.156364,82.130000,0.218168,81.4,82.390909,0.990909,-75.503839,11.982281,-88.847026,-46.183610,-3.725546,3.669606,58.066833,-174.320612,131.182650,53.073342,-8.776777,3.420592,-14.794028,-1.120424,-2.752774,0.0,0.239715,0.280799,0.328401,0.343704,0.293882,-0.184633,-0.810022,0.325711,0.333623,0.093203,0.202003,0.053011,0.161045,-0.115390,0.151621,-0.013508,-0.298568,-0.041479,0.055670,-0.033483,-0.334265,-0.287782,0.066335,-0.230688,0.043775,0.028518,-0.014947
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
48377,4841.753910,4846.651563,4.970976,1.303375,3.098765,9.909526,0.608597,-0.757303,0.654912,-2.672577,0.405067,-1.543342,0.059917,1.558833,-2.440838,2.704155,3.659130,1015.841838,0.129779,1015.605041,1016.020279,0.415238,-0.188656,0.340172,-0.932673,0.649635,-0.326556,0.156099,0.317973,-0.390601,0.937829,0.350236,-0.052399,0.246939,-0.594436,0.537931,-0.413987,52.355879,0.000042,52.355825,52.355929,-0.000105,4.956983,2.949490e-05,4.956949,4.957044,-5.468000e-07,1.877952,0.917412,0.038762,3.069094,3.030333,62.626175,3.917587,58.027951,67.729068,-7.470849,-38.646577,4.502427,-46.935912,-32.813931,-10.647105,14.242068,4.308050,-1.149957,19.311390,-16.145764,13.501222,7.443132,-4.199231,22.970099,-19.122906,-33.734600,1.098794,-35.680000,-32.149999,-3.530001,140.759000,1.333275,138.4,142.650000,-4.250000,-71.349692,7.791226,-89.492514,-56.380398,1.987764,-75.392990,89.137311,-179.145100,173.308466,-197.218528,-77.390094,4.408660,-92.933568,-70.921256,-14.881493,2.0,1.082064,-0.301157,0.363294,0.545152,0.161292,-0.395636,0.847484,-0.544457,0.481817,0.599918,0.348252,0.330383,0.154060,0.024305,0.262438,-0.296730,-0.197630,0.214137,-0.306347,0.335641,0.311902,0.119301,0.132947,0.230577,0.115991,0.106700,0.208785
48378,4841.853621,4846.751516,4.974591,1.300112,3.098765,9.909526,-2.173400,-0.793244,0.638936,-2.672577,0.243531,0.075824,0.137374,1.564651,-2.440838,2.704155,0.296574,1015.850143,0.127584,1015.614700,1016.020279,0.405579,-0.192839,0.341756,-0.932673,0.649635,-0.082153,0.163390,0.314693,-0.390601,0.937829,0.231592,-0.059520,0.251261,-0.594436,0.537931,-0.157444,52.355877,0.000042,52.355825,52.355929,-0.000104,4.956983,2.950752e-05,4.956949,4.957044,-1.894500e-06,1.938559,0.893207,0.140890,3.069094,2.928204,62.476758,3.865524,58.027951,67.729068,-7.489734,-38.857959,4.634546,-46.935912,-32.813931,-10.901440,13.883491,4.943339,-2.933050,19.311390,-16.993391,13.095602,7.902474,-5.357324,22.970099,-21.597101,-33.806800,1.111083,-35.760000,-32.199999,-3.560001,140.672000,1.349193,138.3,142.600000,-4.300000,-71.192916,8.005318,-89.492514,-56.380398,11.984257,-79.386774,82.275238,-179.145100,173.308466,5.456723,-77.731491,5.071991,-95.121919,-70.921256,-15.786264,2.0,1.070355,-0.293305,0.361494,0.555330,0.193442,-0.394976,0.846222,-0.533497,0.465888,0.591060,0.353575,0.309216,0.107354,0.000862,0.355008,-0.238202,-0.232185,0.289295,-0.047055,0.207511,0.314341,0.125092,-0.026056,0.260823,0.113788,0.096203,0.265281
48379,4841.953815,4846.851468,4.929413,1.299994,3.098765,9.909526,-2.780385,-0.777616,0.631614,-2.672577,0.243531,1.061613,0.142745,1.571110,-2.440838,2.704155,-0.486249,1015.858254,0.125180,1015.625441,1016.020279,0.394838,-0.194135,0.342447,-0.932673,0.649635,-0.419714,0.160721,0.316916,-0.390601,0.937829,-0.529272,-0.062045,0.253578,-0.594436,0.537931,-0.312990,52.355875,0.000042,52.355825,52.355929,-0.000104,4.956983,2.955029e-05,4.956949,4.957044,-3.242200e-06,1.997123,0.868592,0.243018,3.069094,2.826076,62.326963,3.806233,58.027951,67.729068,-7.508619,-39.071224,4.741348,-46.935912,-32.813931,-10.332093,13.512487,5.584345,-4.489874,19.311390,-17.807249,12.651942,8.333306,-5.943237,22.970099,-21.556628,-33.879601,1.122841,-35.840000,-32.249999,-3.590001,140.584000,1.364283,138.2,142.550000,-4.350000,-71.012795,8.160845,-89.492514,-56.380398,4.818894,-78.977736,82.533716,-179.145100,173.308466,15.904368,-78.087412,5.764051,-97.131688,-70.921256,-17.578175,2.0,1.069600,-0.277228,0.368631,0.570353,0.213590,-0.399549,0.844830,-0.523144,0.456219,0.587735,0.351095,0.301843,0.079513,0.015770,0.375323,-0.224226,-0.232029,0.304792,-0.021949,0.204983,0.318248,0.111092,-0.032370,0.253598,0.104745,0.087775,0.252935
48380,4842.053767,4846.951420,4.864558,1.295510,3.098765,9.909526,-1.490871,-0.739652,0.629137,-2.672577,0.243531,0.300034,0.150657,1.584717,-2.440838,3.003889,3.018255,1015.866151,0.122622,1015.636181,1016.020279,0.384098,-0.203521,0.343024,-0.932673,0.649635,-0.605047,0.145455,0.325066,-0.396503,0.937829,-0.710789,-0.070262,0.259372,-0.594436,0.537931,-0.681187,52.355873,0.000042,52.355825,52.355929,-0.000104,4.956983,2.962172e-05,4.956949,4.957044,-4.589900e-06,2.053645,0.843712,0.345147,3.069094,2.723948,62.176791,3.739324,58.027951,67.729068,-7.527503,-39.273995,4.834755,-46.935912,-32.813931,-9.964983,13.125358,6.231573,-6.039076,19.311390,-20.128633,12.244473,8.676570,-5.943237,22.970099,-18.927966,-33.953001,1.134038,-35.920000,-32.299999,-3.620001,140.495000,1.378488,138.1,142.500000,-4.400000,-70.685160,8.733191,-89.492514,-48.197134,40.026300,-78.243916,83.104469,-179.145100,173.308466,98.336404,-78.490379,6.522968,-99.701881,-70.921256,-21.479222,2.0,1.075748,-0.254640,0.376874,0.591297,0.239667,-0.394326,0.843738,-0.519043,0.433892,0.581259,0.355137,0.272767,0.069533,0.008330,0.436448,-0.190362,-0.232612,0.346613,0.075780,0.162590,0.312216,0.102124,-0.087044,0.252855,0.090561,0.066207,0.248722


## Support Vector Machine

In [120]:
from LearningAlgorithms import ClassificationAlgorithms

X = pca_df.iloc[:, -num_comp:]
Y = pca_df.loc[:, pca_df.columns.str.startswith('Label')]
split = int(0.8 * len(pca_df))

clf = ClassificationAlgorithms()
pred_training_y, pred_test_y, frame_prob_training_y, frame_prob_test_y = clf.support_vector_machine_with_kernel(X[:split], Y[:split], X[split:])

KeyboardInterrupt: 

## Plotting

In [None]:
import seaborn as sns
from scipy import stats
import matplotlib.pyplot as plt
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()
from statsmodels.tsa.seasonal import seasonal_decompose

def plot_data(data, output_path):
    # Set Seaborn style and context
    sns.set_style("whitegrid")
    sns.set_context("paper")

    # Line plot
    fig, ax = plt.subplots()
    sns.lineplot(data=data, palette="tab10", linewidth=1.5, ax=ax)
    ax.set_title('Time Series Line Plot')
    ax.set_xlabel('Time')
    ax.set_ylabel('Values')
    fig.savefig(output_path + 'line_plot.pdf')

    # Distribution of the data
    fig, ax = plt.subplots()
    sns.kdeplot(data=data, ax=ax, fill=True)
    ax.set_title('Data Distribution Plot')
    ax.set_xlabel('Values')
    fig.savefig(output_path + 'distribution_plot.pdf')

    # Boxplots of the data
    fig, ax = plt.subplots()
    sns.boxplot(data=data, palette="tab10", ax=ax)
    ax.set_title('Data Boxplot')
    fig.savefig(output_path + 'boxplot.pdf')

    # Correlation matrix of the data
    fig, ax = plt.subplots()
    sns.heatmap(data.corr(), annot=True, cmap='coolwarm', ax=ax)
    ax.set_title('Data Correlation Matrix')
    fig.savefig(output_path + 'correlation_matrix.pdf')

    # Pairwise relationships
    fig, ax = plt.subplots()
    sns.pairplot(data)
    ax.set_title('Pairwise Relationships')
    fig.savefig(output_path + 'pairwise_relationships.pdf')

    # Histogram
    fig, ax = plt.subplots()
    data.hist(bins=30, ax=ax)
    ax.set_title('Data Histogram')
    fig.savefig(output_path + 'histogram.pdf')

    # Time series decomposition
    for col in data.columns:
        try:
            result = seasonal_decompose(data[col], model='additive', period=1)
            fig, (ax1,ax2,ax3,ax4) = plt.subplots(4,1, figsize=(10,8))
            result.observed.plot(ax=ax1)
            ax1.set_ylabel('Observed')
            result.trend.plot(ax=ax2)
            ax2.set_ylabel('Trend')
            result.seasonal.plot(ax=ax3)
            ax3.set_ylabel('Seasonal')
            result.resid.plot(ax=ax4)
            ax4.set_ylabel('Residual')
            fig.savefig(output_path + f'{col}_decomposition.pdf')
        except:
            print(f"Cannot decompose {col}")

output_path = experiment + "figures/"
plot_data(df, output_path)