In [13]:
from sklearn.preprocessing import MinMaxScaler
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from kan import *
from kan.KAN import KAN
import sympy as sp
import warnings
from tqdm import tqdm
import matplotlib.pyplot as plt
import os
from kan import *
from kan.KAN import KAN
device = 'cuda'

In [14]:
from dataloader import preprocess_data, construct_datasets

unnorm = True

columns_input = ['finestep_norm', 'step_diff', 'step_60rsum_exp', 'step_300rsum_exp', 'c_mean_lag10', 'c_mean_10rmean']
columns_output = 'c_mean' if unnorm else 'c_mean_norm'

In [15]:
hysteresis_test_data = [preprocess_data(pd.read_csv(r'..\dat\walks\random_walk_180min.csv')), preprocess_data(pd.read_csv(r'..\dat\hysteresis\hysteresis_display.csv'))]
#hysteresis_test_df = preprocess_data(pd.read_csv(r'..\dat\walks\random_walk_180min.csv'))
#hysteresis_test_data = (hysteresis_test_df[columns_input], hysteresis_test_df[columns_output])
def unnorm_data(data, df):
    if 'c_mean' in df.columns:
        return data * (df['c_mean'].max() - df['c_mean'].min()) + df['c_mean'].min()
    elif 'counter' in df.columns:
        return data * (df['counter'].max() - df['counter'].min()) + df['counter'].min()
    else:
        return data

In [16]:
creep_test_data = []
for root, dirs, files in os.walk(r'..\dat\creep'):
    for file in files:
        if file.endswith('.csv'):
            if file.startswith('random_walk_30min_pause30at30min'):
                data = preprocess_data(pd.read_csv(os.path.join(root, file)))
                creep_test_data.append(data)

In [17]:
import pickle
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error
linreg_model = pickle.load(open('../exp/final/linreg_model.sav', 'rb'))

In [18]:
# Define the neural network architecture
class SimpleNN(torch.nn.Module):
    def __init__(self):
        super(SimpleNN, self).__init__()
        self.fc1 = torch.nn.Linear(len(columns_input), 10)
        #self.fc2 = nn.Linear(10, 10)
        self.fc3 = torch.nn.Linear(10, 1)
    
    def forward(self, x):
        x = torch.relu(self.fc1(x))
        #x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x
nn_model = SimpleNN().to('cuda')
nn_model.load_state_dict(torch.load(r'./final/simple_nn.pth'))
nn_model.eval()
nn_model

SimpleNN(
  (fc1): Linear(in_features=6, out_features=10, bias=True)
  (fc3): Linear(in_features=10, out_features=1, bias=True)
)

In [19]:
class LSTMModel(torch.nn.Module):
    def __init__(self, num_layers, hidden_size):
        super().__init__()
        self.lstm = torch.nn.LSTM(input_size=len(columns_input), hidden_size=hidden_size, num_layers=num_layers, batch_first=True)
        self.linear = torch.nn.Linear(hidden_size, 1)
    def forward(self, x):
        x, _ = self.lstm(x)
        x = self.linear(x)
        return x
lstm_model = LSTMModel(1, 29).to('cuda')
lstm_model.load_state_dict(torch.load(r'./final/lstm.pth'))
lstm_model.eval()

LSTMModel(
  (lstm): LSTM(6, 29, batch_first=True)
  (linear): Linear(in_features=29, out_features=1, bias=True)
)

In [20]:
kan_model = KAN(width=[len(columns_input),1,1], grid=100, k=3, seed=0, device='cuda')
kan_model.load_ckpt('model.ckpt')
kan_model

KAN(
  (biases): ModuleList(
    (0-1): 2 x Linear(in_features=1, out_features=1, bias=False)
  )
  (act_fun): ModuleList(
    (0-1): 2 x KANLayer(
      (base_fun): SiLU()
    )
  )
  (base_fun): SiLU()
  (symbolic_fun): ModuleList(
    (0-1): 2 x Symbolic_KANLayer()
  )
)

In [21]:
metrics = {}

In [22]:
#ma, mi = hysteresis_test_df['c_mean_unnorm'].max(), hysteresis_test_df['c_mean_unnorm'].min()
metrics['hysteresis'] = []
for i, hysteresis_test_df in enumerate(hysteresis_test_data):
    input_data = hysteresis_test_df[columns_input]
    input_data_nona = input_data.dropna()

    linreg_output = linreg_model.predict(input_data_nona.values)
    nn_output = nn_model(torch.tensor(input_data_nona.values, dtype=torch.float32).to('cuda')).detach().cpu().numpy()
    lstm_output = lstm_model(torch.tensor(input_data_nona.values, dtype=torch.float32).to('cuda')).detach().cpu().numpy()
    kan_output = kan_model(torch.tensor(input_data_nona.values, dtype=torch.float32).to('cuda')).detach().cpu().numpy()

    if unnorm:
        linreg_output = unnorm_data(linreg_output, hysteresis_test_df)
        nn_output = unnorm_data(nn_output, hysteresis_test_df)
        lstm_output = unnorm_data(lstm_output, hysteresis_test_df)
        kan_output = unnorm_data(kan_output, hysteresis_test_df)

    hysteresis_test_df.loc[input_data_nona.index, 'linreg'] = linreg_output
    hysteresis_test_df.loc[input_data_nona.index, 'nn'] = nn_output
    hysteresis_test_df.loc[input_data_nona.index, 'lstm'] = lstm_output
    hysteresis_test_df.loc[input_data_nona.index, 'kan'] = kan_output

    hysteresis_test_df_nona = hysteresis_test_df[columns_input + ['c_mean', 'linreg', 'nn', 'lstm', 'kan']].dropna()

    hysteresis_test_df.to_csv(f'../dat/predictions/hysteresis_{i}.csv', index=False)

    mae_hysteresis = {'linreg': mean_absolute_error(hysteresis_test_df_nona['c_mean'], hysteresis_test_df_nona['linreg']),
                        'nn': mean_absolute_error(hysteresis_test_df_nona['c_mean'], hysteresis_test_df_nona['nn']),
                        'lstm': mean_absolute_error(hysteresis_test_df_nona['c_mean'], hysteresis_test_df_nona['lstm']),
                        'kan': mean_absolute_error(hysteresis_test_df_nona['c_mean'], hysteresis_test_df_nona['kan'])}
    mse_hysteresis = {'linreg': mean_squared_error(hysteresis_test_df_nona['c_mean'], hysteresis_test_df_nona['linreg']),
                        'nn': mean_squared_error(hysteresis_test_df_nona['c_mean'], hysteresis_test_df_nona['nn']),
                        'lstm': mean_squared_error(hysteresis_test_df_nona['c_mean'], hysteresis_test_df_nona['lstm']),
                        'kan': mean_squared_error(hysteresis_test_df_nona['c_mean'], hysteresis_test_df_nona['kan'])}
    metrics['hysteresis'].append({'mae': mae_hysteresis, 'mse': mse_hysteresis})



In [23]:
cutoff = 18000


metrics['creep'] = []

for i, creep_test_df in enumerate(creep_test_data):

    linreg_predictions = []
    nn_predictions = []
    lstm_predictions = []
    kan_predictions = []
    with warnings.catch_warnings():
        warnings.filterwarnings("ignore", category=UserWarning)

        first_half = creep_test_df.iloc[:cutoff][columns_input]
        first_half_nonan = first_half.dropna()

        linreg_predictions = list(linreg_model.predict(first_half_nonan))
        nn_predictions = list(nn_model(torch.tensor(first_half_nonan.values, dtype=torch.float32).to('cuda')).detach().cpu().numpy())
        lstm_predictions = list(lstm_model(torch.tensor(first_half_nonan.values, dtype=torch.float32).to('cuda')).detach().cpu().numpy())
        kan_predictions = list(kan_model(torch.tensor(first_half_nonan.values, dtype=torch.float32).to('cuda')).detach().cpu().numpy())

        second_half = creep_test_df.iloc[cutoff:]
        second_half_nona = second_half.dropna()

        for index, row in tqdm(second_half_nona.iterrows(), total=len(second_half_nona)):
            linreg_input = row[columns_input]
            nn_input = row[columns_input]
            lstm_input = row[columns_input]
            kan_input = row[columns_input]
            output = row[columns_output]

            linreg_input['c_mean_lag10'] = linreg_predictions[-10]
            linreg_input['c_mean_10rmean'] = (linreg_input['c_mean_10rmean'] * 10 - row['c_mean_norm'] + linreg_predictions[-10]) / 10
            nn_input['c_mean_lag10'] = nn_predictions[-10]
            nn_input['c_mean_10rmean'] = (nn_input['c_mean_10rmean'] * 10 - row['c_mean_norm'] + nn_predictions[-10]) / 10
            lstm_input['c_mean_lag10'] = lstm_predictions[-10]
            lstm_input['c_mean_10rmean'] = (lstm_input['c_mean_10rmean'] * 10 - row['c_mean_norm'] + lstm_predictions[-10]) / 10
            kan_input['c_mean_lag10'] = kan_predictions[-10]
            kan_input['c_mean_10rmean'] = (kan_input['c_mean_10rmean'] * 10 - row['c_mean_norm'] + kan_predictions[-10]) / 10

            nn_input = torch.tensor(nn_input.values, dtype=torch.float32).to('cuda')
            lstm_input = torch.tensor(lstm_input.values, dtype=torch.float32).view(1, -1).to('cuda')
            kan_input = torch.tensor(kan_input.values, dtype=torch.float32).to('cuda').view(1, -1)

            linreg_output = linreg_model.predict([linreg_input])[0]
            nn_output = nn_model(nn_input).cpu().detach().numpy()
            lstm_output = lstm_model(lstm_input).cpu().detach().numpy()[0]
            kan_output = kan_model(kan_input).cpu().detach().numpy()[0]

            linreg_predictions.append(linreg_output)
            nn_predictions.append(nn_output)
            lstm_predictions.append(lstm_output)
            kan_predictions.append(kan_output)

        linreg_predictions = np.array(linreg_predictions)
        nn_predictions = np.array(nn_predictions)
        lstm_predictions = np.array(lstm_predictions)
        kan_predictions = np.array(kan_predictions)

        #results = pd.DataFrame({'linreg': linreg_predictions, 'nn': nn_predictions, 'lstm': lstm_predictions, 'kan': kan_predictions, 'gt': creep_test_df['c_mean'].values})
        #
        if unnorm:
            linreg_predictions = unnorm_data(linreg_predictions, creep_test_df)
            nn_predictions = unnorm_data(nn_predictions, creep_test_df)
            lstm_predictions = unnorm_data(lstm_predictions, creep_test_df)
            kan_predictions = unnorm_data(kan_predictions, creep_test_df)

        all_index = creep_test_df[columns_input].dropna().index
        creep_test_df.loc[all_index, 'linreg'] = linreg_predictions
        creep_test_df.loc[all_index, 'nn'] = nn_predictions
        creep_test_df.loc[all_index, 'lstm'] = lstm_predictions
        creep_test_df.loc[all_index, 'kan'] = kan_predictions
        creep_test_df.to_csv(r'../dat/predictions/creep_{}.csv'.format(i), index=False)

        mae_creep = {'linreg': mean_absolute_error(creep_test_df.loc[cutoff:, 'c_mean'], creep_test_df.loc[cutoff:, 'linreg']),
                    'nn': mean_absolute_error(creep_test_df.loc[cutoff:, 'c_mean'], creep_test_df.loc[cutoff:, 'nn']),
                    'lstm': mean_absolute_error(creep_test_df.loc[cutoff:, 'c_mean'], creep_test_df.loc[cutoff:, 'lstm']),
                    'kan': mean_absolute_error(creep_test_df.loc[cutoff:, 'c_mean'], creep_test_df.loc[cutoff:, 'kan'])}
        mse_creep = {'linreg': mean_squared_error(creep_test_df.loc[cutoff:, 'c_mean'], creep_test_df.loc[cutoff:, 'linreg']),
                    'nn': mean_squared_error(creep_test_df.loc[cutoff:, 'c_mean'], creep_test_df.loc[cutoff:, 'nn']),
                    'lstm': mean_squared_error(creep_test_df.loc[cutoff:, 'c_mean'], creep_test_df.loc[cutoff:, 'lstm']),
                    'kan': mean_squared_error(creep_test_df.loc[cutoff:, 'c_mean'], creep_test_df.loc[cutoff:, 'kan'])}

        metrics['creep'].append({'mae': mae_creep, 'mse': mse_creep})


  1%|          | 107/17874 [00:00<01:44, 170.36it/s]


KeyboardInterrupt: 

#### Make value a float

In [None]:
"""{'hysteresis': {'mae': {'linreg': 0.4474972192075208,
   'nn': 0.6523380894823934,
   'lstm': 0.47646518893781803,
   'kan': 0.4456869171699789},
  'mse': {'linreg': 0.3218915264254114,
   'nn': 0.691926567500339,
   'lstm': 0.36061896650604497,
   'kan': 0.3194004416851071}},
 'creep': [{'mae': {'linreg': array([0.4445156]),
    'nn': array([1.05049314]),
    'lstm': array([0.44028779]),
    'kan': array([0.4429773])},
   'mse': {'linreg': array([0.30971118]),
    'nn': array([1.36921059]),
    'lstm': array([0.30319218]),
    'kan': array([0.30720424])}},
  {'mae': {'linreg': array([0.59640771]),"""

# crawl through the dict and convert numpy arrays to floats
def convert_to_floats(d):
    for key, value in d.items():
        if isinstance(value, dict):
            convert_to_floats(value)
        else:
            d[key] = float(value)
    return d

metrics2 = convert_to_floats(metrics['hysteresis'])
metrics3 = [convert_to_floats(v) for v in metrics['creep']]

In [None]:
metrics['hysteresis'], metrics['creep'] = metrics2, metrics3

In [None]:
import json
with open(r'../dat/predictions/metrics.json', 'w') as f:
    json.dump(metrics, f)

### Unnorm Data

In [None]:
for i, creep_test_df in enumerate(creep_test_data):
    pred = pd.read_csv(r'../dat/predictions/creep_{}.csv'.format(i))
    gt = pd.read_csv(r'../dat/creep/random_walk_30min_pause30at30min_{}.csv'.format(i))
    ma, mi = gt['c_mean'].max(), gt['c_mean'].min()
    # drop all columns that contain '_unnorm'
    pred = pred.loc[:, ~pred.columns.str.contains('_unnorm')]
    for col in pred.columns:
        # normalization was (df[feature] - df[feature].min()) / (df[feature].max() - df[feature].min())
        # to get the original value, we need to multiply by the range and add the min
        pred[col+'_unnorm'] = pred[col] * (ma - mi) + mi
    pred.to_csv(r'../dat/predictions/creep_{}.csv'.format(i), index=False)
pred.head()

Unnamed: 0,linreg,nn,lstm,kan,gt,linreg_unnorm,nn_unnorm,lstm_unnorm,kan_unnorm,gt_unnorm
0,0.405466,0.418377,0.407117,0.405538,0.40708,-37.383732,-35.07785,-37.088993,-37.370893,-37.095575
1,0.405725,0.417762,0.406015,0.40574,0.402655,-37.337533,-35.187742,-37.285703,-37.334869,-37.885841
2,0.406412,0.417851,0.406155,0.406399,0.408555,-37.214842,-35.171763,-37.260676,-37.217174,-36.832153
3,0.406729,0.417159,0.406055,0.406588,0.402655,-37.158175,-35.295473,-37.278555,-37.183364,-37.885841
4,0.406193,0.419966,0.406139,0.406248,0.40413,-37.2539,-34.794001,-37.263561,-37.244107,-37.622419


In [None]:
gt.loc[600:610]

Unnamed: 0,finestep,time,c_0,c_1,c_2,c_3,c_4,temp,c_mean
600,25921,58.973234,-89,-89,-87,-86,-89,21.1,-88.0
601,25897,59.073553,-88,-90,-89,-92,-88,21.1,-89.4
602,25875,59.188804,-90,-92,-90,-90,-90,21.1,-90.4
603,25857,59.284238,-92,-90,-90,-88,-89,21.1,-89.8
604,25842,59.396058,-91,-89,-87,-89,-86,21.1,-88.4
605,25832,59.484566,-90,-87,-88,-89,-90,21.1,-88.8
606,25826,59.587755,-88,-89,-89,-90,-90,21.1,-89.2
607,25815,59.684129,-89,-91,-91,-90,-89,21.1,-90.0
608,25798,59.788589,-87,-89,-89,-90,-92,21.1,-89.4
609,25777,59.888521,-89,-89,-90,-89,-91,21.1,-89.6
