<a href="https://colab.research.google.com/github/jlab-sensing/MFC_Modeling/blob/main/SNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#  SNN Models
###  In order to run the code in this notebook, you must download the `ucscMFCDataset` directory and `stanfordMFCDataset.zip`, which expands into the directory `rocket4`, from [Hugging Face](https://huggingface.co/datasets/adunlop621/Soil_MFC/tree/main), and store them in the same directory as this notebook. You can also find several pretrained models in the at this link, with the naming conventions described in the [README](https://github.com/jlab-sensing/MFC_Modeling#:~:text=Repository%20files%20navigation-,README,-MFC_Modeling)

In [2]:
%pip install --upgrade hepml
%pip install arrow
%pip install keras_lr_finder
%pip install pandas
%pip install snntorch --quiet

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [3]:
# reload modules before executing user code
#%load_ext autoreload
# reload all modules every time before executing Python code
#%autoreload 2
# render plots in notebook

# Misc imports
%matplotlib inline
import datetime
import pandas as pd
import numpy as np
import glob
import matplotlib.pyplot as plt
import seaborn as sns
from hepml.core import plot_regression_tree
sns.set(color_codes=True)
sns.set_palette(sns.color_palette("muted"))
import random
import statistics

# sklearn imports
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_percentage_error as MAPE
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import train_test_split
from sklearn.model_selection import TimeSeriesSplit

# torch imports
import torch
import torch.nn as nn

# snnTorch imports
import snntorch as snn
from snntorch import functional as SF
import snntorch.spikeplot as splt

# # keras imports
# from keras.models import Sequential
# from keras.layers import Dense
# from keras.layers import LSTM
# from keras import backend as K



##  Load and Format Dataset 1

### Remember to download `stanfordMFCDataset.zip`, which expands into the directory `rocket4`, from [Hugging Face](https://huggingface.co/datasets/adunlop621/Soil_MFC/tree/main), and store it in the same directory as this notebook before executing the following code.

In [4]:
#Load teros data
import glob
teros_files = glob.glob("rocket4/TEROSoutput*.csv")
X = pd.DataFrame()
for f in teros_files:
  try:
    csv = pd.read_csv(f, index_col=False).dropna()
    X = pd.concat([X, csv])
  except:
    continue

In [5]:
#Load power data
power_files = glob.glob("rocket4/soil*.csv")
y = pd.DataFrame()
for f in sorted(power_files, key=lambda x: int(x.split('.')[0].split('_')[-1])):
#in power_files:
  try:
    csv = pd.read_csv(f, on_bad_lines='skip', skiprows=10).dropna(how='all')
    csv = csv.rename({'Unnamed: 0': 'timestamp'}, axis='columns')
    y = pd.concat([y,csv])
  except:
    continue
y["timestamp"] = y["timestamp"].round(decimals = 1)

In [6]:
#Convert current to amps, voltage to volts
y["I1L [10pA]"] = np.abs(y["I1L [10pA]"] * 1E-11)
y["V1 [10nV]"] = np.abs(y["V1 [10nV]"] * 1E-8)
y["I1H [nA]"] = np.abs(y["I1H [nA]"] * 1E-9)

In [7]:
#Sort data by timestamp, convert to datetime
X = X.sort_values(['timestamp'])
y = y.sort_values(['timestamp'])
X['timestamp'] = pd.to_datetime(X['timestamp'], unit='s')
y['timestamp'] = pd.to_datetime(y['timestamp'], unit='s')

#Merge data by timestamp
uncut_df = pd.merge_asof(left=X,right=y,direction='nearest',tolerance=pd.Timedelta('1 sec'), on = 'timestamp').dropna(how='all')

#Isolate data from cell0
df = uncut_df.loc[uncut_df['sensorID'] == 0]

#Localize timestamp
df.timestamp = df.timestamp.dt.tz_localize('UTC').dt.tz_convert('US/Pacific')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.timestamp = df.timestamp.dt.tz_localize('UTC').dt.tz_convert('US/Pacific')


In [8]:
#Use only data from after deployment date
#df = df.loc[(df['timestamp'] > '2021-09-24') & (df['timestamp'] < '2021-10-15')] #Future of Clean Computing Graph
#df = df.loc[(df['timestamp'] > '2021-06-24') & (df['timestamp'] < '2021-07-02')]
#df = df.loc[(df['timestamp'] > '2021-06-18')] #Two weeks after deployment
df = df.loc[(df['timestamp'] > '2021-06-04')] #Deployment date
#df = df.loc[(df['timestamp'] > '2021-06-25') & (df['timestamp'] < '2021-06-26')] #Small training set

#Power drop
#df = df.loc[(df['timestamp'] > '2021-11-01') & (df['timestamp'] < '2021-11-22')]

#Drop data outages
df = df.drop(df[(df.timestamp > '2021-11-11') & (df.timestamp < '2021-11-22 01:00:00')].index)
df = df.drop(df[(df.timestamp > '2022-01-27')].index)
#df = df.set_index('timestamp')
df = df[:-1]

In [9]:
df = df.set_index('timestamp')

In [10]:
#Get time since deployement
df['tsd'] = (df.index - df.index[0]).days
df['hour'] = (df.index).hour

In [11]:
#Calculate power
df["power"] = np.abs(np.multiply(df.iloc[:, 7], df.iloc[:, 8]))
#df["power"] = np.abs(np.multiply(df["I1L [10pA]"], df["V1 [10nV]"]))

#Convert to nW
df['power'] = df['power']*1E9

In [12]:
#Convert to 10 nanoamps, 10 microvolts
df["I1L [10pA]"] = np.abs(df["I1L [10pA]"] * 1E8)
df["V1 [10nV]"] = np.abs(df["V1 [10nV]"] * 1E5)
df["I1H [nA]"] = np.abs(df["I1H [nA]"] * 1E8)

In [13]:
df = df.reset_index()

In [14]:
#Add power time series
df['power - 1h'] = df['power'].shift(1).dropna()
df['power - 2h'] = df['power'].shift(2).dropna()
df['power - 3h'] = df['power'].shift(3).dropna()
#df['power - 2h'] = df['power'].shift(2).dropna()
#df['previous_power - 3'] = df['power'].shift(3).dropna()
#df['previous_power - 4'] = df['power'].shift(4).dropna()

#Add teros time series
df['EC - 1h'] = df['EC'].shift(1).dropna()
df['EC - 2h'] = df['EC'].shift(2).dropna()
df['EC - 3h'] = df['EC'].shift(3).dropna()

df['temp - 1h'] = df['temp'].shift(1).dropna()
df['temp - 2h'] = df['temp'].shift(2).dropna()
df['temp - 3h'] = df['temp'].shift(3).dropna()

df['raw_VWC - 1h'] = df['raw_VWC'].shift(1).dropna()
df['raw_VWC - 2h'] = df['raw_VWC'].shift(2).dropna()
df['raw_VWC - 3h'] = df['raw_VWC'].shift(3).dropna()

#Add voltage and current time series
df['V1 - 1h'] = df['V1 [10nV]'].shift(1).dropna()
df['V1 - 2h'] = df['V1 [10nV]'].shift(2).dropna()
df['V1 - 3h'] = df['V1 [10nV]'].shift(3).dropna()

df['I1L - 1h'] = df['I1L [10pA]'].shift(1).dropna()
df['I1L - 2h'] = df['I1L [10pA]'].shift(2).dropna()
df['I1L - 3h'] = df['I1L [10pA]'].shift(3).dropna()

df['I1H - 1h'] = df['I1H [nA]'].shift(1).dropna()
df['I1H - 2h'] = df['I1H [nA]'].shift(2).dropna()
df['I1H - 3h'] = df['I1H [nA]'].shift(3).dropna()
df = df.dropna()

In [15]:
#df = df.rename(columns={'power': 'power [μW]'})
df = df.rename(columns={'I1L [10pA]': 'Current (uA)', 'V1 [10nV]' : 'Voltage (mV)', 'power' : 'Power (uW)'})
df = df.set_index('timestamp')

In [16]:
#New runtime calculation
import math
from dateutil import parser
from matplotlib import pyplot as plt
from datetime import datetime, timedelta

def internal_R_v3(R=2000): #return internal resistance of v3 cells in ohms
    #https://www.jstage.jst.go.jp/article/jwet/20/1/20_21-087/_pdf
    v0_oc = 48.5e-3 #48.5 mV
    v0_cc = 4.8e-3
    v0_r = R*((v0_oc/v0_cc)-1)

    v1_oc = 43.8e-3
    v1_cc = 20.9e-3
    v1_r = R*((v1_oc/v1_cc)-1)

    v2_oc = 45.2e-3
    v2_cc = 23.5e-3
    v2_r = R*((v2_oc/v2_cc)-1)

    return (v0_r+v1_r+v2_r)/3

def internal_R_v0(R=2000): #return internal resistance of v0 cells in ohms
    v3_oc = 41.7e-3 #41.7mV
    v3_cc = 5.1e-3
    v3_r = R*((v3_oc/v3_cc)-1)

    v4_oc = 48.7e-3
    v4_cc = 16.8e-3
    v4_r = R*((v4_oc/v4_cc)-1)

    v5_oc = 39.1e-3
    v5_cc = 16.9e-3
    v5_r = R*((v5_oc/v5_cc)-1)

    return (v3_r+v4_r+v5_r)/3

def SMFC_current(v, R):
    return v/R

#MODEL
def cap_leakage(E_cap_tn, timestep):
    #Spec for KEMET T491
    return 0.01e-6 * E_cap_tn * timestep

def Matrix_Power(V, R):
    #efficiency interpolated from https://www.analog.com/media/en/technical-documentation/data-sheets/ADP5091-5092.pdf
    #given I_in = 100 uA and SYS = 3V
    #V is the voltage (V) of the SMFC we captured
    #R is the resistance (ohms) of the load we used to get that voltage trace
    #Eta = -292.25665*V**4 + 784.30311*V**3 - 770.71691*V**2 + 342.00502*V + 15.83307
    #Eta = Eta/100
    Eta = 0.60
    Pmax = (V**2)/R
    Pout = Eta*Pmax
    #assert((Eta > 0) & (Eta < 1))
    #assert(Pout < 12000e-6)
    return Pout

def update_capEnergy(e0, V_applied, R, C, dt):
    # e0: initial energy stored
    # V_applied: voltage from SMFC
    # R: internal resistance of SMFC
    # C: capacitance of capacitor
    # dt: time step since last data point
    e_cap = e0 + Matrix_Power(V_applied, R)*dt - cap_leakage(e0, dt)
    v_cap = math.sqrt(2*e_cap/C)
    if e_cap < 0: #Not charging if leakage is greater than energy
        e_cap = 0

    return e_cap, v_cap #output final e and v

def Advanced_energy():
    #Now representing "Advanced"
    #startup time of 2500 ms
    t = 2500e-3
    e = 2.4 * 128e-3 * t
    e_startup = 2.4 * 128e-3 * 5e-3
    return e+e_startup

def Minimal_energy():
    #Now representing "Minimal"
    t = 0.888e-3 #tentative time
    e = 0.9 * 4.8e-3 * t #this uses average current
    e_startup = 0#assume negligible, no known startup time given
    return  e + e_startup

def Analog_energy():
    #Now representing Analog
    t = 1e-3 #estimated operating time
    e = 0.11 * 2.15e-6 * t
    e_startup = 0 #analog device, no startup needed :)
    return e + e_startup

#STEP 3:
# For each day:
#   on_Minimal, on_Advanced, on_Analog = 0
#   For each time step (like every 60 s given our logging freq):
#       - Update the energy in our capacitor (put fcn in models.py) given (1) input voltage, (2) time step, (3) capacitance (prob 10 uF), this will be an integral
#       - Check if energy is enough to turn on (1) 1 uJ load, (2) 10 uJ load, and (3) 20 uJ load (will tweak later to reflect real energy cost of each system)
#       - If so, add to on_Minimal, on_Advanced, and on_Analog and reset capacitor energy to 0 J (might tweak this value)
#   Append on_Minimal, on_Advanced, on_Analog to on_Minimal_list, on_Advanced_list, on_Analog_list. This will be a list of how many sensor readings we are able to take with each of these systems every day given the energy we got
#STEP 4: Visualize the daily # of readings with 3 bar graphs, y axis is # of readings and x axis is days.
#   - Given 3 lists of integer values, plot them on bar graphs

def group_util(test_date1, test_date2, N):
    diff = (test_date2 - test_date1) / N
    return [test_date1 + diff * idx for idx in range(N)] + [test_date2]

def oracle_simulate(v_list, C_h):
    #Calculate maximum energy
    total_E = 0
    for i in range(len(v_list) - 1):
        t = (v_list.index[i+1] - v_list.index[i]).total_seconds()
        if t > 180:
          print("Discontinuity")
          print(v_list.index[i+1], v_list.index[i])
          print(v_list['Voltage (mV)'][i+1], v_list['Voltage (mV)'][i])
          #total_E, ignore = update_capEnergy(total_E, V_applied=(v_list['V1 [mV]'][i+1] + v_list['V1 [mV]'][i])/2, R=internal_R_v0(), C=C_h[0], dt = t)
        else:
          total_E, ignore = update_capEnergy(total_E, V_applied=max(v_list['Voltage (mV)'][i], v_list['Voltage (mV)'][i+1]), R=internal_R_v0(), C=C_h[0], dt = t)
    print("Oracle activations:", math.floor(total_E/Minimal_energy()))
    return(math.floor(total_E/Minimal_energy()))

def naive_simulate(t_list, v_list, v_list_naive, v_list_fine, C_h):
    # t_list: list of decimal time stamps in unit of days (e.g. 71.85893518518519 day), same length as v_list
    # v_list: list of voltage values from SFMC
    # C_h: capacitance of the capacitor being filled up by harvester

    #assume capacitor is completely discharged at start
    e_minimal_stored = 0
    e_minimal_stored_theo = 0

    #Initialize evaluation metrics
    false_act = 0
    max_act = 0
    pred_act = 0
    succ_act = 0

    total_E = 0
    total_E_naive = 0

    #Calculate maximum energy
    #for i in range(len(v_list_fine) - 1):
    #    t = (v_list_fine.index[i+1] - v_list_fine.index[i]).total_seconds()
    #    total_E, ignore = update_capEnergy(total_E, V_applied=v_list_fine['V1 [10nV]'][i], R=internal_R_v0(), C=C_h[0], dt = t)
    #print(total_E/Minimal_energy())
    v = v_list_naive.mean()
    #for each voltage data point
    for jj in range(len(v_list) - 1): #last data point was at 71.85893518518519 day
        t = (v_list.index[jj+1] - v_list.index[jj]).total_seconds()
        if t <= time_frame_seconds:
          #Total predicted vs. actual energy stored
          #Predict energy stored during scheduled sub-interval
          total_E, ignore = update_capEnergy(total_E, V_applied=v_list[jj], R=internal_R_v0(), C=C_h[0], dt = t)
          total_E_naive, ignore = update_capEnergy(total_E_naive, V_applied=v, R=internal_R_v0(), C=C_h[0], dt = t)

          E_Minimal_pred, v_minimal_pred = update_capEnergy(e_minimal_stored, V_applied=v, R=internal_R_v0(), C=C_h[0], dt = t) #set dt as length of prediction interval, in seconds
          pred_act += math.floor(E_Minimal_pred/Minimal_energy()) #Update number of activations predicted
          itn = 0
          if math.floor(E_Minimal_pred/Minimal_energy()) > 0:
              minimal_intervals = [date for date in group_util(v_list.index[jj], v_list.index[jj] + timedelta(seconds=t), math.floor(E_Minimal_pred/Minimal_energy()))]
              #Calculate desired interval
              int_len = time_frame_seconds /  math.floor(E_Minimal_pred/Minimal_energy())
              for i in range(len(minimal_intervals) - 1):
                  #Determine actual energy stored during scheduled sub-interval
                  start = v_list_fine.index.searchsorted(minimal_intervals[i])
                  end =  v_list_fine.index.searchsorted(minimal_intervals[i+1])

                  E_Minimal, ignore = update_capEnergy(e_minimal_stored, V_applied=v_list_fine.iloc[start:end]['Voltage (mV)'].mean(), R=internal_R_v0(), C=C_h[0], dt = int_len)
                  if not math.isnan(v_list_fine.iloc[start:end]['Voltage (mV)'].mean()):
                    if E_Minimal < Minimal_energy():
                        false_act += 1
                        e_minimal_stored = max(0, E_Minimal - Minimal_energy())
                        itn += 1

                    elif E_Minimal >= Minimal_energy():
                        succ_act += 1
                        e_minimal_stored = max(0, E_Minimal - Minimal_energy())
                        itn+= 1

                    else:
                      print('Error')
                      print(e_minimal_stored, v)

                  #Unit test
                  #else:
                  #  print("?")
                  #  print(v_list_fine.index[start])
                  #  print(v_list_fine.index[end])
                  #  print(minimal_intervals[i], minimal_intervals[i+1])

              #Unit test
              #if itn != math.floor(E_Minimal_pred/Minimal_energy()):
              #    print("itn not matching")
              #    print(itn, math.floor(E_Minimal_pred/Minimal_energy()))
              #    continue

          else:
              e_minimal_stored, ignore = update_capEnergy(e_minimal_stored, V_applied=v_list[jj], R=internal_R_v0(), C=C_h[0], dt = t)
              #Added this
              #start = v_list_fine.index.searchsorted(v_list.index[jj])
              #end =  v_list_fine.index.searchsorted(v_list.index[jj+1])
              #for h in range(start, end):
              #    v = v_list_fine.iloc[h]['V1 [mV]']
              #    interval_length = ((v_list_fine.index[h+1]) - (v_list_fine.index[h])).total_seconds()
              #    E_Minimal, ignore = update_capEnergy(e_minimal_stored, V_applied=v, R=internal_R_v0(), C=C_h[0], dt = interval_length)
              #    e_minimal_stored = E_Minimal


        else:
          print("It's over 9000!", v_list.index[jj], v_list.index[jj+1])

    print("Naive total_E activations:", total_E/Minimal_energy())
    print("Naive total_E_pred activations:", total_E_naive/Minimal_energy())
    return pred_act, false_act, succ_act, total_E_naive

def getMax(c_list, input_list):
    max_value = max(input_list)
    i = [index for index, item in enumerate(input_list) if item == max_value][0]
    return i, max_value, c_list[i]


#SMFC
import csv
from collections import defaultdict
from scipy.signal import butter, lfilter
import matplotlib.pyplot as plt
from datetime import datetime

def butter_lowpass(cutoff, fs, order=5):
        return butter(order, cutoff, fs=fs, btype='low', analog=False)

def butter_lowpass_filter(data, cutoff, fs, order=5):
    b, a = butter_lowpass(cutoff, fs, order=order)
    y = lfilter(b, a, data)
    return y

def getMFC_data(y_test, test_pred):
    unix_time = y_test.index
    d0 = unix_time[0]
    days = []
    for d in unix_time:
        day = d
        day_from_start = day-d0
        decimal_day = day_from_start.total_seconds()/(24 * 3600)
        days.append(decimal_day)

    return days

def simulate(t_list, v_list, v_list_pred, v_list_fine, C_h):
    # t_list: list of decimal time stamps in unit of days (e.g. 71.85893518518519 day), same length as v_list
    # v_list: list of voltage values from SFMC
    # C_h: capacitance of the capacitor being filled up by harvester

    #assume capacitor is completely discharged at start
    e_minimal_stored = 0
    e_minimal_stored_theo = 0

    #Initialize evaluation metrics
    false_act = 0
    max_act = 0
    pred_act = 0
    succ_act = 0

    total_E = 0
    total_E_pred = 0

    #Calculate maximum energy
    #for i in range(len(v_list_fine) - 1):
    #    t = (v_list_fine.index[i+1] - v_list_fine.index[i]).total_seconds()
    #    total_E, ignore = update_capEnergy(total_E, V_applied=v_list_fine['V1 [10nV]'][i], R=internal_R_v0(), C=C_h[0], dt = t)
    #print(total_E/Minimal_energy())
    #for each voltage data point
    for jj in range(len(v_list) - 1): #last data point was at 71.85893518518519 day
        t = (v_list.index[jj+1] - v_list.index[jj]).total_seconds()
        total_E, ignore = update_capEnergy(total_E, V_applied=v_list[jj], R=internal_R_v0(), C=C_h[0], dt = t)
        total_E_pred, ignore = update_capEnergy(total_E_pred, V_applied=v_list_pred[jj], R=internal_R_v0(), C=C_h[0], dt = t)
        if t <= time_frame_seconds:
          #Total predicted vs. actual energy stored
          #Predict energy stored during scheduled sub-interval
          E_Minimal_pred, v_minimal_pred = update_capEnergy(e_minimal_stored, V_applied=v_list_pred[jj], R=internal_R_v0(), C=C_h[0], dt = t) #set dt as length of prediction interval, in seconds
          pred_act += math.floor(E_Minimal_pred/Minimal_energy()) #Update number of activations predicted
          itn = 0
          if math.floor(E_Minimal_pred/Minimal_energy()) > 0:
              minimal_intervals = [date for date in group_util(v_list_pred.index[jj], v_list_pred.index[jj] + timedelta(seconds=t), math.floor(E_Minimal_pred/Minimal_energy()))]
              #Calculate desired interval
              int_len = time_frame_seconds /  math.floor(E_Minimal_pred/Minimal_energy())
              for i in range(len(minimal_intervals) - 1):
                  #Determine actual energy stored during scheduled sub-interval
                  start = v_list_fine.index.searchsorted(minimal_intervals[i])
                  end =  v_list_fine.index.searchsorted(minimal_intervals[i+1])
                  v = v_list_fine.iloc[start:end]['Voltage (mV)'].mean()

                  #interval_length = ((v_list_fine.index[end]) - (v_list_fine.index[start])).total_seconds()
                  #if interval_length > int_len:
                  #  print('interval_length > int_len')
                  #  print('interval_length, int_len:', interval_length, int_len)
                  #  print(v_list_fine.index[start], v_list_fine.index[end])
                  #else:
                  #  print('interval_length <= int_len')
                  #  print('interval_length, int_len:', interval_length, int_len)
                  #  print(v_list_fine.index[start], v_list_fine.index[end])

                  E_Minimal, ignore = update_capEnergy(e_minimal_stored, V_applied=v, R=internal_R_v0(), C=C_h[0], dt = int_len)
                  if not math.isnan(v_list_fine.iloc[start:end]['Voltage (mV)'].mean()):
                    if E_Minimal < Minimal_energy():
                        false_act += 1
                        e_minimal_stored = max(0, E_Minimal - Minimal_energy())
                        itn += 1

                    elif E_Minimal >= Minimal_energy():
                        succ_act += 1
                        e_minimal_stored = max(0, E_Minimal - Minimal_energy())
                        itn+= 1

                    else:
                      print('Error')
                      print(e_minimal_stored, v)

                  #Unit test
                  #else:
                  #  print("?")
                  #  print(v_list_fine.index[start])
                  #  print(v_list_fine.index[end])
                  #  print(minimal_intervals[i], minimal_intervals[i+1])

              #Unit test
              #if itn != math.floor(E_Minimal_pred/Minimal_energy()):
              #    print("itn not matching")
              #    print(itn, math.floor(E_Minimal_pred/Minimal_energy()))
              #    continue

          else:
              e_minimal_stored, ignore = update_capEnergy(e_minimal_stored, V_applied=v_list[jj], R=internal_R_v0(), C=C_h[0], dt = t)
              #Added this
              #start = v_list_fine.index.searchsorted(v_list.index[jj])
              #end =  v_list_fine.index.searchsorted(v_list.index[jj+1])
              #for h in range(start, end):
              #    v = v_list_fine.iloc[h]['V1 [mV]']
              #    interval_length = ((v_list_fine.index[h+1]) - (v_list_fine.index[h])).total_seconds()
              #    E_Minimal, ignore = update_capEnergy(e_minimal_stored, V_applied=v, R=internal_R_v0(), C=C_h[0], dt = interval_length)
              #    e_minimal_stored = E_Minimal


        else:
          print("It's over 9000!", v_list.index[jj], v_list.index[jj+1])

    print("Runtime total_E activations:", total_E/Minimal_energy())
    print("Runtime total_E_pred activations:", total_E_pred/Minimal_energy())
    return pred_act, false_act, succ_act, total_E, total_E_pred

## Specify Device so we can use GPU

In [17]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

## Network Architecture

In [18]:
beta = 0.9

# old design network
# model = Sequential()
# model.add(LSTM(200, input_shape=(X_train.shape[1], X_train.shape[2]), activation='relu'))
# model.add(Dense(100, activation='relu'))
# model.add(Dense(3))
# model.compile(loss=quantile_loss, metrics=['mape'], optimizer='adam')

# Define Network
class Net(nn.Module):
    def __init__(self, num_inputs, num_steps):
        super().__init__()

        self.num_inputs = num_inputs
        self.num_steps = num_steps

        num_hidden1 = 200

        # layer 1
        self.slstm1 = snn.SLSTM(num_inputs, num_hidden1, threshold = 0.25)

        # layer 2
        self.fc1 = torch.nn.Linear(in_features=num_hidden1, out_features=100)
        self.lif1 = snn.Leaky(beta=beta, threshold = 0.5)

        # randomly initialize decay rate for output neuron
        beta_out = random.uniform(0.5, 1)

        # layer 2
        self.fc2 = torch.nn.Linear(in_features=100, out_features=3)
        self.lif2 = snn.Leaky(beta=beta_out, learn_beta=True, reset_mechanism="none")


    def forward(self, x):
        # Initialize hidden states and outputs at t=0
        syn1, mem1 = self.slstm1.reset_mem()
        mem2 = self.lif1.reset_mem()
        mem3 = self.lif2.reset_mem()

        # Record the final layer
        spk1_rec = []
        spk2_rec = []
        spk3_rec = []
        mem_rec = []

        for step in range(self.num_steps):
            spk1, syn1, mem1 = self.slstm1(x.flatten(1), syn1, mem1)
            spk2, mem2 = self.lif1(self.fc1(spk1), mem2)
            spk3, mem3 = self.lif2(self.fc2(spk2), mem3)

            # Append the Spike and Membrane History
            spk1_rec.append(spk1)
            spk2_rec.append(spk2)
            spk3_rec.append(spk3)
            mem_rec.append(mem3)

        return torch.stack(spk1_rec), torch.stack(spk2_rec), torch.stack(spk3_rec), torch.stack(mem_rec)

In [19]:
from sklearn.model_selection import TimeSeriesSplit
# from keras.models import Sequential
# from keras.layers import Dense
# from keras.layers import LSTM
# from keras import backend as K
from torch.utils.data import DataLoader, TensorDataset

In [20]:
# Combine features into X and targets into y
X = pd.concat([
    df["power - 1h"], df["power - 2h"], df["power - 3h"],
    df["V1 - 1h"], df["V1 - 2h"], df["V1 - 3h"],
    df["I1L - 1h"], df["I1L - 2h"], df["I1L - 3h"],
    df["EC - 1h"], df["EC - 2h"], df["EC - 3h"],
    df["raw_VWC - 1h"], df["raw_VWC - 2h"], df["raw_VWC - 3h"],
    df["temp - 1h"], df["temp - 2h"], df["temp - 3h"],
    df["tsd"], df["hour"]
], axis=1)


y = pd.concat([
    df['Power (uW)'], df['Voltage (mV)'], df['Current (uA)']
], axis=1)

# Split into training and testing sets (70% training, 30% testing)
X_train, X_test = train_test_split(X, test_size=0.3, shuffle=False)
y_train, y_test = train_test_split(y, test_size=0.3, shuffle=False)

# Split the training set into teacher and student subsets (50/50)
X_train_teacher, X_train_student = train_test_split(X_train, test_size=0.5, shuffle=False)
y_train_teacher, y_train_student = train_test_split(y_train, test_size=0.5, shuffle=False)
print(X_train_student)

# Split the testing set into validation and final test sets (50/50)
X_valid, X_test = train_test_split(X_test, test_size=0.5, shuffle=False)
y_valid, y_test = train_test_split(y_test, test_size=0.5, shuffle=False)

# Print the shapes for verification
print(f"X_train_teacher shape: {X_train_teacher.shape}")
print(f"X_train_student shape: {X_train_student.shape}")
print(f"X_valid shape: {X_valid.shape}")
print(f"X_test shape: {X_test.shape}")


                            power - 1h   power - 2h   power - 3h   V1 - 1h   
timestamp                                                                    
2021-09-27 12:54:04-07:00    29.157199   255.766324   722.260645  3265.925  \
2021-09-27 12:54:18-07:00   420.039723    29.157199   255.766324  3215.913   
2021-09-27 12:54:32-07:00   270.391732   420.039723    29.157199  3242.947   
2021-09-27 12:54:45-07:00  1282.425236   270.391732   420.039723  3370.497   
2021-09-27 12:54:59-07:00   259.344092  1282.425236   270.391732  3233.608   
...                                ...          ...          ...       ...   
2021-12-12 01:48:42-08:00  2107.290708   700.025260   197.476456  4348.382   
2021-12-12 01:48:55-08:00   882.158515  2107.290708   700.025260  4250.078   
2021-12-12 01:49:09-08:00  2330.585839   882.158515  2107.290708  3994.854   
2021-12-12 01:49:22-08:00   221.110620  2330.585839   882.158515  4202.154   
2021-12-12 01:49:36-08:00  1232.285215   221.110620  2330.585839

## 2. Train and Load Models

###  2.1 Train new SNN model

In [196]:
power_mape = []
voltage_mape = []
current_mape = []

E_actual_list = []
E_pred_list = []

max_act_list = []
pred_act_list = []
succ_act_list = []

pred_act_naive_list = []
false_act_naive_list = []
succ_act_naive_list = []

#Set parameters
batchsize_list = [300, 150, 50, 20, 8]
time_frame_list = ['3min', '5min', '15min', '30min', '60min']
time_frame_seconds_list = [180, 300, 900, 1800, 3600]
n = 0

for j in range(len(batchsize_list)):
    n += 1
    if n == 2: #Select which timescales to train for
        # #Normalize Data
        X_normalized = ((X - X.min()) / (X.max() - X.min()))
        # Split into training and testing sets (70% training, 30% testing)
        X_train, X_test = train_test_split(X_normalized, test_size=0.3, shuffle=False)
        y_train, y_test = train_test_split(y, test_size=0.3, shuffle=False)

        # Split the training set into teacher and student subsets (50/50)
        X_train_teacher, X_train_student = train_test_split(X_train, test_size=0.5, shuffle=False)
        y_train_teacher, y_train_student = train_test_split(y_train, test_size=0.5, shuffle=False)

        # Split the testing set into validation and final test sets (50/50)
        X_valid, X_test = train_test_split(X_test, test_size=0.5, shuffle=False)
        y_valid, y_test = train_test_split(y_test, test_size=0.5, shuffle=False)
        # #Split train and test sets
        # X_train, X_test = train_test_split(X_normalized, test_size=0.3, shuffle=False)
        # y_train, y_test = train_test_split(y, test_size=0.3, shuffle=False)

        # X_valid, X_test = train_test_split(X_test, test_size=0.5, shuffle=False)
        # y_valid, y_test = train_test_split(y_test, test_size=0.5, shuffle=False)

        batchsize = batchsize_list[j]
        time_frame = time_frame_list[j]
        time_frame_seconds = time_frame_seconds_list[j]

        print(time_frame)

        #Resample data
        X_train_teacher = X_train_teacher.resample(time_frame).mean().dropna()
        X_valid = X_valid.resample(time_frame).mean().dropna()
        X_test = X_test.resample(time_frame).mean().dropna()

        y_train_teacher = y_train_teacher.resample(time_frame).mean().dropna()
        y_valid = y_valid.resample(time_frame).mean().dropna()
        y_test = y_test.resample(time_frame).mean().dropna()

        #Reshape data
        X_train_teacher = X_train_teacher.values.reshape((X_train_teacher.shape[0], 1, X_train_teacher.shape[1]))
        X_valid = X_valid.values.reshape((X_valid.shape[0], 1, X_valid.shape[1]))
        X_test = X_test.values.reshape((X_test.shape[0], 1, X_test.shape[1]))

        # convert to tensor
        X_train_teacher = torch.tensor(X_train_teacher)
        y_train_teacher = torch.tensor(y_train_teacher.values)
        X_test = torch.tensor(X_test)
        y_test = torch.tensor(y_test.values)

        # make datasets
        train_teacher_dataset = TensorDataset(X_train_teacher, y_train_teacher)
        test_dataset = TensorDataset(X_test, y_test)

        # Create DataLoaders
        train_loader = DataLoader(train_teacher_dataset, batch_size=batchsize, shuffle=False)
        test_loader = DataLoader(test_dataset, batch_size=batchsize, shuffle=False)

        # Define the number of time steps for the spiking
        num_steps = 50
        num_inputs = X_train_teacher.shape[2]

        # create new inctance of the SNN Class
        model = Net(num_inputs, num_steps).to(device)

        # define optimizer
        optimizer = torch.optim.Adam(params=model.parameters(), lr=1e-3)

        # define loss function
        def quantile_loss(y_true, y_pred, quantile=0.5):
            error = y_true - y_pred
            loss = torch.mean(torch.max(quantile * error, (quantile - 1) * error))
            return loss
        loss_fn = quantile_loss
        #loss_fn = torch.nn.MSELoss()
        #loss_fn = torch.nn.L1Loss()

        # initialize histories
        loss_hist = []
        avg_loss_hist = []
        acc_hist = []
        mape_hist = []
        num_epochs = 10

        # put model into train mode
        print(y_train_teacher)
        model.train()

        # Train Loop
        for epoch in range(num_epochs):
            for i, (data, targets) in enumerate(iter(train_loader)):
                # move to device
                data = data.to(device)
                targets = targets.to(device)

                # change to floats
                data = data.float()
                targets = targets.float()

                # run forward pass
                _, _, _, mem = model(data)
                # calculate loss
                print("output: ", mem[-1])
                print("targets: ", targets)
                print("data: ", data)
                loss_val = loss_fn(mem[-1], targets)



                # calculate and store MAPE Loss
                mem_numpy = mem.cpu().detach().numpy()
                #mem_numpy = mem.detach().numpy()
                targets_numpy = targets.cpu().detach().numpy()
                #targets_numpy = targets.detach().numpy()
                mape_hist.append(MAPE(mem_numpy[-1], targets_numpy))
                power_mape.append(MAPE(mem_numpy[-1][:,0], targets_numpy[:,0]))
                voltage_mape.append(MAPE(mem_numpy[-1][:,1], targets_numpy[:,1]))
                current_mape.append(MAPE(mem_numpy[-1][:,2], targets_numpy[:,2]))

                # Gradient calculation + weight update
                optimizer.zero_grad()
                loss_val.backward()
                optimizer.step()

                # Store loss history for future plotting
                loss_hist.append(loss_val.item())

                if i%10 == 0:
                    print(f"Epoch {epoch}, Iteration {i} Train Loss: {loss_val.item():.2f}")
                if len(loss_hist) > 100:
                    avg_loss_hist.append(sum(loss_hist[-100:])/len(loss_hist[-100:]))
                else:
                    avg_loss_hist.append(0)

            if len(loss_hist) > 100:
                print(f'New Epoch! Avg loss for the last 100 iterations: {avg_loss_hist[-1]}')

5min
tensor([[28544.3102, 30979.2252,  9214.1085],
        [28838.2100, 30978.0755,  9309.3040],
        [29093.4932, 30980.6113,  9391.0057],
        ...,
        [  612.2184,  3290.0938,  1851.7723],
        [  666.4920,  3256.4468,  2043.8347],
        [  759.2590,  3232.3066,  2368.1128]], dtype=torch.float64)
output:  tensor([[-0.6275,  2.8781,  2.0510],
        [-0.6275,  2.8781,  2.0510],
        [-0.6275,  2.8781,  2.0510],
        [-0.6275,  2.8781,  2.0510],
        [-0.6275,  2.8781,  2.0510],
        [-0.6275,  2.8781,  2.0510],
        [-0.6275,  2.8781,  2.0510],
        [-0.6275,  2.8781,  2.0510],
        [-0.6275,  2.8781,  2.0510],
        [-0.6275,  2.8781,  2.0510],
        [-0.6275,  2.8781,  2.0510],
        [-0.6275,  2.8781,  2.0510],
        [-0.6275,  2.8781,  2.0510],
        [-0.6275,  2.8781,  2.0510],
        [-0.6275,  2.8781,  2.0510],
        [-0.6275,  2.8781,  2.0510],
        [-0.6275,  2.8781,  2.0510],
        [-0.6275,  2.8781,  2.0510],
        [

KeyboardInterrupt: 

In [21]:
#Save model
checkpoint = {'state_dict': model.state_dict(),'optimizer' :optimizer.state_dict()}
torch.save(checkpoint, 'snn_5min_quant50.pth')
%mv snn_5min_quant50.pth '/content/trained_models_folder'

NameError: name 'model' is not defined

###  2.2 TSRV on Type 1 Models

In [None]:
print(df.columns)

Index(['sensorID', 'raw_VWC', 'temp', 'EC', 'I1L_valid', 'I2L_valid',
       'I1H [nA]', 'Current (uA)', 'Voltage (mV)', 'V2 [10nV]', 'I2H [nA]',
       'I2L [10pA]', 'tsd', 'hour', 'Power (uW)', 'power - 1h', 'power - 2h',
       'power - 3h', 'EC - 1h', 'EC - 2h', 'EC - 3h', 'temp - 1h', 'temp - 2h',
       'temp - 3h', 'raw_VWC - 1h', 'raw_VWC - 2h', 'raw_VWC - 3h', 'V1 - 1h',
       'V1 - 2h', 'V1 - 3h', 'I1L - 1h', 'I1L - 2h', 'I1L - 3h', 'I1H - 1h',
       'I1H - 2h', 'I1H - 3h'],
      dtype='object')


In [None]:
power_mape = []
voltage_mape = []
current_mape = []

E_actual_list = []
E_pred_list = []

max_act_list = []
pred_act_list = []
succ_act_list = []

pred_act_naive_list = []
false_act_naive_list = []
succ_act_naive_list = []

#Set parameters
batchsize = 8
time_frame = '60min'
time_frame_seconds = 3600
n = 0
splits = TimeSeriesSplit(n_splits=4)
for train_index, test_index in splits.split(X):
    n += 1
    if n >= 1:
        #Split train and test sets
        X_train = X.iloc[train_index]
        X_test = X.iloc[test_index]
        y_train = y.iloc[train_index]
        y_test = y.iloc[test_index]

        X_valid, X_test = train_test_split(X_test, test_size=0.5, shuffle=False)
        y_valid, y_test = train_test_split(y_test, test_size=0.5, shuffle=False)

        #Set dataset bounds
        train_bound_lower = y_train.index[0]
        train_bound_upper = y_train.index[-1]
        valid_bound_lower = y_valid.index[0]
        valid_bound_upper = y_valid.index[-1]
        test_bound_lower = y_test.index[0]
        test_bound_upper = y_test.index[-1]

        #Resample data
        X_train = X_train.resample(time_frame).mean().dropna()
        X_valid = X_valid.resample(time_frame).mean().dropna()
        X_test = X_test.resample(time_frame).mean().dropna()

        y_train = y_train.resample(time_frame).mean().dropna()
        y_valid = y_valid.resample(time_frame).mean().dropna()
        y_test = y_test.resample(time_frame).mean().dropna()

        #Reshape data
        X_train = X_train.values.reshape((X_train.shape[0], 1, X_train.shape[1]))
        X_valid = X_valid.values.reshape((X_valid.shape[0], 1, X_valid.shape[1]))
        X_test = X_test.values.reshape((X_test.shape[0], 1, X_test.shape[1]))

        # convert to tensor
        X_train = torch.tensor(X_train)
        y_train_index_labels = y_train.index
        y_train_column_labels = y_train.columns
        y_train = torch.tensor(y_train.values)

        X_test = torch.tensor(X_test)
        y_test_index_labels = y_test.index
        y_test_column_labels = y_test.columns
        y_test = torch.tensor(y_test.values)

        # make datasets
        train_dataset = TensorDataset(X_train, y_train)
        test_dataset = TensorDataset(X_test, y_test)

        # Create DataLoaders
        train_loader = DataLoader(train_dataset, batch_size=batchsize, shuffle=False)
        test_loader = DataLoader(test_dataset, batch_size=batchsize, shuffle=False)

        # Define the number of time steps for the spiking
        num_steps = 50
        num_inputs = X_train.shape[2]

        # create new inctance of the SNN Class
        model = Net(num_inputs, num_steps).to(device)

        # define optimizer
        optimizer = torch.optim.Adam(params=model.parameters(), lr=1e-3)

        # define loss function
        def quantile_loss(y_true, y_pred, quantile=0.05):
            error = y_true - y_pred
            loss = torch.mean(torch.max(quantile * error, (quantile - 1) * error))
            return loss
        loss_fn = quantile_loss

        # initialize histories
        loss_hist = []
        avg_loss_hist = []
        acc_hist = []
        num_epochs = 25

        # put model into train mode
        model.train()

        # Train Loop
        for epoch in range(num_epochs):
            for i, (data, targets) in enumerate(iter(train_loader)):
                # move to device
                data = data.to(device)
                targets = targets.to(device)

                # change to floats
                data = data.float()
                targets = targets.float()

                _, _, _, mem = model(data)

                loss_val = loss_fn(mem[-1], targets)

                # Gradient calculation + weight update
                optimizer.zero_grad()
                loss_val.backward()
                optimizer.step()

                # Store loss history for future plotting
                loss_hist.append(loss_val.item())

                # Update loss plot
                # update_loss_plot(loss_hist)

                if i%10 == 0:
                    print(f"Epoch {epoch}, Iteration {i} Train Loss: {loss_val.item():.2f}")
                if len(loss_hist) > 100:
                    avg_loss_hist.append(sum(loss_hist[-100:])/len(loss_hist[-100:]))
                else:
                    avg_loss_hist.append(0)

            if len(loss_hist) > 100:
                print(f'New Epoch! Avg loss for the last 100 iterations: {avg_loss_hist[-1]}')

        # predictions
        model.eval()

        train_pred = []
        test_pred = []
        with torch.no_grad():
            for data, target in train_dataset:
                data = data.to(device)
                data = data.float()
                _, _, _, train_mem = model(data)
                train_pred.append(train_mem[-1])

            for data, target in test_dataset:
                data = data.to(device)
                data = data.float()
                _, _, _, test_mem = model(data)
                test_pred.append(test_mem[-1])

        # Convert to numpy
        train_pred = torch.cat(train_pred, dim=0)
        train_pred = train_pred.numpy()
        test_pred = torch.cat(test_pred, dim=0)
        test_pred = test_pred.numpy()

        # convert tensors back to dataframe
        y_test = pd.DataFrame(y_test, index=y_test_index_labels, columns=y_test_column_labels)
        y_train = pd.DataFrame(y_test, index=y_train_index_labels, columns=y_test_column_labels)

        #Prepare data for runtime simulation
        y_test['power pred'] = test_pred[:, 0]
        y_test['V1 [mV] pred'] = test_pred[:, 1]
        y_test['I1L [μA] pred'] = test_pred[:, 2]

        days  = getMFC_data(y_test, test_pred)

        v_test = df.loc[(((df.index >= test_bound_lower)) & (df.index <= test_bound_upper))]['Voltage (mV)']
        v_test = v_test.drop(v_test[(v_test.index > '2021-11-11') & (v_test.index < '2021-11-22 01:00:00')].index)
        v_test = pd.DataFrame(v_test)/1E5
        v_avg_true = v_test['Voltage (mV)'].resample(time_frame).mean().dropna()
        v_avg_pred = y_test['V1 [mV] pred']/1E5
        C0 = [0.007000000000000006, 0.007000000000000006, 0.007000000000000006]

        #Remove first and last entries of averaged data to prevent overestimation of available energy
        v_avg_true = v_avg_true[1:][:-1]
        v_avg_pred = v_avg_pred[1:][:-1]

        #Run oracle model
        max_act = oracle_simulate(v_test, C0)

        #Call simulate function
        pred_act, false_act, succ_act, total_E, total_E_pred = simulate(days, v_avg_true, v_avg_pred, v_test, C0)

        #Run naive model
        v_valid = df.loc[(((df.index >= valid_bound_lower)) & (df.index < valid_bound_upper))]['Voltage (mV)']/1E5
        pred_act_naive, false_act_naive, succ_act_naive, total_E = naive_simulate(days, v_avg_true, v_valid, v_test, C0)
        print("Dataset, train set, and test set size:", len(y_train) + len(y_valid) + len(y_test), len(y_train), len(y_test))
        print('Timeframe:', time_frame)

        print('Minimal Application')
        print("Naive vs. DL succesful activations:", succ_act/succ_act_naive)
        #print('Predicted vs. Actual percent difference: %.3f%%' % ((total_E * 100 / total_E_pred) - 100))
        print('Maximum possible activations:', max_act)
        print('Predicted activations:', pred_act)
        print('Successful activations: %d, %.3f%%' % (succ_act, succ_act * 100/pred_act))
        print('Failed activations: %d, %.3f%%' % (false_act, false_act * 100/pred_act))
        print('Missed activations: %d, %.3f%%' % (max_act - succ_act, (max_act - succ_act) * 100/max_act))

        #Naive model
        print('Naive predicted activations (usual actual energy average):', pred_act_naive)
        print('Naive successful activations (usual actual energy average): %d, %.3f%%' % (succ_act_naive, succ_act_naive * 100/pred_act_naive))
        print('Naive failed activations (usual actual energy average): %d, %.3f%%' % (false_act_naive, false_act_naive * 100/pred_act_naive))
        print('Naive missed activations (usual actual energy average): %d, %.3f%%' % (max_act - succ_act_naive, (max_act - succ_act_naive) * 100/max_act))


        print('Voltage overestimation rate: %.3f%%' % ((y_test['Voltage (mV)'].values <= y_test['Voltage (mV)']).mean() * 100))
        print("Test MAPE power: %3f" %  MAPE(y_test['Power (uW)'].values.ravel(), y_test['power pred']))
        print("Test MAPE voltage: %3f" % MAPE(y_test['Voltage (mV)'], y_test['V1 [mV] pred']))
        print("Test MAPE current: %3f" % MAPE(y_test['Current (uA)'], y_test['I1L [μA] pred']))

Epoch 0, Iteration 0 Train Loss: 22023.83
Epoch 0, Iteration 10 Train Loss: 2211.05
Epoch 0, Iteration 20 Train Loss: 1743.63
Epoch 0, Iteration 30 Train Loss: 1186.39
Epoch 0, Iteration 40 Train Loss: 1397.68
Epoch 0, Iteration 50 Train Loss: 1696.61
Epoch 0, Iteration 60 Train Loss: 1203.95
Epoch 0, Iteration 70 Train Loss: 2920.93
Epoch 0, Iteration 80 Train Loss: 1765.85
Epoch 0, Iteration 90 Train Loss: 2137.57
Epoch 1, Iteration 0 Train Loss: 21849.47
Epoch 1, Iteration 10 Train Loss: 1908.69
Epoch 1, Iteration 20 Train Loss: 1313.47
Epoch 1, Iteration 30 Train Loss: 790.27
Epoch 1, Iteration 40 Train Loss: 948.51
Epoch 1, Iteration 50 Train Loss: 1170.54
Epoch 1, Iteration 60 Train Loss: 748.62
Epoch 1, Iteration 70 Train Loss: 2351.21
Epoch 1, Iteration 80 Train Loss: 1193.84
Epoch 1, Iteration 90 Train Loss: 1607.48
New Epoch! Avg loss for the last 100 iterations: 2057.9776861572263
Epoch 2, Iteration 0 Train Loss: 21298.33
Epoch 2, Iteration 10 Train Loss: 1501.35
Epoch 2, It

In [22]:
# Print out predictions for power, voltage, and current
print("5-Minute-Ahead Predictions:")
print("Power Predictions (uW):")
print(y_test[:, 0])

print("\nVoltage Predictions (mV):")
print(y_test[:, 1])

print("\nCurrent Predictions (μA):")
print(y_test[:, 2])


5-Minute-Ahead Predictions:
Power Predictions (uW):


InvalidIndexError: (slice(None, None, None), 0)

##  3. Graph Selected Data

###  3.1 Load and graph pretrained SNN models

###  In order to use pretrained models it is neccesary to download the ```trained_models``` directory from [Hugging Face](https://huggingface.co/datasets/adunlop621/Soil_MFC/tree/main) and store it in the same directory as this notebook

In [None]:
from keras.models import load_model

time_frame = '60min'
batchsize = 8

X = pd.concat([df["power - 1h"], df["power - 2h"], df["power - 3h"], df["V1 - 1h"], df["V1 - 2h"], df["V1 - 3h"], df["I1L - 1h"], df["I1L - 2h"], df["I1L - 3h"],df["EC - 1h"], df["EC - 2h"], df["EC - 3h"], df["raw_VWC - 1h"], df["raw_VWC - 2h"], df["raw_VWC - 3h"], df["temp - 1h"], df["temp - 2h"], df["temp - 3h"], df["tsd"], df["hour"]], axis = 1)
#X = pd.concat([df["power - 1h"], df["power - 2h"], df["power - 3h"], df["V1 - 1h"], df["V1 - 2h"], df["V1 - 3h"], df["I1L - 1h"], df["I1L - 2h"], df["I1L - 3h"], df["tsd"], df["hour"]], axis = 1)
y = pd.concat([df["Power (uW)"], df['Voltage (mV)'], df['Current (uA)']], axis = 1)

#Normalize Data
X_normalized = ((X - X.min()) / (X.max() - X.min()))

#Split train and test sets
# X_train, X_test = train_test_split(X_normalized, test_size=0.3, shuffle=False)
# y_train, y_test = train_test_split(y, test_size=0.3, shuffle=False)

# X_valid, X_test = train_test_split(X_test, test_size=0.5, shuffle=False)
# y_valid, y_test = train_test_split(y_test, test_size=0.5, shuffle=False)
# Split into training and testing sets (70% training, 30% testing)
X_train, X_test = train_test_split(X_normalized, test_size=0.3, shuffle=False)
y_train, y_test = train_test_split(y, test_size=0.3, shuffle=False)

# Split the training set into teacher and student subsets (50/50)
X_train_teacher, X_train_student = train_test_split(X_train, test_size=0.5, shuffle=False)
y_train_teacher, y_train_student = train_test_split(y_train, test_size=0.5, shuffle=False)

# Split the testing set into validation and final test sets (50/50)
X_valid, X_test = train_test_split(X_test, test_size=0.5, shuffle=False)
y_valid, y_test = train_test_split(y_test, test_size=0.5, shuffle=False)

#Resample data
X_train_student = X_train_student.resample(time_frame).mean().dropna()
y_train_student = y_train_student.resample(time_frame).mean().dropna()
X_valid = X_valid.resample(time_frame).mean().dropna()
y_valid = y_valid.resample(time_frame).mean().dropna()

X_test = X_test.resample(time_frame).mean().dropna()
y_test = y_test.resample(time_frame).mean().dropna()

#Define mv1
mv1 = y_train_student

#Reshape data
X_train_teacher = X_train_teacher.values.reshape((X_train_teacher.shape[0], 1, X_train_teacher.shape[1]))
X_train_student = X_train_student.values.reshape((X_train_student.shape[0], 1, X_train_student.shape[1]))
X_valid = X_valid.values.reshape((X_valid.shape[0], 1, X_valid.shape[1]))
X_test = X_test.values.reshape((X_test.shape[0], 1, X_test.shape[1]))

# convert to tensor
X_train_teacher = torch.tensor(X_train_teacher)
y_train_teacher = torch.tensor(y_train_teacher.values)
X_train_student = torch.tensor(X_train_student)
y_train_student = torch.tensor(y_train_student.values)
X_valid = torch.tensor(X_valid)
y_valid = torch.tensor(y_valid.values)
X_test = torch.tensor(X_test)
y_test = torch.tensor(y_test.values)

# make datasets
train_teacher_dataset = TensorDataset(X_train_teacher, y_train_teacher)
train_student_dataset = TensorDataset(X_train_student, y_train_student)
valid_dataset = TensorDataset(X_valid, y_valid)
test_dataset = TensorDataset(X_test, y_test)

# Create DataLoaders
train_teacher_loader = DataLoader(train_teacher_dataset, batch_size=batchsize, shuffle=False)
train_student_loader = DataLoader(train_student_dataset, batch_size=batchsize, shuffle=False)
valid_loader = DataLoader(valid_dataset, batch_size=batchsize, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batchsize, shuffle=False)

num_steps = 50
num_inputs = X_train_student.shape[2]

# create new inctance of the SNN Class
model = Net(num_inputs, num_steps).to(device)

model.load_state_dict(torch.load("trained_models/snn_5min_quant50.pth", map_location=torch.device('cpu')), strict=False)
model.eval()
actuals = []
predictions = []

with torch.no_grad():
    for data, targets in train_student_loader:

        # prepare data
        data = data.to(device)
        targets = targets.to(device)

        data = data.float()
        targets = targets.float()

        _, _, _, output = model(data)

        output = output.cpu()
        output = output.squeeze(1).detach()

        prediction = output[-1]

        actuals.append(targets)
        predictions.append(prediction)

# Convert lists to tensors
actuals = torch.cat(actuals, dim=0)
predictions = torch.cat(predictions, dim=0)

mv1["power_pred_med"] = predictions[:, 0]
mv1["voltage_pred_med"] = predictions[:, 1]
mv1["current_pred_med"] = predictions[:, 2]
#mv1 = mv1.loc[(mv1.index > '2022-01-04') & (mv1.index < '2022-01-06')]
mv1 = mv1.loc[(mv1.index > '2021-12-12') & (mv1.index < '2021-12-14')]
mv2 = mv1


  model.load_state_dict(torch.load("/content/trained_models_folder/snn_5min_quant50.pth", map_location=torch.device('cpu')), strict=False)


In [23]:
print(mv1)

NameError: name 'mv1' is not defined

###  3.2 SNN Peformance Metrics

In [24]:
from keras.models import load_model

# Set parameters
batchsize_list = [300, 150, 50, 20, 8]
time_frame_list = ['3min', '5min', '15min', '30min', '60min']
#time_frame_list = ['3min', '5min']
time_frame_seconds_list = [180, 300, 900, 1800, 3600]
#time_frame_seconds_list = [180, 300]
n = 0

snn_power_mape_list = []
snn_volt_mape_list = []
snn_curr_mape_list = []

# Dictionary to store mv variables
mv_dict = {}

for j in range(len(batchsize_list)):
    batchsize = batchsize_list[j]
    time_frame = time_frame_list[1]
    time_frame_seconds = time_frame_seconds_list[1]
    #time_frame = '5min'
    #time_frame_seconds = 300

    n += 1

    X = pd.concat([df["power - 1h"], df["power - 2h"], df["power - 3h"],
                   df["V1 - 1h"], df["V1 - 2h"], df["V1 - 3h"],
                   df["I1L - 1h"], df["I1L - 2h"], df["I1L - 3h"],
                   df["EC - 1h"], df["EC - 2h"], df["EC - 3h"],
                   df["raw_VWC - 1h"], df["raw_VWC - 2h"], df["raw_VWC - 3h"],
                   df["temp - 1h"], df["temp - 2h"], df["temp - 3h"],
                   df["tsd"], df["hour"]], axis=1)
    y = pd.concat([df["Power (uW)"], df['Voltage (mV)'], df['Current (uA)']], axis=1)

    # Normalize Data
    X_normalized = ((X - X.min()) / (X.max() - X.min()))

    # Split train and test sets
    X_train, X_test = train_test_split(X_normalized, test_size=0.3, shuffle=False)
    y_train, y_test = train_test_split(y, test_size=0.3, shuffle=False)


    # Split the training set into teacher and student subsets (50/50)
    X_train_teacher, X_train_student = train_test_split(X_train, test_size=0.5, shuffle=False)
    y_train_teacher, y_train_student = train_test_split(y_train, test_size=0.5, shuffle=False)

    # Split the testing set into validation and final test sets (50/50)
    X_valid, X_test = train_test_split(X_test, test_size=0.5, shuffle=False)
    y_valid, y_test = train_test_split(y_test, test_size=0.5, shuffle=False)

    #Resample data
    # X_train_student = X_train_student.resample(time_frame).mean().dropna()
    # y_train_student = y_train_student.resample(time_frame).mean().dropna()
    X_valid = X_valid.resample(time_frame).mean().dropna()
    y_valid = y_valid.resample(time_frame).mean().dropna()

    X_test = X_test.resample(time_frame).mean().dropna()
    y_test = y_test.resample(time_frame).mean().dropna()

    # Calculate actual energy generated in test set
    E_actual = 0
    for i in range(len(y_train_student) - 1):
        t = (y_train_student.index[i+1] - y_train_student.index[i]).total_seconds()
        if t < 180:
            E_actual += y_train_student['Power (uW)'][i] * t


    # Define mv variable for the current time frame
    mv_dict[time_frame] = y_train_student

    #Reshape data
    X_train_teacher = X_train_teacher.values.reshape((X_train_teacher.shape[0], 1, X_train_teacher.shape[1]))
    X_train_student = X_train_student.values.reshape((X_train_student.shape[0], 1, X_train_student.shape[1]))
    X_valid = X_valid.values.reshape((X_valid.shape[0], 1, X_valid.shape[1]))
    X_test = X_test.values.reshape((X_test.shape[0], 1, X_test.shape[1]))

    # convert to tensor
    X_train_teacher = torch.tensor(X_train_teacher)
    y_train_teacher = torch.tensor(y_train_teacher.values)
    X_train_student = torch.tensor(X_train_student)
    y_train_student = torch.tensor(y_train_student.values)
    X_valid = torch.tensor(X_valid)
    y_valid = torch.tensor(y_valid.values)
    X_test = torch.tensor(X_test)
    y_test = torch.tensor(y_test.values)

    # make datasets
    train_teacher_dataset = TensorDataset(X_train_teacher, y_train_teacher)
    train_student_dataset = TensorDataset(X_train_student, y_train_student)
    valid_dataset = TensorDataset(X_valid, y_valid)
    test_dataset = TensorDataset(X_test, y_test)

    # Create DataLoaders
    train_teacher_loader = DataLoader(train_teacher_dataset, batch_size=batchsize, shuffle=False)
    train_student_loader = DataLoader(train_student_dataset, batch_size=batchsize, shuffle=False)
    valid_loader = DataLoader(valid_dataset, batch_size=batchsize, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=batchsize, shuffle=False)

    num_steps = 50
    num_inputs = X_train_student.shape[2]

    # Create new instance of the SNN Class
    model = Net(num_inputs, num_steps).to(device)

    file = 'trained_models/snn_' + time_frame + '_quant50.pth'
    print(file)

    checkpoint = torch.load(file, map_location=torch.device('cpu'), weights_only=True)
    model.load_state_dict(checkpoint['state_dict'])

    model.eval()
    actuals = []
    predictions = []
    with torch.no_grad():
        for data, targets in train_student_loader:
            #print(i)
            # Prepare data
            data = data.to(device).float()
            targets = targets.to(device).float()

            _, _, _, output = model(data)

            output = output.cpu().squeeze(1).detach()
            actuals.append(targets)
            predictions.append(output[-1])

    # Convert lists to tensors
    actuals = torch.cat(actuals, dim=0)
    predictions = torch.cat(predictions, dim=0)

    mv = mv_dict[time_frame]
    mv["power_pred_med_" + time_frame] = predictions[:, 0].numpy()
    mv["voltage_pred_med_" + time_frame] = predictions[:, 1].numpy()
    mv["current_pred_med_" + time_frame] = predictions[:, 2].numpy()

    print(f'Voltage overestimation rate for {time_frame}: %.3f%%' % (
        (mv['Voltage (mV)'].values <= mv["voltage_pred_med_" + time_frame]).mean() * 100))
    print(f"Test MAPE power ({time_frame}): %3f" % MAPE(mv['Power (uW)'].values.ravel(), mv["power_pred_med_" + time_frame]))
    print(f"Test MAPE voltage ({time_frame}): %3f" % MAPE(mv['Voltage (mV)'], mv["voltage_pred_med_" + time_frame]))
    print(f"Test MAPE current ({time_frame}): %3f" % MAPE(mv['Current (uA)'], mv["current_pred_med_" + time_frame]))

    E_pred = 0
    for i in range(len(mv) - 1):
        t = (mv.index[i+1] - mv.index[i]).total_seconds()
        if t <= time_frame_seconds + 50:
            E_pred += mv["power_pred_med_" + time_frame][i] * t

    print(f'Predicted vs. Actual Total Energy Percent Difference ({time_frame}): %.3f%%' % (
        (E_pred - E_actual) * 100 / E_actual))

    V_actual = mv['Voltage (mV)'].mean()
    V_pred = mv["voltage_pred_med_" + time_frame].mean()
    print(f'Predicted vs. Actual Total Voltage Percent Difference ({time_frame}): %.3f%%' % (
        (V_pred - V_actual) * 100 / V_actual))


trained_models/snn_5min_quant50.pth


KeyboardInterrupt: 

In [190]:
from keras.models import load_model

# Set parameters
batchsize_list = [300, 150, 50, 20, 8]
time_frame = '5min'
time_frame_seconds = 300

# Initialize results storage
snn_power_mape_list = []
snn_volt_mape_list = []
snn_curr_mape_list = []

# Dictionary to store mv variables
mv_dict = {}

# Process only the first batch size
batchsize = batchsize_list[1]  # Pick the first batch size

X = pd.concat([df["power - 1h"], df["power - 2h"], df["power - 3h"],
               df["V1 - 1h"], df["V1 - 2h"], df["V1 - 3h"],
               df["I1L - 1h"], df["I1L - 2h"], df["I1L - 3h"],
               df["EC - 1h"], df["EC - 2h"], df["EC - 3h"],
               df["raw_VWC - 1h"], df["raw_VWC - 2h"], df["raw_VWC - 3h"],
               df["temp - 1h"], df["temp - 2h"], df["temp - 3h"],
               df["tsd"], df["hour"]], axis=1)
y = pd.concat([df["Power (uW)"], df['Voltage (mV)'], df['Current (uA)']], axis=1)

# Normalize Data
X_normalized = ((X - X.min()) / (X.max() - X.min()))

# Split train and test sets
X_train, X_test = train_test_split(X_normalized, test_size=0.3, shuffle=False)
y_train, y_test = train_test_split(y, test_size=0.3, shuffle=False)

# Split the testing set into validation and final test sets (50/50)
X_valid, X_test = train_test_split(X_test, test_size=0.5, shuffle=False)
y_valid, y_test = train_test_split(y_test, test_size=0.5, shuffle=False)

# Resample data
X_train = X_train.resample(time_frame).mean().dropna()
y_train = y_train.resample(time_frame).mean().dropna()
X_valid = X_valid.resample(time_frame).mean().dropna()
y_valid = y_valid.resample(time_frame).mean().dropna()

X_test = X_test.resample(time_frame).mean().dropna()
y_test = y_test.resample(time_frame).mean().dropna()

# Define mv variable for the current time frame
mv_dict[time_frame] = y_train

# Reshape data
X_train = X_train.values.reshape((X_train.shape[0], 1, X_train.shape[1]))
X_valid = X_valid.values.reshape((X_valid.shape[0], 1, X_valid.shape[1]))
X_test = X_test.values.reshape((X_test.shape[0], 1, X_test.shape[1]))

# Convert to tensor
X_train = torch.tensor(X_train)
y_train = torch.tensor(y_train.values)
X_valid = torch.tensor(X_valid)
y_valid = torch.tensor(y_valid.values)
X_test = torch.tensor(X_test)
y_test = torch.tensor(y_test.values)

# Make datasets
train_dataset = TensorDataset(X_train, y_train)
valid_dataset = TensorDataset(X_valid, y_valid)
test_dataset = TensorDataset(X_test, y_test)

# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=batchsize, shuffle=False)
valid_loader = DataLoader(valid_dataset, batch_size=batchsize, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batchsize, shuffle=False)

num_steps = 50
num_inputs = X_train.shape[2]

# Create new instance of the SNN Class
model = Net(num_inputs, num_steps).to(device)

file = 'trained_models/snn_' + time_frame + '_quant50.pth'
print(file)

checkpoint = torch.load(file, map_location=torch.device('cpu'), weights_only=True)
model.load_state_dict(checkpoint['state_dict'])

model.eval()
actuals = []
predictions = []
with torch.no_grad():
    for data, targets in train_loader:
        # Prepare data
        data = data.to(device).float()
        targets = targets.to(device).float()

        _, _, _, output = model(data)

        output = output.cpu().squeeze(1).detach()
        actuals.append(targets)
        predictions.append(output[-1])

# Convert lists to tensors
actuals = torch.cat(actuals, dim=0)
predictions = torch.cat(predictions, dim=0)
print("predictions: ", predictions)
print("actuals: ", actuals)

mv = mv_dict[time_frame]
mv["power_pred_med_" + time_frame] = predictions[:, 0].numpy()
mv["voltage_pred_med_" + time_frame] = predictions[:, 1].numpy()
mv["current_pred_med_" + time_frame] = predictions[:, 2].numpy()

print(f'Voltage overestimation rate for {time_frame}: %.3f%%' % (
    (mv['Voltage (mV)'].values <= mv["voltage_pred_med_" + time_frame]).mean() * 100))
print(f"Test MAPE power ({time_frame}): %3f" % MAPE(mv['Power (uW)'].values.ravel(), mv["power_pred_med_" + time_frame]))
print(f"Test MAPE voltage ({time_frame}): %3f" % MAPE(mv['Voltage (mV)'], mv["voltage_pred_med_" + time_frame]))
print(f"Test MAPE current ({time_frame}): %3f" % MAPE(mv['Current (uA)'], mv["current_pred_med_" + time_frame]))


trained_models/snn_5min_quant50.pth
predictions:  tensor([[ 716.3546, 3280.2092, 1918.9449],
        [ 716.3546, 3280.2092, 1918.9449],
        [ 716.3546, 3280.2092, 1918.9449],
        ...,
        [ 518.3690, 2080.7988, 2589.0771],
        [ 518.3690, 2080.7988, 2589.0771],
        [ 523.3364, 2113.8623, 2593.1101]])
actuals:  tensor([[28544.3105, 30979.2246,  9214.1084],
        [28838.2109, 30978.0762,  9309.3037],
        [29093.4941, 30980.6113,  9391.0059],
        ...,
        [ 1025.7499,  3787.1465,  2725.7690],
        [  966.0275,  3962.2629,  2456.9177],
        [  872.7040,  4119.1685,  2137.6677]])
Voltage overestimation rate for 5min: 61.404%
Test MAPE power (5min): 49.838094
Test MAPE voltage (5min): 27.417626
Test MAPE current (5min): 30.729853


In [191]:
print(mv_dict['5min'].keys())

Index(['Power (uW)', 'Voltage (mV)', 'Current (uA)', 'power_pred_med_5min',
       'voltage_pred_med_5min', 'current_pred_med_5min'],
      dtype='object')


# Student Model Architecture

## Train Student model

In [216]:
print(y)

                             Power (uW)  Voltage (mV)  Current (uA)
timestamp                                                          
2021-06-04 00:00:50-07:00  29283.382409     30977.020      9453.260
2021-06-04 00:01:04-07:00  31207.828873     30968.787     10077.188
2021-06-04 00:01:17-07:00  28851.587848     30978.863      9313.314
2021-06-04 00:01:31-07:00  28715.818842     30978.003      9269.745
2021-06-04 00:01:45-07:00  27285.687612     30983.655      8806.478
...                                 ...           ...           ...
2022-01-26 23:58:52-08:00   9185.733148      2438.569     37668.539
2022-01-26 23:59:06-08:00     41.767202       855.004       488.503
2022-01-26 23:59:19-08:00     19.878576       855.250       232.430
2022-01-26 23:59:33-08:00    105.926588       820.843      1290.461
2022-01-26 23:59:47-08:00    284.662956       713.814      3987.915

[969652 rows x 3 columns]


In [220]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
import numpy as np

# Load teacher predictions
timeframe = '5min'
teacher_5min_preds = mv_dict[timeframe]
teacher_5min_preds_df = pd.DataFrame(
    teacher_5min_preds,
    columns=[
        "power_pred_med_" + timeframe,
        "voltage_pred_med_" + timeframe,
        "current_pred_med_" + timeframe
    ]
)

# Normalize the data
X_normalized = (X - X.min()) / (X.max() - X.min())

# Split train and test sets
X_train, X_test = train_test_split(X_normalized, test_size=0.3, shuffle=False)
y_train, y_test = train_test_split(y, test_size=0.3, shuffle=False)

# Split the testing set into validation and final test sets (50/50)
X_valid, X_test = train_test_split(X_test, test_size=0.5, shuffle=False)
y_valid, y_test = train_test_split(y_test, test_size=0.5, shuffle=False)

# Resample data to 15-minute intervals
X_train = X_train.resample('15min').mean().dropna()
y_train = y_train.resample('15min').mean().dropna()
print(X_train.shape)
print(y_train.shape)
X_valid = X_valid.resample('15min').mean().dropna()
y_valid = y_valid.resample('15min').mean().dropna()

# Align predictions to data
# Filter 5-minute predictions to keep those 5 minutes before 15-minute intervals
valid_timestamps = X_train.index
teacher_5min_preds_df = teacher_5min_preds_df.loc[teacher_5min_preds_df.index.isin(valid_timestamps - pd.Timedelta(minutes=5))]

# Reassign the prediction timestamps to align with the current data timestamp
teacher_5min_preds_df.index = teacher_5min_preds_df.index + pd.Timedelta(minutes=5)

nan_entries = teacher_5min_preds_df.index[pd.isnull(teacher_5min_preds_df.index)]
# Print all NaN entries
print("NaN Entries in teacher_5min_preds_df.index:")
print(nan_entries)

# Concatenate predictions with training data
X_with_teacher = pd.concat([X_train, teacher_5min_preds_df], axis=1)

# Drop rows with missing values caused by timestamp mismatches
# X_with_teacher = X_with_teacher.dropna()
# Verify the updated shape
print("Updated X Shape: Rows " + str(X_with_teacher.shape[0]) + " Columns " + str(X_with_teacher.shape[1]))
print(X_with_teacher.shape)
print(y_train.shape)


def quantile_loss(y_true, y_pred, quantile=0.5):
    error = y_true - y_pred
    loss = torch.mean(torch.max(quantile * error, (quantile - 1) * error))
    print("QUANTILE LOSS: ", loss)
    return loss

def distillation_loss(y_teacher, y_pred):
  loss = torch.mean((y_teacher - y_pred) ** 2)
  print("DISTILLATION LOSS: ", loss)

  return loss

def combined_loss(y_true, y_pred, y_teacher, quantile=0.5, alpha=0.5):
  return ((alpha) * quantile_loss(y_true, y_pred, quantile)) + ((1-alpha) * distillation_loss(y_teacher, y_pred))

# Define the StudentModel (as provided earlier)
class StudentModel(nn.Module):
    def __init__(self, num_inputs, num_steps, output_size):
        super(StudentModel, self).__init__()
        self.num_inputs = num_inputs
        self.num_steps = num_steps
        self.output_size = output_size

        self.lstm = nn.LSTM(input_size=num_inputs, hidden_size=200, num_layers=1, batch_first=True)
        self.fc1 = nn.Linear(200, 100)
        self.fc2 = nn.Linear(100, output_size)
        self.relu = nn.ReLU()

    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        lstm_out = lstm_out[:, -1, :]
        fc1_out = self.relu(self.fc1(lstm_out))
        output = self.fc2(fc1_out)
        return output

# Set parameters
batch_size = 32  # Adjust as needed
num_epochs = 25
learning_rate = 1e-3
beta = 0.5  # For quantile loss

X_teacher_normalized = (X_with_teacher - X_with_teacher.min()) / (X_with_teacher.max() - X_with_teacher.min())

X_train_final, X_test_final = train_test_split(X_teacher_normalized, test_size=0.3, shuffle=False)
y_train_final, y_test_final = train_test_split(y_train, test_size=0.3, shuffle=False)

print("Training labels shape (X_with_teacher):", X_teacher_normalized.shape)
print("Training set shape (X_train_final):", X_train_final.shape)
print("Training labels shape (y_train_final):", y_train_final.shape)

# Split the test set further into validation and test sets
X_valid_final, X_test_final = train_test_split(X_test_final, test_size=0.5, shuffle=False)
y_valid_final, y_test_final = train_test_split(y_test_final, test_size=0.5, shuffle=False)

# Reshape data for LSTM input
X_train_final = X_train_final.values.reshape((X_train_final.shape[0], 1, X_train_final.shape[1]))
X_valid_final = X_valid_final.values.reshape((X_valid_final.shape[0], 1, X_valid_final.shape[1]))
X_test_final = X_test_final.values.reshape((X_test_final.shape[0], 1, X_test_final.shape[1]))

# X_train_teacher = X_train_teacher.values.reshape((X_train_teacher.shape[0], 1, X_train_teacher.shape[1]))
#         X_valid = X_valid.values.reshape((X_valid.shape[0], 1, X_valid.shape[1]))
#         X_test = X_test.values.reshape((X_test.shape[0], 1, X_test.shape[1]))

#         # convert to tensor
#         X_train_teacher = torch.tensor(X_train_teacher)
#         y_train_teacher = torch.tensor(y_train_teacher.values)
#         X_test = torch.tensor(X_test)
#         y_test = torch.tensor(y_test.values)
# Convert to tensor


X_train_final = torch.tensor(X_train_final).float()
y_train_final = torch.tensor(y_train_final.values).float()
X_valid_final = torch.tensor(X_valid_final).float()
y_valid_final = torch.tensor(y_valid_final.values).float()
X_test_final = torch.tensor(X_test_final).float()
y_test_final = torch.tensor(y_test_final.values).float()

# Clean X_train_final tensor
nan_mask = torch.isnan(X_train_final)  # Identify NaN values
non_nan_values = X_train_final[~nan_mask]  # Filter out NaN values for mean calculation
mean_value = torch.mean(non_nan_values)  # Compute mean of valid values

# Replace NaNs with the computed mean
tensor_cleaned = X_train_final.clone()
tensor_cleaned[nan_mask] = mean_value

# Reassign cleaned tensor back if desired
X_train_final = tensor_cleaned

# Create datasets
train_dataset_final = TensorDataset(X_train_final, y_train_final)
valid_dataset_final = TensorDataset(X_valid_final, y_valid_final)
test_dataset_final = TensorDataset(X_test_final, y_test_final)

# Create DataLoaders
train_loader = DataLoader(train_dataset_final, batch_size=batch_size, shuffle=False)
valid_loader = DataLoader(valid_dataset_final, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset_final, batch_size=batch_size, shuffle=False)



# Define model, optimizer, and loss function
num_steps = 1  # Since you're using LSTM for time series data with one step
num_inputs = X_train_final.shape[2]
output_size = y_train_final.shape[1]
model = Net(num_inputs, num_steps).to(device)

optimizer = torch.optim.Adam(params=model.parameters(), lr=learning_rate)

# Training loop
model.train()
for epoch in range(num_epochs):
    running_loss = 0.0
    for i, (data, targets) in enumerate(train_loader):
        data, targets = data.to(device).float(), targets.to(device).float()

        optimizer.zero_grad()
        outputs = model(data)  # Ensure your model's forward method aligns
        y_teacher = data[:, :, [20, 21, 22]].squeeze(-1)
        y_pred = outputs[-1]
        y_true = targets
        # y_pred = y_pred.permute(1, 0, 2)

        # print("teacher preds: ", y_teacher)
        # print("student preds: ", y_pred)
        # print("outputs: ", outputs)
        # print("targets: ", targets)

        # Compute loss
        loss = combined_loss(y_true, y_pred, y_teacher, quantile=0.5, alpha=1.0)

        loss.backward()
        optimizer.step()
        running_loss += loss.item()

        if i % 10 == 0:
            print(f"Epoch {epoch+1}/{num_epochs}, Batch {i}, Loss: {loss.item():.4f}")

    print(f"Epoch {epoch+1}, Avg Loss: {running_loss / len(train_loader):.4f}")

(10285, 20)
(10285, 3)
NaN Entries in teacher_5min_preds_df.index:
DatetimeIndex([], dtype='datetime64[ns, US/Pacific]', name='timestamp', freq=None)
Updated X Shape: Rows 10285 Columns 23
(10285, 23)
(10285, 3)
Training labels shape (X_with_teacher): (10285, 23)
Training set shape (X_train_final): (7199, 23)
Training labels shape (y_train_final): (7199, 3)
QUANTILE LOSS:  tensor(11591.4219, grad_fn=<MeanBackward0>)
DISTILLATION LOSS:  tensor(0.7238, grad_fn=<MeanBackward0>)
Epoch 1/25, Batch 0, Loss: 11591.4219
QUANTILE LOSS:  tensor(10171.4805, grad_fn=<MeanBackward0>)
DISTILLATION LOSS:  tensor(0.7148, grad_fn=<MeanBackward0>)
QUANTILE LOSS:  tensor(8215.1436, grad_fn=<MeanBackward0>)
DISTILLATION LOSS:  tensor(0.7411, grad_fn=<MeanBackward0>)
QUANTILE LOSS:  tensor(6147.6899, grad_fn=<MeanBackward0>)
DISTILLATION LOSS:  tensor(0.7397, grad_fn=<MeanBackward0>)
QUANTILE LOSS:  tensor(6030.6597, grad_fn=<MeanBackward0>)
DISTILLATION LOSS:  tensor(0.7282, grad_fn=<MeanBackward0>)
QUANT

In [207]:
import numpy as np
import torch
# Define model, optimizer, and loss function
num_steps = 1  # Since you're using LSTM for time series data with one step
num_inputs = X_train_final.shape[2]
output_size = y_train_final.shape[1]
model = Net(num_inputs, num_steps).to(device)

optimizer = torch.optim.Adam(params=model.parameters(), lr=learning_rate)

# Assuming the model has already been trained
# Now let's test the model on the test set

model.eval()  # Set model to evaluation mode
actuals = []
predictions = []

with torch.no_grad():
    for data, targets in test_loader:
        data, targets = data.to(device), targets.to(device)

        # Get model predictions
        outputs = model(data)
        y_pred = outputs[-1]  # Last time-step prediction
        y_pred = y_pred.permute(1, 0, 2)  # Reorganize to match ground truth shape

        # Collect actual and predicted values
        actuals.append(targets.cpu().numpy())  # Convert to numpy for easier comparison
        predictions.append(y_pred.cpu().numpy())  # Convert to numpy for easier comparison

# Concatenate all batches
actuals = np.concatenate(actuals, axis=0)
predictions = np.concatenate(predictions, axis=0)
predictions = predictions.squeeze(1)
print("Actuals: ", actuals)
print("predictions: ", predictions)

# Print the shapes of the actuals and predictions
print(f"Actual values shape: {actuals.shape}")
print(f"Predictions shape: {predictions.shape}")

# Compute MAPE for each output (e.g., power, voltage, current)
mape_power = MAPE(actuals[:, 0], predictions[:, 0])
mape_voltage = MAPE(actuals[:, 1], predictions[:, 1])
mape_current = MAPE(actuals[:, 2], predictions[:, 2])

# Print results
print(f"Test MAPE power: {mape_power:.3f}%")
print(f"Test MAPE voltage: {mape_voltage:.3f}%")
print(f"Test MAPE current: {mape_current:.3f}%")

# Optionally, you can store the results in a dictionary or log them for further analysis

Actuals:  [[ 241.8258  1189.6141  2090.2522 ]
 [ 313.35068 1186.8251  2711.1724 ]
 [ 302.72406 1164.8369  2670.9026 ]
 ...
 [ 732.9445  3099.3567  2410.5564 ]
 [ 922.6488  3775.9644  2457.9927 ]
 [ 872.704   4119.1685  2137.6677 ]]
predictions:  [[-0.01992282 -0.03442565  0.08372541]
 [-0.01992282 -0.03442565  0.08372541]
 [-0.01992282 -0.03442565  0.08372541]
 ...
 [-0.01992282 -0.03442565  0.08372541]
 [-0.01992282 -0.03442565  0.08372541]
 [-0.01992282 -0.03442565  0.08372541]]
Actual values shape: (1543, 3)
Predictions shape: (1543, 3)
Test MAPE power: 100.007%
Test MAPE voltage: 100.003%
Test MAPE current: 99.997%


In [115]:
# Validation loop
model.eval()
valid_loss = 0.0
valid_mape = 0.0
valid_mse = 0.0

with torch.no_grad():
    for data, targets in valid_loader:
        data = data.to(device)
        targets = targets.to(device)

        outputs = model(data)
        loss = quantile_loss(targets, outputs, quantile=0.5)
        valid_loss += loss.item()

        # Calculate MAPE
        mape = mean_absolute_percentage_error(targets.cpu().numpy(), outputs.cpu().numpy())
        valid_mape += mape

        # Calculate MSE
        mse = mean_squared_error(targets.cpu().numpy(), outputs.cpu().numpy())
        valid_mse += mse

print(f"Validation Loss: {valid_loss / len(valid_loader):.4f}")
print(f"Validation MAPE: {valid_mape / len(valid_loader):.4f}")
print(f"Validation MSE: {valid_mse / len(valid_loader):.4f}")

# Test loop
test_loss = 0.0
test_mape = 0.0
test_mse = 0.0

with torch.no_grad():
    for data, targets in test_loader:
        data = data.to(device)
        targets = targets.to(device)

        outputs = model(data)
        loss = quantile_loss(targets, outputs, quantile=0.5)
        test_loss += loss.item()

        # Calculate MAPE
        mape = mean_absolute_percentage_error(targets.cpu().numpy(), outputs.cpu().numpy())
        test_mape += mape

        # Calculate MSE
        mse = mean_squared_error(targets.cpu().numpy(), outputs.cpu().numpy())
        test_mse += mse

print(f"Test Loss: {test_loss / len(test_loader):.4f}")
print(f"Test MAPE: {test_mape / len(test_loader):.4f}")
print(f"Test MSE: {test_mse / len(test_loader):.4f}")

RuntimeError: input has inconsistent input_size: got 23 expected 20

In [None]:

batchsize_list = [300, 150, 50, 20, 8]
time_frame_list = ['3min', '5min', '15min', '30min', '60min']
time_frame_seconds_list = [180, 300, 900, 1800, 3600]
n = 0

snn_power_mape_list = []
snn_volt_mape_list = []
snn_curr_mape_list = []

# Dictionary to store mv variables
mv_dict = {}

for j in range(len(batchsize_list)):
    batchsize = batchsize_list[j]
    time_frame = time_frame_list[j]
    time_frame_seconds = time_frame_seconds_list[j]

    X = pd.concat([df["power - 1h"], df["power - 2h"], df["power - 3h"],
                   df["V1 - 1h"], df["V1 - 2h"], df["V1 - 3h"],
                   df["I1L - 1h"], df["I1L - 2h"], df["I1L - 3h"],
                   df["EC - 1h"], df["EC - 2h"], df["EC - 3h"],
                   df["raw_VWC - 1h"], df["raw_VWC - 2h"], df["raw_VWC - 3h"],
                   df["temp - 1h"], df["temp - 2h"], df["temp - 3h"],
                   df["tsd"], df["hour"]], axis=1)
    y = pd.concat([df["Power (uW)"], df['Voltage (mV)'], df['Current (uA)']], axis=1)

    # Normalize Data
    X_normalized = ((X - X.min()) / (X.max() - X.min()))

    # Split train and test sets
    X_train, X_test = train_test_split(X_normalized, test_size=0.3, shuffle=False)
    y_train, y_test = train_test_split(y, test_size=0.3, shuffle=False)

    X_valid, X_test = train_test_split(X_test, test_size=0.5, shuffle=False)
    y_valid, y_test = train_test_split(y_test, test_size=0.5, shuffle=False)

    # Calculate actual energy generated in test set
    E_actual = 0
    for i in range(len(y_test) - 1):
        t = (y_test.index[i+1] - y_test.index[i]).total_seconds()
        if t < 180:
            E_actual += y_test['Power (uW)'][i] * t

    # Resample data
    X_valid = X_valid.resample(time_frame).mean().dropna()
    y_valid = y_valid.resample(time_frame).mean().dropna()

    X_test = X_test.resample(time_frame).mean().dropna()
    y_test = y_test.resample(time_frame).mean().dropna()

    # Define mv variable for the current time frame
    mv_dict[time_frame] = y_test

    # Reshape data
    X_train = X_train.values.reshape((X_train.shape[0], 1, X_train.shape[1]))
    X_valid = X_valid.values.reshape((X_valid.shape[0], 1, X_valid.shape[1]))
    X_test = X_test.values.reshape((X_test.shape[0], 1, X_test.shape[1]))

    # Convert to tensor
    X_train = torch.tensor(X_train)
    y_train = torch.tensor(y_train.values)
    X_valid = torch.tensor(X_valid)
    y_valid = torch.tensor(y_valid.values)
    X_test = torch.tensor(X_test)
    y_test = torch.tensor(y_test.values)

    # Make datasets
    train_dataset = TensorDataset(X_train, y_train)
    valid_dataset = TensorDataset(X_valid, y_valid)
    test_dataset = TensorDataset(X_test, y_test)

    # Create DataLoaders
    train_loader = DataLoader(train_dataset, batch_size=batchsize, shuffle=False)
    valid_loader = DataLoader(valid_dataset, batch_size=batchsize, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=batchsize, shuffle=False)

    num_steps = 50
    num_inputs = X_train.shape[2]

    # Create new instance of the SNN Class
    model = Net(num_inputs, num_steps).to(device)

    file = 'trained_models/snn_' + time_frame + '_quant50.pth'
    print(file)

    checkpoint = torch.load(file, map_location=torch.device('cpu'), weights_only=True)
    model.load_state_dict(checkpoint['state_dict'])

    model.eval()
    actuals = []
    predictions = []

    with torch.no_grad():
        for data, targets in test_loader:
            # Prepare data
            data = data.to(device).float()
            targets = targets.to(device).float()

            _, _, _, output = model(data)

            output = output.cpu().squeeze(1).detach()
            actuals.append(targets)
            predictions.append(output[-1])

    # Convert lists to tensors
    actuals = torch.cat(actuals, dim=0)
    predictions = torch.cat(predictions, dim=0)

    mv = mv_dict[time_frame]
    mv["power_pred_med_" + time_frame] = predictions[:, 0].numpy()
    mv["voltage_pred_med_" + time_frame] = predictions[:, 1].numpy()
    mv["current_pred_med_" + time_frame] = predictions[:, 2].numpy()

    print(f'Voltage overestimation rate for {time_frame}: %.3f%%' % (
        (mv['Voltage (mV)'].values <= mv["voltage_pred_med_" + time_frame]).mean() * 100))
    print(f"Test MAPE power ({time_frame}): %3f" % MAPE(mv['Power (uW)'].values.ravel(), mv["power_pred_med_" + time_frame]))
    print(f"Test MAPE voltage ({time_frame}): %3f" % MAPE(mv['Voltage (mV)'], mv["voltage_pred_med_" + time_frame]))
    print(f"Test MAPE current ({time_frame}): %3f" % MAPE(mv['Current (uA)'], mv["current_pred_med_" + time_frame]))

    E_pred = 0
    for i in range(len(mv) - 1):
        t = (mv.index[i+1] - mv.index[i]).total_seconds()
        if t <= time_frame_seconds + 50:
            E_pred += mv["power_pred_med_" + time_frame][i] * t

    print(f'Predicted vs. Actual Total Energy Percent Difference ({time_frame}): %.3f%%' % (
        (E_pred - E_actual) * 100 / E_actual))

    V_actual = mv['Voltage (mV)'].mean()
    V_pred = mv["voltage_pred_med_" + time_frame].mean()
    print(f'Predicted vs. Actual Total Voltage Percent Difference ({time_frame}): %.3f%%' % (
        (V_pred - V_actual) * 100 / V_actual))
