## CSV Results Processor

Graph results of climate prediction data captured via CSV data.



In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
debug = False

DRIVE_PATH = "/content/drive/MyDrive/data606"

# Set the location of this script in GDrive
SCRIPT_PATH = DRIVE_PATH + "/src/"

# Root Path of the data on the cloud drive
DATA_ROOT = DRIVE_PATH + "/data/preds/"

# Journal file
JOURNAL_LOG = SCRIPT_PATH + "cv-results.csv"

# Colors for rendering
colors = 'rbygm'

In [None]:
# Visualization params
METRIC = 'MSE'

GROUP_COLS = ['TargetLabel','InputWindow','LabelWindow','TestPct','Columns']
TGT_LABEL = 0
WIND_SIZE = 1
TEST_PCT = 2
COLS = 3

In [None]:
import glob
import os
import pandas as pd
from pathlib import Path
import numpy as np

df = pd.read_csv(JOURNAL_LOG)


In [None]:
df

In [None]:
df_net = df.groupby(GROUP_COLS)[METRIC].all()

In [None]:
df_net

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.ticker as plticker

In [None]:
# Delete rows w/o a real serial
df = df[df['Serial'] > 10]

In [None]:
plt.rcParams["figure.figsize"] = [18,6]
sns.barplot(x=df['Serial'], y=df[METRIC])
#plt.plot(df[COLS])
plt.xlabel('Serial')
plt.xticks(rotation=90)
plt.ylabel(METRIC)
plt.title(f'{METRIC} for all Serials')

In [None]:
TICK_SPACING=6

for i,s in enumerate(df.index):
  cur_row = df.loc[s]
  serial = cur_row['Serial']
  if (serial <= 10):
    continue

  mse = round(float(cur_row['MSE']), 4)
  mae = round(float(cur_row['MAE']), 4)
  model = cur_row['Model']
  epochs = cur_row['NumEpochs']

  if (mse > .02):
      continue
  # if (serial != 674022):
  #   continue

  #fig, axs = plt.subplots(num_graphs, 1, figsize=(9,(num_graphs*5)), layout="constrained")
  fig, ax = plt.subplots(1, 1, figsize=(11,5), layout="constrained")

  #print(f'Reading results for serial: {serial}')
  df_stats = pd.read_csv(DATA_ROOT + f'model-preds-{serial}.csv')
  # Save numeric index off, could be handy
  index=df_stats.index.values
  #...but drop it
  df_stats.drop(columns=['index'],inplace=True)
  df_stats.set_index('pred_dates', drop=True, inplace=True)

  # Plot the stats
  sns.lineplot(data=df_stats[['y_test','preds']], ax=ax)

  ax.set_xticks(df_stats.index, labels=df_stats.index, rotation=90)
  ax.xaxis.set_major_locator(plticker.MultipleLocator(TICK_SPACING))
  plt.xlabel('Time steps')
  plt.ylabel('Temp in degrees C')
  plt.legend(('Test','Predicted'))

  #plt.ylabel(METRIC)
  title_str = [f'{GROUP_COLS[t]}: {cur_row[GROUP_COLS[t]]}\n' for t in range(4)]
  title_str = ''.join(title_str)
  ax.set_title(f'({i}) Pred vs. Actual for Serial {serial}\n{title_str}')
  ax.annotate(f'Model: {model} MSE: {mse}   MAE: {mae} Epochs: {epochs}',
              xy=(1,1),  # point to annotate - see xycoords for units
              xytext=(50, 10),  # offset from xy - units in textcoords
              xycoords='axes fraction',  # how coords are translated?
              textcoords='offset pixels', # 'axes fraction', 'offset pixels'
              horizontalalignment='right'
              )
  plt.legend(df_stats.columns)
