# Results

> This handles results of a GPFA/SimpleGP Imputation with plotting and metrics

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
#| hide
#| default_exp results

In [None]:
#| export
from meteo_imp.gpfa.learner import *
from meteo_imp.data_preparation import *
from meteo_imp.gpfa.imputation import *
from meteo_imp.utils import *

import torch

import pandas as pd
import numpy as np
import sklearn
from sklearn.metrics import mean_squared_error, r2_score

from fastcore.foundation import patch, patch_to
from fastcore.meta import delegates
from fastcore.basics import store_attr, listify
from fastcore.test import test_close
from itertools import zip_longest
from fastcore.dispatch import typedispatch

import matplotlib.pyplot as plt
import altair as alt
from altair import datum

from typing import Collection


In [None]:
#| export
class ImputationResult:
    def __init__(self,
                 data_imputed, #imputed data in tidy format
                 data_complete, # complete data in tidy format
                 model_info, # learner for parameters display
                 units = None, # units for plots
                 metrics_all_data = True # Compute metrics only for gap or for all data?
                ):
        store_attr()

    def __repr__(self: GPFAImputation):
        return f"""Imputation Result:
        N obs: {self.data_imputed.time.unique().shape[0]}"""

    def __str__(self: GPFAImputation):
        return self.__repr__()

In [None]:
np.array([5, 3])

array([5, 3])

constructor methods from `GPFAImputation` and `GPFAImputationResults`

In [None]:
#| export
@patch
def to_result(self: GPFAImputation, data_complete, units=None):
    var_names = self.data.columns
    return ImputationResult(self.impute(add_time=True), data_complete, self.learner.model.get_info(var_names), units, metrics_all_data=False)

In [None]:
#| export
@patch
def to_result(self: GPFAImputationExplorer, data_complete, units=None):
    var_names = self.data.columns
    return ImputationResult(self.predict(), data_complete, self.learner.model.get_info(var_names), units)

In [None]:
reset_seed()
fd = MeteoDataTest.generate_gpfa(2, 10, Lambda=[1,2.]).add_random_missing()

imp = GPFAImputation(fd.data)
imp_exp = GPFAImputationExplorer(fd.data)

In [None]:
fd.data

Unnamed: 0,x0,x1
0,0.352996,0.013275
1,,0.248802
2,-0.264305,-0.657965
3,0.248825,0.822856
4,-0.896081,
5,0.773089,
6,,
7,,
8,-0.841634,-1.945134
9,1.269603,2.074223


In [None]:
res = imp.to_result(fd.data_compl_tidy, units = {'x0': 'x0 units', 'x1': 'x1 unitssss'})
res_exp = imp_exp.to_result(fd.data_compl_tidy, units = {'x0': 'x0 units', 'x1': 'x1 unitssss'})

### Metrics

In [None]:
#| export
@patch
def compute_metric(self: ImputationResult,
                   metric,  # function that takes as argument true and pred and returns the metric
                   metric_name = 'metric',
                   ):
    df = pd.merge(self.data_imputed, self.data_complete, on = ['time','variable'])
    
    vars = []
    
    for var in df.variable.unique():
        mask = (df.variable == var) & (df.is_missing == True) if not self.metrics_all_data else df.variable == var
        
        df_var = df[mask]
        vars.append({'variable': var,
                      metric_name: metric(df_var['value'], df_var['mean']) if len(df_var) > 0 else None})
    
    return pd.DataFrame(vars)

In [None]:
#| exports
@patch
def rmse(self: ImputationResult):
    rmse = self.compute_metric(lambda x, y: np.sqrt(mean_squared_error(x,y)), "rmse")
    if self.units: rmse = rmse.assign(units= self.units.values())
    return rmse

In [None]:
res_exp.rmse()

Unnamed: 0,variable,rmse,units
0,x0,0.738387,x0 units
1,x1,1.337692,x1 unitssss


In [None]:
res.rmse()

Unnamed: 0,variable,rmse,units
0,x0,0.861892,x0 units
1,x1,1.485962,x1 unitssss


In [None]:
#| exports
@patch
def r2(self: ImputationResult):
    return self.compute_metric(r2_score, "r2")

In [None]:
res.r2()

Unnamed: 0,variable,r2
0,x0,-0.279686
1,x1,-0.018969


In [None]:
#| export
@patch
def print_metrics(self: ImputationResult):
    
    old = self.metrics_all_data
    
    self.metrics_all_data = True
    all_met = {
    'r2': self.r2(),
    'RMSE': self.rmse()
    }
    
    self.metrics_all_data = False
    met = {**all_met,
    'r2 - Only GAP': self.r2(),
    'RMSE - Only GAP': self.rmse()
    }
    
    self.metrics_all_data = old
    return met

### Prediction plot

In [None]:
alt.Chart(pd.DataFrame({'a': [1,2,3]})).mark_tick().encode(x = 'a')

  for col_name, dtype in df.dtypes.iteritems():


In [None]:
#| export
def _plot_error_bar(data, variable, y_label, properties, sel):
    
    error = alt.Chart(data).mark_errorband().encode(
        x = "time",    
        y = alt.Y("err_low:Q", title = y_label, scale=alt.Scale(zero=False)),
        y2 = "err_high:Q",
        color=alt.Color("variable",
                        legend = alt.Legend(title=["Line: pred. mean", "area: +/- 2 std", "(variable)"])
                       ),
        tooltip = alt.Tooltip(['std', 'mean'], format=".4")
    ).transform_calculate(
        err_low = "datum.mean - 2 * datum.std",
        err_high = "datum.mean + 2 * datum.std"
    ).properties( **properties)

    mean = alt.Chart(data).mark_line().encode(
        x = "time",    
        y = alt.Y("mean:Q", title = y_label, scale=alt.Scale(zero=False)),
        color="variable",
    ).add_selection(
        sel
    ).properties(title = variable)

    return error + mean


In [None]:
#| exports
def _plot_variable(imp, complete, variable, y_label="", sel=None, properties = {}):
    
    imp = imp[imp.variable == variable]
    sel = sel if sel is not None else alt.selection_interval(bind="scales")
    
    base_plot = _plot_error_bar(imp, variable, y_label, properties, sel)
        
    if complete is not None:

        complete = complete[complete.variable == variable]
        truth_plt = alt.Chart(complete).mark_point(
            color='black',
            strokeWidth = 1,
            fillOpacity = 1
        ).encode(
            x = alt.X("time", axis=alt.Axis(domain=False, labels = False, ticks=False, title=None)),
            y = alt.Y("value", title = y_label, scale=alt.Scale(zero=False)),
            fill= alt.Fill("is_missing", scale = alt.Scale(range=["#ffffff00", "black"]),
                           legend = alt.Legend(title =["Observed data","(is missing)"])),
            shape = "is_missing",
        )
       
        p = {'width': properties['width']} if properties else {}
        missing = alt.Chart(complete).mark_tick(
            color='black',
        ).encode(
            x = "time",
            color = alt.condition(datum.is_missing, alt.value('black'), alt.value('white'))
        ).add_selection(
            sel
        ).properties(**p)

        base_plot = alt.VConcatChart(vconcat=[(truth_plt + base_plot), missing], spacing=-10)
        
    return base_plot
    

In [None]:
imp_d = imp.impute(tidy=True, add_time=True)

_plot_variable(imp_d, None, "x1")

  for col_name, dtype in df.dtypes.iteritems():


In [None]:
#| exports
@patch()
def plot_pred(
    self: ImputationResult,
    n_cols: int = 2,
    bind_interaction: bool =True, # Whether the sub-plots for each variable should be connected for zooming/panning
    properties:dict = {} # additional properties (eg. size) for altair plot
):
    "Plot the prediction for each variable"
   
    plot_list = [alt.hconcat() for _ in range(0, self.data_imputed.shape[0], n_cols)]
    selection_scale = alt.selection_interval(bind="scales", encodings=['x']) if bind_interaction else None
    for idx, variable in enumerate(pd.unique(self.data_imputed.variable)):
        plot_list[idx // n_cols] |= _plot_variable(self.data_imputed,
                                                   self.data_complete,
                                                   variable,
                                                   y_label = f"{variable} [{self.units[variable]}]" if self.units is not None else variable,
                                                   sel = selection_scale, properties=properties)
    
    plot = alt.vconcat(*plot_list)
    
    return plot

In [None]:
res.plot_pred()

In [None]:
res.plot_pred(bind_interaction=False)

The code is running correctly and as expected around the missing data point the error is band is wider

### Display results

show the prediction plot, metrics and model parameters in one convinient view

In [None]:
#| exports
@patch 
def display_results(self: ImputationResult, plot_args={}):
    
    plot_args = {'properties': {'height': 200 , 'width': 350}, **plot_args} # set default plot size
    plot = self.plot_pred(**plot_args)
    
    display(plot)    
    display_as_row(self.print_metrics(), "Metrics")
    display_as_row(self.model_info, "Model Info")   

In [None]:
res.model_info

{'Lambda':   variable        z0
 0       x0  0.796563
 1       x1  0.459402,
 'lengthscale':   latent  lengthscale
 0     z0     0.693147,
 'psi':   variable       psi
 0       x0  0.693147
 1       x1  0.693147,
 'likelihood':       noise
 0  0.693247}

In [None]:
res.display_results()

  for col_name, dtype in df.dtypes.iteritems():


variable,r2
x0,0.6066
x1,0.5064

variable,rmse,units
x0,0.4721,x0 units
x1,0.9398,x1 unitssss

variable,r2
x0,-0.2797
x1,-0.019

variable,rmse,units
x0,0.8619,x0 units
x1,1.486,x1 unitssss


variable,z0
x0,0.7966
x1,0.4594

latent,lengthscale
z0,0.6931

variable,psi
x0,0.6931
x1,0.6931

noise
0.6932


## Export 

In [None]:
#| hide
from nbdev import nbdev_export
nbdev_export()