# 10 Why Hat
> Analytical approach using Neural Network on tabulate data

The core engine for project $\large\hat{y}$.

The dataset is from [New York City Airbnb Open Data](https://www.kaggle.com/dgomonov/new-york-city-airbnb-open-data#AB_NYC_2019.csv) on kaggle

In [1]:
# default_exp whyhat

In [2]:
# export
import pandas as pd
import numpy as np
from pathlib import Path
import os
import json
from torchember.core import color
from torchember.helper import tracker
os.environ['KMP_DUPLICATE_LIB_OK']='True'

In [3]:
DATA = Path("../data")

In [4]:
CSV_PATH = DATA/"AB_NYC_2019.csv"

The AirBnB New York 2019 dataset

In [5]:
df = pd.read_csv(CSV_PATH)
df.sample(10)

Unnamed: 0,id,name,host_id,host_name,neighbourhood_group,neighbourhood,latitude,longitude,room_type,price,minimum_nights,number_of_reviews,last_review,reviews_per_month,calculated_host_listings_count,availability_365
17324,13676601,Here's a great offer on a spacious furnished r...,79598330,Nesha,Brooklyn,East Flatbush,40.66337,-73.92576,Private room,30,3,20,2018-03-31,0.54,1,27
20494,16323239,Spacious 2 BDR - Hell's Kitchen/Times Square,2559886,Sy,Manhattan,Hell's Kitchen,40.76284,-73.98849,Entire home/apt,130,4,2,2017-11-30,0.07,1,0
27546,21679639,Entire 1-Bedroom Greenpoint Apartment,4696622,James,Brooklyn,Greenpoint,40.72747,-73.95462,Entire home/apt,95,3,0,,,1,0
45469,34776151,Bedroom + den + bath w/ sep. entry in Bed Stuy!,73612539,Rebecca,Brooklyn,Bedford-Stuyvesant,40.68602,-73.94844,Private room,68,1,8,2019-07-02,6.32,2,36
33070,26085075,Charming studio with PRIVATE DECK by McCarren ...,27530449,Estefania,Brooklyn,Greenpoint,40.72173,-73.9482,Private room,106,4,21,2019-06-03,1.73,2,58
19553,15634892,"Adorable, NYC studio for the holiday!",15353668,Bria,Manhattan,Midtown,40.75228,-73.97186,Entire home/apt,144,28,0,,,1,90
39825,30954420,Artistic apartment in the Heart of Manhattan,231298987,Austin,Manhattan,Lower East Side,40.7189,-73.98599,Entire home/apt,200,4,1,2019-03-16,0.26,1,0
10897,8407092,Historic Ridgewood Brick Townhouse,9684993,Randy,Queens,Ridgewood,40.70928,-73.89795,Entire home/apt,139,5,3,2018-07-30,0.16,1,0
10715,8247721,Charming Crown Heights Brownstone,43494916,Tilly,Brooklyn,Crown Heights,40.67791,-73.95337,Entire home/apt,80,3,0,,,2,0
5187,3743048,*WARM*Beautiful*Room*ST. GEORGE steps to ferry!,19143974,Meghan,Staten Island,St. George,40.64408,-74.07834,Private room,58,3,93,2019-05-06,2.1,1,279


### Config how we learn the columns

This is a python/console interface, that will 
* guide the user through columns one by one, 
* let user decide how should we treat a column during the learning

In [6]:
# export
from hashlib import md5
from datetime import datetime
from torch import nn
import torch
import numpy as np
def md5hash(x):
    return md5(x.encode()).hexdigest()

## X input modules

In [7]:
# export 
class ModelInput(nn.Module):
    def __init__(self,rich_col):
        super().__init__()
        self.rich_col = rich_col
        rich_col.input_module =self

class InputEmb(ModelInput):
    def __init__(self,rich_col):
        super().__init__(rich_col)
        self.emb = nn.Embedding(len(rich_col.top_freq)+1,rich_col.hidden_size)
        
    def forward(self,x):
        return self.emb(x)
    
class InputOneHot(ModelInput):
    def __init__(self,rich_col):
        super().__init__(rich_col)
        self.eye = torch.eye(len(self.rich_col))
        
    def forward(self,x):
        return self.eye[x]
    
class InputConti(ModelInput):
    def __init__(self,rich_col):
        super().__init__(rich_col)
        rich_col.mean = rich_col.col.mean()
        rich_col.std = rich_col.col.std()
        self.bn=nn.BatchNorm1d(1)
        self.tanh = nn.Tanh()
        
    def forward(self,x):
        x = self.tanh(self.bn(x))
        return x.detach()
    

## Y target encode

In [8]:
# export
class YEncoder:
    """
    Encode the why into the required shape
    input of the __call__, numpy array
    """
    def __init__(self,rich_col):
        super().__init__()
        self.rich_col = rich_col
        assert rich_col.is_y,f"{rich_col.name} isn't a y column set"
        rich_col.y_encoder = self
        
    def __call__(self,x):
        raise NotImplementedError("Defind __call__ of YEncoder first")

class YOneHot(YEncoder):
    def __init__(self, rich_col):
        super().__init__(rich_col)
        self.eye = np.eye(len(rich_col)).astype(np.int)
    
    def __call__(self, x):
        return self.eye[x]
    
class YConti(YEncoder):
    def __init__(self, rich_col):
        super().__init__(rich_col)
        self.mean = rich_col.col.mean()
        self.std = rich_col.col.std()
        
    def __call__(self,x):
        return np.clip((x-self.mean)/self.std,-2,2)

## Enhanced columns

In [9]:
# export
class RichColumn(object):
    """
    A pandas series manager
    """
    def __init__(self,column, is_y = False,min_occur = 5, is_emb = True,hidden_size=20):
        self.col = column
        self.col.rc = self
        self.name = self.col.name
        self.min_occur = min_occur
        self.hidden_size = hidden_size
        self.is_emb =  is_emb
        self.is_y = is_y
        self.use = True
        self.is_conti = True
        self.defined = False
        
    def kill(self):
        """
        set column to kill mode, that it would not be involved in the learning
        """
        self.defined = True
        self.use = False
        
    def conti(self):
        """
        set column to contineous data
        """
        self.defined = True
        self.is_conti = True
        
    def disc(self):
        """
        set column to discrete data
        """
        self.defined = True
        self.is_conti = False
        
    def is_number(self):
        """
        Is this column's data type in any form of number
        """
        return self.col.dtype in (int,float,
                              np.float16,np.float32,np.float64,np.float64,
                              np.int0,np.int8,np.int16,np.int32,np.int64)
    
    def __bool__(self):
        """
        is this column going to join the learning
        """
        return self.use
    
    def __len__(self):
        """
        width of column when entering the model, or used as target
        """
        if self.is_conti:
            return 1
        else:
            if self.is_emb and (self.is_y==False):
                return self.hidden_size
            else:
                width = len(self.top_freq)+1
                width =1 if width==2 else width
                return width
    
    def __repr__(self,):
        return f"<Rich Column:{self.name}>"
    
    def top_freq_(self):
        freq = self.freq()
        self.top_freq = freq[freq[self.name]>=self.min_occur].reset_index()
        self.tokens = dict((v,k+1) for k,v in enumerate(self.top_freq["index"]))
        self.token_arr = np.array(["<mtk>",]+list(self.top_freq["index"]))
        return self.top_freq
    
    def freq(self):
        return pd.DataFrame(data=self.col.value_counts())
    
    @property
    def conf_dict(self):
        return dict((i,getattr(self,i)) for i in ["name","defined","is_conti","is_y","is_emb","use"])
    
    def set_conf(self,conf_dict):
        for k,v in conf_dict.items():
            setattr(self,k,v)
        return self
    
    def encode(self,x):
        if self.is_conti:
            return x if x else self.mean
        else:
            try:
                return self.tokens[x]
            except:
                return 0
        
    def decode(self,idx):
        return self.token_arr[idx]
    
    def build_learn(self):
        """
        prepare the column for learning
        """
        if self.is_y == False:
            if self.is_conti:
                self.mean = self.col.mean()
                InputConti(self)
            else:
                InputEmb(self)
        else:
            if self.is_conti:
                self.mean = self.col.mean()
                YConti(self)
            else:
                YOneHot(self)
        return self

In [10]:
# export 
class RichDF(object):
    """
    A pandas dataframe manager
    """
    def __init__(self,df,fname=None):
        self.df = df
        self.columns = dict()
        if fname==None:
            fname=f"why_hat_{self.ts_str}"
        self.t = tracker("torchember",fname)
        self.t.data = self.t.log_path
        
        for colname in self.df:
            self.columns.update({colname:RichColumn(df[colname])})
            
    @property
    def ts_str(self):
        return datetime.now().strftime("%m%d_%H%M%S")
        
    @property
    def col_conf(self):
        return dict((k,{"use":v.use,"is_cont":v.is_conti}) for k,v in self.columns.items())
    
    def __getitem__(self,col_name):
        return self.columns[col_name]
        
    def kill(self,colname):
        """
        Not using this column
        """
        self.df[colname].rc.kill()
        
    def conti(self,colname):
        self.df[colname].rc.conti()
        
    def disc(self,colname):
        self.df[colname].rc.disc()
        
    def save_col(self,rcol):
        self.t[md5hash(rcol.name)]=rcol.conf_dict
        
    def set_col(self,rcol):
        if rcol.defined:
            print(f"{rcol.name} defined, use:{rcol.use}, contineus?:{rcol.is_conti}")
        print(color.bold("="*30))
        print(color.cyan(rcol.name))
        print(color.red(f"number? {rcol.is_number()}"))
        print(rcol.top_freq_().head(5))
              
        print(color.red("Is this a [C]ontineous, [D]iscrete or a column we do[N]'t need? default N"))
        x = input().lower()
        if x=="c":
            rcol.conti()
            print(color.blue(f"{rcol.name} set to contineous data"))
            self.save_col(rcol)
        elif x =="d":
            rcol.disc()
            print(color.blue(f"{rcol.name} set to discrite data"))
            self.save_col(rcol)
        elif (x =="") or (x=="n"):
            rcol.kill()
            print(color.blue(f"{rcol.name} will not be involved in learning"))
            self.save_col(rcol)
        else:
            print(color.yellow(f"option [{x}] not found, try Again?"))
            
    def save(self,colname):
        col=self.df[colname]
        self.t[md5hash(colname)] = col.rc.conf_dict
        
    def read(self,colname):
        col=self.df[colname]
        col.rc.set_conf(self.t[md5hash(colname)])
        if col.rc.is_conti:
            col.rc.top_freq_()
            
    def shuffle_df(self):
        self.df = self.df\
        .sample(frac=1.)\
        .reset_index().drop("index",axis=1)
        
    def tour(self):
        """
        Go through column 1 by 1 to decide the processing for its data
        """
        for colname in self.df:
            col = self.df[colname]
            current = self.t[md5hash(colname)]
            if current != None:
                col.rc.set_conf(current)
                if col.rc.is_conti==False:
                    col.rc.top_freq_()
            if col.rc.defined==False:
                self.set_col(col.rc)
                
    def set_y(self, *colnames):
        """
        set columns to y
        all the columns that use==True and is_y==False will be treated as x
        """
        for colname in colnames:
            rc = self.columns[colname]
            rc.is_y = True
            rc.use = True
            rc.is_emb = False
            self.save(colname)
            
    def set_x(self, *colnames):
        """
        set columns to x
        of course,every columns' default status is x, 
        so you don't have to set this if you accidentally set x to y
        """
        for colname in colnames:
            rc = self.columns[colname]
            rc.use = True
            rc.is_y = False
            self.save(colname)
    
    @property
    def Xs(self):
        """
        Return the next x rich column
        """
        for col,rc in self.columns.items():
            if (rc.is_y) ==False and rc.use:
                yield rc
    
    @property
    def Ys(self):
        """
        Return the next y rich column
        """
        for col,rc in self.columns.items():
            if rc.is_y and rc.use:
                yield rc
                

In [11]:
rdf = RichDF(df,fname = "testing_case_nyc")

## Use tour() to set the configuration

In [12]:
rdf.tour()

Here's how I set the columns

In [13]:
rdf.set_y("price")

This is how I set the configuration:

In [14]:
for col in rdf.df:
    print(">"*5,col,"<"*5)
    print(rdf.t[md5hash(col)])

>>>>> id <<<<<
{'name': 'id', 'defined': True, 'is_conti': True, 'is_y': False, 'is_emb': True, 'use': False}
>>>>> name <<<<<
{'name': 'name', 'defined': True, 'is_conti': False, 'is_y': False, 'is_emb': True, 'use': True}
>>>>> host_id <<<<<
{'name': 'host_id', 'defined': True, 'is_conti': False, 'is_y': False, 'is_emb': True, 'use': True}
>>>>> host_name <<<<<
{'name': 'host_name', 'defined': True, 'is_conti': True, 'is_y': False, 'is_emb': True, 'use': False}
>>>>> neighbourhood_group <<<<<
{'name': 'neighbourhood_group', 'defined': True, 'is_conti': False, 'is_y': False, 'is_emb': True, 'use': True}
>>>>> neighbourhood <<<<<
{'name': 'neighbourhood', 'defined': True, 'is_conti': False, 'is_y': False, 'is_emb': True, 'use': True}
>>>>> latitude <<<<<
{'name': 'latitude', 'defined': True, 'is_conti': True, 'is_y': False, 'is_emb': True, 'use': True}
>>>>> longitude <<<<<
{'name': 'longitude', 'defined': True, 'is_conti': True, 'is_y': False, 'is_emb': True, 'use': True}
>>>>> room_t

In [15]:
list(rdf.Xs)

[<Rich Column:name>,
 <Rich Column:host_id>,
 <Rich Column:neighbourhood_group>,
 <Rich Column:neighbourhood>,
 <Rich Column:latitude>,
 <Rich Column:longitude>,
 <Rich Column:room_type>,
 <Rich Column:minimum_nights>,
 <Rich Column:number_of_reviews>,
 <Rich Column:reviews_per_month>,
 <Rich Column:calculated_host_listings_count>,
 <Rich Column:availability_365>]

In [16]:
rdf["room_type"].encode("Entire home/apt")

1

In [17]:
# export
class TabularModel(nn.Module):
    def __init__(self,rdf):
        super().__init__()
        self.rdf=rdf
        self.inputs = nn.ModuleDict(modules = dict((x.name,x.input_module) for x in rdf.Xs))
        
        self.build_dial_x()
        self.build_dial_y()
        
        self.input_width = len(self.dial)
        self.target_width = len(self.dial_y)
        
        self.hidden_size = max(self.input_width,self.target_width,20)
        self.dnn = nn.Sequential(*[
            nn.Linear(self.input_width,self.hidden_size),
            nn.BatchNorm1d(self.hidden_size),
            nn.ReLU(),
            nn.Linear(self.hidden_size,self.target_width),
            nn.BatchNorm1d(self.target_width),
        ])
        
    def forward(self,Xs):
        """
        Xs dictionary of inputs
        """
        ipts = list(self.inputs[xcol.name](Xs[xcol.name]) for xcol in self.rdf.Xs)
        concat = torch.cat(ipts,dim=1)
        return self.dnn(concat)
                                    
    def build_dial_x(self):
        all_width = 0
        self.dial = dict()
        for x in self.rdf.Xs:
            for i in range(len(x)):
                self.dial.update({all_width:dict({"colname":x.name,
                                                  "rich_col":x,
                                                  "sub_idx":i, 
                                                  "remark":f"input<{i}> of column {x.name}"})})
                all_width+=1                
        return all_width
                   
    def build_dial_y(self):
        all_width = 0
        self.dial_y = dict()
        for y in self.rdf.Ys:
            for i in range(len(y)):
                self.dial_y.update({all_width:dict({"colname":y.name,
                                                  "rich_col":y,
                                                  "sub_idx":i, 
                                                  "remark":f"target<{i}> of column {y.name}"})})
                all_width+=1                
        return all_width
                        
class TabularNN:
    def __init__(self, rich_df,batch_size=128):
        self.rich_df = rich_df
        self.l = len(rich_df.df)
        self.batch_size = batch_size
        self.x = list(x.build_learn() for x in self.rich_df.Xs)
        self.y = list(y.build_learn() for y in self.rich_df.Ys)
        self.assert_xy()
        self.assert_y_consistency()
        self.reset_i()
        self.epoch = 0
        self.rich_df.shuffle_df()
        self.model = TabularModel(self.rich_df)
        
    def reset_i(self):
        """reset iterator"""
        self.s=0
        self.e=1
        
    def __repr__(self):
        return f">>TabularNN"
        
    def assert_xy(self):
        assert len(self.x)>0, "You have you set some X"
        assert len(self.y)>0, "You have you set some Y"
        
    def assert_y_consistency(self):
        conti_list = list(rc.is_conti for rc in self.rich_df.Ys)
        assert float(sum(conti_list))/len(conti_list) in [1.,0.],"Y has to be all discrete columns, or contineous columns"
        # decide loss function based on Y
        if conti_list[0]:
            self.crit = nn.MSELoss()
        else:
            self.crit = nn.BCEWithLogitsLoss()
            
    def build_model_nn(self):
        self.nn = TabularModel(self)
        
    def batch_df(self):
        start = self.batch_size*self.s
        end = self.batch_size*self.e
        if start>self.l:
            self.epoch+=1
            self.reset_i()
            start = self.batch_size*self.s
            end = self.batch_size*self.e
        yield self.rich_df.df[start:end]
        
    def batch_array(self):
        df_b = next(self.batch_df())
        x_data = dict()
        y_data = dict()
        
        for x in self.x:
            if x.is_conti:
                df_b[x.name]= df_b[x.name].fillna(x.mean)
            arr = df_b[x.name].apply(x.encode).values
            x_tensor = torch.FloatTensor(arr)[:,None] if x.is_conti else torch.LongTensor(arr)
            x_data.update({x.name:x_tensor})
            
        for y in self.y:
            arr = df_b[y.name].apply(y.encode).values
            y_tensor = torch.FloatTensor(arr) if y.is_conti else torch.LongTensor(arr)
            y_data.update({y.name:y_tensor[:,None]})
        yield x_data,y_data
        
    def batch_y_pred(self):
        x_data,y_data = next(self.batch_array())
        yield self.model(x_data)
        

In [18]:
rdf["room_type"].col.rc

<Rich Column:room_type>

In [19]:
tnn = TabularNN(rdf)

In [20]:
coldf = tnn.x[1].top_freq["index"]

In [21]:
next(tnn.batch_df())

Unnamed: 0,id,name,host_id,host_name,neighbourhood_group,neighbourhood,latitude,longitude,room_type,price,minimum_nights,number_of_reviews,last_review,reviews_per_month,calculated_host_listings_count,availability_365
0,36444734,Sunny Upper West Side Apt. 3mins from Central ...,32987938,Alex,Manhattan,Upper West Side,40.77889,-73.97668,Entire home/apt,143,3,0,,,1,10
1,16987293,"New, Luxury and Sunny Apartment",57455831,Maggie,Brooklyn,Clinton Hill,40.69440,-73.96606,Entire home/apt,150,1,1,2017-05-06,0.04,2,0
2,21154544,Huge beautiful bedroom with double exposure,66260832,Dragana,Manhattan,Harlem,40.81520,-73.95175,Private room,50,15,0,,,1,0
3,2135489,Charming Studio in Brooklyn,8624212,Leon,Brooklyn,Carroll Gardens,40.68362,-73.99714,Entire home/apt,170,2,131,2019-06-16,2.02,1,26
4,11321187,Entire 1Br Apt on UES,59215698,Daniela,Manhattan,Upper East Side,40.76796,-73.95205,Entire home/apt,130,1,21,2019-07-07,0.56,2,53
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
123,15765350,Doorman 2 Bed GYM DECK 5212,16098958,Jeremy & Laura,Manhattan,Murray Hill,40.74437,-73.97295,Entire home/apt,190,30,4,2019-01-14,0.13,96,331
124,23307047,2 Story PRIVATE Duplex/Elevator Building in NoMad,172756149,C,Manhattan,Kips Bay,40.74121,-73.98139,Entire home/apt,240,1,66,2019-07-02,4.09,1,83
125,6169068,"Prime Park Slope Townhouse, 4 BR and Garden",9773128,Deborah,Brooklyn,Park Slope,40.66798,-73.97610,Entire home/apt,345,30,0,,,1,156
126,24122599,Cozy room in a Victorian house in Central Broo...,14905006,Myriam,Brooklyn,Kensington,40.63966,-73.97160,Private room,52,1,11,2019-05-27,0.81,1,0


In [22]:
x_data,y_data = next(tnn.batch_array())

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [23]:
x_data

{'name': tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0]),
 'host_id': tensor([  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  24,
           0,   0,   0,   0,   0, 153,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   1,   0,   0, 185,   0,   0, 133,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   7,  92,   0,   0,  20,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, 106,   0,   0,
           0,   0,   0,   0,   0,   0,   2,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,  75,   0,   0,   0, 237

In [24]:
tnn.model.dnn

Sequential(
  (0): Linear(in_features=107, out_features=107, bias=True)
  (1): BatchNorm1d(107, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): ReLU()
  (3): Linear(in_features=107, out_features=1, bias=True)
  (4): BatchNorm1d(1, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)

In [25]:
ipts = list(tnn.model.inputs[xcol.name](x_data[xcol.name]) for xcol in tnn.model.rdf.Xs)
concat = torch.cat(ipts,dim=1)

In [26]:
concat,concat.shape

(tensor([[-0.3061,  0.5583, -1.7048,  ..., -0.1683, -0.2213, -0.8510],
         [-0.3061,  0.5583, -1.7048,  ..., -0.6799, -0.1963, -0.3392],
         [-0.3061,  0.5583, -1.7048,  ..., -0.1683, -0.2213, -0.3392],
         ...,
         [-0.3061,  0.5583, -1.7048,  ..., -0.1683, -0.2213,  0.0281],
         [-0.3061,  0.5583, -1.7048,  ..., -0.4205, -0.2213, -0.3392],
         [-0.3061,  0.5583, -1.7048,  ..., -0.5656, -0.2213, -0.3392]],
        grad_fn=<CatBackward>), torch.Size([128, 107]))

In [28]:
next(tnn.batch_y_pred())

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


tensor([[ 0.1854],
        [ 0.4965],
        [ 0.0657],
        [ 0.5390],
        [ 0.0346],
        [ 0.1109],
        [ 1.0257],
        [ 0.9426],
        [-1.4236],
        [-0.4027],
        [-1.3949],
        [ 1.0144],
        [ 1.6111],
        [ 0.4727],
        [-0.3963],
        [ 0.0993],
        [ 1.8734],
        [-0.5209],
        [ 1.1702],
        [-0.6602],
        [ 0.9155],
        [-0.1697],
        [-1.4661],
        [ 0.3416],
        [ 0.0491],
        [-1.4276],
        [-0.3420],
        [ 0.0853],
        [ 0.3223],
        [ 1.3579],
        [ 0.5943],
        [ 0.3173],
        [-0.9157],
        [-1.4335],
        [-1.6335],
        [-0.1290],
        [-0.4366],
        [ 2.6865],
        [-0.5450],
        [-0.8102],
        [ 0.2763],
        [-1.5096],
        [-1.4165],
        [-1.3428],
        [-0.5520],
        [ 1.3585],
        [ 0.8928],
        [ 0.5567],
        [ 0.2359],
        [-0.9065],
        [ 0.3392],
        [-0.8105],
        [-1.