### Other Models on the Synthetic ImagePoints Target

In [1]:
import os, sys
import copy as copyroot
import pandas as pd
from IPython.display import display
from matplotlib import pyplot as plt

from fastai2.basics import *
from fastai2.vision.all import *

from sklearn.preprocessing import StandardScaler

import torch
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = True

%load_ext autoreload
%autoreload 2

from module.mnist_helpers import build_df, eda_fig_1
from module.mnist_helpers import img_pt_plot, train_history_dualplot
from module.mnist_helpers import seed_everything

### Setup

In [86]:
path = untar_data(URLs.MNIST_TINY)
df = build_df(path)


y_names = ['point_topleft_x', 'point_topleft_y']

df2 = pd.DataFrame([])
for i, row in df.iterrows():    
    
    img_np = np.array(Image.open(str(path) + row['fn'])).flatten()
    
    df2 = pd.concat((df2, pd.Series(img_np)), axis=1)
    
df2 = df2.T
df2.reset_index(inplace=True, drop=True)
print(f'X shape: {df2.shape}')

df2_y = df[y_names]
df2 = pd.concat((df2, df2_y), axis=1)
print(f'df2 final shape: {df2.shape}')

df2.head(2)

X shape: (709, 784)
df2 final shape: (709, 786)


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,776,777,778,779,780,781,782,783,point_topleft_x,point_topleft_y
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,11,5
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,9,4


In [87]:
X = df2[[col for col in df2.columns if 'point' not in str(col)]]

Y = df2[[col for col in df2.columns if 'point' in str(col)]]
y0, y1 = Y.iloc[:,0], Y.iloc[:,1]

Xt,Xv,yt0,yv0 = train_test_split(X, y0, random_state=0)
Xt,Xv,yt1,yv1 = train_test_split(X, y1, random_state=0)

Xt.shape, Xv.shape, yt0.shape, yt1.shape

((531, 784), (178, 784), (531,), (531,))

In [112]:
ledger = pd.DataFrame()

def append_ledger(r2_tv, name=None, params=None):
    
    global ledger
    
    record = pd.DataFrame({
        'name':     [name if name is not None else 'unknown'],
        'r2_train': [r2_tv[0]],
        'r2_valid': [r2_tv[1]],
    })
    
    if params is not None:
        record = pd.concat((record, 
                            pd.DataFrame(params, index=[0])),
                           axis=1)
    
    ledger = pd.concat((ledger, record), axis=0)

In [116]:
# TODO - make multi index for:
    # target = x vs target = y
    # target = topleft vs target = center

### Run Models

In [42]:
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso, Ridge
from sklearn.model_selection import train_test_split

In [43]:
lr = LinearRegression()
lr.fit(Xt, yt0)
lr.score(Xt, yt0), lr.score(Xv, yv0)

(0.9992599653917897, -6.916591828903264e+16)

In [49]:
q = Lasso()

In [50]:
for alpha in [0.3, 1.0, 10.0]:
    learn = Lasso(alpha=alpha,)
    learn.fit(Xt0, yt0, )
    print(learn.score(Xt, yt0), learn.score(Xv, yv0))

0.9264171868807906 0.23198880450087667
0.8512759976961152 0.4455662968177406
0.6509567725618848 0.4544721565207197


  positive)


In [52]:
for alpha in [1.0, 10.0, 1000.0]:
    learn = Ridge(alpha=alpha)
    learn.fit(Xt, yt)
    print(learn.score(Xt, yt), learn.score(Xv, yv))

0.9799826616323699 -87.71460495592596
0.9735847079736011 -43.01749800026262
0.9137516269841953 -6.823906802319964


In [53]:
from sklearn.ensemble import RandomForestRegressor

In [56]:
d_params = {'n_estimators':[50, 100, 200],
            'max_depth':   [None, 2, 5],
           }


def grid(d):
    ret = [{}]
    for param_name, values in d.items():
        old_ret = ret.copy()
        new_ret = []
        for value in values:
            for item in old_ret:
                item = item.copy()
                item[param_name] = value
                new_ret.append(item)
                ret = new_ret.copy()
    return ret

{'n_estimators': [50, 100, 200], 'max_depth': [None, 2, 5]}

In [80]:
r2_ledger = []
for params in grid(d_params):
    #learn = RandomForestRegressor(n_estimators=100, max_depth=None)
    learn = RandomForestRegressor(**params)
    learn.fit(Xt0, yt0, )
    r2t, r2v = learn.score(Xt, yt0), learn.score(Xv, yv0)
    r2_ledger.append((r2t, r2v))
    print(r2t, r2v)

0.9558844509146343 0.6882495750818954
0.9567107080391499 0.6854839302514066
0.9574588460961856 0.696162081694773
0.5218802569954935 0.38758619922759074
0.5107858329139587 0.3960169684883761
0.5120406722143521 0.3981965116489584
0.8233293598973843 0.6014276831195686
0.826689382224604 0.6040956028979134
0.8297675208139573 0.6151080055084164


In [85]:
pd.concat(
    (pd.DataFrame(grid(d_params)),
     pd.DataFrame(r2_ledger, columns=['r2_train', 'r2_valid'])
    ), axis=1
    ).round(3)

Unnamed: 0,n_estimators,max_depth,r2_train,r2_valid
0,50,,0.956,0.688
1,100,,0.957,0.685
2,200,,0.957,0.696
3,50,2.0,0.522,0.388
4,100,2.0,0.511,0.396
5,200,2.0,0.512,0.398
6,50,5.0,0.823,0.601
7,100,5.0,0.827,0.604
8,200,5.0,0.83,0.615
