# Analysis of baseline models v1



In [4]:
import pickle
import joblib
from pandas import DataFrame, read_csv
import glob
from multigrids import TemporalMultiGrid, TemporalGrid
import os
from datetime import datetime, timedelta

In [5]:
data_dir = "/Users/rwspicer/Desktop/data/V1/"
feature_file = os.path.join(
    data_dir,
    "master-project/training/ACP/v2/baseline/multigrid/ACP-training-base.yml"
)
label_file = os.path.join(
    data_dir,
    "thermokarst/initiation-regions/ACP/v4/PDM-5var/without_predisp/multigrid/ACP-TKI-PDM5.yml"
)

to_td = lambda x: timedelta(hours = int(x.split(':')[0]),minutes = int(x.split(':')[1]), seconds = float(x.split(':')[2]))
to_seconds = lambda x: int(x.split(':')[0]) *60 *60 + int(x.split(':')[1]) * 60 + float(x.split(':')[2])
to_min = lambda x: to_seconds(x) / 60
to_hour = lambda x: to_min(x) / 60




In [6]:
training_data = TemporalMultiGrid(feature_file)
om = TemporalGrid(label_file)

list_of_models = [
    'rfm_e2_md2_mfAUTO_msl8_mss5_tdp25.yml',
    
    'rfm_e50_md60_mfAUTO_mln50000_msl8_mss5_tdp25.yml',
    'rfm_e50_md60_mfAUTO_mln50000_msl4_mss5_tdp25.yml',
    'rfm_e50_md60_mfAUTO_mln50000_msl4_mss5_tdp25.yml',
    'rfm_e10_md60_mfAUTO_mln50000_msl8_mss5_tdp50.yml',
    'rfm_e10_md60_mfAUTO_mln50000_msl8_mss2_tdp50.yml',
    'rfm_e10_md100_mfAUTO_mln50000_msl4_mss10_tdp50.yml',
    'rfm_e100_md25_mfAUTO_mln50000_msl4_mss10_tdp25.yml',
    'rfm_e100_md25_mfAUTO_mln50000_msl2_mss5_tdp25.yml',
    'rfm_e100_md25_mfAUTO_mln50000_msl2_mss2_tdp25.yml',
    'rfm_e50_md100_mfAUTO_mln50000_msl8_mss5_tdp75.yml',
    'rfm_e50_md60_mfAUTO_mln50000_msl8_mss5_tdp75.yml',
    'rfm_e50_md60_mfAUTO_mln50000_msl2_mss10_tdp75.yml',
]

model_files = glob.glob('/Volumes/toshi-stati/tk-rf-models/baseline-models/*.joblib')

In [9]:
def show_feature_importances(model, feature_list, show=False):
    # Get numerical feature importances
    importances = list(model.feature_importances_)
    # List of tuples with variable and importance
    feature_importances = [(feature, round(importance*100, 3)) for feature, importance in zip(feature_list, importances)]
    # Sort the feature importances by most important first
    feature_importances = sorted(feature_importances, key = lambda x: x[1], reverse = True)
    # Print out the feature and importances 
    if show:
        [print('{:20} {}%'.format(*pair)) for pair in feature_importances];
        
    feature_importances = {f[0]:f[1] for f in feature_importances} 
    return feature_importances


grid_names = ['fdd', 'tdd', 'tdd+1', 'ewp', 'fwp', 'sp', 'lsp', 'sp+1', 'lat', 'long','aspect','slope', 'elev' ]

feature_importance_list = []
order = []
for fp in model_files:
    model = joblib.load(fp)
    order.append(os.path.split(fp)[1].replace('joblib','yml'))
    feature_importance_list.append(show_feature_importances(model, grid_names))

In [16]:
DataFrame(feature_importance_list, index = order)[1:].to_csv('../results/baseline-models-fi-v1.csv')