# IAQF 2021 Code Collection

In [1]:
# Import libraries

import datetime
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
import seaborn as sbn

import sklearn
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import Pipeline
from sklearn.ensemble import GradientBoostingClassifier,GradientBoostingRegressor
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import AdaBoostRegressor
from sklearn.metrics import accuracy_score

from xgboost import XGBClassifier
from xgboost import XGBRegressor

import tensorflow as tf
from tensorflow import keras

In [2]:
OS = 'Mac'  # OS is either 'Mac' or 'Windows'
sep = '/' if OS == 'Mac' else '\\'
path = "..{}source_data{}{}.csv"

PROBLEM_PATH_DICT = {
    1: {
        'et': path.format(sep, sep, "crude_gas_et_df"),
        'p1': path.format(sep, sep, "crude_gas_p1_df"),
        'p2': path.format(sep, sep, "crude_gas_p2_df"),
        'p3': path.format(sep, sep, "crude_gas_p2_df")
    },  # crude oil price vs. gasoline price
    2: {
        'et': path.format(sep, sep, "cf_gas_et_df"),
        'p1': path.format(sep, sep, "cf_gas_p1_df"),
        'p2': path.format(sep, sep, "cf_gas_p2_df")
    },  # crude oil futures price vs. gasoline price
    3: {
        'et': path.format(sep, sep, "crude_co2_et_df"),
        'p1': path.format(sep, sep, "crude_co2_p1_df"),
        'p2': path.format(sep, sep, "crude_co2_p2_df")
    }  # crude oil price vs. co2
}

PROBLEM_VAR_DICT = {
    1: ('crude_oil', 'gasoline'),
    2: ('oil_futures', 'gasoline'),
    3: ('crude_oil', 'co2')
}

ADDITIONAL_VAR_DICT = {
    1: {
        'et': ['cpi','us_pop','public_roads','us_urban_pop','oil_production','auto_sales','us_real_gdp'],
        'p1': ['cpi','us_pop','public_roads','us_urban_pop','oil_production','auto_sales','us_real_gdp'],
        'p2': ['cpi','us_pop','public_roads','us_urban_pop','oil_production','djus_auto_index','auto_sales','us_real_gdp','usd_mex_exrate'],
        'p3': ['cpi','us_pop','public_roads','us_urban_pop','oil_production','auto_sales','us_real_gdp']
    },
    2: {
        'et': ['cpi','us_pop','public_roads','us_urban_pop','oil_production','auto_sales','us_real_gdp'],
        'p1': ['cpi','us_pop','public_roads','us_urban_pop','oil_production','auto_sales','us_real_gdp'],
        'p2': ['cpi','us_pop','public_roads','us_urban_pop','oil_production','auto_sales','us_real_gdp'],
    },
    3: {
        'et': ['cpi','coal_consumption','ngas_consumption','us_pop','oil_production','tree_cover_loss','us_real_gdp'],
        'p1': ['cpi','coal_consumption','ngas_consumption','us_pop','oil_production','tree_cover_loss','us_real_gdp'],
        'p2': ['cpi','coal_consumption','ngas_consumption','us_pop','oil_production','tree_cover_loss','us_real_gdp']
    }
}

PROBLEM_NO = 3
PERIOD_NO = 'et'

X_MAIN_COL, Y_COL = PROBLEM_VAR_DICT[PROBLEM_NO]
X_MAIN_CHG_COL = X_MAIN_COL + '_chg'
Y_CHG_COL = Y_COL + '_chg'
EXTRA_VARS = ADDITIONAL_VAR_DICT[PROBLEM_NO][PERIOD_NO]

TRAINING_RATIO_DEFAULT = 0.7
VALIDATION_RATIO_DEFAULT = 0.15
TEST_RATIO_DEFAULT = 0.15

REGRESSOR_ACCURACY_DICT = {}
CLASSIFIER_ACCURACY_DICT = {}

REGRESSOR_COEF_DICT = {}
CLASSIFIER_COEF_DICT = {}

df = pd.read_csv(PROBLEM_PATH_DICT[PROBLEM_NO][PERIOD_NO])
df

Unnamed: 0,week,crude_oil,cpi,coal_consumption,ngas_consumption,us_pop,oil_production,tree_cover_loss,us_real_gdp,co2
0,2001/1/7,27.80,78.645985,96641.56859,9.634759e+05,283960.0,5944,1932323.219,12999.5699,370.30
1,2001/1/14,28.81,68.158536,89172.96037,9.010023e+05,284011.5,5949,2174659.188,12293.3405,370.67
2,2001/1/21,30.63,76.149426,100533.68770,1.032450e+06,284063.0,5912,2335532.223,12708.8104,370.52
3,2001/1/28,31.35,76.116224,101753.62190,8.260147e+05,284114.5,5935,2065545.899,12287.2265,371.36
4,2001/2/4,29.59,66.693151,61448.50321,7.766296e+05,284166.0,5929,1989475.501,13353.2628,371.58
...,...,...,...,...,...,...,...,...,...,...
986,2019/12/1,58.07,112.084710,43325.23094,6.060118e+05,329314.0,12900,2173871.578,19477.3091,411.07
987,2019/12/8,57.64,109.471205,39751.66796,8.286665e+05,329335.8,12800,2458219.683,19750.1111,411.32
988,2019/12/15,59.25,108.183411,48847.79099,7.519214e+05,329357.6,12800,1946025.679,20106.3904,411.89
989,2019/12/22,60.75,108.707988,50008.88542,5.915937e+05,329379.4,12900,2230631.393,20396.2701,412.21


## Creating independent and dependent variables

In [3]:
# calculate independent variables
periods = (1,2,3,5,10)
for i in periods:
    df [X_MAIN_CHG_COL + f'_{i}'] = df[X_MAIN_COL].pct_change(periods=i)

# calculate dependent variable
df[Y_CHG_COL] = df[Y_COL].pct_change(periods=1)

# eliminate the empty rows
df = df[11:]
df

Unnamed: 0,week,crude_oil,cpi,coal_consumption,ngas_consumption,us_pop,oil_production,tree_cover_loss,us_real_gdp,co2,crude_oil_chg_1,crude_oil_chg_2,crude_oil_chg_3,crude_oil_chg_5,crude_oil_chg_10,co2_chg
11,2001/3/25,26.42,71.870113,74962.95365,624013.0521,284546.5,5868,1728742.189,11982.2289,372.40,-0.022206,-0.071353,-0.053386,-0.109538,-0.082957,-0.000376
12,2001/4/1,26.86,74.862456,90444.57488,614474.9072,284602.0,5883,2248741.369,13936.6979,372.45,0.016654,-0.005922,-0.055888,-0.062478,-0.123082,0.000134
13,2001/4/8,26.76,67.840486,71861.08162,436068.6622,284648.4,5867,1724128.479,12809.3245,373.61,-0.003723,0.012869,-0.009623,-0.041204,-0.146411,0.003115
14,2001/4/15,28.27,82.229017,76914.86811,449159.7901,284694.8,5855,1956950.159,13474.2006,373.71,0.056428,0.052494,0.070023,-0.006327,-0.044610,0.000268
15,2001/4/22,27.89,76.824931,73912.39968,-37896.8606,284741.2,5861,1366131.633,13236.2220,373.60,-0.013442,0.042227,0.038347,0.032198,-0.097995,-0.000294
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
986,2019/12/1,58.07,112.084710,43325.23094,606011.7971,329314.0,12900,2173871.578,19477.3091,411.07,0.020562,0.021460,0.024343,0.051993,-0.021237,0.000852
987,2019/12/8,57.64,109.471205,39751.66796,828666.4868,329335.8,12800,2458219.683,19750.1111,411.32,-0.007405,0.013005,0.013896,0.044771,0.013005,0.000608
988,2019/12/15,59.25,108.183411,48847.79099,751921.3538,329357.6,12800,1946025.679,20106.3904,411.89,0.027932,0.020320,0.041301,0.045158,0.115399,0.001386
989,2019/12/22,60.75,108.707988,50008.88542,591593.7371,329379.4,12900,2230631.393,20396.2701,412.21,0.025316,0.053956,0.046151,0.068602,0.140417,0.000777


## Seperating training, validation, and test datasets

In [4]:
# create list of independent variable's name
indep_vars = [X_MAIN_CHG_COL + f'_{i}' for i in periods] + EXTRA_VARS
num_var =len(indep_vars)

# extract the values to X
X = df[[X_MAIN_COL] + indep_vars].values
X

array([[ 2.64200000e+01, -2.22057735e-02, -7.13532513e-02, ...,
         5.86800000e+03,  1.72874219e+06,  1.19822289e+04],
       [ 2.68600000e+01,  1.66540500e-02, -5.92153960e-03, ...,
         5.88300000e+03,  2.24874137e+06,  1.39366979e+04],
       [ 2.67600000e+01, -3.72300819e-03,  1.28690386e-02, ...,
         5.86700000e+03,  1.72412848e+06,  1.28093245e+04],
       ...,
       [ 5.92500000e+01,  2.79319917e-02,  2.03203031e-02, ...,
         1.28000000e+04,  1.94602568e+06,  2.01063904e+04],
       [ 6.07500000e+01,  2.53164557e-02,  5.39555864e-02, ...,
         1.29000000e+04,  2.23063139e+06,  2.03962701e+04],
       [ 6.12900000e+01,  8.88888889e-03,  3.44303797e-02, ...,
         1.29000000e+04,  2.12435707e+06,  1.88128854e+04]])

In [5]:
# create y for regressor, y_c for classifier
y = df[Y_CHG_COL]
y_c = (df[Y_CHG_COL] > 0).values.astype('int')

# set training, validation, and test criterias
training_ratio = TRAINING_RATIO_DEFAULT
validation_ratio = VALIDATION_RATIO_DEFAULT
test_ratio = TEST_RATIO_DEFAULT

# check correctness of X and y
X.shape, y.shape, y_c.shape

((980, 13), (980,), (980,))

In [6]:
# create training, validation, and test sets for regressor
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_ratio, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=validation_ratio/(validation_ratio+training_ratio), random_state=42)

print("X_train shape:\t", X_train.shape)
print("X_test shape:\t", X_test.shape)
print("X_val shape:\t", X_val.shape)
print("y_train shape:\t", y_train.shape)
print("y_val shape:\t", y_val.shape)
print("y_test shape:\t", y_test.shape)

X_train shape:	 (686, 13)
X_test shape:	 (147, 13)
X_val shape:	 (147, 13)
y_train shape:	 (686,)
y_val shape:	 (147,)
y_test shape:	 (147,)


In [7]:
# create training, validation, and test sets for classifier
X_train_c, X_test_c, y_train_c, y_test_c = train_test_split(X, y_c, test_size=test_ratio, random_state=42)
X_train_c, X_val_c, y_train_c, y_val_c = train_test_split(X_train_c, y_train_c, test_size=validation_ratio/(validation_ratio+training_ratio), random_state=42)

print("X_train_c shape:", X_train_c.shape)
print("X_test_c shape:\t", X_test_c.shape)
print("X_val_c shape:\t", X_val_c.shape)
print("y_train_c shape:", y_train_c.shape)
print("y_val_c shape:\t", y_val_c.shape)
print("y_test_c shape:\t", y_test_c.shape)

X_train_c shape: (686, 13)
X_test_c shape:	 (147, 13)
X_val_c shape:	 (147, 13)
y_train_c shape: (686,)
y_val_c shape:	 (147,)
y_test_c shape:	 (147,)


## Principal Component Analysis (PCA)

In [8]:
# normalization for regressor set
for i in range(X_train.shape[1]):
    X_train_mean = X_train[:, i]. mean()
    X_train_std = X_train[:, i].std()
    X_train[:, i] = (X_train[:,i] - X_train_mean) / X_train_std
    X_test[:,i] = (X_test[:,i] - X_train_mean) / X_train_std
    X_val[:, i] = (X_val[:,i] - X_train_mean) / X_train_std

print("X_train shape:\t", X_train.shape)
print("X_val shape:\t", X_val.shape)

X_train shape:	 (686, 13)
X_val shape:	 (147, 13)


In [9]:
# normalization for classifier set
for i in range(X_train_c.shape[1]):
    X_train_c_mean = X_train_c[:, i]. mean()
    X_train_c_std = X_train_c[:, i].std()
    X_train_c[:, i] = (X_train_c[:,i] - X_train_c_mean) / X_train_c_std
    X_test_c[:,i] = (X_test_c[:,i] - X_train_c_mean) / X_train_c_std
    X_val_c[:, i] = (X_val_c[:,i] - X_train_c_mean) / X_train_c_std

print("X_train_c shape:", X_train_c.shape)
print("X_val_c shape:\t", X_val_c.shape)

X_train_c shape: (686, 13)
X_val_c shape:	 (147, 13)


In [10]:
# PCA for regressor
# fit on training set
scaler = StandardScaler()
scaler.fit(X_train)

# apply transform to training and test set
X_train_pca = scaler.transform(X_train)
X_test_pca = scaler.transform(X_test)
X_val_pca = scaler.transform(X_val)

# set PCA value
pca = PCA(0.95)

# fit PCA training set
pca.fit(X_train_pca)

# apply transform to training and test set
X_train_pca = pca.transform(X_train_pca)
X_test_pca = pca.transform(X_test_pca)
X_val_pca = pca.transform(X_val_pca)

print("X_train_pca shape:\t", X_train_pca.shape)
print("X_test_pca shape:\t", X_test_pca.shape)
print("X_val_pca shape:\t", X_val_pca.shape)
print("y_train shape:\t", y_train.shape)
print("y_val shape:\t", y_val.shape)
print("y_test shape:\t", y_test.shape)

X_train_pca shape:	 (686, 8)
X_test_pca shape:	 (147, 8)
X_val_pca shape:	 (147, 8)
y_train shape:	 (686,)
y_val shape:	 (147,)
y_test shape:	 (147,)


In [11]:
# PCA for classifier
# fit on training set_c
scaler = StandardScaler()
scaler.fit(X_train_c)

# apply transform to training and test set
X_train_c_pca = scaler.transform(X_train_c)
X_test_c_pca = scaler.transform(X_test_c)
X_val_c_pca = scaler.transform(X_val_c)

# set PCA value
pca = PCA(0.95)

# fit PCA training set
pca.fit(X_train_c_pca)

# apply transform to training and test set
X_train_c_pca = pca.transform(X_train_c_pca)
X_test_c_pca = pca.transform(X_test_c_pca)
X_val_c_pca = pca.transform(X_val_c_pca)

print("X_train_c_pca shape:\t", X_train_c_pca.shape)
print("X_test_c_pca shape:\t", X_test_c_pca.shape)
print("X_val_c_pca shape:\t", X_val_c_pca.shape)
print("y_train_c shape:", y_train_c.shape)
print("y_val_c shape:\t", y_val_c.shape)
print("y_test_c shape:\t", y_test_c.shape)

X_train_c_pca shape:	 (686, 8)
X_test_c_pca shape:	 (147, 8)
X_val_c_pca shape:	 (147, 8)
y_train_c shape: (686,)
y_val_c shape:	 (147,)
y_test_c shape:	 (147,)


In [12]:
num_var = X_train_c_pca.shape[1]
num_var

8

## Multiple linear regression

In [13]:
multiple_reg = LinearRegression().fit(X_train_pca, y_train)
score_multiple_reg = multiple_reg.score(X_test_pca, y_test)

print('score_multiple_reg:', "%.4f" % score_multiple_reg)

score_multiple_reg: 0.0251


In [14]:
REGRESSOR_COEF_DICT['multi_linear'] = multiple_reg.coef_
REGRESSOR_ACCURACY_DICT['multi_linear'] = score_multiple_reg

## Random forest classifier

In [15]:
# testing for different max depth
rfc = dict()
for i in range (1, 15):
    clf = RandomForestClassifier(max_depth=i, random_state=0).fit(X_train_c_pca, y_train_c)
    y_pred = clf.predict(X_val_c_pca)
    score_clf = accuracy_score(y_val_c, y_pred)
    rfc[i] = score_clf

max_depth = max(rfc, key=rfc.get)
print('max depth:', max_depth)

max depth: 3


In [16]:
# choose max_depth = 4
rf_clf = RandomForestClassifier(max_depth=int(max_depth), random_state=0).fit(X_train_c_pca, y_train_c)
score_rf_clf = rf_clf.score(X_test_c_pca, y_test_c)
print('score_rf_clf:', "%.4f" % score_rf_clf)

score_rf_clf: 0.5918


In [17]:
CLASSIFIER_COEF_DICT['random_forest'] = rf_clf.feature_importances_
CLASSIFIER_ACCURACY_DICT['random_forest'] = score_rf_clf

## Random forest regressor

In [18]:
rf_reg = RandomForestRegressor().fit(X_train_pca, y_train)
score_rf_reg = rf_reg.score(X_test_pca, y_test)
print('score_rf_reg:', "%.4f" % score_rf_reg)

score_rf_reg: -0.0902


In [19]:
REGRESSOR_COEF_DICT['random_forest'] = rf_reg.feature_importances_
REGRESSOR_ACCURACY_DICT['random_forest'] = score_rf_reg

## Logistic regression

In [20]:
logistic_reg = LogisticRegression(penalty = 'l1', solver='liblinear', random_state=0).fit(X_train_c_pca, y_train_c)
score_logistic = logistic_reg.score(X_test_c_pca, y_test_c)
print('score_logistic:', "%.4f" % score_logistic)

score_logistic: 0.6190


In [21]:
REGRESSOR_COEF_DICT['logistic'] = logistic_reg.coef_[0]
REGRESSOR_ACCURACY_DICT['logistic'] = score_logistic

## Gradient boosting classifier

In [22]:
gra_clf = GradientBoostingClassifier(n_estimators = 200, learning_rate=0.50, max_depth=5, random_state=0).fit(X_train_c_pca, y_train_c)
score_gb_clf = gra_clf.score(X_test_c_pca, y_test_c)
score_gb_clf

0.5374149659863946

In [23]:
CLASSIFIER_COEF_DICT['gradient_boosting'] = gra_clf.feature_importances_
CLASSIFIER_ACCURACY_DICT['gradient_boosting'] = score_gb_clf

## Gradient boosting regressor

In [24]:
gra_reg = GradientBoostingRegressor(random_state=0).fit(X_train_pca, y_train)
score_gb_reg = gra_reg.score(X_test_pca, y_test)
score_gb_reg

-0.18504586459848205

In [25]:
REGRESSOR_COEF_DICT['gradient_boosting'] = gra_reg.feature_importances_
REGRESSOR_ACCURACY_DICT['gradient_boosting'] = score_gb_reg

## XGBoost classifier

In [26]:
xgb_clf = XGBClassifier(use_label_encoder=False).fit(X_train_c_pca, y_train_c)
score_xgb_clf = xgb_clf.score(X_test_c_pca, y_test_c)
score_xgb_clf



0.5782312925170068

In [27]:
CLASSIFIER_COEF_DICT['xgboost'] = xgb_clf.feature_importances_
CLASSIFIER_ACCURACY_DICT['xgboost'] = score_xgb_clf

## XGBoost regressor

In [28]:
xgb_reg = XGBRegressor(random_state = 0).fit(X_train_pca, y_train)
score_xgb_reg = xgb_reg.score(X_test_pca, y_test)
score_xgb_reg

-0.1838772538687108

In [29]:
REGRESSOR_COEF_DICT['xgboost'] = xgb_reg.feature_importances_
REGRESSOR_ACCURACY_DICT['xgboost'] = score_xgb_reg

## AdaBoost classifier

In [30]:
adb_clf = AdaBoostClassifier().fit(X_train_c_pca, y_train_c)
score_adb_clf = adb_clf.score(X_test_c_pca, y_test_c)
score_adb_clf

0.5306122448979592

In [31]:
CLASSIFIER_COEF_DICT['adaboost'] = adb_clf.feature_importances_
CLASSIFIER_ACCURACY_DICT['adaboost'] = score_adb_clf

## AdaBoost regressor

In [32]:
adb_reg = AdaBoostRegressor().fit(X_train_pca, y_train)
score_adb_reg = adb_reg.score(X_test_pca, y_test)
score_adb_reg

-0.0477250756163885

In [33]:
REGRESSOR_COEF_DICT['adaboost'] = adb_reg.feature_importances_
REGRESSOR_ACCURACY_DICT['adaboost'] = score_adb_reg

## Support vector classification

In [34]:
# Try different kernels
kernels = ('linear', 'poly', 'rbf')

for k in kernels:
    svm_clf = SVC(kernel=k).fit(X_train_c_pca, y_train_c)
    y_pred = svm_clf.predict(X_test_c_pca)
    vars()[f'score_svc_{k}']= accuracy_score(y_test_c, y_pred)
    
    if k == 'linear':
        CLASSIFIER_COEF_DICT['svc_linear'] = svm_clf.coef_[0]
    
print(score_svc_linear)
print(score_svc_poly)
print(score_svc_rbf)

CLASSIFIER_ACCURACY_DICT['svc_linear'] = score_svc_linear
CLASSIFIER_ACCURACY_DICT['svc_poly'] = score_svc_poly
CLASSIFIER_ACCURACY_DICT['svc_rbf'] = score_svc_rbf

0.6054421768707483
0.5986394557823129
0.6258503401360545


## Gaussian Naive Bayes

In [35]:
gnb_clf = GaussianNB().fit(X_train_c_pca, y_train_c)
score_gnb = gnb_clf.score(X_test_c_pca, y_test_c)
print(score_gnb)

0.5714285714285714


In [36]:
CLASSIFIER_ACCURACY_DICT['GaussianNB'] = score_gnb

## Neuro networks

In [37]:
model = keras.models.Sequential()
model.add(keras.layers.Dense(10, input_dim=num_var, activation='relu'))
model.add(keras.layers.Dense(5, activation='relu'))
model.add(keras.layers.Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

history = model.fit(X_train_c_pca, y_train_c, epochs=50, validation_data=(X_val_c_pca, y_val_c))

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [38]:
loss, score_neuro = model.evaluate(X_test_c_pca, y_test_c)
print(score_neuro)

0.5918367505073547


In [39]:
CLASSIFIER_ACCURACY_DICT['neuro'] = score_neuro

## Summary

In [40]:
reg_df = pd.DataFrame.from_dict(REGRESSOR_ACCURACY_DICT , orient='index', columns=['Score'])
reg_df.index.name = 'Regression models'
#reg_df.sort_values(by=['Score'], ascending=False)
reg_df

Unnamed: 0_level_0,Score
Regression models,Unnamed: 1_level_1
multi_linear,0.02513
random_forest,-0.090244
logistic,0.619048
gradient_boosting,-0.185046
xgboost,-0.183877
adaboost,-0.047725


In [41]:
clf_df = pd.DataFrame.from_dict(CLASSIFIER_ACCURACY_DICT, orient='index',columns=['Score'])
clf_df.index.name = 'Classifier models'
#clf_df.sort_values(by=['Score'], ascending=False)
clf_df

Unnamed: 0_level_0,Score
Classifier models,Unnamed: 1_level_1
random_forest,0.591837
gradient_boosting,0.537415
xgboost,0.578231
adaboost,0.530612
svc_linear,0.605442
svc_poly,0.598639
svc_rbf,0.62585
GaussianNB,0.571429
neuro,0.591837


In [42]:
reg_coef_df = pd.DataFrame.from_dict(REGRESSOR_COEF_DICT, orient='index')
reg_coef_df

Unnamed: 0,0,1,2,3,4,5,6,7
multi_linear,3.1e-05,-1.1e-05,-0.000243,0.000185,0.000319,-6.2e-05,-0.000182,-0.000475
random_forest,0.103463,0.100293,0.162131,0.102855,0.165228,0.10344,0.116596,0.145994
logistic,0.036448,0.003778,-0.234013,0.178421,0.371848,0.0,-0.187821,-0.744518
gradient_boosting,0.07556,0.063126,0.1942,0.114864,0.202946,0.069473,0.141081,0.138751
xgboost,0.10647,0.092608,0.139091,0.115327,0.153332,0.104085,0.126785,0.162303
adaboost,0.135635,0.117496,0.1266,0.150392,0.136296,0.087861,0.137323,0.108398


In [43]:
clf_coef_df = pd.DataFrame.from_dict(CLASSIFIER_COEF_DICT, orient='index')
clf_coef_df

Unnamed: 0,0,1,2,3,4,5,6,7
random_forest,0.061343,0.077653,0.19885,0.085868,0.204979,0.047164,0.090875,0.233268
gradient_boosting,0.078651,0.105862,0.119766,0.094816,0.142959,0.117708,0.135955,0.204284
xgboost,0.109946,0.112039,0.129917,0.114247,0.159301,0.123407,0.112307,0.138835
adaboost,0.16,0.16,0.14,0.04,0.16,0.02,0.12,0.2
svc_linear,0.052312,-0.022876,-0.329477,0.296954,0.48541,-0.023588,-0.249605,-0.891143


In [44]:
# Export
reg_df.to_csv('reg_df.csv')
clf_df.to_csv('clf_df.csv')
reg_coef_df.to_csv('reg_coef_df.csv')
clf_coef_df.to_csv('clf_coef_df.csv')