# IAQF 2021 Code Collection

In [1]:
PROBLEM_NO = 3
PERIOD_NO = 'p2'


In [2]:
# Import libraries

import datetime
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
import seaborn as sbn

import sklearn
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import Pipeline
from sklearn.ensemble import GradientBoostingClassifier,GradientBoostingRegressor
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import AdaBoostRegressor
from sklearn.metrics import accuracy_score

from xgboost import XGBClassifier
from xgboost import XGBRegressor

import tensorflow as tf
from tensorflow import keras

from openpyxl import load_workbook

In [3]:
OS = 'Mac'  # OS is either 'Mac' or 'Windows'
sep = '/' if OS == 'Mac' else '\\'
path = "..{}source_data{}{}.csv"

PROBLEM_PATH_DICT = {
    1: {
        'basic': path.format(sep, sep, "crude_gas_et_df"),
        'et': path.format(sep, sep, "crude_gas_et_df"),
        'p1': path.format(sep, sep, "crude_gas_p1_df"),
        'p1.2': path.format(sep, sep, "crude_gas_p2_df"),
        'p2': path.format(sep, sep, "crude_gas_p2_df")
    },  # crude oil price vs. gasoline price
    2: {
        'basic': path.format(sep, sep, "cf_gas_et_df"),
        'et': path.format(sep, sep, "cf_gas_et_df"),
        'p1': path.format(sep, sep, "cf_gas_p1_df"),
        'p1.2': path.format(sep, sep, "cf_gas_p2_df"),
        'p2': path.format(sep, sep, "cf_gas_p2_df"),
    },  # crude oil futures price vs. gasoline price
    3: {
        'basic': path.format(sep, sep, "crude_co2_et_df"),
        'et': path.format(sep, sep, "crude_co2_et_df"),
        'p1': path.format(sep, sep, "crude_co2_p1_df"),
        'p1.2': path.format(sep, sep, "crude_co2_p2_df"),
        'p2': path.format(sep, sep, "crude_co2_p2_df")
    }  # crude oil price vs. co2
}

PROBLEM_VAR_DICT = {
    1: ('crude_oil', 'gasoline'),
    2: ('oil_futures', 'gasoline'),
    3: ('crude_oil', 'co2')
}

ADDITIONAL_VAR_DICT = {
    1: {
        'basic': [],
        'et': ['cpi','us_pop','public_roads','us_urban_pop','oil_production','auto_sales','us_real_gdp'],
        'p1': ['cpi','us_pop','public_roads','us_urban_pop','oil_production','auto_sales','us_real_gdp'],
        'p1.2': ['cpi','us_pop','public_roads','us_urban_pop','oil_production','auto_sales','us_real_gdp'],
        'p2': ['cpi','us_pop','public_roads','us_urban_pop','oil_production','djus_auto_index','auto_sales','us_real_gdp','usd_mex_exrate']
        
    },
    2: {
        'basic': [],
        'et': ['cpi','us_pop','public_roads','us_urban_pop','oil_production','auto_sales','us_real_gdp'],
        'p1': ['cpi','us_pop','public_roads','us_urban_pop','oil_production','auto_sales','us_real_gdp'],
        'p1.2': ['cpi','us_pop','public_roads','us_urban_pop','oil_production','auto_sales','us_real_gdp'],
        'p2': ['cpi','us_pop','public_roads','us_urban_pop','oil_production','djus_auto_index','auto_sales','us_real_gdp','usd_mex_exrate']

    },
    3: {
        'basic': [],
        'et': ['cpi','coal_consumption','ngas_consumption','us_pop','oil_production','tree_cover_loss','us_real_gdp'],
        'p1': ['cpi','coal_consumption','ngas_consumption','us_pop','oil_production','tree_cover_loss','us_real_gdp'],
        'p1.2': ['cpi','coal_consumption','ngas_consumption','us_pop','oil_production','tree_cover_loss','us_real_gdp'],
        'p2': ['cpi','coal_consumption','ngas_consumption','us_pop','oil_production','cattle_index','tree_cover_loss','us_real_gdp','usd_mex_exrate']
    }
}






sheetname = str(PROBLEM_NO) + PERIOD_NO

X_MAIN_COL, Y_COL = PROBLEM_VAR_DICT[PROBLEM_NO]
X_MAIN_CHG_COL = X_MAIN_COL + '_chg'
Y_CHG_COL = Y_COL + '_chg'
EXTRA_VARS = ADDITIONAL_VAR_DICT[PROBLEM_NO][PERIOD_NO]

TRAINING_RATIO_DEFAULT = 0.7
VALIDATION_RATIO_DEFAULT = 0.15
TEST_RATIO_DEFAULT = 0.15

REGRESSOR_ACCURACY_DICT = {}
CLASSIFIER_ACCURACY_DICT = {}

REGRESSOR_COEF_DICT = {}
CLASSIFIER_COEF_DICT = {}

df = pd.read_csv(PROBLEM_PATH_DICT[PROBLEM_NO][PERIOD_NO])
df

Unnamed: 0,week,crude_oil,cpi,coal_consumption,ngas_consumption,us_pop,oil_production,cattle_index,tree_cover_loss,us_real_gdp,usd_mex_exrate,co2
0,2009-01-04,42.40,93.262178,80527.66971,1.166511e+06,306035.0,4935,297.27,1406957.794,14476.2498,13.657700,386.40
1,2009-01-11,44.46,91.817169,81500.87953,9.357362e+05,306085.5,4917,288.70,1567991.001,16075.1555,13.899500,387.02
2,2009-01-18,36.73,91.122475,97916.98177,9.389947e+05,306136.0,5052,299.23,1336297.836,15137.1916,14.025100,387.14
3,2009-01-25,42.15,87.888142,84498.02661,9.046054e+05,306186.5,5045,292.83,1652557.380,15155.3440,14.255700,387.50
4,2009-02-01,42.70,82.715265,89510.77745,7.992948e+05,306237.0,5235,290.44,1470782.481,15538.1713,14.119200,387.03
...,...,...,...,...,...,...,...,...,...,...,...,...
569,2019-12-01,58.07,112.084710,43325.23094,6.060118e+05,329314.0,12900,430.72,2173871.578,19477.3091,19.286699,411.07
570,2019-12-08,57.64,109.471205,39751.66796,8.286665e+05,329335.8,12800,426.54,2458219.683,19750.1111,18.997499,411.32
571,2019-12-15,59.25,108.183411,48847.79099,7.519214e+05,329357.6,12800,435.32,1946025.679,20106.3904,18.938499,411.89
572,2019-12-22,60.75,108.707988,50008.88542,5.915937e+05,329379.4,12900,429.35,2230631.393,20396.2701,18.844700,412.21


## Creating independent and dependent variables

In [4]:
# calculate independent variables
periods = (1,2,3,5,10)
for i in periods:
    df [X_MAIN_CHG_COL + f'_{i}'] = df[X_MAIN_COL].pct_change(periods=i)

# calculate dependent variable
df[Y_CHG_COL] = df[Y_COL].pct_change(periods=1)

# eliminate the empty rows
df = df[11:]
df

Unnamed: 0,week,crude_oil,cpi,coal_consumption,ngas_consumption,us_pop,oil_production,cattle_index,tree_cover_loss,us_real_gdp,usd_mex_exrate,co2,crude_oil_chg_1,crude_oil_chg_2,crude_oil_chg_3,crude_oil_chg_5,crude_oil_chg_10,co2_chg
11,2009-03-22,49.49,93.593485,88029.33328,607862.7477,306562.2,5432,282.76,1933223.874,15121.9490,14.338800,389.29,0.083881,0.146132,0.204136,0.339740,0.113135,0.002808
12,2009-03-29,52.99,105.508484,81588.96621,786643.0421,306603.6,5480,278.41,1451331.110,15312.9473,13.541800,389.04,0.070721,0.160534,0.227189,0.426380,0.442690,-0.000642
13,2009-04-05,50.34,97.283677,72928.73097,389615.9568,306645.0,5469,287.29,1181283.104,15389.4132,13.130000,389.43,-0.050009,0.017175,0.102497,0.224818,0.194306,0.001002
14,2009-04-12,50.46,79.216384,72626.84388,553381.6950,306699.5,5482,288.74,1278670.711,14744.1587,13.097800,388.93,0.002384,-0.047745,0.019600,0.168597,0.181733,-0.001284
15,2009-04-19,49.86,95.223458,69962.81952,351902.9953,306754.0,5421,288.48,1752394.499,14624.3705,13.550500,389.56,-0.011891,-0.009535,-0.059068,0.091984,0.222658,0.001620
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
569,2019-12-01,58.07,112.084710,43325.23094,606011.7971,329314.0,12900,430.72,2173871.578,19477.3091,19.286699,411.07,0.020562,0.021460,0.024343,0.051993,-0.021237,0.000852
570,2019-12-08,57.64,109.471205,39751.66796,828666.4868,329335.8,12800,426.54,2458219.683,19750.1111,18.997499,411.32,-0.007405,0.013005,0.013896,0.044771,0.013005,0.000608
571,2019-12-15,59.25,108.183411,48847.79099,751921.3538,329357.6,12800,435.32,1946025.679,20106.3904,18.938499,411.89,0.027932,0.020320,0.041301,0.045158,0.115399,0.001386
572,2019-12-22,60.75,108.707988,50008.88542,591593.7371,329379.4,12900,429.35,2230631.393,20396.2701,18.844700,412.21,0.025316,0.053956,0.046151,0.068602,0.140417,0.000777


## Seperating training, validation, and test datasets

In [5]:
# create list of independent variable's name
indep_vars = [X_MAIN_CHG_COL + f'_{i}' for i in periods] + EXTRA_VARS
num_var =len(indep_vars)

# extract the values to X
X = df[[X_MAIN_COL] + indep_vars].values
X

array([[ 4.94900000e+01,  8.38808585e-02,  1.46132469e-01, ...,
         1.93322387e+06,  1.51219490e+04,  1.43388004e+01],
       [ 5.29900000e+01,  7.07213579e-02,  1.60534385e-01, ...,
         1.45133111e+06,  1.53129473e+04,  1.35417996e+01],
       [ 5.03400000e+01, -5.00094357e-02,  1.71751869e-02, ...,
         1.18128310e+06,  1.53894132e+04,  1.31300001e+01],
       ...,
       [ 5.92500000e+01,  2.79319917e-02,  2.03203031e-02, ...,
         1.94602568e+06,  2.01063904e+04,  1.89384994e+01],
       [ 6.07500000e+01,  2.53164557e-02,  5.39555864e-02, ...,
         2.23063139e+06,  2.03962701e+04,  1.88446999e+01],
       [ 6.12900000e+01,  8.88888889e-03,  3.44303797e-02, ...,
         2.12435707e+06,  1.88128854e+04,  1.89113007e+01]])

In [6]:
# create y for regressor, y_c for classifier
y = df[Y_CHG_COL]
y_c = (df[Y_CHG_COL] > 0).values.astype('int')

# set training, validation, and test criterias
training_ratio = TRAINING_RATIO_DEFAULT
validation_ratio = VALIDATION_RATIO_DEFAULT
test_ratio = TEST_RATIO_DEFAULT

# check correctness of X and y
X.shape, y.shape, y_c.shape

((563, 15), (563,), (563,))

In [7]:
# create training, validation, and test sets for regressor
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_ratio, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=validation_ratio/(validation_ratio+training_ratio), random_state=42)

print("X_train shape:\t", X_train.shape)
print("X_test shape:\t", X_test.shape)
print("X_val shape:\t", X_val.shape)
print("y_train shape:\t", y_train.shape)
print("y_val shape:\t", y_val.shape)
print("y_test shape:\t", y_test.shape)

X_train shape:	 (393, 15)
X_test shape:	 (85, 15)
X_val shape:	 (85, 15)
y_train shape:	 (393,)
y_val shape:	 (85,)
y_test shape:	 (85,)


In [8]:
# create training, validation, and test sets for classifier
X_train_c, X_test_c, y_train_c, y_test_c = train_test_split(X, y_c, test_size=test_ratio, random_state=42)
X_train_c, X_val_c, y_train_c, y_val_c = train_test_split(X_train_c, y_train_c, test_size=validation_ratio/(validation_ratio+training_ratio), random_state=42)

print("X_train_c shape:", X_train_c.shape)
print("X_test_c shape:\t", X_test_c.shape)
print("X_val_c shape:\t", X_val_c.shape)
print("y_train_c shape:", y_train_c.shape)
print("y_val_c shape:\t", y_val_c.shape)
print("y_test_c shape:\t", y_test_c.shape)

X_train_c shape: (393, 15)
X_test_c shape:	 (85, 15)
X_val_c shape:	 (85, 15)
y_train_c shape: (393,)
y_val_c shape:	 (85,)
y_test_c shape:	 (85,)


## Principal Component Analysis (PCA)

In [9]:
# normalization for regressor set
for i in range(X_train.shape[1]):
    X_train_mean = X_train[:, i]. mean()
    X_train_std = X_train[:, i].std()
    X_train[:, i] = (X_train[:,i] - X_train_mean) / X_train_std
    X_test[:,i] = (X_test[:,i] - X_train_mean) / X_train_std
    X_val[:, i] = (X_val[:,i] - X_train_mean) / X_train_std

print("X_train shape:\t", X_train.shape)
print("X_val shape:\t", X_val.shape)

X_train shape:	 (393, 15)
X_val shape:	 (85, 15)


In [10]:
# normalization for classifier set
for i in range(X_train_c.shape[1]):
    X_train_c_mean = X_train_c[:, i]. mean()
    X_train_c_std = X_train_c[:, i].std()
    X_train_c[:, i] = (X_train_c[:,i] - X_train_c_mean) / X_train_c_std
    X_test_c[:,i] = (X_test_c[:,i] - X_train_c_mean) / X_train_c_std
    X_val_c[:, i] = (X_val_c[:,i] - X_train_c_mean) / X_train_c_std

print("X_train_c shape:", X_train_c.shape)
print("X_val_c shape:\t", X_val_c.shape)

X_train_c shape: (393, 15)
X_val_c shape:	 (85, 15)


In [11]:
# PCA for regressor
# set PCA value
pca = PCA(0.95)

# fit PCA training set
pca.fit(X_train)

# apply transform to training and test set
X_train_pca = pca.transform(X_train)
X_test_pca = pca.transform(X_test)
X_val_pca = pca.transform(X_val)

print("X_train_pca shape:\t", X_train_pca.shape)
print("X_test_pca shape:\t", X_test_pca.shape)
print("X_val_pca shape:\t", X_val_pca.shape)
print("y_train shape:\t", y_train.shape)
print("y_val shape:\t", y_val.shape)
print("y_test shape:\t", y_test.shape)

X_train_pca shape:	 (393, 9)
X_test_pca shape:	 (85, 9)
X_val_pca shape:	 (85, 9)
y_train shape:	 (393,)
y_val shape:	 (85,)
y_test shape:	 (85,)


In [12]:
# PCA for classifier
# apply transform to training and test set
X_train_c_pca = pca.transform(X_train_c)
X_test_c_pca = pca.transform(X_test_c)
X_val_c_pca = pca.transform(X_val_c)

print("X_train_c_pca shape:\t", X_train_c_pca.shape)
print("X_test_c_pca shape:\t", X_test_c_pca.shape)
print("X_val_c_pca shape:\t", X_val_c_pca.shape)
print("y_train_c shape:", y_train_c.shape)
print("y_val_c shape:\t", y_val_c.shape)
print("y_test_c shape:\t", y_test_c.shape)

X_train_c_pca shape:	 (393, 9)
X_test_c_pca shape:	 (85, 9)
X_val_c_pca shape:	 (85, 9)
y_train_c shape: (393,)
y_val_c shape:	 (85,)
y_test_c shape:	 (85,)


In [13]:
num_var = X_train_c_pca.shape[1]
num_var

9

## Multiple linear regression

In [14]:
multiple_reg = LinearRegression().fit(X_train_pca, y_train)
score_multiple_reg = multiple_reg.score(X_test_pca, y_test)

print('score_multiple_reg:', "%.4f" % score_multiple_reg)

score_multiple_reg: 0.0627


In [15]:
REGRESSOR_COEF_DICT['multi_linear'] = multiple_reg.coef_
REGRESSOR_ACCURACY_DICT['multi_linear'] = score_multiple_reg

## Random forest classifier

In [16]:
# testing for different max depth
rfc = dict()
for i in range (1, 15):
    clf = RandomForestClassifier(max_depth=i, random_state=0).fit(X_train_c_pca, y_train_c)
    y_pred = clf.predict(X_val_c_pca)
    score_clf = accuracy_score(y_val_c, y_pred)
    rfc[i] = score_clf

max_depth = max(rfc, key=rfc.get)
print('max depth:', max_depth)

max depth: 1


In [17]:
# choose max_depth = 4
rf_clf = RandomForestClassifier(max_depth=int(max_depth), random_state=0).fit(X_train_c_pca, y_train_c)
score_rf_clf = rf_clf.score(X_test_c_pca, y_test_c)
print('score_rf_clf:', "%.4f" % score_rf_clf)

score_rf_clf: 0.7059


In [18]:
CLASSIFIER_COEF_DICT['random_forest'] = rf_clf.feature_importances_
CLASSIFIER_ACCURACY_DICT['random_forest'] = score_rf_clf

## Random forest regressor

In [19]:
rf_reg = RandomForestRegressor().fit(X_train_pca, y_train)
score_rf_reg = rf_reg.score(X_test_pca, y_test)
print('score_rf_reg:', "%.4f" % score_rf_reg)

score_rf_reg: 0.0434


In [20]:
REGRESSOR_COEF_DICT['random_forest'] = rf_reg.feature_importances_
REGRESSOR_ACCURACY_DICT['random_forest'] = score_rf_reg

## Logistic regression

In [21]:
logistic_reg = LogisticRegression(penalty = 'l1', solver='liblinear', random_state=0).fit(X_train_c_pca, y_train_c)
score_logistic = logistic_reg.score(X_test_c_pca, y_test_c)
print('score_logistic:', "%.4f" % score_logistic)

score_logistic: 0.6941


In [22]:
CLASSIFIER_COEF_DICT['logistic'] = logistic_reg.coef_[0]
CLASSIFIER_ACCURACY_DICT['logistic'] = score_logistic

## Gradient boosting classifier

In [23]:
gra_clf = GradientBoostingClassifier(n_estimators = 200, learning_rate=0.50, max_depth=5, random_state=0).fit(X_train_c_pca, y_train_c)
score_gb_clf = gra_clf.score(X_test_c_pca, y_test_c)
score_gb_clf

0.5529411764705883

In [24]:
CLASSIFIER_COEF_DICT['gradient_boosting'] = gra_clf.feature_importances_
CLASSIFIER_ACCURACY_DICT['gradient_boosting'] = score_gb_clf

## Gradient boosting regressor

In [25]:
gra_reg = GradientBoostingRegressor(random_state=0).fit(X_train_pca, y_train)
score_gb_reg = gra_reg.score(X_test_pca, y_test)
score_gb_reg

-0.1265636871768412

In [26]:
REGRESSOR_COEF_DICT['gradient_boosting'] = gra_reg.feature_importances_
REGRESSOR_ACCURACY_DICT['gradient_boosting'] = score_gb_reg

## XGBoost classifier

In [27]:
xgb_clf = XGBClassifier(use_label_encoder=False).fit(X_train_c_pca, y_train_c)
score_xgb_clf = xgb_clf.score(X_test_c_pca, y_test_c)
score_xgb_clf



0.49411764705882355

In [28]:
CLASSIFIER_COEF_DICT['xgboost'] = xgb_clf.feature_importances_
CLASSIFIER_ACCURACY_DICT['xgboost'] = score_xgb_clf

## XGBoost regressor

In [29]:
xgb_reg = XGBRegressor(random_state = 0).fit(X_train_pca, y_train)
score_xgb_reg = xgb_reg.score(X_test_pca, y_test)
score_xgb_reg

-0.22637460406135834

In [30]:
REGRESSOR_COEF_DICT['xgboost'] = xgb_reg.feature_importances_
REGRESSOR_ACCURACY_DICT['xgboost'] = score_xgb_reg

## AdaBoost classifier

In [31]:
adb_clf = AdaBoostClassifier().fit(X_train_c_pca, y_train_c)
score_adb_clf = adb_clf.score(X_test_c_pca, y_test_c)
score_adb_clf

0.6352941176470588

In [32]:
CLASSIFIER_COEF_DICT['adaboost'] = adb_clf.feature_importances_
CLASSIFIER_ACCURACY_DICT['adaboost'] = score_adb_clf

## AdaBoost regressor

In [33]:
adb_reg = AdaBoostRegressor().fit(X_train_pca, y_train)
score_adb_reg = adb_reg.score(X_test_pca, y_test)
score_adb_reg

0.01389118867280481

In [34]:
REGRESSOR_COEF_DICT['adaboost'] = adb_reg.feature_importances_
REGRESSOR_ACCURACY_DICT['adaboost'] = score_adb_reg

## Support vector classification

In [35]:
# Try different kernels
kernels = ('linear', 'poly', 'rbf')

for k in kernels:
    svm_clf = SVC(kernel=k).fit(X_train_c_pca, y_train_c)
    y_pred = svm_clf.predict(X_test_c_pca)
    vars()[f'score_svc_{k}']= accuracy_score(y_test_c, y_pred)
    
    if k == 'linear':
        CLASSIFIER_COEF_DICT['svc_linear'] = svm_clf.coef_[0]
    
print(score_svc_linear)
print(score_svc_poly)
print(score_svc_rbf)

CLASSIFIER_ACCURACY_DICT['svc_linear'] = score_svc_linear
CLASSIFIER_ACCURACY_DICT['svc_poly'] = score_svc_poly
CLASSIFIER_ACCURACY_DICT['svc_rbf'] = score_svc_rbf

0.7294117647058823
0.6705882352941176
0.6588235294117647


## Gaussian Naive Bayes

In [36]:
gnb_clf = GaussianNB().fit(X_train_c_pca, y_train_c)
score_gnb = gnb_clf.score(X_test_c_pca, y_test_c)
print(score_gnb)

0.7294117647058823


In [37]:
CLASSIFIER_ACCURACY_DICT['GaussianNB'] = score_gnb

## Neuro networks

In [38]:
model = keras.models.Sequential()
model.add(keras.layers.Dense(10, input_dim=num_var, activation='relu'))
model.add(keras.layers.Dense(5, activation='relu'))
model.add(keras.layers.Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

history = model.fit(X_train_c_pca, y_train_c, epochs=50, validation_data=(X_val_c_pca, y_val_c))

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [39]:
loss, score_neuro = model.evaluate(X_test_c_pca, y_test_c)
print(score_neuro)

0.6823529601097107


In [40]:
CLASSIFIER_ACCURACY_DICT['neuro'] = score_neuro

## Summary

In [41]:
reg_df = pd.DataFrame.from_dict(REGRESSOR_ACCURACY_DICT , orient='index', columns=['Score'])
reg_df.index.name = 'Regression models'
#reg_df.sort_values(by=['Score'], ascending=False)
reg_df

Unnamed: 0_level_0,Score
Regression models,Unnamed: 1_level_1
multi_linear,0.062738
random_forest,0.043392
gradient_boosting,-0.126564
xgboost,-0.226375
adaboost,0.013891


In [42]:
clf_df = pd.DataFrame.from_dict(CLASSIFIER_ACCURACY_DICT, orient='index',columns=['Score'])
clf_df.index.name = 'Classifier models'
#clf_df.sort_values(by=['Score'], ascending=False)
clf_df

Unnamed: 0_level_0,Score
Classifier models,Unnamed: 1_level_1
random_forest,0.705882
logistic,0.694118
gradient_boosting,0.552941
xgboost,0.494118
adaboost,0.635294
svc_linear,0.729412
svc_poly,0.670588
svc_rbf,0.658824
GaussianNB,0.729412
neuro,0.682353


In [43]:
reg_coef_df = pd.DataFrame.from_dict(REGRESSOR_COEF_DICT, orient='index')
reg_coef_df

Unnamed: 0,0,1,2,3,4,5,6,7,8
multi_linear,1.6e-05,2e-05,-4.5e-05,0.000293,-0.00036,-0.000165,3.2e-05,0.000453,0.000269
random_forest,0.088414,0.095668,0.084296,0.161635,0.144336,0.09287,0.095099,0.127326,0.110356
gradient_boosting,0.051986,0.102827,0.100148,0.139598,0.126079,0.088638,0.126752,0.160945,0.103027
xgboost,0.099284,0.124519,0.082115,0.115591,0.115074,0.109446,0.126549,0.118692,0.108731
adaboost,0.02791,0.120334,0.056101,0.176476,0.183748,0.13211,0.098821,0.124682,0.079818


In [44]:
clf_coef_df = pd.DataFrame.from_dict(CLASSIFIER_COEF_DICT, orient='index')
clf_coef_df

Unnamed: 0,0,1,2,3,4,5,6,7,8
random_forest,0.02,0.04,0.04,0.13,0.33,0.05,0.04,0.27,0.08
logistic,0.002371,-0.013184,-0.201637,0.208744,-0.423214,-0.178593,0.143575,0.645465,0.209227
gradient_boosting,0.092015,0.072457,0.095273,0.102639,0.095543,0.107766,0.089106,0.216731,0.12847
xgboost,0.089967,0.113825,0.115103,0.105873,0.109381,0.089211,0.11485,0.131004,0.130785
adaboost,0.12,0.14,0.1,0.08,0.08,0.12,0.14,0.1,0.12
svc_linear,0.001986,-0.008169,-0.254268,0.379269,-0.49041,-0.20122,0.168047,0.790169,0.437013


In [45]:
# Export
book = load_workbook('Results.xlsx')
writer = pd.ExcelWriter('Results.xlsx', engine='openpyxl')
writer.book = book

writer.sheets = dict((ws.title, ws) for ws in book.worksheets)
reg_df.to_excel(writer, sheet_name='%s reg score' % sheetname)
clf_df.to_excel(writer, sheet_name='%s clf score' % sheetname)
reg_coef_df.to_excel(writer, sheet_name='%s reg coef' % sheetname)
clf_coef_df.to_excel(writer, sheet_name='%s clf coef' % sheetname)

writer.save()
#reg_df.to_csv('reg_df.csv')
#clf_df.to_csv('clf_df.csv')
#reg_coef_df.to_csv('reg_coef_df.csv')
#clf_coef_df.to_csv('clf_coef_df.csv')