In [10]:
import numpy as np
import pandas as pd
import random
import tensorflow as tf
import os
from pandastable import Table, TableModel
from tkinter import *
from explainerdashboard import ClassifierExplainer, ExplainerDashboard


## pycaret
from pycaret.regression import *

## FinBERT
from transformers import BertTokenizer, BertForSequenceClassification
from transformers import pipeline

## SDV
from sdv.metadata import SingleTableMetadata
from sdv.single_table import GaussianCopulaSynthesizer
from sdv.sampling import Condition
from sdv.evaluation.single_table import evaluate_quality
from sdv.evaluation.single_table import run_diagnostic
from sdv.evaluation.single_table import get_column_plot
from sdv.evaluation.single_table import get_column_pair_plot
from sdv.sequential import PARSynthesizer
from sdv.sampling import Condition
from sdmetrics.reports.single_table import QualityReport
from sdmetrics.reports.utils import get_column_plot
from sdmetrics.reports.utils import get_column_plot

## LSTM
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout
from sklearn.metrics import *
from sklearn.model_selection import train_test_split
from keras.callbacks import EarlyStopping

import socket

seed_num = 42
np.random.seed(seed_num)
random.seed(seed_num)
tf.random.set_seed(seed_num)
tf.keras.utils.set_random_seed(seed_num)
os.chdir('/Users/hiksang/Desktop/Proj/DF/EWS/data')  # 추후 DB로 변환

In [4]:
df = pd.read_csv('KVALL.WD.csv')


def make_pipeline():    
    finbert = BertForSequenceClassification.from_pretrained('yiyanghkust/finbert-tone',num_labels=3)
    tokenizer = BertTokenizer.from_pretrained('yiyanghkust/finbert-tone')
    nlp = pipeline("sentiment-analysis", model=finbert, tokenizer=tokenizer)
    return nlp

def classification_text(text):
    result = nlp()
    
feature_col = df.iloc[:,4:].columns
feature_list = feature_col.to_list()
feature_cols = feature_list[1:]
feature_target = [feature_list[0]]
feature_bank = feature_list + ['bank.code']
feature_all = feature_bank + ['date']
print(feature_target)
print(feature_list)
print(feature_cols)

['KV001']
['KV001', 'KV002', 'KV003', 'KV004', 'KV005', 'KV006', 'KV007', 'KV008', 'KV009', 'KV010', 'KV011', 'KV012', 'KV013', 'KV014', 'KV015', 'KV016', 'KV017', 'KV018']
['KV002', 'KV003', 'KV004', 'KV005', 'KV006', 'KV007', 'KV008', 'KV009', 'KV010', 'KV011', 'KV012', 'KV013', 'KV014', 'KV015', 'KV016', 'KV017', 'KV018']


# PreProcess

In [5]:
def prep_data(df, feature_col):
    df = df[feature_col]
    df = df.replace([np.inf, -np.inf], np.nan)  # "inf" 값을 NaN으로 대체
    df = df.dropna()
    return df


def fss_date(df, feature_col, bank_name): # 
    df['date'] = pd.to_datetime(df['year.code'].astype(str) + df['month.code'].astype(str), format='%Y%m') 

    df = df[feature_col]
    df = df.replace([np.inf, -np.inf], np.nan)  # "inf" 값을 NaN으로 대체
    df = df.dropna()
    print(df)
    df = df[df['bank.code'] == bank_name]
    df = df.drop(columns=['bank.code'])

    
    return df


def lstm_train_test(prep_df):
    feature_cols = ['KV002','KV003','KV004','KV005','KV006','KV007','KV008','KV009','KV010',
               'KV011', 'KV012', 'KV013','KV014','KV015','KV016','KV017','KV018']
    label_col = ['KV001']

    feature_df = pd.DataFrame(prep_df, columns=feature_cols)
    label_df = pd.DataFrame(prep_df, columns=label_col)

    feature_df = feature_df.to_numpy()
    label_df = label_df.to_numpy()
    
    x_train, x_test, y_train, y_test = train_test_split(feature_df, label_df, test_size=0.4, shuffle=False, random_state=42)
    x_val, x_test, y_val, y_test = train_test_split(x_test, y_test, test_size=0.5, shuffle=False, random_state=42)
    print(len(x_train), len(x_val), len(x_test))
    return x_train.reshape(-1,1,17), x_val.reshape(-1,1,17), x_test.reshape(-1,1,17), y_train.reshape(-1,1,1), y_val.reshape(-1,1,1), y_test.reshape(-1,1,1)


df_ml = prep_data(df, feature_bank)
df_lstm = fss_date(df, feature_all, 'BANK #1')
x_train, x_val, x_test, y_train, y_val, y_test = lstm_train_test(df_lstm)

    

    

print(len(df_lstm))


        KV001      KV002      KV003     KV004     KV005     KV006      KV007  \
15   7.490800  11.230069  12.892386  0.647280 -8.069694  4.787695  29.034507   
16   7.381399  11.509397  12.977475  0.495335 -1.847013  0.894125  29.547321   
17   7.331138  11.553702  13.024878  0.820232 -3.768500  2.526414  29.872508   
18   7.403061  11.589960  13.047304  1.212506 -5.585090  4.531546  31.057246   
19   6.973216   9.064114  10.816283  1.009559 -7.276591  3.756003  30.998928   
..        ...        ...        ...       ...       ...       ...        ...   
879  7.382142  10.968299  13.063094  1.878980 -3.793386  0.956893  34.626846   
880  7.059718  10.815314  12.733773  0.526592 -1.247132  4.581053  34.833039   
881  6.858891  10.665785  12.534142  1.030550 -2.661936  2.664026  34.366872   
882  6.840320  10.670107  12.508762  1.514801 -4.543245  1.043397  34.195223   
883  6.956038  10.731233  12.599827  1.585924 -7.251555 -1.389545  33.836448   

        KV008      KV009     KV010     

# AutoML

In [14]:

def make_localhost(port):    
    s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
    s.connect(('8.8.8.8', 1))
    local_ip_address = s.getsockname()[0] 
    return f'http://{local_ip_address}:{port}'
    
def auto_ml(df, target, bank_name):
    df = df[df['bank.code'] == bank_name]
    df = df.drop(columns=['bank.code'])
    model = setup(df, target = target, session_id= 42)
    best = compare_models()
    
    linear_reg = create_model('lr', cross_validation=True)
    ridge_reg = create_model('ridge', cross_validation=True)
    xgb = create_model('xgboost', cross_validation = True)
    lgb = create_model('lightgbm', cross_validation = True)
    
    linear_port = 8000
#     XGBoost_port = 8010
#     lightGBM_port =8020
#     GBM_port =8030
#     ridge_port = 8040    
    
#     ## dashboard of linear
    dashboard(linear_reg, run_kwargs={'port':linear_port})#, 'host':'0.0.0.0'})
#     linear_address = make_localhost(linear_port)
    
#     ## dashboard of ridge
#     dashboard(ridge_reg, run_kwargs={'port':ridge_port})#, 'host':'0.0.0.0'})
#     ridge_address = make_localhost(linear_port)
    
#     ## dashboard of xgb
#     dashboard(xgb, run_kwargs={'port':XGBoost_port})#, 'host':'0.0.0.0'})
#     xgb_address = make_localhost(linear_port)
    
#     ## dashboard of lightgbm
#     dashboard(lgb, run_kwargs={'port':lightGBM_port})#, 'host':'0.0.0.0'})
#     lightgbbm_address = make_localhost(linear_port)
    
#     address_dict = {'lr' : linear_address, 'ridge': ridge_address, 'xgboost' : xgb_address, 'lightgbm' : lightgbbm_address}
    return linear_reg

def auto_ml_all(df, target):
    df = df.drop(columns=['bank.code'])
    
    model = setup(df, target = target, session_id= 42)
    best = compare_models()
    
    linear_reg = create_model('lr', cross_validation=True)
    ridge_reg = create_model('ridge', cross_validation=True)
    xgb = create_model('xgboost', cross_validation = True)
    lgb = create_model('lightgbm', cross_validation = True)
    
#     dashboard(linear_reg)
#     dashboard(xgb)
    
estm = auto_ml(df_ml, "KV001", 'BANK #1')

print(len(df_ml))
# auto_ml_all(df_ml, "KV001")

Unnamed: 0,Description,Value
0,Session id,42
1,Target,KV001
2,Target type,Regression
3,Original data shape,"(36, 18)"
4,Transformed data shape,"(36, 18)"
5,Transformed train set shape,"(25, 18)"
6,Transformed test set shape,"(11, 18)"
7,Numeric features,17
8,Preprocess,True
9,Imputation type,simple


Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
lr,Linear Regression,0.0958,0.0145,0.1076,-0.7659,0.0143,0.0147,0.018
lightgbm,Light Gradient Boosting Machine,0.3366,0.2277,0.3882,-0.9948,0.0518,0.0523,0.017
dummy,Dummy Regressor,0.3366,0.2277,0.3882,-0.9948,0.0518,0.0523,0.017
ridge,Ridge Regression,0.1397,0.0383,0.1647,-1.1329,0.0217,0.0213,0.016
catboost,CatBoost Regressor,0.2181,0.0904,0.2459,-1.2494,0.0333,0.0342,0.017
ada,AdaBoost Regressor,0.2058,0.0805,0.2378,-1.601,0.0325,0.0326,0.021
br,Bayesian Ridge,0.2294,0.1849,0.27,-2.0105,0.0333,0.0343,0.017
xgboost,Extreme Gradient Boosting,0.2115,0.0683,0.2425,-3.3339,0.032,0.0323,0.018
et,Extra Trees Regressor,0.1676,0.042,0.1856,-3.4827,0.0252,0.0262,0.03
lasso,Lasso Regression,0.3328,0.1866,0.3854,-5.0733,0.0503,0.051,0.017


Processing:   0%|          | 0/85 [00:00<?, ?it/s]

Unnamed: 0_level_0,MAE,MSE,RMSE,R2,RMSLE,MAPE
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,0.102,0.0124,0.1114,0.9852,0.0138,0.0148
1,0.1158,0.0169,0.1299,0.8237,0.018,0.0181
2,0.0851,0.0086,0.0927,0.908,0.0119,0.0124
3,0.145,0.0345,0.1857,0.9323,0.0242,0.0226
4,0.0528,0.004,0.0633,0.9894,0.009,0.0084
5,0.0688,0.0049,0.0702,-1.9053,0.0093,0.0105
6,0.2146,0.0493,0.2219,-0.5562,0.0305,0.0335
7,0.0335,0.0014,0.0376,0.9862,0.0052,0.0052
8,0.0732,0.0063,0.0795,-11.1897,0.0103,0.011
9,0.0671,0.0071,0.084,0.3671,0.0109,0.0102


Processing:   0%|          | 0/4 [00:00<?, ?it/s]

Unnamed: 0_level_0,MAE,MSE,RMSE,R2,RMSLE,MAPE
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,0.289,0.1313,0.3623,0.8431,0.0423,0.0413
1,0.2654,0.0969,0.3113,-0.0126,0.0442,0.0417
2,0.1582,0.0265,0.1629,0.7159,0.0209,0.023
3,0.201,0.0542,0.2328,0.8936,0.0331,0.0332
4,0.0743,0.0075,0.0867,0.9802,0.0117,0.0115
5,0.1208,0.0263,0.1623,-14.5201,0.021,0.0183
6,0.0204,0.0004,0.0205,0.9867,0.0028,0.0032
7,0.1173,0.0148,0.1218,0.8549,0.0167,0.0182
8,0.0295,0.001,0.0317,-0.9332,0.0041,0.0044
9,0.1207,0.0238,0.1544,-1.1372,0.0205,0.0183


Processing:   0%|          | 0/4 [00:00<?, ?it/s]

Unnamed: 0_level_0,MAE,MSE,RMSE,R2,RMSLE,MAPE
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,0.3987,0.2143,0.4629,0.744,0.0627,0.0628
1,0.1377,0.0305,0.1746,0.6815,0.0229,0.0207
2,0.2366,0.0835,0.2889,0.1062,0.0365,0.0349
3,0.2106,0.0673,0.2594,0.8679,0.0353,0.0326
4,0.2563,0.0881,0.2968,0.7674,0.0375,0.0377
5,0.1225,0.0238,0.1544,-13.052,0.0208,0.0187
6,0.2892,0.0917,0.3029,-1.8982,0.0403,0.0453
7,0.203,0.0415,0.2037,0.5944,0.0269,0.0309
8,0.0911,0.0111,0.1052,-20.3686,0.0137,0.0137
9,0.1695,0.031,0.1762,-1.7813,0.0239,0.0261


Processing:   0%|          | 0/4 [00:00<?, ?it/s]

Unnamed: 0_level_0,MAE,MSE,RMSE,R2,RMSLE,MAPE
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,0.8393,0.8368,0.9148,-0.0,0.1255,0.1358
1,0.2575,0.1003,0.3166,-0.0476,0.0426,0.0404
2,0.3985,0.2522,0.5022,-1.7006,0.0633,0.0555
3,0.5861,0.5178,0.7196,-0.017,0.0979,0.0944
4,0.5835,0.3817,0.6178,-0.0077,0.0806,0.0883
5,0.0412,0.0024,0.049,-0.4143,0.0065,0.0063
6,0.1779,0.0595,0.244,-0.8803,0.0328,0.0283
7,0.3198,0.1045,0.3232,-0.0215,0.0429,0.0492
8,0.057,0.0038,0.0614,-6.2632,0.008,0.0086
9,0.1056,0.0178,0.1334,-0.5954,0.0178,0.0164


Processing:   0%|          | 0/4 [00:00<?, ?it/s]

Generating self.shap_explainer = shap.LinearExplainer(modelX)...
Building ExplainerDashboard..
Detected notebook environment, consider setting mode='external', mode='inline' or mode='jupyterlab' to keep the notebook interactive while the dashboard is running...
For this type of model and model_output interactions don't work, so setting shap_interaction=False...
The explainer object has no decision_trees property. so setting decision_trees=False...
Generating layout...
Calculating shap values...
Calculating predictions...
Calculating residuals...
Calculating absolute residuals...
Calculating dependencies...
Calculating importances...
Reminder: you can store the explainer (including calculated dependencies) with explainer.dump('explainer.joblib') and reload with e.g. ClassifierExplainer.from_file('explainer.joblib')
Registering callbacks...
Starting ExplainerDashboard on http://172.16.21.139:8000


567


# dashboard to html

In [26]:
from explainerdashboard import ExplainerDashboard, RegressionExplainer


df = df_ml[df['bank.code'] == "BANK #1"]
df_ml = df.drop(columns=['bank.code'])

model = setup(df_ml, target =  "KV001") ## 추후 shuffle False
best = compare_models()
linear_reg = create_model('lr', cross_validation=True)
X_test_df = model.X_test_transformed.copy()
y_test = model.y_test_transformed
print(x_train)
X_test_df.columns = [col.replace(".", "__").replace("{", "__").replace("}", "__") for col in X_test_df.columns]
explainer = RegressionExplainer(linear_reg, X_test_df, y_test)
ExplainerDashboard(explainer, mode='dash').run()

# html_linear = ExplainerDashboard(explainer, mode='dash').save_html()




# from explainerdashboard import ExplainerDashboard, RegressionExplainer

#         # Replaceing chars which dash doesnt accept for column name `.` , `{`, `}`
# X_test_df = self.X_test_transformed.copy()
# X_test_df.columns = [
#             col.replace(".", "__").replace("{", "__").replace("}", "__")
#             for col in X_test_df.columns
#         ]
# explainer = RegressionExplainer(
#             estimator, X_test_df, self.y_test_transformed, **kwargs
#         )
# return ExplainerDashboard(
#     explainer, mode=display_format, **dashboard_kwargs
#         ).run(**run_kwargs)

Unnamed: 0,Description,Value
0,Session id,7062
1,Target,KV001
2,Target type,Regression
3,Original data shape,"(36, 18)"
4,Transformed data shape,"(36, 18)"
5,Transformed train set shape,"(25, 18)"
6,Transformed test set shape,"(11, 18)"
7,Numeric features,17
8,Preprocess,True
9,Imputation type,simple


Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
gbr,Gradient Boosting Regressor,0.16,0.0724,0.1788,-0.4587,0.0247,0.0264,0.051
dt,Decision Tree Regressor,0.2134,0.1102,0.2443,-8.6095,0.0335,0.0348,0.044
xgboost,Extreme Gradient Boosting,0.2059,0.0919,0.2334,-9.293,0.0317,0.0331,0.047
rf,Random Forest Regressor,0.2147,0.0944,0.2368,-10.722,0.0322,0.0346,0.057
et,Extra Trees Regressor,0.1805,0.0866,0.2025,-18.7644,0.0279,0.0297,0.054
ridge,Ridge Regression,0.1665,0.0462,0.1811,-19.3024,0.0246,0.0264,0.048
lr,Linear Regression,0.1381,0.0356,0.1579,-20.5231,0.0209,0.0214,0.048
catboost,CatBoost Regressor,0.1973,0.0951,0.2213,-27.8058,0.0301,0.0319,0.109
par,Passive Aggressive Regressor,0.2149,0.0848,0.2516,-34.3705,0.0347,0.0342,0.046
ada,AdaBoost Regressor,0.2273,0.1031,0.2523,-47.2347,0.0341,0.0362,0.047


Processing:   0%|          | 0/85 [00:00<?, ?it/s]

Unnamed: 0_level_0,MAE,MSE,RMSE,R2,RMSLE,MAPE
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,0.1301,0.0356,0.1888,0.5715,0.0255,0.0205
1,0.0518,0.0027,0.0519,0.9907,0.0067,0.0077
2,0.1493,0.0248,0.1574,0.9483,0.023,0.0256
3,0.0856,0.0121,0.1099,0.9101,0.0141,0.0125
4,0.027,0.0009,0.03,0.9736,0.0041,0.0043
5,0.0115,0.0002,0.0131,0.2012,0.0017,0.0017
6,0.2434,0.0702,0.265,-196.2291,0.0339,0.0365
7,0.3032,0.1231,0.3509,-5.8439,0.0444,0.0457
8,0.2307,0.0569,0.2386,-2.4622,0.0324,0.0359
9,0.1482,0.0299,0.173,-5.2915,0.0238,0.0233


Processing:   0%|          | 0/4 [00:00<?, ?it/s]

         KV002      KV003     KV004     KV005      KV006      KV007     KV008  \
671  15.522093  15.820181  2.206114 -0.419899   0.172194  36.593109  4.545916   
23    8.613658  10.604945  1.351652 -5.244090   3.792024  33.337486 -1.483327   
860  10.878238  11.920766  0.492949 -0.806683  16.423546  20.478924  3.716913   
85   16.644384  16.644384  0.850648  4.768416  -0.321974  35.557476  1.828264   
17   11.553701  13.024878  0.820232 -3.768500   2.526414  29.872509 -0.054115   
..         ...        ...       ...       ...        ...        ...       ...   
46   13.767385  15.085279  1.844785 -1.519952   1.873275  34.837662  3.526832   
708  15.466281  15.825948  0.711049 -0.977346   1.252268  33.362732  0.644348   
125  17.954687  17.954687  1.486087 -3.248996  -0.757332  27.622940 -1.197789   
863  10.994670  12.034121  2.441579 -3.552291   9.403339  24.727489  0.324836   
247  12.861914  14.140787  2.683846 -4.521680   3.211112  23.172119  0.102454   

         KV009     KV010   

In [32]:
linear_dash = ExplainerDashboard(explainer, mode='dash')
linear_dash.save_html('ht.html')



Building ExplainerDashboard..
Detected notebook environment, consider setting mode='external', mode='inline' or mode='jupyterlab' to keep the notebook interactive while the dashboard is running...
For this type of model and model_output interactions don't work, so setting shap_interaction=False...
The explainer object has no decision_trees property. so setting decision_trees=False...
Generating layout...
Calculating dependencies...
Reminder: you can store the explainer (including calculated dependencies) with explainer.dump('explainer.joblib') and reload with e.g. ClassifierExplainer.from_file('explainer.joblib')
Registering callbacks...


In [107]:
x_train, x_val, x_test, y_train, y_val, y_test

(array([[[ 1.12300695e+01,  1.28923864e+01,  6.47280076e-01,
          -8.06969403e+00,  4.78769478e+00,  2.90345074e+01,
          -1.75244558e+00,  3.15475865e+01,  4.41757700e+00,
           9.65108389e+01,  3.10889742e+00,  3.32841217e+00,
          -2.68113209e+00, -2.52380692e+01,  2.45064197e+01,
           9.66388078e+01, -9.08404794e+00]],
 
        [[ 1.15093966e+01,  1.29774750e+01,  4.95335491e-01,
          -1.84701326e+00,  8.94125058e-01,  2.95473212e+01,
           2.67196459e+00,  3.26705002e+01,  4.25973062e+00,
           9.66828956e+01,  5.05022814e-01,  1.82471807e-01,
           3.12863684e-01,  4.50675406e+00,  2.15282009e+01,
           5.39155308e+00,  1.16457547e+00]],
 
        [[ 1.15537018e+01,  1.30248777e+01,  8.20232407e-01,
          -3.76849963e+00,  2.52641359e+00,  2.98725083e+01,
          -5.41154602e-02,  3.21987129e+01,  4.22361441e+00,
           9.50869614e+01,  1.07915286e+00,  7.13767725e-01,
           1.06464682e-01,  2.86605053e+01,  1.897

In [20]:
def fss_date(df, feature_col, bank_name): # 
    df['date'] = pd.to_datetime(df['year.code'].astype(str) + df['month.code'].astype(str), format='%Y%m') 

    df = df[feature_col]
    df = df.replace([np.inf, -np.inf], np.nan)  # "inf" 값을 NaN으로 대체
    df = df.dropna()
    print(df)
    df = df[df['bank.code'] == bank_name]
    df = df.drop(columns=['bank.code'])
    
    return df

def lstm_train_test(df, target, feature):
    
    '''
    Example of target & feature
    feature = ['KV002','KV003','KV004','KV005','KV006','KV007','KV008','KV009','KV010',
               'KV011', 'KV012', 'KV013','KV014','KV015','KV016','KV017','KV018']
    target = ['KV001']
    '''
    ## feature or label
    feature_df = pd.DataFrame(df, columns=feature)
    label_df = pd.DataFrame(df, columns=target)

    feature_df = feature_df.to_numpy()
    label_df = label_df.to_numpy()
    
    num_feature = len(feature)
    
    
    x_train, x_test, y_train, y_test = train_test_split(feature_df, label_df, test_size=0.4, shuffle=False, random_state=42)
    x_val, x_test, y_val, y_test = train_test_split(x_test, y_test, test_size=0.5, shuffle=False, random_state=42)

    print(f'# of train : {len(x_train)} \n# of val : {len(x_val)} \n# of test : {len(x_test)}')
    
    return x_train.reshape(-1,1,num_feature), x_val.reshape(-1,1,num_feature), x_test.reshape(-1,1,num_feature), y_train.reshape(-1,1,1), y_val.reshape(-1,1,1), y_test.reshape(-1,1,1)


def lstm_model(df,target,feature): 
    ## data prep
    x_train, x_val, x_test, y_train, y_val, y_test = lstm_train_test(df,target,feature)
    
    ## lstm model
    model = Sequential()
    model.add(LSTM(50, activation='relu', input_shape=(1,time_stemp)))
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mse')
    model.fit(x_train, y_train,
              validation_data=(x_val, y_val),
              epochs=50,
              batch_size=1)
    
    ## hp tuning
    
    return None
    
    
    

df_lstm = fss_date(df, feature_all, 'BANK #1')
x_train, x_val, x_test, y_train, y_val, y_test = lstm_train_test(df=df_lstm, target=feature_target,feature=feature_cols)


        KV001      KV002      KV003     KV004     KV005     KV006      KV007  \
15   7.490800  11.230069  12.892386  0.647280 -8.069694  4.787695  29.034507   
16   7.381399  11.509397  12.977475  0.495335 -1.847013  0.894125  29.547321   
17   7.331138  11.553702  13.024878  0.820232 -3.768500  2.526414  29.872508   
18   7.403061  11.589960  13.047304  1.212506 -5.585090  4.531546  31.057246   
19   6.973216   9.064114  10.816283  1.009559 -7.276591  3.756003  30.998928   
..        ...        ...        ...       ...       ...       ...        ...   
879  7.382142  10.968299  13.063094  1.878980 -3.793386  0.956893  34.626846   
880  7.059718  10.815314  12.733773  0.526592 -1.247132  4.581053  34.833039   
881  6.858891  10.665785  12.534142  1.030550 -2.661936  2.664026  34.366872   
882  6.840320  10.670107  12.508762  1.514801 -4.543245  1.043397  34.195223   
883  6.956038  10.731233  12.599827  1.585924 -7.251555 -1.389545  33.836448   

        KV008      KV009     KV010     

In [31]:
import keras_tuner

(x, y), (x_test, y_test) = keras.datasets.mnist.load_data()

x_train = x[:-10000]
x_val = x[-10000:]
y_train = y[:-10000]
y_val = y[-10000:]

x_train = np.expand_dims(x_train, -1).astype("float32") / 255.0
x_val = np.expand_dims(x_val, -1).astype("float32") / 255.0
x_test = np.expand_dims(x_test, -1).astype("float32") / 255.0

num_classes = 10
y_train = keras.utils.to_categorical(y_train, num_classes)
y_val = keras.utils.to_categorical(y_val, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

In [36]:
from keras import layers

def build_model(hp):
    model = keras.Sequential()
    model.add(layers.Flatten())
    # Tune the number of layers.
    for i in range(hp.Int("num_layers", 1, 3)):
        model.add(
            layers.Dense(
                # Tune number of units separately.
                units=hp.Int(f"units_{i}", min_value=32, max_value=512, step=32),
                activation=hp.Choice("activation", ["relu", "tanh"]),
            )
        )
    if hp.Boolean("dropout"):
        model.add(layers.Dropout(rate=0.25))
    model.add(layers.Dense(10, activation="softmax"))
    learning_rate = hp.Float("lr", min_value=1e-4, max_value=1e-2, sampling="log")
    model.compile(
#         optimizer=keras.optimizers.Adam(learning_rate=learning_rate),
        optimizer=keras.optimizers.legacy.Adam(learning_rate=learning_rate), # for mac
        loss="categorical_crossentropy",
        metrics=["accuracy"],
    )
    return model

tuner = keras_tuner.RandomSearch(
    hypermodel=build_model,
    objective="val_accuracy",
    max_trials=3,
    executions_per_trial=2,
    overwrite=True,
    directory="my_dir",
    project_name="helloworld",
)

tuner.search_space_summary()


Search space summary
Default search space size: 5
num_layers (Int)
{'default': None, 'conditions': [], 'min_value': 1, 'max_value': 3, 'step': 1, 'sampling': 'linear'}
units_0 (Int)
{'default': None, 'conditions': [], 'min_value': 32, 'max_value': 512, 'step': 32, 'sampling': 'linear'}
activation (Choice)
{'default': 'relu', 'conditions': [], 'values': ['relu', 'tanh'], 'ordered': False}
dropout (Boolean)
{'default': False, 'conditions': []}
lr (Float)
{'default': 0.0001, 'conditions': [], 'min_value': 0.0001, 'max_value': 0.01, 'step': None, 'sampling': 'log'}


In [37]:
import socket
def make_localhost(port):    
    s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
    s.connect(('8.8.8.8', 1))
    local_ip_address = s.getsockname()[0] 
    return f'http://{local_ip_address}:{port}'
linear_port = 8000
XGBoost_port = 8010
lightGBM_port =8020
GBM_port =8030

dashboard(linear_model, run_kwarg={'port':linear_port})
linear_address = f'http://{local_ip_address}:{linear_port}' 

dashboard(XGBoost, run_kwarg={'port':XGBoost_port})
XGBoost_address = f'http://{local_ip_address}:{XGBoost_port}'
 
dashboard(lightGBM, run_kwarg={'port':lightGBM_port})
lightGBM_address = f'http://{local_ip_address}:{lightGBM_port}' 

dashboard(GBM, run_kwarg={'port':GBM_port})
GBM_address = f'http://{local_ip_address}:{GBM+port}' 
