In [1]:
# Model vytrenuje na trenovacich datech pro ruzne lambdy a vrstvy

import argparse
import lzma
import os
import pickle
import sys
import urllib.request

import numpy as np
import sklearn.preprocessing
import sklearn.compose
import time
from sklearn.neural_network import MLPClassifier

import os

if os.name == 'nt':
    import winsound

import pandas as pd

from process_data import process_data
from process_data import reinit_properties


#       Data preprocessing
h = (3) #Moznosti rozlozeni vrstev k prohledani
ls = np.geomspace(0.3, 25.0, 50)                       #Lambdy k prohledani
folds = [2, 3, 5]

ns = range(0, 3)
ms = range(17, 27)

scores = np.zeros((len(ns), len(ms), len(folds), len(ls)))

start0 = time.time()

for n_ind in range(len(ns)):
    for m_ind in range(len(ms)):
        n = ns[n_ind]
        m = ms[m_ind]

        print("n = {}, m = {}".format(n, m))
        reinit_properties(n, m)

        #Nacteni dat
        data = pd.read_csv('data/training_data.csv')

        #Zpracovani dat, viz process_data.py
        start = time.time()
        datumy, data_for_transform, target, p, sz = process_data(data) # Zpracovani dat
        print("\nDoba transformace dat: {:.2f} s".format(time.time()-start))

        #Rozdeleni na trenovaci a testovaci indexy
        datum_trenovani = np.datetime64('2005-10-13')
        trenovaci_indexy = np.where(datumy <= datum_trenovani)[0]
        train_size = int(trenovaci_indexy[-1]) + 1
        test_size = data_for_transform.shape[0] - train_size
        # print("former train_size: ", train_size)
        # print("test_size: ", test_size)

        # print("\nn + m = ", p.n + p.m)
        # print("pocet tymu = ", p.pocet_tymu)

        #   Feature engineering
        scaler = sklearn.preprocessing.RobustScaler()

        #Nafitovani transformeru a transformace dat
        scaled_data = scaler.fit_transform(data_for_transform[:, p.num_indexy])

        data_final = scaled_data

        # print("Data transformovana", sz, p.pocet_sloupcu, data_final.shape)


        #       Trenink

        # hidden_layer_sizess = [(500)] #Moznosti rozlozeni vrstev k prohledani
        # ls = [0.5, 1.0, 2.0, 2.7, 3.0]                       #Lambdy k prohledani


        max_iter = 2000
                  
        print("________________________")
        for f_ind in range(len(folds)):
            f = folds[f_ind]
            start = time.time()
            print("\nnumber of folds:", f)

            parameters = {'alpha' : ls}

            #Vytvoreni a trenovani modelu
            mlp = MLPClassifier(random_state=1, hidden_layer_sizes=h , max_iter = max_iter)

            clf = sklearn.model_selection.GridSearchCV(mlp, parameters, cv = f, n_jobs = -1)
            # print(clf.get_params().keys())
            clf.fit(data_final, target)

            #Vypis accuracy
            print("best score: {:.3f}% ; best lambda:".format(100.0 * clf.best_score_), clf.best_params_)
            # for i in range(len(clf.cv_results_['params'])):
            #     print("{:.2f}".format(100 * clf.cv_results_['mean_test_score'][i]), clf.cv_results_['params'][i])
            print("Doba:", "{:.2f} s".format(time.time()-start))
            print("________________________")
                  
            #       Ukladani
            for i in range(len(clf.cv_results_['params'])):
                  scores[n_ind, m_ind, f_ind, i] = clf.cv_results_['mean_test_score'][i]

print("Celkova doba:", "{:.2f} s".format(time.time()-start0))

np.savez("scores.npz", ns, ms, folds, ls, scores)

if os.name == 'nt': # Pipnuti na upozorneni uzivatele, ze skript skoncil (jen pro windows)
    frequency = 500  # Set Frequency in Hz
    duration = 1000  # Set Duration in ms
    winsound.Beep(frequency, duration)

n = 0, m = 17

Doba transformace dat: 0.37 s
________________________

number of folds: 2
best score: 60.092% ; best lambda: {'alpha': 2.8650408797320734}
Doba: 6.13 s
________________________

number of folds: 3
best score: 60.591% ; best lambda: {'alpha': 2.3918286164678046}
Doba: 7.98 s
________________________

number of folds: 5
best score: 60.758% ; best lambda: {'alpha': 2.6177636951209453}
Doba: 13.07 s
________________________
n = 0, m = 18

Doba transformace dat: 0.30 s
________________________

number of folds: 2
best score: 60.763% ; best lambda: {'alpha': 13.29037788906916}
Doba: 4.85 s
________________________

number of folds: 3
best score: 60.888% ; best lambda: {'alpha': 11.09523650556752}
Doba: 7.35 s
________________________

number of folds: 5
best score: 60.909% ; best lambda: {'alpha': 4.499173455472238}
Doba: 14.01 s
________________________
n = 0, m = 19

Doba transformace dat: 0.30 s
________________________

number of folds: 2
best score: 60.912% ; best lambda

In [2]:
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = [10, 5]


In [2]:
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets

import matplotlib.pyplot as plt

loaded_arrays = np.load("scores.npz")
ns     = loaded_arrays['arr_0']
ms     = loaded_arrays['arr_1']
folds  = loaded_arrays['arr_2']
ls     = loaded_arrays['arr_3']
scores = loaded_arrays['arr_4']

axis_names = ["n","m","folds","lambda"]
list_vars = [ns, ms, folds, ls]

best_inds = np.unravel_index(np.argmax(scores, axis=None), scores.shape)

#Print best globally
best_params = []
for i in range(4):
    best_params.append(list_vars[i][best_inds[i]])
    
print(scores.max(), best_params)

#Print best for each fold
for ii in range(len(folds)):
    iiarr = scores[:, :, ii, :]
    iibest_inds = np.unravel_index(np.argmax(iiarr, axis=None), iiarr.shape)

    iibest_params = []
    for i in range(3):
        ind = i if i < 2 else i + 1
        iibest_params.append(list_vars[ind][iibest_inds[i]])
    print("f:", list_vars[2][ii], iiarr.max(), iibest_params)


def plot_heatmap_slice(axis_slice1, axis_slice2, axis_top):
    print("axis_slice1:", axis_names[axis_slice1])
    axis_top_pom = axis_top + 1 if axis_top >= axis_slice1 else axis_top
    axis_slice2_pom = axis_slice2 + 1 if axis_slice2 >= axis_slice1 else axis_slice2
    axis_slice2_pom = axis_slice2_pom + 1 if axis_slice2_pom >= axis_top else axis_slice2_pom
    
    print("axis_slice2:", axis_names[axis_slice2_pom])
    print("axis_top:", axis_names[axis_top_pom])
    # slc = scores[n_ind, m_ind, f_ind, i]
    
    all_axis = range(4)
    last_axis = np.delete(all_axis, [axis_slice1, axis_slice2_pom, axis_top_pom])[0]
    print("last axis:", axis_names[last_axis])
    
    def implt_finally(sliced_value1, sliced_value2):
        print("sliced_value1:", list_vars[axis_slice1][sliced_value1])
        print("sliced_value2:", list_vars[axis_slice2_pom][sliced_value2])
        
        sliced1 = scores.take(indices=sliced_value1, axis=axis_slice1)
        topped = np.max(sliced1, axis = axis_top)
        plt.subplot(2, 2, 1)
        plt.imshow(topped, cmap='Reds')
        plt.title("Max score - heatmap")
        plt.colorbar()
        
        sliced2 = topped.take(indices=sliced_value2, axis=axis_slice2)
        plt.subplot(2, 2, 2)
        plt.plot(sliced2)
        plt.title("Max score - plot")
        plt.show()
        
        topped = np.argmax(sliced1, axis = axis_top)
        plt.subplot(2, 2, 3)
        plt.imshow(topped, cmap='Reds')
        plt.title("Argmax score - heatmap")
        plt.colorbar()
        
        sliced2 = topped.take(indices=sliced_value2, axis=axis_slice2)
        plt.subplot(2, 2, 4)
        plt.plot(sliced2)
        plt.title("Argmax score - plot")
        plt.show()
        
        
    interact(implt_finally, sliced_value1=widgets.IntSlider(min=0, max=scores.shape[axis_slice1] - 1, step=1), sliced_value2=widgets.IntSlider(min=0, max=scores.shape[axis_slice2_pom] - 1, step=1))
    
    
    
interact(plot_heatmap_slice, axis_slice1=widgets.IntSlider(min=0, max=3, value = 2, step=1), axis_top=widgets.IntSlider(min=0, max=2, value = 2, step=1), axis_slice2=widgets.IntSlider(min=0, max=1, step=1));

    

0.6114937378651134 [0, 24, 5, 5.898395437103501]
f: 2 0.610778868539682 [0, 26, 13.29037788906916]
f: 3 0.6104115526753556 [2, 22, 11.09523650556752]
f: 5 0.6114937378651134 [0, 24, 5.898395437103501]


interactive(children=(IntSlider(value=2, description='axis_slice1', max=3), IntSlider(value=0, description='ax…