In [1]:
%matplotlib widget
import pandas as pd
import numpy as np
import csv 
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib.widgets import TextBox
import matplotlib
import math
import sklearn.model_selection
from sklearn.neighbors import KNeighborsRegressor
import metamodell
from IPython.display import display
import customstats
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import PolynomialFeatures
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import Lasso
from sklearn.linear_model import ElasticNet
from sklearn.linear_model import Ridge
from sklearn.svm import SVR
from sklearn.kernel_ridge import KernelRidge

# Read CSV-Data

In [2]:
def load():
    filename="collected_results.csv"
    with open(filename,"r") as fil:
        fields=[i.strip() for i in fil.readline().split(",")]
    data=np.genfromtxt(filename,skip_header=1,delimiter=",")
    return fields,data
fields,data=load()

# Select outputs to train modell for

In [3]:
desired_outputs={'error_schicht','error_wez0', 'error_delr0', 'error_wez90', 'error_delr90', 'error_wez', 'error_delr', 'error_ges'}
nr_inputs=21
#do not use energy-ratio since entirely dependent on relative expansion
nr_inputs=nr_inputs-1
fields,data=load()


# generate Test- and Train-Data

In [4]:
nr_points=data.shape[0]
ratio_test=0.25 #how many datapoints shall be used for testing
idx_train,idx_test=sklearn.model_selection.train_test_split(range(nr_points),test_size=ratio_test)


# define kinds of meta_modells to train

In [5]:
nr_iter=20000
Regressors=[]

#quad estimators are commented out for now (take long)
#when we have the significant variables, it should be no problem
Regressors.append(("nearest_neighbour",KNeighborsRegressor()))
Regressors.append(("quad_nearest_neighbour",Pipeline([('poly',PolynomialFeatures(2)),('knn'  ,KNeighborsRegressor())])))
Regressors.append(("decision_tree",DecisionTreeRegressor()))
Regressors.append(("quad_decision_tree",Pipeline([('poly',PolynomialFeatures(2)),('tree'  ,DecisionTreeRegressor())])))
Regressors.append(("Lasso",Lasso(max_iter=nr_iter)))
#Regressors.append(("quad_Lasso",Pipeline([('poly',PolynomialFeatures(2)),('lasso'  ,Lasso(max_iter=nr_iter))])))
Regressors.append(("ElasticNet",ElasticNet(max_iter=nr_iter)))
#Regressors.append(("quad_ElasticNet",Pipeline([('poly',PolynomialFeatures(2)),('ElasticNet'  ,ElasticNet(max_iter=nr_iter))])))
Regressors.append(("SVR",SVR(max_iter=nr_iter)))
#Regressors.append(("quad_SVR",Pipeline([('poly',PolynomialFeatures(2)),('SVR'  ,SVR(max_iter=nr_iter))])))
#Regressors.append(("SVRlin",SVR(kernel="linear",max_iter=nr_iter))) does not work well
#Regressors.append(("quad_SVRlin",Pipeline([('poly',PolynomialFeatures(2)),('SVRlin'  ,SVR(kernel="linear",max_iter=nr_iter))])))
Regressors.append(("KernelRidge",KernelRidge()))
#Regressors.append(("quad_KernelRidge",Pipeline([('poly',PolynomialFeatures(2)),('KernelRidge'  ,KernelRidge())])))
Regressors.append(("Ridge",Ridge()))
#Regressors.append(("quad_KernelRidge",Pipeline([('poly',PolynomialFeatures(2)),('Ridge'  ,Ridge())])))


# evaluate different metamodells

In [6]:
#%%debug -b metamodell.py:123
def evaluate_row(row_predicted,row_real,flag):
    #error when predicting the resulting error directly
    
    if flag==0:
        #error when predicting the errors for wez,cut for each qs and schicht
        errors_qs_predicted=[row_predicted['error_wez0'],row_predicted['error_wez90'],row_predicted['error_delr0'],row_predicted['error_delr90']]
        error_schicht_predicted=row_predicted['error_schicht']
        error_ges_predicted=customstats.sqrt_MSE(errors_qs_predicted)+error_schicht_predicted
    elif flag==1:
        #error when predicting the errors for wez,cut and schicht
        error_wez_predicted=row_predicted['error_wez']
        error_spalt_predicted=row_predicted['error_delr']
        error_schicht_predicted=row_predicted['error_schicht']
        error_ges_predicted=customstats.sqrt_MSE([error_wez_predicted,error_spalt_predicted])+error_schicht_predicted    
    elif flag==2:
        error_ges_predicted=row_predicted['error_ges']
    error_ges_real=row_real['error_ges']
    error=customstats.rel_deviation(error_ges_real,error_ges_predicted)**2
    return error
def create_evaluate_row_func(flag):
    def myfunc(*args,**kwargs):
        return evaluate_row(*args,**kwargs,flag=flag)
    return myfunc
evaluate_row_funcs=[create_evaluate_row_func(i) for i in range(3)]
needed_outputs=[]
needed_outputs.append([i for i,name in enumerate(fields) if name in {'error_schicht','error_wez0', 'error_delr0', 'error_wez90', 'error_delr90'}])
needed_outputs.append([i for i,name in enumerate(fields) if name in {'error_schicht','error_wez', 'error_delr'}])
needed_outputs.append([i for i,name in enumerate(fields) if name in {'error_ges'}])

evaluate_row_func_list=list(zip(evaluate_row_funcs,needed_outputs))
#TODO evaluate_row-> list
if True:
    import warnings
    warnings.filterwarnings('ignore')
metamodell.tryout_metamodells(Regressors,data,nr_inputs,evaluate_row_func_list,fields,nr_tries=10)

In [7]:
%debug

In [8]:
print(needed_outputs)