In [None]:
import numpy as np
import pandas as pd
import sklearn
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import regularizers
from tensorflow.keras.models import load_model
from tensorflow.keras.callbacks import ModelCheckpoint
from sklearn import datasets
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from matplotlib import cm
from matplotlib.ticker import LinearLocator, FormatStrFormatter
from mpl_toolkits.mplot3d import Axes3D
from math import sqrt, log10
import os.path
from sklearn.metrics import confusion_matrix


In [None]:
phone_data = pd.read_csv("telefony.csv", sep=',') 

In [None]:
print(phone_data.shape)

In [None]:
X = phone_data.iloc[:, 0:20]

In [None]:
y = phone_data["price_range"]

In [None]:
enc = OneHotEncoder(handle_unknown='ignore')
enc.fit(y.to_numpy().reshape(-1,1))
Y = enc.transform(y.to_numpy().reshape(-1,1)).toarray()

In [None]:
X_train, X_vt, Y_train, Y_vt = train_test_split(X.to_numpy(), Y, test_size=0.3, random_state=1)

In [None]:
X_test, X_val, Y_test, Y_val = train_test_split(X_vt, Y_vt, test_size=0.5, random_state=1)

In [None]:
number_of_neurons=201
number_of_layers=46
n_skip=5
l_skip=5
n_min = 5
l_min= 5

In [None]:
for n in range(n_min,number_of_neurons,n_skip):
    for l in range(l_min,number_of_layers,l_skip):
        model = tf.keras.Sequential()
        name="saved/"+"n"+str(n)+"l"+str(l)+".hdf5"
        file_exists = os.path.exists(name)
        if not file_exists:
            for x in range(l):
                model.add(tf.keras.layers.Dense(n, activation='relu', input_shape=(20,),kernel_regularizer=regularizers.L2(0.01)))
            model.add(tf.keras.layers.Dense(4, activation='softmax'))

            model.compile(optimizer='adam', loss='categorical_crossentropy', metrics = ['accuracy'])

            checkpoint = ModelCheckpoint(name, monitor='val_accuracy', verbose=1, save_best_only=True, mode='auto', period=1)
            history = model.fit(X_train, Y_train, epochs=100, validation_data=(X_val, Y_val),callbacks=[checkpoint])

In [None]:
best_n=0
best_l=0
best_acc=0
best_m=0
i=0
r = {'n':[],'l':[],'test_acc':[],'test_loss':[],'train_acc':[],'train_loss':[],'val_acc':[],'val_loss':[],'m':[],'m2':[]}
results = pd.DataFrame(r)

for n in range(1,500,1):
    for l in range(1,120,1):
        name="saved/"+"n"+str(n)+"l"+str(l)+".hdf5"
        file_exists = os.path.exists(name)
        if file_exists:
            model2 = load_model(name)
            eva_test = model2.evaluate(X_test, Y_test)
            test_acc=eva_test[1]
            test_loss=eva_test[0]

            eva_train = model2.evaluate(X_train, Y_train)
            train_acc=eva_train[1]
            train_loss=eva_train[0]

            eva_val = model2.evaluate(X_val, Y_val)
            val_acc=eva_val[1]
            val_loss=eva_val[0]

            m=((test_acc)**5*(train_acc)*(val_acc)**2)/(1+(test_loss)**5+(train_loss)+(val_loss)**2)
            m2=2*m/(log10(100+l*sqrt(n)))

            results.loc[i,:]=[n,l,test_acc,test_loss,train_acc,train_loss,val_acc,val_loss,m,m2]
            i+=1
            if m > best_m:
                best_m = m
                best_n = n
                best_l = l

In [None]:
name="saved/"+"n"+str(best_n)+"l"+str(best_l)+".hdf5"

In [None]:
model3 = load_model(name)

In [None]:
model3.evaluate(X_test, Y_test)

In [None]:
model3.evaluate(X_train, Y_train)

In [None]:
results

In [None]:
best_l

In [None]:
results = results.drop(results[results.m2 < 0.55].index)

In [None]:
threedee = plt.figure().gca(projection='3d')
threedee.scatter(results['n'], results['l'], results['m2'])
threedee.set_xlabel('neurons')
threedee.set_ylabel('layers')
threedee.set_zlabel('m value')
plt.show()

In [None]:
results

In [None]:
model3.save('najlepszymodel/n50l35.hdf5')

In [None]:
model4 = load_model('saved/n10l10.hdf5')

In [None]:
Y_pred = np.argmax(model4.predict(X),axis=1)

In [None]:
Y_testch = np.argmax(Y,axis=1)

In [None]:
tf.math.confusion_matrix(Y_testch, Y_pred)

In [None]:
# Najlepsze wyniki sieci, gdzie liczba warstw <=3
results2 = results.drop(results[results.l > 3].index)
results2 = results2.drop(results2[results2.m < 0.09].index)

results2

In [None]:
results2 = results.drop(results[results.m < 0.7].index)
results2