In [10]:
import numpy as np
import pandas as pd
import random
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from scipy.optimize import minimize
import time

In [11]:
random.seed(1852026)

In [12]:
df = pd.read_excel("dataPoints.xlsx")

In [13]:
def get_data():
    data = df.to_numpy()
    data_train, data_rest = train_test_split(data, test_size=0.30)
    data_test, data_val = train_test_split(data_rest, test_size=0.50)
    data_train, data_validate, data_test = np.split(df.sample(frac=1), [int(.7*len(df)), int(.85*len(df))])
    data_train = data_train.to_numpy()
    data_validate = data_validate.to_numpy()
    data_test = data_test.to_numpy()

    X_tr = np.transpose(data_train[:,0:2])
    Y_tr = np.transpose(data_train[:,2:])
    X_val = np.transpose(data_validate[:,0:2])
    Y_val = np.transpose(data_validate[:,2:])
    X_test = np.transpose(data_test[:,0:2])
    Y_test = np.transpose(data_test[:,2:])
    return X_tr,Y_tr,X_val,Y_val,X_test,Y_test

X_train,Y_train,X_val,Y_val,X_test,Y_test=get_data()

X_train[0][0]

n = X_train.shape[0] # Size of the input vector X
n_y = Y_train.shape[0] # Size of the input vector Y
p = X_train.shape[1] # Size of the sample
N = 5 # size of the hidden layer, # neurons, this is just for the moment to have some example

In [14]:
def initialize_parameters(n, N, n_y):
    """
    Argument:
    n -- size of the input layer
    N -- size of the hidden layer
    p -- size of the sample
    n_y -- size of the output layer
    
    Returns:
    params -- python dictionary containing your parameters:
                    W1 -- weight matrix of shape (N, n)
                    b1 -- bias vector of shape (N, 1)
                    V -- weight matrix of shape (n_y, N)
    """
        
    W1 = np.random.randn(N,n)
    b1 = np.zeros(shape=(N, 1))
    V = np.random.randn(n_y,N)
    
    
    parameters = {"W1": W1,
                  "b1": b1,
                  "V": V}
    
    return parameters

In [15]:
def second_norm(omega):
    return np.linalg.norm(omega)**2

In [16]:
def second_norm_jac(omega):
    return np.linalg.norm(omega)

In [17]:
def hyp_tan(t,sigma):
    return (np.exp(2*sigma*t)-1)/(np.exp(2*sigma*t)+1)

In [18]:
def predicting(z0,omega,sigma):
    W = omega[:,1:3]
    V = omega[:,0:1]
    b = omega[:,3:4]
    Y_predicted=[predicting_1_ouput(z0[i],b,W,V,sigma) for i in range((z0.T).shape[1])]#(z0.T).shape[1]
    return np.array(Y_predicted)

In [19]:
def predicting_1_ouput(z0,b,W,V,sigma):
    #print("W")
    #print(W.shape)
    #print("X")
    #print(z0.shape)
    z1= np.dot(W,z0)-b.T
    #print("z1")
    #print(np.sum(V*hyp_tan(z1,sigma)))
    return np.sum(V*hyp_tan(z1,sigma))

params = initialize_parameters(n, N, n_y)
W1 = params['W1']
b1 = params['b1']
V = params['V']
omega0=np.append(np.append(V.T,W1, axis=1), b1, axis=1)

omega0

In [16]:
#X= X_train.T
#y_true = Y_train
#number_of_samples = X_train.shape[1]
def reg_training_error(input_list):
    omega =input_list# np.array(input_list[:-2])
    rho=0.00001
    sigma=2
    #rho   = input_list[1] 
    #sigma = input_list[2]
    omega=omega.reshape(N,int(len(omega)/N))
    Y_pred=predicting(X,omega,sigma)
    #print(np.sum((Y_pred-y_true)**2))
    #print(number_of_samples)
    #print(np.sum((Y_pred-y_true)**2)/(2*number_of_samples))
    #print(rho*(second_norm(omega)))
    E = (np.sum((Y_pred-y_true)**2)/(2*X.shape[0]) + rho*(second_norm(omega)))
    omega=omega.flatten()
    return E#,Y_pred

In [65]:
X, y_true, Xval,Yval, Xtest, Ytest = get_data()
X=X.T
N=11
rho=0.00001
sigma=1
n = X.shape[1] # Size of the input vector X
n_y = y_true.shape[0] # Size of the input vector Y
p = X.shape[0] # Size of the sample
params = initialize_parameters(n, N, n_y)
W1 = params['W1']
b1 = params['b1']
V = params['V']
omega0=np.append(np.append(V.T,W1, axis=1), b1, axis=1)
res = minimize(reg_training_error, omega0,method='L-BFGS-B')

In [66]:
res

      fun: 0.012961387705247542
 hess_inv: <44x44 LbfgsInvHessProduct with dtype=float64>
      jac: array([ 1.95059247e-05,  8.47444493e-03, -5.10052885e-03,  1.16605907e-02,
        7.58074159e-07,  9.50594412e-03, -2.37171220e-03,  3.61469830e-03,
       -3.15285992e-06,  5.66912940e-03,  1.75068640e-04,  3.64157818e-03,
        5.44200102e-06,  6.66961278e-05,  4.94932220e-05,  1.14512566e-05,
       -3.39764675e-05,  3.38687481e-03,  6.97480129e-03,  1.01758831e-02,
       -4.27088920e-06,  1.54174382e-03,  1.48631385e-03,  2.52216199e-03,
       -5.86995730e-06,  1.19615776e-04,  8.98222642e-04,  6.49260715e-03,
        6.77947282e-06,  5.95327641e-03,  2.93151788e-03,  1.07537004e-02,
       -2.92684280e-05,  2.41037450e-03,  1.65716781e-03, -1.36983480e-04,
       -3.31783212e-05, -1.98691549e-03,  2.45809692e-03,  3.71656143e-03,
       -2.71742698e-05,  2.33060134e-03, -4.31011754e-03,  1.04606549e-02])
  message: b'STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT'
     n

In [67]:
res['fun']

0.012961387705247542

In [68]:
res['x']

array([ 1.74213403e+00,  2.96697059e-01, -4.13672770e-01, -2.80997080e-01,
        8.04921724e-01,  8.46882448e-01, -4.12504128e-01, -1.69843412e+00,
        6.09336476e-01, -2.38553227e+00, -5.02632316e-02,  4.68215626e+00,
        1.03912495e+00, -1.99334363e-02,  2.48078851e-01, -1.92625255e+00,
       -9.31781102e-01,  1.22691646e-01, -6.37251834e-01,  4.30218384e-01,
        5.53421975e-01, -4.78088000e-02,  1.08165438e+00, -3.86882178e-01,
        4.73544520e-01, -6.62122165e-01, -2.36307557e-02, -3.14376501e-01,
        1.10602848e+00, -1.37465213e-02,  1.72731454e-01, -3.93315313e-01,
       -6.96339645e-01,  6.28385164e-02,  2.81294231e+00,  2.92456514e+00,
       -8.91847231e-01,  1.05213177e+00, -4.67622886e-04,  5.55110104e-01,
       -5.91692171e-01, -7.41849002e-01, -1.38551513e-01,  6.10161038e-01])

In [69]:
second_norm_jac(res['jac'].T)

0.03051845814321291

In [70]:
def reg_training_error_val_test(omega,X,y_true,number_of_samples):
    #omega =input_list# np.array(input_list[:-2])
    rho=0.00000001
    sigma=2
    #rho   = input_list[1] 
    #sigma = input_list[2]
    omega=omega.reshape(N,int(len(omega)/N))
    Y_pred=predicting(X,omega,sigma)
    #print(np.sum((Y_pred-y_true)**2))
    #print(number_of_samples)
    #print(np.sum((Y_pred-y_true)**2)/(2*number_of_samples))
    #print(rho*(second_norm(omega)))
    E = (np.sum((Y_pred-y_true)**2)/(2*number_of_samples) + rho*(second_norm(omega)))
    #omega=omega.flatten()
    return E#,Y_pred

In [71]:
reg_training_error_val_test(res['x'],Xval.T,Yval,Xval.shape[1])

0.014794505540612488

In [72]:
reg_training_error_val_test(res['x'],Xtest.T,Ytest,Xtest.shape[1])

0.04635946404406659

In [52]:
reg_training_error_val_test(omega0.flatten(),X,y_true,X.shape[0])

22.1583722229898

# start part 1.1

In [14]:
neurons_count=[3,6,7,9,11,20]
rho_values=[0.001,0.00001]
sigma_vals=[1,1.5]

In [15]:
all_poss_conf=[]
for i in neurons_count:
    for r in rho_values:
        for s in sigma_vals:
            all_poss_conf.append((i,r,s))

In [16]:
len(all_poss_conf)

24

In [12]:
def reg_training_error_grid(omega):
    omega=omega.reshape(N,int(len(omega)/N))
    Y_pred=predicting(X,omega,sigma)
    E = (np.sum((Y_pred-y_true)**2)/(2*X.shape[0]) + rho*(second_norm(omega)))
    return E

In [18]:
def minimizing(omega_init):
    #Neuron_num, rho, sigma = conf
    output = minimize(reg_training_error_grid, omega_init)
    return output['fun'], output['jac'], output['x']

In [19]:
def reg_error_val_test(omega,X,y_true,number_of_samples, rho, sigma):
    
    omega=omega.reshape(N,int(len(omega)/N))
    Y_pred=predicting(X,omega,sigma)
    #print(np.sum((Y_pred-y_true)**2))
    #print(number_of_samples)
    #print(np.sum((Y_pred-y_true)**2)/(2*number_of_samples))
    #print(rho*(second_norm(omega)))
    E = (np.sum((Y_pred-y_true)**2)/(number_of_samples)) #+ rho*(second_norm(omega)))
    return E#,Y_pred

In [20]:
# change X, ytrue, k times
start=time.time()
K=3
a=0
conf_res=[]
for c in all_poss_conf:
    print(c)
    funn,err_k,jac_norm,val_err,test_err=[],[],[],[],[]
    if a>100:
        break
    else:
        
        for k in range(K):

            a=a+1
            N, rho, sigma = c
            X, y_true, Xval,Yval, Xtest, Ytest = get_data()
            X=X.T
            n = X.shape[1] # Size of the input vector X
            n_y = y_true.shape[0] # Size of the input vector Y
            p = X.shape[0] # Size of the sample
            params = initialize_parameters(n, N, n_y)
            W1 = params['W1']
            b1 = params['b1']
            V = params['V']
            omega0=np.append(np.append(V.T,W1, axis=1), b1, axis=1)
            err, jac, omega_out = minimizing(omega0)
            funn.append(err)
            err_k.append(reg_error_val_test(omega_out,X,y_true,X.shape[0], rho, sigma))
            jac_norm.append(second_norm_jac(jac.T))
            val_err.append(reg_error_val_test(omega_out,Xval.T,Yval,Xval.shape[1], rho, sigma))
            X=np.concatenate((X,Xval.T))
            y_true=np.concatenate((y_true.T,Yval.T)).T
            err, jac, omega_out = minimizing(omega_out)
            test_err.append(reg_error_val_test(omega_out,Xtest.T,Ytest,Xtest.shape[1], rho, sigma))
        conf_res.append([np.mean(funn),np.mean(err_k),np.mean(jac_norm),np.mean(val_err),np.mean(test_err)])
print(time.time()-start)
configs=pd.DataFrame(all_poss_conf,columns=['neurons','rho','sigma'])
configs[['fun','err_tr','jac_norm','err_val','err_test']]=pd.DataFrame(conf_res)
configs.to_csv('res_confs_ALL.csv')

(3, 0.001, 1)
(3, 0.001, 1.5)
(3, 1e-05, 1)
(3, 1e-05, 1.5)
(6, 0.001, 1)
(6, 0.001, 1.5)
(6, 1e-05, 1)
(6, 1e-05, 1.5)
(7, 0.001, 1)
(7, 0.001, 1.5)
(7, 1e-05, 1)
(7, 1e-05, 1.5)
(9, 0.001, 1)
(9, 0.001, 1.5)
(9, 1e-05, 1)
(9, 1e-05, 1.5)
(11, 0.001, 1)
(11, 0.001, 1.5)
(11, 1e-05, 1)
(11, 1e-05, 1.5)
(20, 0.001, 1)
(20, 0.001, 1.5)
(20, 1e-05, 1)
(20, 1e-05, 1.5)


KeyboardInterrupt: 

In [23]:
configs

Unnamed: 0,neurons,rho,sigma,fun,err_tr,jac_norm,err_val,err_test
0,3,0.001,1.0,0.146204,0.267513,5e-06,0.380936,0.379573
1,3,0.001,1.5,0.142374,0.261603,4e-06,0.33205,0.39252
2,3,1e-05,1.0,0.401511,0.801057,6e-06,0.785278,0.813513
3,3,1e-05,1.5,0.146901,0.293585,6e-06,0.342516,0.324235
4,6,0.001,1.0,0.089539,0.150138,1.3e-05,0.135282,0.270154
5,6,0.001,1.5,0.076551,0.129571,1e-05,0.223333,0.227814
6,6,1e-05,1.0,0.072468,0.143758,2.6e-05,0.172449,0.144893
7,6,1e-05,1.5,0.063973,0.127338,4e-06,0.205831,0.2281
8,7,0.001,1.0,0.06536,0.082563,1.7e-05,0.110586,0.067852
9,7,0.001,1.5,0.045105,0.060382,1.2e-05,0.081816,0.094272


In [22]:
configs=pd.DataFrame(all_poss_conf,columns=['neurons','rho','sigma'])
configs[['fun','err_tr','jac_norm','err_val','err_test']]=pd.DataFrame(conf_res)
configs.to_csv('res_confs_ALL.csv')

In [19]:
configs

Unnamed: 0,neurons,rho,sigma,fun,err_tr,jac_norm,err_val,err_test
0,3,0.001,1,0.157998,0.289258,7e-06,0.268788,0.341503
1,3,0.001,2,0.271599,0.526929,1e-05,0.755026,0.280764
2,3,1e-05,1,0.275339,0.548956,9e-06,0.42768,0.650734
3,3,1e-05,2,0.53337,1.06088,8e-06,1.109851,0.945024
4,7,0.001,1,0.05806,0.068736,1.4e-05,0.05942,0.101887
5,7,0.001,2,0.049019,0.078278,9e-06,0.118789,0.15088
6,7,1e-05,1,0.037751,0.07373,1.4e-05,0.093436,0.112173
7,7,1e-05,2,0.04608,0.091252,1.2e-05,0.101186,0.096415
8,9,0.001,1,0.049923,0.058461,1.3e-05,0.07219,0.10707
9,9,0.001,2,,,,,


In [20]:
Xtest.shape[1]

45

In [21]:
Xval.shape

(2, 45)

In [25]:
np.concatenate((X,Xval.T)).shape

(255, 2)

In [44]:
configs=pd.DataFrame(all_poss_conf[:3],columns=['neurons','rho','sigma'])
configs[['err_tr','jac_norm','err_val','err_test']]=pd.DataFrame(conf_res)

In [46]:
configs.to_csv('res_confs_111.csv')

In [87]:
jac

array([-3.72529030e-08,  6.70552254e-08, -6.70552254e-08,  8.94069672e-08,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00, -2.98023224e-08, -6.70552254e-08,
       -6.70552254e-08, -6.70552254e-08, -7.45058060e-09,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00, -7.45058060e-09,  0.00000000e+00])

In [88]:
omega_out

array([ 6.65983144e-01,  3.64691540e-01, -1.72050014e+00, -4.31487776e-01,
        2.57613507e-07,  5.80369411e-08, -1.04872921e-08,  0.00000000e+00,
       -7.35619357e-07,  9.49874554e-07,  6.65986190e-01, -3.87785307e-01,
        1.61577219e+00, -4.81619542e-01, -1.80118128e-06,  0.00000000e+00,
       -4.73702356e-07,  1.50660648e-06, -1.74329507e-06,  0.00000000e+00])

In [89]:
all_poss_conf[0]

(2, 0.001, 1)

In [29]:
y_true.shape

(1, 210)

In [28]:
Yval.shape

(1, 45)

In [33]:
(np.concatenate((y_true.T,Yval.T)).T).shape

(1, 255)

# part 1.2

In [None]:
X, y_true, Xval,Yval, Xtest, Ytest = get_data()
X=X.T
N=11
rho=0.00001
sigma=1
n = X.shape[1] # Size of the input vector X
n_y = y_true.shape[0] # Size of the input vector Y
p = X.shape[0] # Size of the sample
params = initialize_parameters(n, N, n_y)
W1 = params['W1']
b1 = params['b1']
V = params['V']
omega0=np.append(np.append(V.T,W1, axis=1), b1, axis=1)

# Q2

import optunity

solvers = optunity.available_solvers()
print('Available solvers: ' + ', '.join(solvers))

In [20]:
def predicting_q2(z0,V,sigma):
    W = W1
    #V = omega[:,0:1]
    b = b1
    Y_predicted=[predicting_1_ouput(z0[i],b,W,V,sigma) for i in range((z0.T).shape[1])]#(z0.T).shape[1]
    return np.array(Y_predicted)

In [76]:
X, y_true, Xval,Yval, Xtest, Ytest = get_data()
X=X.T
N=11
rho=0.00001
sigma=1
n = X.shape[1] # Size of the input vector X
n_y = y_true.shape[0] # Size of the input vector Y
p = X.shape[0] # Size of the sample
params = initialize_parameters(n, N, n_y)
W1 = params['W1']*[0.001,0.0002]
b1 = params['b1']*0.0006
V = params['V']
omega=np.append(np.append(V.T,W1, axis=1), b1, axis=1)

In [77]:
def reg_training_error_q2(V):
    #omega=omega.reshape(N,int(len(omega)/N))
    Y_pred=predicting_q2(X,V,sigma)
    #omega=np.append(np.append(V.T,W1, axis=1), b1, axis=1)
    E = (np.sum((Y_pred-y_true)**2)/(2*X.shape[0]) + rho*(second_norm(V))+rho*(second_norm(W1))+rho*(second_norm(b1)))
    return E

In [78]:
#predicting_q2(X,V,sigma).shape

In [79]:
res = minimize(reg_training_error_q2, V,method='TNC')

In [80]:
res

     fun: 1.323807339836164
     jac: array([ 4.66293670e-06,  7.43849426e-06, -1.29452005e-05, -6.88338275e-06,
        9.70334924e-06,  1.33670852e-05,  1.81854531e-05, -1.08357767e-05,
       -2.34701147e-05,  8.77076189e-06, -2.22044605e-08])
 message: 'Converged (|f_n-f_(n-1)| ~= 0)'
    nfev: 57
     nit: 10
  status: 1
 success: True
       x: array([-0.06364367,  8.78980786,  3.00381507,  2.12877649,  2.0864033 ,
       -1.32807398,  1.98405722,  3.7384757 ,  1.26033096,  1.95728083,
       -5.68316245])

In [31]:
#ress=optunity.minimize(reg_training_error_q2, num_evals=100, x=[-5, 5], y=[-5, 5], solver_name=solvers[1])

In [84]:
y_true.shape

(1, 210)

In [92]:
from sklearn.neural_network import MLPClassifier