In [1]:
import numpy as np
import pandas as pd
import random
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

In [2]:
random.seed(1852026)

In [3]:
df = pd.read_excel("dataPoints.xlsx")

In [4]:
data = df.to_numpy()
data_train, data_rest = train_test_split(data, test_size=0.30)
data_test, data_val = train_test_split(data_rest, test_size=0.50)
data_train, data_validate, data_test = np.split(df.sample(frac=1), [int(.7*len(df)), int(.85*len(df))])
data_train = data_train.to_numpy()
data_validate = data_validate.to_numpy()
data_test = data_test.to_numpy()

X_train = np.transpose(data_train[:,0:2])
Y_train = np.transpose(data_train[:,2:])
X_validate = np.transpose(data_validate[:,0:2])
Y_validate = np.transpose(data_validate[:,2:])
X_test = np.transpose(data_test[:,0:2])
Y_test = np.transpose(data_test[:,2:])

In [5]:
X_train.shape

(2, 210)

In [6]:
Y_train.shape

(1, 210)

In [7]:
n = X_train.shape[0] # Size of the input vector X
n_y = Y_train.shape[0] # Size of the input vector Y
p = X_train.shape[1] # Size of the sample
N = 5 # size of the hidden layer, # neurons, this is just for the moment to have some example

In [8]:
def initialize_parameters(n, N, n_y):
    """
    Argument:
    n -- size of the input layer
    N -- size of the hidden layer
    p -- size of the sample
    n_y -- size of the output layer
    
    Returns:
    params -- python dictionary containing your parameters:
                    W1 -- weight matrix of shape (N, n)
                    b1 -- bias vector of shape (N, 1)
                    V -- weight matrix of shape (n_y, N)
    """
        
    W1 = np.random.randn(N,n) * 0.01
    b1 = np.zeros(shape=(n, 1))
    V = np.random.randn(n_y,N) * 0.01
    
    
    parameters = {"W1": W1,
                  "b1": b1,
                  "V": V}
    
    return parameters

In [16]:
rr=initialize_parameters(n, N, n_y)

In [17]:
rr

{'W1': array([[ 0.01556526, -0.01158364],
        [-0.02327415, -0.00112092],
        [ 0.00038901, -0.0068777 ],
        [-0.01340754,  0.00502958],
        [ 0.00088805, -0.01814066]]), 'b1': array([[0.],
        [0.]]), 'V': array([[ 0.01203228, -0.01256006, -0.00520418,  0.00056666,  0.01843201]])}

In [23]:
YY = np.array([0.2,0.3,0.5,0.6])
YYY=np.array([0.3,0.1,0.5,0.6])

In [26]:
np.sum((YY-YYY)**2)

0.04999999999999999

In [27]:
np.linalg.norm(np.array([[-4, -3, -2], [-1, 0, 1], [ 2, 3, 4]]),2)

7.3484692283495345

In [9]:
np.linalg.norm(np.array([[2, 2, 2], [2, 2, 2], [ 2,2, 2],[2,2,2]]),2)

6.928203230275509

In [11]:
(2**2 * 12)**0.5

6.928203230275509

In [12]:
def second_norm(omega):
    return np.linalg.norm(omega,2)**2

In [118]:
def reg_training_error(y_pred,y_true,number_of_samples, omega, rho):
    E = ((np.sum((y_pred-y_true)**2)/(2*number_of_samples)) + rho*(second_norm(omega)))
    return E

In [46]:
def hyp_tan(t,sigma):
    return (2*np.exp(2*sigma*t)-1)/(2*np.exp(2*sigma*t)+1)

In [47]:
def predicting(z0,b,w,v,sigma):
    z1= np.dot(w,z0)-b
    return np.sum(v*hyp_tan(z1,sigma))

In [119]:
W1 = np.random.randn(N,n)*0.3
b1 = np.ones(shape=(N, 1))
V = np.random.randn(n_y,N)*0.3

In [120]:
predicting(X_train.T[10],b1,W1,V,1)

1.9248944680090099

In [121]:
W1

array([[-0.49700446, -0.00730414],
       [ 0.49321305,  0.14228574],
       [ 0.08857209,  0.01549494],
       [-0.20201534,  0.02838033],
       [-0.23897971,  0.35168052]])

In [262]:
b1

array([[1.],
       [1.],
       [1.],
       [1.],
       [1.]])

In [264]:
V.T

array([[-0.03448435],
       [-0.02678508],
       [ 0.05422537],
       [ 0.06374755],
       [-0.02806628]])

array([[-0.26415911,  0.13266138, -0.26415911],
       [-0.19719182,  0.28584915, -0.19719182],
       [-0.23540867,  0.25615556, -0.23540867],
       [ 0.14374696,  0.650911  ,  0.14374696],
       [ 0.32618034, -0.31527202,  0.32618034]])

In [116]:
W1

array([[-0.26415911,  0.13266138],
       [-0.19719182,  0.28584915],
       [-0.23540867,  0.25615556],
       [ 0.14374696,  0.650911  ],
       [ 0.32618034, -0.31527202]])

In [57]:
Y_train.T[10]

array([2.25412485])

In [80]:
W1.shape

(5, 2)

In [81]:
V.shape

(1, 5)

In [82]:
b1.shape

(5, 1)

In [83]:
Y_train.shape

(1, 210)

In [122]:
omega1=np.append(np.append(W1, V.T, axis=1), b1, axis=1)

In [123]:
omega1

array([[-0.49700446, -0.00730414, -0.07596126,  1.        ],
       [ 0.49321305,  0.14228574, -0.21428795,  1.        ],
       [ 0.08857209,  0.01549494, -0.25487012,  1.        ],
       [-0.20201534,  0.02838033,  0.35439359,  1.        ],
       [-0.23897971,  0.35168052, -0.34475378,  1.        ]])

In [128]:
Y_predicted=[predicting(X_train.T[i],b1,W1,V,1) for i in range(X_train.shape[1])]

In [129]:
reg_training_error(Y_predicted,Y_train.T,X_train.shape[1], omega1, 0.0001)

165.59669528463465

In [8]:
def delta_E(omega,rho):
    return 2*np.linalg.norm(omega,2)*rho

In [302]:
W1 = np.random.randn(N,n)*0.1
b1 = np.ones(shape=(N, 1))
V = np.random.randn(n_y,N)*0.1
eta=3
rho=0.0001
sigma0=1
omega0=np.append(np.append(W1, V.T, axis=1), b1, axis=1)
Y_predicted=[predicting(X_train.T[i],b1,W1,V,sigma0) for i in range(X_train.shape[1])]
reg_training_error(Y_predicted,Y_train.T,X_train.shape[1], omega0, rho)
err=[]
c=0
for c in range(100):
    eta= abs(eta -(0.01*eta/(c+1)))
    omega=omega0 - eta*delta_E(omega0,rho)
    omega0=omega
    W_new = omega[:,0:2]
    V_new = omega[:,2:3]
    b_new = omega[:,3:4]
    Y_predicted=[predicting(X_train.T[i],b_new,W_new,V_new,sigma0) for i in range(X_train.shape[1])]
    err.append(reg_training_error(Y_predicted,Y_train.T,X_train.shape[1], omega, rho))
    c=c+1

In [303]:
err[99]

158.3368331690668

In [304]:
err[0]

379.52787879893754

In [274]:
eta

2.84811212042293

In [275]:
W_new 

array([[-0.14724273, -0.22591449],
       [-0.20364018, -0.01393064],
       [-0.06846543, -0.23696529],
       [-0.24080435,  0.00726667],
       [-0.11534141, -0.2409708 ]])

In [271]:
omega

array([[-0.14724273, -0.22591449, -0.15986417,  0.87742591],
       [-0.20364018, -0.01393064, -0.25938068,  0.87742591],
       [-0.06846543, -0.23696529,  0.02175381,  0.87742591],
       [-0.24080435,  0.00726667, -0.31740086,  0.87742591],
       [-0.11534141, -0.2409708 , -0.03952039,  0.87742591]])

In [78]:
len(oo)/N

4.0

In [9]:
def second_norm(omega):
    return np.linalg.norm(omega,2)**2

In [209]:
def reg_training_error(X,y_true,number_of_samples, omega, rho,sigma):
    omega=omega.reshape(N,int(len(omega)/N))
    Y_pred=predicting(X,omega,sigma)
    #print(np.sum((Y_pred-y_true)**2))
    #print(number_of_samples)
    #print(np.sum((Y_pred-y_true)**2)/(2*number_of_samples))
    #print(rho*(second_norm(omega)))
    E = (np.sum((Y_pred-y_true)**2)/(2*number_of_samples) + rho*(second_norm(omega)))
    omega=omega.flatten()
    return E#,Y_pred

In [169]:
def hyp_tan(t,sigma):
    return (2*np.exp(2*sigma*t)-1)/(2*np.exp(2*sigma*t)+1)

In [170]:
def predicting(z0,omega,sigma):
    W = omega[:,1:3]
    V = omega[:,0:1]
    b = omega[:,3:4]
    Y_predicted=[predicting_1_ouput(z0[i],b,W,V,sigma) for i in range((z0.T).shape[1])]#(z0.T).shape[1]
    return np.array(Y_predicted)

In [171]:
def predicting_1_ouput(z0,b,W,V,sigma):
    #print("W")
    #print(W)
    #print("X")
   # print(z0)
    z1= np.dot(W,z0)-b.T
    #print("z1")
    #print(np.sum(V*hyp_tan(z1,sigma)))
    return np.sum(V*hyp_tan(z1,sigma))

In [172]:
W1 = np.random.randn(N,n)
b1 = np.ones(shape=(N, 1))
V = np.random.randn(n_y,N)
omega0=np.append(np.append(V.T,W1, axis=1), b1, axis=1)

In [173]:
e,yp=reg_training_error(X_train.T,Y_train,X_train.shape[1], omega0.flatten(), rho,sigma0)

In [174]:
second_norm(omega0)

10.377958836768968

In [175]:
rho

1e-05

In [176]:
e

15.512946951339426

In [177]:
np.sum((yp-Y_train)**2)/(2*210)

15.512843171751058

In [251]:
W1 = np.random.randn(N,n)*0.1
b1 = np.ones(shape=(N, 1))
V = np.random.randn(n_y,N)*0.1
eta=3
rho=0.00001
sigma0=1
omega0=np.append(np.append(V.T,W1, axis=1), b1, axis=1)

reg_training_error(X_train.T,Y_train,X_train.shape[1], omega0.flatten(), rho,sigma0)
err=[]
c=0
for c in range(100):
    eta=abs(eta -(0.01*eta/(c+1)))
    delta=delta_E(omega0,rho)
    omega=omega0 - eta*delta
    if delta<0.0000000001:
        break
    omega0=omega
    W_new = omega[:,1:3]
    V_new = omega[:,0:1]
    b_new = omega[:,3:4]
    
    err.append(reg_training_error(X_train.T,Y_train,X_train.shape[1], omega.flatten(), rho,sigma0))
    #c=c+1

In [252]:
err[-1]

0.7627291193418683

In [253]:
len(err)

100

In [205]:
np.sum((err[-1][1]-Y_train)**2)/420

10.017539159291536

In [206]:
from scipy.optimize import minimize

In [210]:
res = minimize(reg_training_error(X_train.T,Y_train,X_train.shape[1], omega.flatten(), rho,sigma0),omega)

TypeError: 'numpy.float64' object is not callable

In [68]:
oo=omega0.flatten()

In [69]:
oo

array([-0.04135224,  0.01083977,  0.03262176,  0.9871645 , -0.05923252,
       -0.24072998, -0.18008022,  0.9871645 , -0.00775125, -0.030291  ,
       -0.09399489,  0.9871645 , -0.22692822,  0.0021534 , -0.04720819,
        0.9871645 ,  0.00783853, -0.06675953,  0.03291857,  0.9871645 ])

In [82]:
omega0

array([[-0.04135224,  0.01083977,  0.03262176,  0.9871645 ],
       [-0.05923252, -0.24072998, -0.18008022,  0.9871645 ],
       [-0.00775125, -0.030291  , -0.09399489,  0.9871645 ],
       [-0.22692822,  0.0021534 , -0.04720819,  0.9871645 ],
       [ 0.00783853, -0.06675953,  0.03291857,  0.9871645 ]])

In [70]:
oo.reshape(5,4)

array([[-0.04135224,  0.01083977,  0.03262176,  0.9871645 ],
       [-0.05923252, -0.24072998, -0.18008022,  0.9871645 ],
       [-0.00775125, -0.030291  , -0.09399489,  0.9871645 ],
       [-0.22692822,  0.0021534 , -0.04720819,  0.9871645 ],
       [ 0.00783853, -0.06675953,  0.03291857,  0.9871645 ]])