In [1]:
# Importowanie bibliotek
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from random import shuffle
from matplotlib import cm
%matplotlib inline

# Za
np.random.seed(10)

In [2]:
# Wczy
dfu = pd.read_csv('tae.data', header=None)

In [3]:
dfu.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 151 entries, 0 to 150
Data columns (total 6 columns):
0    151 non-null int64
1    151 non-null int64
2    151 non-null int64
3    151 non-null int64
4    151 non-null int64
5    151 non-null int64
dtypes: int64(6)
memory usage: 7.1 KB


In [4]:
dfu.head()

Unnamed: 0,0,1,2,3,4,5
0,1,23,3,1,19,3
1,2,15,3,1,17,3
2,1,23,3,2,49,3
3,1,5,2,2,33,3
4,2,7,11,2,55,3


In [5]:
# Przy
columns = ['English speaker', 'Course', 'Course instructor', 'Summer or regular semester', 'Class size', 'Class attribute']
dfu.columns = columns

# Z normalizovano
df=dfu
df=((df-df.min())/(df.max()-df.min()))
df["Class attribute"]=dfu["Class attribute"]

# Podz
# St
test = df.sample(15)
train = df.drop(test.index)

# Pod
x = train.drop('Class attribute', axis=1).values
labels = train['Class attribute']

# Kodovanje One-Hot
y = pd.get_dummies(train['Class attribute']).values

# Pod
xt = test.drop('Class attribute', axis=1).values
labels = test['Class attribute']

# Kodovanje One-Hot
yt = pd.get_dummies(test['Class attribute']).values

In [6]:
test.head()

Unnamed: 0,English speaker,Course,Course instructor,Summer or regular semester,Class size,Class attribute
85,1.0,0.25,0.4,0.0,0.269841,3
112,0.0,0.541667,0.56,1.0,0.460317,1
10,1.0,0.375,0.84,1.0,0.095238,3
133,1.0,0.958333,1.0,1.0,0.285714,2
53,1.0,0.208333,0.64,1.0,0.619048,2


In [7]:
def layer_sizes(X, Y):
    # Rom
    n_x = X.shape[1]
    # Ro
    n_y = Y.shape[1]
    return (n_x, n_y)
        
def sigmoid(z):
    return 1.0/(1.0+np.exp(-z))

def sigmoid_prime(z):
    return sigmoid(z)*(1-sigmoid(z))

In [8]:
def initialize_parameters(n_x, n_h1, n_h2, n_y):
    # Ust
    W1 = np.random.randn(n_x,n_h1)*0.01
    b1 = np.zeros(n_h1)

    W2 = np.random.randn(n_h1,n_h2)*0.01
    b2 = np.zeros(n_h2)

    W3 = np.random.randn(n_h2,n_y)*0.01
    b3 = np.zeros(n_y)

    parameters = {"W1": W1,
                  "b1": b1,
                  "W2": W2,
                  "b2": b2,
                  "W3": W3,
                  "b3": b3}
    
    return parameters

In [9]:
def forward_propagation(X, parameters):
    # Pobr "parameters"
    W1 = parameters['W1']
    b1 = parameters['b1']
    W2 = parameters['W2']
    b2 = parameters['b2']
    W3 = parameters['W3']
    b3 = parameters['b3']
    
    # Warstwa 1
    Z1 = np.dot(X, W1) + b1
    A1 = sigmoid(Z1)

    # Warstwa 2
    Z2 = np.dot(A1, W2) + b2
    A2 = sigmoid(Z2)
    
    # Warstwa 3
    Z3 = np.dot(A2, W3) + b3
    A3 = sigmoid(Z3)
    
    cache = {"Z1": Z1,
             "A1": A1,
             "Z2": Z2,
             "A2": A2,
             "Z3": Z3,
             "A3": A3}
    
    return A3, cache

In [10]:
def compute_cost(A3, Y, parameters):
    # binary cross entropy
    m = Y.shape[0] # number of examples
    logprobs = np.multiply(np.log(A3), Y) + np.multiply((1 - Y), np.log(1 - A3))
    cost = - np.sum(logprobs)/m   
    cost = np.squeeze(cost)
    return cost*100

In [11]:
def accuracy(output_vec, test_vec):
    equals = np.equal(np.argmax(test_vec, axis=1), np.argmax(output_vec, axis=1))
    acc = np.mean(equals)
    return acc

In [12]:
def adapt_learning_rate(learning_rate, xi_d, xi_i, er_r, cost, prev_cost):
    if cost > er_r*prev_cost:
        learning_rate*=xi_d
    elif cost < prev_cost:
        learning_rate*=xi_i
    prev_cost = cost
    return learning_rate, prev_cost

In [13]:
def backward_propagation(parameters, cache, X, Y):
    # Pob "parameters".
    W1 = parameters['W1']
    W2 = parameters['W2']
    W3 = parameters['W3']
        
    # Pob"cache".
    A1 = cache['A1']
    A2 = cache['A2']
    A3 = cache['A3']

    Z1 = cache['Z1']
    Z2 = cache['Z2']
    
    ########## Warstwa 3
    delta3 = A3-Y
    dW3 = np.dot(A2.T, delta3)
    db3 = delta3
    ########## Warstwa 2
    delta2 = np.dot(delta3, W3.T) * sigmoid_prime(Z2)
    dW2 = np.dot(A1.T, delta2)
    db2 = delta2
    ########## Warstwa 1
    delta1 = np.dot(delta2, W2.T) * sigmoid_prime(Z1)
    dW1 = np.dot(X.T, delta1)
    db1 = delta1

    
    grads = {"dW1": dW1,
             "db1": db1,
             "dW2": dW2,
             "db2": db2,       
             "dW3": dW3,
             "db3": db3}
    return grads

In [14]:
def update_parameters(parameters, grads, learning_rate):
    # Pob"parameters".
    W1 = parameters['W1']
    b1 = parameters['b1']
    W2 = parameters['W2']
    b2 = parameters['b2']
    W3 = parameters['W3']
    b3 = parameters['b3']

    # Pob"grads".
    dW1 = grads['dW1']
    db1 = grads['db1']
    dW2 = grads['dW2']
    db2 = grads['db2']
    dW3 = grads['dW3']
    db3 = grads['db3']

    # Akt
    W1 -= learning_rate * dW1
    b1 -= learning_rate * db1.sum(axis=0)

    W2 -= learning_rate * dW2
    b2 -= learning_rate * db2.sum(axis=0)
    
    W3 -= learning_rate * dW3
    b3 -= learning_rate * db3.sum(axis=0)
    
    parameters = {"W1": W1,
                  "b1": b1,
                  "W2": W2,
                  "b2": b2,
                  "W3": W3,
                  "b3": b3}
    
    return parameters

In [15]:
def nn_model(X, Y, n_h1, n_h2, er_r, xi_i, xi_d, num_iterations, learning_rate):
    prev_cost = 1000.0
    n_x, n_y = layer_sizes(X, Y)
    
    parameters = initialize_parameters(n_x, n_h1, n_h2, n_y)
 
    W1 = parameters['W1']
    b1 = parameters['b1']
    W2 = parameters['W2']
    b2 = parameters['b2']
    W3 = parameters['W3']
    b3 = parameters['b3']
    
    for epoch in range(0, num_iterations):
         
        # Forward propagation.
        A3, cache = forward_propagation(X, parameters)
        
        # Funkcja bledu.
        cost = compute_cost(A3, Y, parameters)
        
        # Backpropagation. 
        grads = backward_propagation(parameters, cache, X, Y)

        # CONSTANT LEARNING RATE:
        # learning_rate = 0.01
        
        # DECREMENTAL LEARNING RATE:
        # learning_rate *= 1/(1 + learning_rate/(epoch+1) * epoch)
        
        # ADAPTIVE LEARNING RATE:
        learning_rate, prev_cost = adapt_learning_rate(learning_rate, xi_d, xi_i, er_r, cost, prev_cost)

        # Gradient descent parameter update. Inputs: "parameters, grads". Outputs: "parameters".
        parameters = update_parameters(parameters, grads, learning_rate)


    return parameters, A3

In [16]:
parameters, A3 = nn_model(x, y, n_h1 = 40, n_h2 = 20, er_r = 1.016, xi_i = 1.06, xi_d = 0.77, num_iterations = 4800, learning_rate = 0.01)
acc = accuracy(output_vec = A3, test_vec = y)
A3, cache = forward_propagation(xt, parameters)
acc_t = accuracy(output_vec = A3, test_vec = yt)
print("{}, ".format(acc, acc_t))

 0.6911764705882353, 0.6666666666666666


In [17]:
'''
# XI_I XI_D
n_h1 = 68
n_h2 = 43
er_r = 1.016
xi_i_number = list(range(101, 140, 1))
xi_d_number = list(range(61, 100, 1))
acc_tab_xi = np.zeros(shape=(39,39))
for xi_i in xi_i_number:
    for xi_d in xi_d_number:
        parameters, A3 = nn_model(x, y, n_h1, n_h2, er_r, xi_i/100, xi_d/100, num_iterations = 4800, learning_rate = 0.01)
        acc = accuracy(output_vec = A3, test_vec = y)
        A3, cache = forward_propagation(xt, parameters)
        acc_t = accuracy(output_vec = A3, test_vec = yt)
        print("{}, {}, {}, {}".format(xi_i, xi_d, acc, acc_t))
        acc_tab_xi[xi_i-101, xi_d-61] = acc*acc_t
        
fig = plt.figure(num=None, figsize=(6, 5), dpi=150, facecolor='w', edgecolor='k')
X = np.arange(1.01, 1.391, 0.01)
Y = np.arange(0.61, 0.991, 0.01)
X, Y = np.meshgrid(X, Y)
ax = fig.gca(projection='3d')
ax.set_xlabel('xi_i')
ax.set_ylabel('xi_d')
ax.set_zlabel('acc * acc_t')
ax.plot_surface(X, Y, acc_tab_xi, rstride=1, cstride=1, cmap='viridis', antialiased=True)
'''

'\n# XI_I XI_D\nn_h1 = 68\nn_h2 = 43\ner_r = 1.016\nxi_i_number = list(range(101, 140, 1))\nxi_d_number = list(range(61, 100, 1))\nacc_tab_xi = np.zeros(shape=(39,39))\nfor xi_i in xi_i_number:\n    for xi_d in xi_d_number:\n        parameters, A3 = nn_model(x, y, n_h1, n_h2, er_r, xi_i/100, xi_d/100, num_iterations = 4800, learning_rate = 0.01)\n        acc = accuracy(output_vec = A3, test_vec = y)\n        A3, cache = forward_propagation(xt, parameters)\n        acc_t = accuracy(output_vec = A3, test_vec = yt)\n        print("{}, {}, {}, {}".format(xi_i, xi_d, acc, acc_t))\n        acc_tab_xi[xi_i-101, xi_d-61] = acc*acc_t\n        \nfig = plt.figure(num=None, figsize=(6, 5), dpi=150, facecolor=\'w\', edgecolor=\'k\')\nX = np.arange(1.01, 1.391, 0.01)\nY = np.arange(0.61, 0.991, 0.01)\nX, Y = np.meshgrid(X, Y)\nax = fig.gca(projection=\'3d\')\nax.set_xlabel(\'xi_i\')\nax.set_ylabel(\'xi_d\')\nax.set_zlabel(\'acc * acc_t\')\nax.plot_surface(X, Y, acc_tab_xi, rstride=1, cstride=1, cmap

In [18]:
'''
# CONSTANT, DECREMENT, ADAPTIVE LR
n_h1 = 68
n_h2 = 43
er_r = 1.016
xi_i = 1.12
xi_d = 0.7
acc_tab_a = nn_model_a(x, y, n_h1, n_h2, er_r, xi_i, xi_d, num_iterations = 10000, learning_rate = 0.01)
acc_tab_c = nn_model_c(x, y, n_h1, n_h2, num_iterations = 10000, learning_rate = 0.01)
acc_tab_d = nn_model_d(x, y, n_h1, n_h2, num_iterations = 10000, learning_rate = 0.01)

epoch_tab = np.arange(0, 9951, 50)
fig = plt.gcf()
fig.set_size_inches(10,5)
plt.plot(epoch_tab, acc_tab_a)
plt.plot(epoch_tab, acc_tab_c)
plt.plot(epoch_tab, acc_tab_d)
plt.xlabel('epochs')
plt.ylabel('acc * acc_t')
plt.grid(True)
plt.legend(['Adaptive', 'Constant', 'Decrement'], loc='upper left')
plt.show()
'''

"\n# CONSTANT, DECREMENT, ADAPTIVE LR\nn_h1 = 68\nn_h2 = 43\ner_r = 1.016\nxi_i = 1.12\nxi_d = 0.7\nacc_tab_a = nn_model_a(x, y, n_h1, n_h2, er_r, xi_i, xi_d, num_iterations = 10000, learning_rate = 0.01)\nacc_tab_c = nn_model_c(x, y, n_h1, n_h2, num_iterations = 10000, learning_rate = 0.01)\nacc_tab_d = nn_model_d(x, y, n_h1, n_h2, num_iterations = 10000, learning_rate = 0.01)\n\nepoch_tab = np.arange(0, 9951, 50)\nfig = plt.gcf()\nfig.set_size_inches(10,5)\nplt.plot(epoch_tab, acc_tab_a)\nplt.plot(epoch_tab, acc_tab_c)\nplt.plot(epoch_tab, acc_tab_d)\nplt.xlabel('epochs')\nplt.ylabel('acc * acc_t')\nplt.grid(True)\nplt.legend(['Adaptive', 'Constant', 'Decrement'], loc='upper left')\nplt.show()\n"