# Neural Network Tests

This notebook contains several tests of different NN parameters

In [1]:
import numpy as np
import pandas as pd
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import hamming_loss

train_data = pd.read_csv('data/train_data.csv')
train_labels = pd.read_csv('data/train_labels.csv')


In [2]:
label_encoded, unique_labels = pd.factorize(train_labels['Font'])
labels = pd.DataFrame(label_encoded, columns=['label'])
df = pd.concat([train_data, labels], axis = 1)

In [3]:
from sklearn.model_selection import train_test_split
X = df.iloc[:, :-1]
Y = df.iloc[:, -1]
x_train_df, x_valid_df, y_train_df, y_valid_df = train_test_split(X, Y, test_size=0.3, random_state = 0)

In [4]:
test_data = pd.read_csv('data/test_data.csv')
x_test_df = test_data

In [5]:
x_train_pre_norm = np.array(x_train_df)
x_valid_pre_norm = np.array(x_valid_df)
y_train = np.array(y_train_df)
y_valid = np.array(y_valid_df)
x_test_pre_norm = np.array(x_test_df)

X_np = np.array(X)

In [6]:
mean = np.sum(X_np, axis = 0) / X_np.shape[0]
std = np.std(X_np, axis = 0)

In [7]:
def normalize(X, mean, std):
    """Normalizes a given array X by columns 
    with the mean and std"""
    X_out = np.zeros(X.shape)
    X_out = (X - mean)/std
    return X_out 

In [8]:
x_train = normalize(x_train_pre_norm, mean, std)
x_valid = normalize(x_valid_pre_norm, mean, std)
x_test = normalize(x_test_pre_norm, mean, std)

In [9]:
def predictions_as_csv(y_pred, file_name):
    path = ""
    status = 0
    if len(y_pred) == 29221:
        ids = np.arange(1,len(y_pred)+1,1)
        pred_label = unique_labels[y_pred]
        data = {'ID':ids, 'Font':pred_label} 
        submission = pd.DataFrame(data)
        submission.to_csv(path + file_name + ".csv", index = False)
        status = 1
    
    return status

**Test 1**

In [10]:
model_nNetwork = MLPClassifier(random_state=1, max_iter=600, alpha = 1, activation = 'relu')
model_nNetwork.fit(x_train, y_train)

MLPClassifier(alpha=1, max_iter=600, random_state=1)

In [11]:
y_pred_train = model_nNetwork.predict(x_train)
error = hamming_loss(y_train, y_pred_train)
print('The training error is: ' + str(error) + '.')

The training error is: 0.24142857142857144.


In [12]:
y_pred_valid = model_nNetwork.predict(x_valid)
error = hamming_loss(y_valid, y_pred_valid)
print('The validation error is: ' + str(error) + '.')

The validation error is: 0.3384615384615385.


In [13]:
predicted_score = 1- error
predicted_score

0.6615384615384615

In [14]:
y_pred_test = model_nNetwork.predict(x_test)

**Test 2**

In [15]:
model_nNetwork = MLPClassifier(random_state=1, max_iter=600, alpha = 0.9, activation = 'relu', learning_rate = 'adaptive')
model_nNetwork.fit(x_train, y_train)

MLPClassifier(alpha=0.9, learning_rate='adaptive', max_iter=600, random_state=1)

In [16]:
y_pred_train = model_nNetwork.predict(x_train)
error = hamming_loss(y_train, y_pred_train)
print('The training error is: ' + str(error) + '.')

The training error is: 0.23241758241758242.


In [17]:
y_pred_valid = model_nNetwork.predict(x_valid)
error = hamming_loss(y_valid, y_pred_valid)
print('The validation error is: ' + str(error) + '.')

The validation error is: 0.3352307692307692.


In [18]:
predicted_score = 1- error
predicted_score

0.6647692307692308

**Test 3**

In [19]:
model_nNetwork = MLPClassifier(random_state=1, max_iter=600, alpha = 0.9, activation = 'relu', learning_rate = 'adaptive', hidden_layer_sizes=(100,30) )
model_nNetwork.fit(x_train, y_train)

MLPClassifier(alpha=0.9, hidden_layer_sizes=(100, 30), learning_rate='adaptive',
              max_iter=600, random_state=1)

In [20]:
y_pred_train = model_nNetwork.predict(x_train)
error = hamming_loss(y_train, y_pred_train)
print('The training error is: ' + str(error) + '.')

The training error is: 0.18652747252747254.


In [21]:
y_pred_valid = model_nNetwork.predict(x_valid)
error = hamming_loss(y_valid, y_pred_valid)
print('The validation error is: ' + str(error) + '.')

The validation error is: 0.30056410256410254.


In [22]:
predicted_score = 1- error
predicted_score

0.6994358974358974

**Test 4**

In [23]:
model_nNetwork = MLPClassifier(random_state=1, max_iter=800, alpha = 0.8, activation = 'relu', learning_rate = 'adaptive', hidden_layer_sizes=(100,100) )
model_nNetwork.fit(x_train, y_train)

MLPClassifier(alpha=0.8, hidden_layer_sizes=(100, 100),
              learning_rate='adaptive', max_iter=800, random_state=1)

In [24]:
y_pred_train = model_nNetwork.predict(x_train)
error = hamming_loss(y_train, y_pred_train)
print('The training error is: ' + str(error) + '.')

The training error is: 0.17235164835164835.


In [25]:
y_pred_valid = model_nNetwork.predict(x_valid)
error = hamming_loss(y_valid, y_pred_valid)
print('The validation error is: ' + str(error) + '.')

The validation error is: 0.29097435897435897.


In [26]:
predicted_score = 1- error
predicted_score

0.709025641025641

**Test 5**

In [27]:
model_nNetwork = MLPClassifier(random_state=1, max_iter=800, alpha = 1, activation = 'relu', learning_rate = 'adaptive', hidden_layer_sizes=(100,100) )
model_nNetwork.fit(x_train, y_train)

MLPClassifier(alpha=1, hidden_layer_sizes=(100, 100), learning_rate='adaptive',
              max_iter=800, random_state=1)

In [28]:
y_pred_train = model_nNetwork.predict(x_train)
error = hamming_loss(y_train, y_pred_train)
print('The training error is: ' + str(error) + '.')

The training error is: 0.19441758241758242.


In [29]:
y_pred_valid = model_nNetwork.predict(x_valid)
error = hamming_loss(y_valid, y_pred_valid)
print('The validation error is: ' + str(error) + '.')

The validation error is: 0.30194871794871797.


In [30]:
predicted_score = 1- error
predicted_score

0.6980512820512821

**Test 6**

In [31]:
model_nNetwork = MLPClassifier(random_state=1, max_iter=800, alpha = 1, activation = 'relu', learning_rate = 'adaptive', hidden_layer_sizes=(100,100,20) )
model_nNetwork.fit(x_train, y_train)

MLPClassifier(alpha=1, hidden_layer_sizes=(100, 100, 20),
              learning_rate='adaptive', max_iter=800, random_state=1)

In [32]:
y_pred_train = model_nNetwork.predict(x_train)
error = hamming_loss(y_train, y_pred_train)
print('The training error is: ' + str(error) + '.')

The training error is: 0.1666813186813187.


In [33]:
y_pred_valid = model_nNetwork.predict(x_valid)
error = hamming_loss(y_valid, y_pred_valid)
print('The validation error is: ' + str(error) + '.')

The validation error is: 0.2866666666666667.


In [34]:
predicted_score = 1- error
predicted_score

0.7133333333333334

**Test 7**

In [35]:
model_nNetwork = MLPClassifier(random_state=1, max_iter=800, alpha = 1, activation = 'relu', learning_rate = 'adaptive', hidden_layer_sizes=(100,100,100) )
model_nNetwork.fit(x_train, y_train)

MLPClassifier(alpha=1, hidden_layer_sizes=(100, 100, 100),
              learning_rate='adaptive', max_iter=800, random_state=1)

In [36]:
y_pred_train = model_nNetwork.predict(x_train)
error = hamming_loss(y_train, y_pred_train)
print('The training error is: ' + str(error) + '.')

The training error is: 0.1620879120879121.


In [37]:
y_pred_valid = model_nNetwork.predict(x_valid)
error = hamming_loss(y_valid, y_pred_valid)
print('The validation error is: ' + str(error) + '.')

The validation error is: 0.2802051282051282.


In [38]:
predicted_score = 1- error
predicted_score

0.7197948717948718

In [39]:
y_pred_test = model_nNetwork.predict(x_test)

**Test 8**

In [40]:
model_nNetwork = MLPClassifier(random_state=1, max_iter=800, alpha = 1.1, activation = 'relu', learning_rate = 'adaptive', hidden_layer_sizes=(100,100,100,50) )
model_nNetwork.fit(x_train, y_train)

MLPClassifier(alpha=1.1, hidden_layer_sizes=(100, 100, 100, 50),
              learning_rate='adaptive', max_iter=800, random_state=1)

In [41]:
y_pred_train = model_nNetwork.predict(x_train)
error = hamming_loss(y_train, y_pred_train)
print('The training error is: ' + str(error) + '.')

The training error is: 0.18410989010989012.


In [42]:
y_pred_valid = model_nNetwork.predict(x_valid)
error = hamming_loss(y_valid, y_pred_valid)
print('The validation error is: ' + str(error) + '.')

The validation error is: 0.2990769230769231.


In [43]:
predicted_score = 1- error
predicted_score

0.700923076923077

**Test 9**

In [44]:
model_nNetwork = MLPClassifier(random_state=1, max_iter=800, alpha = 1.1, activation = 'logistic', learning_rate = 'adaptive', hidden_layer_sizes=(100,100,100,50) )
model_nNetwork.fit(x_train, y_train)

MLPClassifier(activation='logistic', alpha=1.1,
              hidden_layer_sizes=(100, 100, 100, 50), learning_rate='adaptive',
              max_iter=800, random_state=1)

In [45]:
y_pred_train = model_nNetwork.predict(x_train)
error = hamming_loss(y_train, y_pred_train)
print('The training error is: ' + str(error) + '.')

The training error is: 0.72.


In [46]:
y_pred_valid = model_nNetwork.predict(x_valid)
error = hamming_loss(y_valid, y_pred_valid)
print('The validation error is: ' + str(error) + '.')

The validation error is: 0.7243589743589743.


In [47]:
predicted_score = 1- error
predicted_score

0.27564102564102566

**Test 10**

In [48]:
model_nNetwork = MLPClassifier(random_state=1, max_iter=1000, alpha = 1.1, activation = 'relu', learning_rate = 'adaptive', hidden_layer_sizes=(200,100,100,50) )
model_nNetwork.fit(x_train, y_train)

MLPClassifier(alpha=1.1, hidden_layer_sizes=(200, 100, 100, 50),
              learning_rate='adaptive', max_iter=1000, random_state=1)

In [49]:
y_pred_train = model_nNetwork.predict(x_train)
error = hamming_loss(y_train, y_pred_train)
print('The training error is: ' + str(error) + '.')

The training error is: 0.14441758241758243.


In [50]:
y_pred_valid = model_nNetwork.predict(x_valid)
error = hamming_loss(y_valid, y_pred_valid)
print('The validation error is: ' + str(error) + '.')

The validation error is: 0.26569230769230767.


In [51]:
predicted_score = 1- error
predicted_score

0.7343076923076923

**Test 11**

In [52]:
model_nNetwork = MLPClassifier(random_state=1, max_iter=1000, alpha = 1.1, activation = 'logistic', learning_rate = 'adaptive', hidden_layer_sizes=(200,100,100,50) )
model_nNetwork.fit(x_train, y_train)

MLPClassifier(activation='logistic', alpha=1.1,
              hidden_layer_sizes=(200, 100, 100, 50), learning_rate='adaptive',
              max_iter=1000, random_state=1)

In [53]:
y_pred_train = model_nNetwork.predict(x_train)
error = hamming_loss(y_train, y_pred_train)
print('The training error is: ' + str(error) + '.')

The training error is: 0.72.


In [54]:
y_pred_valid = model_nNetwork.predict(x_valid)
error = hamming_loss(y_valid, y_pred_valid)
print('The validation error is: ' + str(error) + '.')

The validation error is: 0.7243589743589743.


In [55]:
predicted_score = 1- error
predicted_score

0.27564102564102566

**Test 12**

In [56]:
model_nNetwork = MLPClassifier(random_state=1, max_iter=1000, alpha = 1.1, activation = 'tanh', learning_rate = 'adaptive', hidden_layer_sizes=(200,100,100,50) )
model_nNetwork.fit(x_train, y_train)

MLPClassifier(activation='tanh', alpha=1.1,
              hidden_layer_sizes=(200, 100, 100, 50), learning_rate='adaptive',
              max_iter=1000, random_state=1)

In [57]:
y_pred_train = model_nNetwork.predict(x_train)
error = hamming_loss(y_train, y_pred_train)
print('The training error is: ' + str(error) + '.')

The training error is: 0.18687912087912087.


In [58]:
y_pred_valid = model_nNetwork.predict(x_valid)
error = hamming_loss(y_valid, y_pred_valid)
print('The validation error is: ' + str(error) + '.')

The validation error is: 0.316.


In [59]:
predicted_score = 1- error
predicted_score

0.6839999999999999

**Test 13**

In [60]:
model_nNetwork = MLPClassifier(random_state=1, max_iter=1000, alpha = 1.1)
model_nNetwork.fit(x_train, y_train)

MLPClassifier(alpha=1.1, max_iter=1000, random_state=1)

In [61]:
y_pred_train = model_nNetwork.predict(x_train)
error = hamming_loss(y_train, y_pred_train)
print('The training error is: ' + str(error) + '.')

The training error is: 0.24863736263736264.


In [62]:
y_pred_valid = model_nNetwork.predict(x_valid)
error = hamming_loss(y_valid, y_pred_valid)
print('The validation error is: ' + str(error) + '.')

The validation error is: 0.3422051282051282.


In [63]:
predicted_score = 1- error
predicted_score

0.6577948717948718

**Test 14**

In [64]:
model_nNetwork = MLPClassifier(random_state=1, max_iter=1000, alpha = 1.2, activation = 'relu', learning_rate = 'adaptive', hidden_layer_sizes=(200,100,100,50) )
model_nNetwork.fit(x_train, y_train)

MLPClassifier(alpha=1.2, hidden_layer_sizes=(200, 100, 100, 50),
              learning_rate='adaptive', max_iter=1000, random_state=1)

In [65]:
y_pred_train = model_nNetwork.predict(x_train)
error = hamming_loss(y_train, y_pred_train)
print('The training error is: ' + str(error) + '.')

The training error is: 0.14516483516483517.


In [66]:
y_pred_valid = model_nNetwork.predict(x_valid)
error = hamming_loss(y_valid, y_pred_valid)
print('The validation error is: ' + str(error) + '.')

The validation error is: 0.2641025641025641.


In [67]:
predicted_score = 1- error
predicted_score

0.735897435897436

**Test 15**

In [68]:
model_nNetwork = MLPClassifier(random_state=1, max_iter=1500, alpha = 1.3, activation = 'relu', learning_rate = 'adaptive', hidden_layer_sizes=(200,200,100,100, 50) )
model_nNetwork.fit(x_train, y_train)

MLPClassifier(alpha=1.3, hidden_layer_sizes=(200, 200, 100, 100, 50),
              learning_rate='adaptive', max_iter=1500, random_state=1)

In [69]:
y_pred_train = model_nNetwork.predict(x_train)
error = hamming_loss(y_train, y_pred_train)
print('The training error is: ' + str(error) + '.')

The training error is: 0.1412967032967033.


In [70]:
y_pred_valid = model_nNetwork.predict(x_valid)
error = hamming_loss(y_valid, y_pred_valid)
print('The validation error is: ' + str(error) + '.')

The validation error is: 0.2617948717948718.


In [71]:
predicted_score = 1- error
predicted_score

0.7382051282051282

In [72]:
y_pred_test = model_nNetwork.predict(x_test)
predictions_as_csv(y_pred_test, "nNetwork_prediction5")

1