In [130]:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pickle

In [131]:
class NeuralNetwork(object):
    def __init__(self, input_nodes, hidden_nodes, output_nodes, learning_rate):
        # Set number of nodes in input, hidden and output layers.
        self.input_nodes = input_nodes
        self.hidden_nodes = hidden_nodes
        self.output_nodes = output_nodes

        # Initialize weights
        self.weights_input_to_hidden = np.random.normal(0.0, self.input_nodes**-0.5, 
                                       (self.input_nodes, self.hidden_nodes))

        self.weights_hidden_to_output = np.random.normal(0.0, self.hidden_nodes**-0.5, 
                                       (self.hidden_nodes, self.output_nodes))
        self.lr = learning_rate
        
        self.activation_function = lambda x : 1/(1 + np.exp(-x))   # Replace 0 with your sigmoid calculation.
        
                    
    
    def train(self, features, targets):
        ''' Train the network on batch of features and targets. 
        
            Arguments
            ---------
            
            features: 2D array, each row is one data record, each column is a feature
            targets: 1D array of target values
        
        '''
        n_records = features.shape[0]
        delta_weights_i_h = np.zeros(self.weights_input_to_hidden.shape)
        delta_weights_h_o = np.zeros(self.weights_hidden_to_output.shape)
        for X, y in zip(features, targets):
            hidden_inputs = np.dot(X,self.weights_input_to_hidden) # signals into hidden layer
            hidden_outputs = self.activation_function(hidden_inputs) # signals from hidden layer
            
            final_inputs = np.dot(hidden_outputs,self.weights_hidden_to_output) # signals into final output layer
            final_outputs = final_inputs # signals from final output layer'this
            
            #### Implement the backward pass here ####
            ### Backward pass ###

            error = y - final_outputs # Output layer error is the difference between desired target and actual output.

            
            output_error_term = error * 1

            #hidden_error = hidden_outputs * (1 - hidden_outputs)
            #hidden_error_term = np.dot(hidden_error, self.weights_hidden_to_output) * output_error_term
            hidden_error = np.dot(self.weights_hidden_to_output, error)
            hidden_error_term = hidden_error * hidden_outputs * (1- hidden_outputs)
            #print('hidden error term: ' + str(hidden_error_term))
            
            
            # Weight step (input to hidden)
            delta_weights_i_h += hidden_error_term * X[:,None]
            #print('delta in to hidden: ' + str(delta_weights_i_h))
            
            
            # Weight step (hidden to output)
            hidden_outputs = hidden_outputs[:,None]
            delta_weights_h_o += output_error_term * hidden_outputs
            #print('delta hidden to out: ' + str(delta_weights_h_o))
        self.weights_hidden_to_output += self.lr * delta_weights_h_o/n_records # update hidden-to-output weights with gradient descent step
        self.weights_input_to_hidden += self.lr * delta_weights_i_h/n_records # update input-to-hidden weights with gradient descent step
 
    def run(self, features):

        hidden_inputs = np.dot(features,self.weights_input_to_hidden) # signals into hidden layer
        hidden_outputs = self.activation_function(hidden_inputs) # signals from hidden layer
        
        final_inputs = np.dot(hidden_outputs,self.weights_hidden_to_output) # signals into final output layer
        final_outputs = (final_inputs) # signals from final output layer 
        
        return final_outputs


In [132]:
learning_rate = 0.0004
hidden_nodes = 1800
output_nodes = 1

N_i = 6
network = NeuralNetwork(N_i, hidden_nodes, output_nodes, learning_rate)

In [133]:
import json

weights_in = []
with open('weight_in1_grades', 'rb') as f:
    weights_in = pickle.load(f)
    
weights_out = []
with open('weight_out1_grades', 'rb') as f:
    weights_out = pickle.load(f)
    
with open('variables.json', 'r') as f:
    try:
        scaled_features = json.load(f)
    # if the file is empty the ValueError will be thrown
    except ValueError:
        scaled_features = {}

In [114]:
network.weights_input_to_hidden = weights_in
network.weights_hidden_to_output = weights_out


In [115]:
#scaled_features

In [123]:
data_path = 'rct.csv'
df = pd.read_csv(data_path)
df.head()

Unnamed: 0,grade,label,rct,tsi,stfi,caliper,moisture,basisweight,cull
0,NL050550,4/16/2017 2:36,123.0,4.88,,15.56,0.00,0.0,119.5
1,NL050550,4/16/2017 4:09,127.0,5.15,,15.56,0.00,0.0,119.5
2,NL250740,1/7/2017 15:55,151.0,4.53,,19.24,0.00,0.0,149.5
3,NL110550,2/19/2017 16:20,121.0,4.53,18.1,9.38,6.21,0.0,119.5
4,NL050550,4/16/2017 0:59,123.0,#DIV/0!,,15.67,#DIV/0!,0.0,119.5


In [124]:
# drops weird moisture values
df = df[df.moisture.str.contains('#DIV/0!') == False]
df = df[df.tsi.str.contains('#DIV/0!') == False]
# drops empty and 0 value rows
df = df[np.isfinite(df['stfi'])]



Unnamed: 0,grade,label,rct,tsi,stfi,caliper,moisture,basisweight,cull
3,NL110550,2/19/2017 16:20,121.0,4.53,18.1,9.38,6.21,0.0,119.5
179,NL110550,3/28/2017 3:17,125.0,4.9,26.95,15.3,6.07,51.63,119.5
206,NL110550,1/4/2017 15:36,129.0,4.89,23.08,14.92,6.49,51.84,119.5
234,NL110550,3/8/2017 16:31,123.0,4.89,27.33,15.42,6.14,51.94,119.5
276,NL110550,3/5/2017 1:27,129.0,5.27,28.32,15.37,6.25,52.1,119.5


In [125]:
df['tsi'] = df['tsi'].astype(float)
df['moisture'] = df['moisture'].astype(float)
df['cull'] = df['cull'].astype(float)

In [126]:

dummy_fields = ['grade']
for each in dummy_fields:
    dummies = pd.get_dummies(df[each], prefix=each, drop_first=False)
    df = pd.concat([df, dummies], axis=1)


fields_to_drop = ['label', 'grade']
data = df.drop(fields_to_drop, axis=1)
data.head()

34

In [127]:
quant_features = ['rct','tsi', 'stfi', 'caliper','moisture','basisweight', 'cull']
# Store scalings in a dictionary so we can convert back later
scaled_features = {}
for each in quant_features:
    mean, std = (data[each]).mean(), data[each].std()
    scaled_features[each] = [mean, std]
    data.loc[:, each] = (data[each] - mean)/std


target_fields = ['rct']
features, targets = data.drop(target_fields, axis=1), data[target_fields]
test_features, test_targets = data.drop(target_fields, axis=1), data[target_fields]

train_features, train_targets = features, targets
val_features, val_targets = features[200:], targets[200:]

In [128]:
mean, std = scaled_features['rct']
xs = []
ys = []
value = []
testFeatures = features.reset_index(drop=True)
testTargets = targets.reset_index(drop=True)

for index, row in testFeatures.iterrows():
    #print(row)
    prediction = network.run(row).T*std+mean
    xs.append(prediction[0])


ys = testTargets['rct'].T*std+mean
data['rct'] = ys
data['prediction'] = xs

In [129]:
data.to_csv('t1predic_grades.csv')