In [41]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pickle

In [42]:
class NeuralNetwork(object):
    def __init__(self, input_nodes, hidden_nodes, output_nodes, learning_rate):
        # Set number of nodes in input, hidden and output layers.
        self.input_nodes = input_nodes
        self.hidden_nodes = hidden_nodes
        self.output_nodes = output_nodes

        # Initialize weights
        self.weights_input_to_hidden = np.random.normal(0.0, self.input_nodes**-0.5, 
                                       (self.input_nodes, self.hidden_nodes))

        self.weights_hidden_to_output = np.random.normal(0.0, self.hidden_nodes**-0.5, 
                                       (self.hidden_nodes, self.output_nodes))
        self.lr = learning_rate
        
        self.activation_function = lambda x : 1/(1 + np.exp(-x))   # Replace 0 with your sigmoid calculation.                  
    
    def train(self, features, targets):

        n_records = features.shape[0]
        delta_weights_i_h = np.zeros(self.weights_input_to_hidden.shape)
        delta_weights_h_o = np.zeros(self.weights_hidden_to_output.shape)
        for X, y in zip(features, targets):
            hidden_inputs = np.dot(X,self.weights_input_to_hidden) # signals into hidden layer
            hidden_outputs = self.activation_function(hidden_inputs) # signals from hidden layer
            
            final_inputs = np.dot(hidden_outputs,self.weights_hidden_to_output) # signals into final output layer
            final_outputs = final_inputs # signals from final output layer'this

            error = y - final_outputs # Output layer error is the difference between desired target and actual output.

            
            output_error_term = error * 1

            hidden_error = np.dot(self.weights_hidden_to_output, error)
            hidden_error_term = hidden_error * hidden_outputs * (1- hidden_outputs)
            #print('hidden error term: ' + str(hidden_error_term))
            
            
            # Weight step (input to hidden)
            delta_weights_i_h += hidden_error_term * X[:,None]
            
            # Weight step (hidden to output)
            hidden_outputs = hidden_outputs[:,None]
            delta_weights_h_o += output_error_term * hidden_outputs
        self.weights_hidden_to_output += self.lr * delta_weights_h_o/n_records # update hidden-to-output weights with gradient descent step
        self.weights_input_to_hidden += self.lr * delta_weights_i_h/n_records # update input-to-hidden weights with gradient descent step
 
    def run(self, features):

        hidden_inputs = np.dot(features,self.weights_input_to_hidden) # signals into hidden layer
        hidden_outputs = self.activation_function(hidden_inputs) # signals from hidden layer
        
        final_inputs = np.dot(hidden_outputs,self.weights_hidden_to_output) # signals into final output layer
        final_outputs = (final_inputs) # signals from final output layer 
        
        return final_outputs

In [43]:
learning_rate = 0.001
hidden_nodes = 3200
output_nodes = 1

N_i = 6
network = NeuralNetwork(N_i, hidden_nodes, output_nodes, learning_rate)

In [44]:
import json

weights_in = []
with open('t3weight_in_no_grades', 'rb') as f:
    weights_in = pickle.load(f)
    
weights_out = []
with open('t3weight_out_no_grades', 'rb') as f:
    weights_out = pickle.load(f)
scaled_features = {}    
with open('t3variables.json', 'r') as f:
    try:
        scaled_features = json.load(f)
    # if the file is empty the ValueError will be thrown
    except ValueError:
        scaled_features = {}
        
network.weights_input_to_hidden = weights_in
network.weights_hidden_to_output = weights_out

In [45]:
data_path = 't3rct.csv'
df = pd.read_csv(data_path)
df.head()

Unnamed: 0,grade,label,rct,tsi,stfi,caliper,moisture,basisweight,cull
0,NL050550,4/16/2017 2:36,123,4.88,,15.56,0,0.0,119.5
1,NL050550,4/16/2017 4:09,127,5.15,,15.56,0,0.0,119.5
2,NL250740,1/7/2017 15:55,151,4.53,,19.24,0,0.0,149.5
3,NL110550,2/19/2017 16:20,121,4.53,18.1,9.38,6.21,0.0,119.5
4,NL050550,4/16/2017 0:59,123,#DIV/0!,,15.67,#DIV/0!,0.0,119.5


In [46]:
# drops weird moisture values
df['tsi'] = df['tsi'].astype(str)
df['stfi'] = df['stfi'].astype(str)
df['moisture'] = df['moisture'].astype(str)
df['rct'] = df['rct'].astype(str)
df['caliper'] = df['caliper'].astype(str)
df['basisweight'] = df['basisweight'].astype(str)
df['cull'] = df['cull'].astype(str)
df = df[df.moisture.str.contains('#DIV/0!') == False]
df = df[df.tsi.str.contains('#DIV/0!') == False]
df = df[df.stfi.str.contains('#DIV/0!') == False]
df = df[df.rct.str.contains('#DIV/0!') == False]
df = df[df.caliper.str.contains('#DIV/0!') == False]
df = df[df.basisweight.str.contains('#DIV/0!') == False]
df = df[df.cull.str.contains('#DIV/0!') == False]
df = df[df.stfi.str.contains('nan') == False]
# drops empty and 0 value rows
df = df.loc[(df != 0).all(axis=1), :]


In [47]:
df['tsi'] = df['tsi'].astype(float)
df['moisture'] = df['moisture'].astype(float)
df['stfi'] = df['stfi'].astype(float)
df['cull'] = df['cull'].astype(float)
df['basisweight'] = df['basisweight'].astype(float)
df['caliper'] = df['caliper'].astype(float)
df['rct'] = df['rct'].astype(float)


In [48]:
# make binary numbers for grades
'''
dummy_fields = ['grade']
for each in dummy_fields:
    dummies = pd.get_dummies(df[each], prefix=each, drop_first=False)
    df = pd.concat([df, dummies], axis=1)
'''

fields_to_drop = ['label', 'grade']
data = df.drop(fields_to_drop, axis=1)

In [50]:
quant_features = ['rct','tsi', 'stfi', 'caliper','moisture','basisweight', 'cull']
# Store scalings in a dictionary so we can convert back later
scaled_features = {}
for each in quant_features:
    mean, std = (data[each]).mean(), data[each].std()
    scaled_features[each] = [mean, std]
    data.loc[:, each] = (data[each] - mean)/std
    
test_data = data[1000:]

data = data[:1000]

target_fields = ['rct']
features, targets = data.drop(target_fields, axis=1), data[target_fields]
test_features, test_targets = test_data.drop(target_fields, axis=1), test_data[target_fields]

In [51]:
mean, std = scaled_features['rct']
xs = []
ys = []

testFeatures = features.reset_index(drop=True)
testTargets = targets.reset_index(drop=True)

for index, row in features.iterrows():
    #print(row)
    prediction = network.run(row).T*std+mean
    xs.append(prediction[0])


ys = targets['rct'].T*std+mean
data['rct'] = ys
data['prediction'] = xs

In [52]:
data.to_csv('t3comparison.csv')