In [1]:
# Import libraries
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import pickle

In [2]:
def load_data(fname):
    f = open(fname, "rb")
    data = pickle.load(f)
    f.close()
    return data

In [3]:
# Loading object file is faster than loading xlsx file
data = load_data("wagedata.dat")

In [11]:
# Variables to train model
data_vars = ["educ","exper","expersq","KWW","black","smsa","enroll"]
# Remove NaN rows 
data_ = data[data_vars + ["lwage"]].dropna()
# Just use some variable in data
X_data = data_[data_vars]
# Add ones to first column of matrix
# X = np.concatenate((np.ones(shape=(data_.shape[0],1)) , np.array(X_data)), axis=1)
X = X_data.T
# Log wage is the output
Y = np.array(data_["lwage"]).reshape(len(data_["lwage"]), 1).T
# Split data to train and test, shuffle the data
# X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2, random_state = 1)

# print("X shape: {0} \nY shape: {1}".format(X_train.shape, Y_train.shape))

In [36]:
# cost or loss function  
def cost(Y, Yhat):    
    N = X.shape[1]
    return 0.5 * np.linalg.norm(Y - Yhat) ** 2 / N

In [42]:
d0 = X.shape[0]
d1 = h = 3 # size of hidden layer 
d2 = C = 1
# initialize parameters randomely 
W1 = 0.01*np.random.randn(d0, d1)
b1 = np.zeros((d1, 1))
W2 = 0.01*np.random.randn(d1, d2)
b2 = np.zeros((d2, 1))

# X = X.T # each column of X is a data point 
# Y = convert_labels(y, C)
N = X.shape[1]
eta = 1 # learning rate 
lastloss = -1
for i in range(10000):
    ## Feedforward 
    Z1 = np.dot(W1.T, X) + b1 
    A1 = np.maximum(Z1, 0)
    Z2 = np.dot(W2.T, A1) + b2
    # import pdb; pdb.set_trace()  # breakpoint 035ab9b5 //
    Yhat = Z2
    
    # compute the loss: average cross-entropy loss
    loss = cost(Y, Yhat)
    # print loss after each 1000 iterations
    if i %1000 == 0: 
        print("iter %d, loss: %f" %(i, loss))
        if loss == lastloss:
            break
        lastloss = loss
    
    # backpropagation
    E2 = (Yhat - Y )/N
    dW2 = np.dot(A1, E2.T)
    db2 = np.sum(E2, axis = 1, keepdims = True)
    E1 = np.dot(W2, E2)
    E1[Z1 <= 0] = 0 # gradient of ReLU 
    dW1 = np.dot(X, E1.T)
    print(E2)
    db1 = np.sum(E1, axis = 1, keepdims = True)
    
    # Gradient Descent update 
    # import pdb; pdb.set_trace()  # breakpoint 47741f63 //
    W1 += -eta*dW1
    b1 += -eta*db1
    W2 += -eta*dW2
    b2 += -eta*db2

iter 0, loss: 19.714499
[[-0.00213035 -0.00208433 -0.00222226 ... -0.00209711 -0.00221788
  -0.00211385]]
[[0.05222607 0.02405252 0.05835958 ... 0.01339293 0.03986163 0.01635481]]
[[-0.0264226  -0.02637859 -0.0265152  ... -0.02639167 -0.02651143
  -0.02640813]]
[[-1.43214866e-05  2.96905992e-05 -1.06918177e-04 ...  1.66156374e-05
  -1.03152403e-04  1.49225617e-07]]
[[-1.43214866e-05  2.96905992e-05 -1.06918177e-04 ...  1.66156374e-05
  -1.03152403e-04  1.49225617e-07]]
[[-1.43214866e-05  2.96905992e-05 -1.06918177e-04 ...  1.66156374e-05
  -1.03152403e-04  1.49225617e-07]]
[[-1.43214866e-05  2.96905992e-05 -1.06918177e-04 ...  1.66156374e-05
  -1.03152403e-04  1.49225617e-07]]
[[-1.43214866e-05  2.96905992e-05 -1.06918177e-04 ...  1.66156374e-05
  -1.03152403e-04  1.49225617e-07]]
[[-1.43214866e-05  2.96905992e-05 -1.06918177e-04 ...  1.66156374e-05
  -1.03152403e-04  1.49225617e-07]]
[[-1.43214866e-05  2.96905992e-05 -1.06918177e-04 ...  1.66156374e-05
  -1.03152403e-04  1.49225617e-0

In [25]:
print(X.shape)
print(W1.shape)
print(b1.shape)
print(W2.shape)
print(b2.shape)

(7, 2963)
(100, 2963)
(100, 1)
(100, 1)
(1, 1)


In [41]:
Z1 = np.dot(W1.T, X) + b1 
A1 = np.maximum(Z1, 0)
Z2 = np.dot(W2.T, A1) + b2
# import pdb; pdb.set_trace()  # breakpoint 035ab9b5 //
Yhat = Z2
print(np.linalg.norm(Y - Y.mean()) ** 2)
train_score = 1 - np.linalg.norm(Y - Yhat) ** 2 / np.linalg.norm(Y - Y.mean()) ** 2
print("Train R-square: {0}".format(train_score))

577.1072967645975
Train R-square: 0.0


In [50]:
# first neural network with keras tutorial
from numpy import loadtxt
from keras.models import Sequential
from keras.layers import Dense
# load the dataset
#dataset = loadtxt('pima-indians-diabetes.csv', delimiter=',')
# split into input (X) and output (y) variables
#X = dataset[:,0:8]
#y = dataset[:,8]
# define the keras model
model = Sequential()
model.add(Dense(20, input_shape=(7,), activation='relu'))
model.add(Dense(1, activation='linear'))
# compile the keras model
model.compile(loss='mean_squared_error', optimizer='adam', metrics=['accuracy'])
# fit the keras model on the dataset
model.fit(X.T, Y.T, epochs=150, batch_size=10)

Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150
Epoch 26/150
Epoch 27/150
Epoch 28/150
Epoch 29/150
Epoch 30/150
Epoch 31/150
Epoch 32/150
Epoch 33/150
Epoch 34/150
Epoch 35/150
Epoch 36/150
Epoch 37/150
Epoch 38/150
Epoch 39/150
Epoch 40/150
Epoch 41/150
Epoch 42/150
Epoch 43/150
Epoch 44/150
Epoch 45/150
Epoch 46/150
Epoch 47/150
Epoch 48/150
Epoch 49/150
Epoch 50/150
Epoch 51/150
Epoch 52/150
Epoch 53/150
Epoch 54/150
Epoch 55/150
Epoch 56/150
Epoch 57/150
Epoch 58/150
Epoch 59/150
Epoch 60/150
Epoch 61/150
Epoch 62/150
Epoch 63/150
Epoch 64/150
Epoch 65/150
Epoch 66/150
Epoch 67/150
Epoch 68/150
Epoch 69/150
Epoch 70/150
Epoch 71/150
Epoch 72/150
Epoch 73/150
Epoch 74/150
Epoch 75/150
Epoch 76/150
Epoch 77/150
Epoch 78

ValueError: Data cardinality is ambiguous:
  x sizes: 7
  y sizes: 1
Make sure all arrays contain the same number of samples.

In [51]:
# evaluate the keras model
_, accuracy = model.evaluate(X.T, Y.T)
print('Accuracy: %.2f' % (accuracy*100))

Accuracy: 0.00
