/
linregex1.py
58 lines (53 loc) · 1.8 KB
/
linregex1.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import numpy as np
from toolz import identity, pluck
import linreg as lr
import glm
import metrics
from utility import Scaler
from ml_util import train_test_split
from out_utils import plot_cost, plot_errors
# Get the data
input = np.loadtxt('./data/Folds.csv', delimiter=',', skiprows=1)
Z = np.array(list(pluck(list(range(0, len(input[0])-1)), input)))
y = np.array(list(pluck(len(input[0])-1, input)))
data = zip(Z, y)
# Split into a train set and test set
train_data, test_data = train_test_split(data, 0.33)
# Scale the training data
scale = Scaler()
Z_train, y_train = zip(*train_data)
scale.fit(Z_train)
X_train = scale.transform(Z_train)
scaledtrain_data = list(zip(X_train, y_train))
# Scale the testing data using the same scaling parameters
# used for the training data
Z_test, y_test = zip(*test_data)
X_test = scale.transform(Z_test)
print('****Minibatch Gradient Descent****')
print('\n--Training--\n')
hyperparam = {'eta': 0.3,
'epochs': 300,
'minibatches': 1,
'adaptive': 0.99}
print('\nHyperparamters\n')
for k, v in hyperparam.items():
print(k, '\t', v)
print('\nNumber of Training Examples: ', X_train.shape[0], '\n')
h_thetaf, cost = glm.fit(lr.J,
lr.gradJ,
hyperparam,
scaledtrain_data)
plot_cost(cost)
h_thetad = scale.denormalize(h_thetaf)
print('Coefficients\t', h_thetaf)
for i, h_theta in enumerate(h_thetad):
print('h_theta' + str(i), '\t', h_theta)
yp_train = glm.predict(identity, X_train, h_thetaf)
plot_errors(y_train, yp_train)
corr_train = metrics.r2(X_train, y_train, h_thetaf)
print('R**2\t', corr_train)
print('\n--Testing--')
yp_test = glm.predict(identity, X_test, h_thetaf)
plot_errors(y_test, yp_test)
corr_test = metrics.r2(X_test, y_test, h_thetaf)
print('R**2\t', corr_test)