-
Notifications
You must be signed in to change notification settings - Fork 0
/
regressionTrain.scala
121 lines (108 loc) · 4.18 KB
/
regressionTrain.scala
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
import BIDMat.{Mat, FMat, DMat, IMat, CMat, BMat, CSMat, SMat, SDMat, GMat, GIMat, GSMat, HMat}
import BIDMat.MatFunctions._
import BIDMat.SciFunctions._
import BIDMat.Solvers._
import BIDMat.Plotting._
import scala.collection.mutable.ListBuffer
import scala.math
lineTest.main(Array())
trainAndTest.main(Array())
// X : a matrix of examples. Each column is an example, each row is a feature
// Y : a column vector of labels. ith row is the label for the ith col of X
// a : the step size
// t : how much error we are willing to accept
class trainer(XList: ListBuffer[SMat], YList: ListBuffer[FMat], a: Double, t: Double) {
println("checking that num examples matches num labels")
if ( XList.length != YList.length ) { println("ERROR: num examples does not match num labels") }
var numWeights = XList(0).nrows
println("checking that all example blocks have same nrows")
for ( X <- XList ) { if ( X.nrows != numWeights ) { println("ERROR: all X blocks do not have same nrows") } }
val THRESHOLD: Double = t
var ALPHA: Double = a
var w: FMat = zeros(numWeights ,1)
def gradients(X: SMat, Y:FMat): FMat = {
if ( X.ncols != Y.nrows ) { println("ERROR: block dimensions to not match") }
//val combo = (w.t * X).t
val combo = X Tmult(w, null) //X is sparse w is a COLUMN!!!
val diff = combo - Y
val twice_diff = diff * 2.0f
//var gs = X * twice_diff
var gs = X SMult(sparse(twice_diff), null)
gs = gs /@ X.ncols
// DO RIDGE REGULARIZATION HERE
return gs
}
def error(X: SMat, Y:FMat): FMat = {
val k: FMat = gradients(X, Y)
val e: FMat = abs(k)
return e
}
def predict(x: FMat): Float = (x*w)(0,0)
//setup for training loop
println("zipping examples")
val examples = XList.zip(YList)
var iters = 0
var err: FMat = zeros(numWeights, 1)
println("calculating initial err")
for ( (e,l) <- examples ) {
err += error(e, l)
}
var errScore: Float = sum(err, 1)(0,0)
//training loop
println("off to the races")
while ( errScore > THRESHOLD ) {
err = zeros(numWeights, 1)
for ( (e,l) <- examples ) {
val gs = gradients(e,l)
w -= gs * ALPHA
err += abs(gs)
}
iters += 1
errScore = sum(err,1)(0,0)
println("===============================================================")
println("Trained " + iters + "iterations")
println("Sum of gradients: " + errScore)
println("largest weight: " + maxi(w, 1)(0,0))
println("smallest weight: " + mini(w, 1)(0,0))
println("weight 1: " + w(0,0))
println("weight 2: " + w(1,0))
println("===============================================================")
}
}
object lineTest {
def main(args: Array[String]) {
val X:FMat = (1 \ 2 \ 3) on (1 \ 1 \ 1) on (1 \ 1 \ 1)
val Y:FMat = 1 on 2 on 3
println("X:\n" + X + "\nY:\n" + Y)
val classifier = new trainer(new ListBuffer() += sparse(X), new ListBuffer() += Y, 0.001, 0.0001)
println("Learned weights:\n" + classifier.w)
println("Weights should be:\n" + X\\Y)
}
}
object trainAndTest {
def main(args: Array[String]) {
println("loading data")
val xList: ListBuffer[SMat] = new ListBuffer()
for ( i <- 1 to 975 ) {
val block: SMat = load("CountsOut/CountsStemmedX"+i+".mat", i+"CountsStemmedX")
xList += block
}
//val lastBlock: SMat = load("CountsOut/CountsStemmedLastX.mat", "LastCountsStemmedX")
//xList += lastBlock
var y: IMat = load("CountsOut/CountsY.mat", "CountsY")
y = y.t
val yList: ListBuffer[FMat] = new ListBuffer()
for (i <- 0 to y.ncols by 1000) {
if (i+999 < y.ncols ) { yList += FMat(y(?, i to i+999).t) }
//else { yList += FMat(y(?, i to y.ncols-1).t) }
}
println("creating and training classifier")
val classifier = new trainer(xList, yList, 0.000001, 0.001)
var testX = full(xList(0))
var testY = yList(0)
for (i <- 0 to testX.ncols-1) {
println("classifier predicted: " + classifier.predict(testX(?, i).t))
println("actually was: " + testY(i,0))
}
}
}