-
Notifications
You must be signed in to change notification settings - Fork 4
/
scriptSVD500.R
92 lines (80 loc) · 2.82 KB
/
scriptSVD500.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
library("rjson")
library("irlba")
businessDatasetFile <- "Dataset/yelp_academic_dataset_business.json"
businessDatasetFileHandle <- file(businessDatasetFile,open="r")
businessDatasetFileLines <- readLines(businessDatasetFileHandle)
numberOfBusinesses <- length(businessDatasetFileLines)
businesses <- matrix(,nrow=1,ncol=numberOfBusinesses)
for (i in 1:numberOfBusinesses) {
businessJSON <- fromJSON(businessDatasetFileLines[i],method="C")
businesses[i] <- businessJSON$business_id
}
close(businessDatasetFileHandle)
closeAllConnections()
remove('businessDatasetFile', 'businessDatasetFileHandle', 'businessDatasetFileLines', 'businessJSON')
gc()
userDatasetFile <- "Dataset/yelp_academic_dataset_user.json"
userDatasetFileHandle <- file(userDatasetFile,open="r")
userDatasetFileLines <- readLines(userDatasetFileHandle)
numberOfUsers <- length(userDatasetFileLines)
users <- matrix(,nrow=1,ncol=numberOfUsers)
for (i in 1:numberOfUsers) {
userJSON <- fromJSON(userDatasetFileLines[i],method="C")
users[i] <- userJSON$user_id
}
close(userDatasetFileHandle)
closeAllConnections()
remove('userDatasetFile', 'userDatasetFileHandle', 'userDatasetFileLines', 'userJSON')
gc()
ptm <- proc.time()
reviewDatasetFile <- "Dataset/yelp_academic_dataset_review.json"
reviewDatasetFileHandle <- file(reviewDatasetFile,open="r")
reviewDatasetFileLines <- readLines(reviewDatasetFileHandle)
numberOfReviews <- length(reviewDatasetFileLines)
reviews <- matrix(0,nrow=numberOfUsers,ncol=numberOfBusinesses)
for (i in 1:numberOfReviews) {
reviewJSON <- fromJSON(reviewDatasetFileLines[i],method="C")
reviews[which(users==reviewJSON$user_id, arr.ind=TRUE)[2], which(businesses==reviewJSON$business_id, arr.ind=TRUE)[2]] <- reviewJSON$stars
}
close(reviewDatasetFileHandle)
closeAllConnections()
remove('reviewDatasetFile', 'reviewDatasetFileHandle', 'reviewDatasetFileLines', 'reviewJSON')
gc()
ptm <- proc.time() - ptm
cat ("Time taken to build the reviews matrix\n")
print(ptm)
ptm <- proc.time()
reviewsSVDRank500 <- irlba(reviews, nu=500, nv=500)
ptm <- proc.time() - ptm
cat ("Time taken to perform SVD using irlba\n")
print(ptm)
gc()
save(list = ls(all = TRUE), file = "SVD500.RData")
Wlocations <- which (reviews != 0, arr.ind=T)
minRank <- 0
minNorm <- 10000
ptm <- proc.time()
for (i in 1:500) {
u <- reviewsSVDRank500$u[,1:i]
D <- reviewsSVDRank500$d[1:i]
v <- t(reviewsSVDRank500$v[,1:i])
T <- u * D
remove('u', 'D')
LRARank <- T %*% v
remove('T', 'v')
sum <- 0
for (j in 1:dim(Wlocations)[1]) {
sum = sum + (reviews[Wlocations[j,1], Wlocations[j,2]] - LRARank[Wlocations[j,1], Wlocations[j,2]])^2
}
fNorm <- sqrt(sum)
cat ("Fnorm for Rank ", i, " = ", fNorm, "\n")
if (minNorm > fNorm) {
minNorm <- fNorm
minRank <- i
}
remove('LRARank', 'fNorm', 'sum')
gc()
}
ptm <- proc.time() - ptm
print (ptm)
save(list = ls(all = TRUE), file = "SVD500withLRA.RData")