Skip to content

Commit

Permalink
Cleanup for release
Browse files Browse the repository at this point in the history
  • Loading branch information
mewo2 committed Jul 30, 2012
1 parent 8eabc27 commit 282672a
Show file tree
Hide file tree
Showing 5 changed files with 37 additions and 25 deletions.
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# EMI Music Hackathon entry

This is code for the 3rd place entry in the [EMI Music Hackathon][hackathon]. It's really not something you should aspire to. Described in more detail [here][blogpost].

[hackathon]: http://www.kaggle.com/c/MusicHackathon
[blogpost]: http://mewo2.com/kaggle/2012/07/29/emi-music-hackathon---how-i-did-it/
7 changes: 5 additions & 2 deletions blend.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,15 @@ train <- read.csv('data/train.csv');

ratings <- train$Rating;

# preds <- c('lm', 'svd', 'rf', 'demo', 'gbm', 'knn', 'rfbya', 'svdslow', 'lmbya2', 'lmbyt2', 'rfq1');
preds <- c('svd', 'rf', 'rfbya', 'svdslow', 'lmbya2', 'lmbyt2', 'rfq1');
preds <- c('lm', 'svd', 'rf', 'demo', 'gbm', 'knn', 'rfbya', 'svdslow', 'lmbya', 'lmbyt');
# preds <- c('svd', 'rf', 'rfbya', 'svdslow', 'lmbya2', 'lmbyt2', 'rfq1');

trains <- sapply(preds, function (name) read.csv(paste('predictions/', name, '.csv.cross', sep=''))$x)/100;
tests <- sapply(preds, function (name) read.csv(paste('predictions/', name, '.csv', sep=''))$x)/100;

for (i in 1:length(preds)) {
cat(preds[i], rmse(trains[,i] * 100, ratings), '\n');
}
mix <- nnet(trains, ratings, size=5, decay=0.1, maxit=500, linout=T, reltol=0, abstol=0, skip=T);
# mix <- lm(ratings ~ ., data=as.data.frame(trains))
print(summary(mix));
Expand Down
2 changes: 1 addition & 1 deletion funk.R
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ funksvd <- function (users, tracks, ratings, nfeats=16) {
# return(list(baseline=baseline,ufeats=ufeats, tfeats=tfeats));
}

funkpred <- function(train, test, ratings, nfeats=32) {
svdpred <- function(train, test, ratings, nfeats=16) {
sv <- funksvd(train$User, train$Track, ratings, nfeats);
pred <- sapply(1:nrow(test),
function (i) {
Expand Down
2 changes: 1 addition & 1 deletion funk.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ void funksvd(int* pn, int* users, int* tracks, double* ratings, int* pnfeats, in
int ntrack = *pntrack;
int min_loops = 30;
double min_improve = 1e-6;
double k = 5;
double k = 4;
double lrate = 1e-4;

int i = 0;
Expand Down
45 changes: 24 additions & 21 deletions predict.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,20 +16,14 @@ testfeats <- cleaned[[2]];
ratings <- cleaned[[3]];

rfpred <- function (trainfeats, testfeats, ratings) {
rf <- randomForest(trainfeats, ratings, do.trace=T, sampsize=1000, ntree=100);
rf <- randomForest(trainfeats, ratings, do.trace=T, sampsize=50000, ntree=100);
pred <- predict(rf, testfeats);
cv <- rf$predicted;
return(list(pred=pred, cv=cv));
}

svdpred <- function (trainfeats, testfeats, ratings, n=64) {
library(irlba);
mu <- mean(ratings);
spmat <- sparseMatrix(i=trainfeats$User + 1, j=trainfeats$Track + 1, x=ratings - mu);
sv <- irlba(spmat, nu=n, nv=n);
rec <- sv$u %*% (sv$d * t(sv$v));
pred <- sapply(1:nrow(testfeats), function (i) rec[testfeats$User[i] + 1, testfeats$Track[i] + 1]) + mu;
return(list(pred=pred));
svdslowpred <- function (trainfeats, testfeats, ratings) {
svdpred(trainfeats, testfeats, ratings, n=64);
}

svmpred <- function (trainfeats, testfeats, ratings) {
Expand Down Expand Up @@ -171,7 +165,7 @@ rfqpred <- function (train, test, ratings) {
return(list(pred=pred, cv=cv));
}

nndemopred <- function (train, test, ratings) {
demopred <- function (train, test, ratings) {
mu <- mean(ratings);
ages <- c(train$AGE, test$AGE)
ages <- cut(ages, quantile(ages, c(0, 0.2, 0.4, 0.6, 0.8, 1.0)));
Expand Down Expand Up @@ -229,19 +223,28 @@ nnpred <- function (train, test, ratings, k=16) {
return(list(pred=pred));
}
source('funk.R');
# s <- sample(nrow(testfeats), 100000)
# pred <- cross.val(remove.global(lmbytrackpred), 4, trainfeats, testfeats, ratings);
pred <- remove.global(rfqpred)(trainfeats, testfeats, ratings);

cat('Estimated RMSE: ', rmse(ratings, pred$cv), '\n');
save.pred <- function (name, pred) {
cat('Estimated RMSE: ', rmse(ratings, pred$cv), '\n');

argv <- commandArgs(T);
if (length(argv) == 0) {
filename <- 'predictions/scratch.csv';
} else {
filename <- argv[1];
filename <- paste('predictions/', name, '.csv', sep='');

write.csv(pred$pred, filename, row.names=F, quote=F);
write.csv(pred$cv, paste(filename, '.cross', sep=''), row.names=F, quote=F);
}

write.csv(pred$pred, filename, row.names=F, quote=F);
write.csv(pred$cv, paste(filename, '.cross', sep=''), row.names=F, quote=F);
save.pred('lmbyt', cross.val(remove.global(lmbytrackpred), 10, trainfeats, testfeats, ratings));
save.pred('lmbya', cross.val(remove.global(lmbyartistpred), 10, trainfeats, testfeats, ratings));
save.pred('rfbya', cross.val(remove.global(rfbyartistpred), 10, trainfeats, testfeats, ratings));

save.pred('rf', rfpred(trainfeats, testfeats, ratings));
save.pred('gbm', cross.val(remove.global(gbmpred), 10, trainfeats, testfeats, ratings));
save.pred('lm', cross.val(remove.global(lmpred), 10, trainfeats, testfeats, ratings));

save.pred('svd', cross.val(remove.global(svdpred), 10, trainfeats, testfeats, ratings));
save.pred('svdslow', cross.val(remove.global(svdslowpred), 10, trainfeats, testfeats, ratings));

save.pred('knn', cross.val(remove.global(nnpred), 10, trainfeats, testfeats, ratings));
save.pred('demo', cross.val(remove.global(demopred), 10, trainfeats, testfeats, ratings));

cat('Done\n')

0 comments on commit 282672a

Please sign in to comment.