/
2-Adjusted_3perc_MedianRepl.R
47 lines (36 loc) · 1.83 KB
/
2-Adjusted_3perc_MedianRepl.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
#################################
# #
# Winton Stock Market Challenge #
# #
#################################
# ASSUMPTION: The required data files are downloaded from competition site and made available locally.
# COMPETITION SITE URL: https://www.kaggle.com/c/the-winton-stock-market-challenge
# Perform house-keeping
rm(list=ls())
gc()
# Set working directory
setwd("C:/courses/kaggle/Winton")
# Read train and test data sets from input CSV File
trainall <- read.csv("train.csv", stringsAsFactors=FALSE) # 40000 x 211
testall <- read.csv("test_2.csv", stringsAsFactors=FALSE) # 120000 x 147
# Create a function that adds 3% if the value is positive and deduce 3% if the value is negative
myfunc <- function(x) {
if (x > 0) {x = x*1.03}
else {x = x*0.97}
return (x)
}
# Read Submission CSV file to impute result
# NOTE: The following three lines of code took very long time to impute data and save to CSV File.
# So this has been commented out and the results are exported to a separate TEXT file
# This text file will be read by a Java program and written to a CSV file for submission.
### submission <- read.csv("sample_submission_2.csv", stringsAsFactors=FALSE) # 120000 x 147
### sample_submission$Predicted <- as.numeric(replicate(120000, apply(trainall[, 147:208],
### FUN=median, MARGIN = 2)))
# Calculate median values from training set
medianvalue <- apply(trainall[, 147:208], FUN = median, MARGIN = 2)
# Compute 3% adjusted median value
medianvalue <- apply(as.data.frame(medianvalue), 1, myfunc)
# Create a data frame with the values to be imputed and prefix them based on submission file requirement
df <- as.data.frame(paste0("_", seq(1:61), ",", medianvalue))
# Write the data frame to an intermediary Java file
write.table(df, "result.txt", row.names=F, quote = F)