In [1]:
library(grid)
library(rpart)
library(rpart.plot)
library(partykit)
library(lattice)
library(ggplot2)
library(caret)
library(dplyr)


Attaching package: ‘dplyr’

The following objects are masked from ‘package:stats’:

    filter, lag

The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union



In [2]:
set.seed(1)

In [3]:
invalids <- c('Time', 'Longitude', 'Latitude', 'Brake', 'Accel', 'flag', 'Curve100', 'Curve150', 'TimeHeadway')
valuables <- c("RoadType","CurveAverage","Speed", "Curve100", "Curve150","MaxSpeed","RiskFactor","Curve","DistSignal","Pitch","AheadDistance","AverageVelocity","TimeToCollision","AccelerationSpeed", "Engine", "SteeringAngle", "TimeHeadway", "Jerk", "LaneCount", "DiffAvgSpeed", "EmptinessOfRoad", "RoadFactor")

In [4]:
printf <- function(...) cat(sprintf(...))

In [50]:
trainAndPredict <- function(expr, train, test, verbose=FALSE) {        
    fit <- rpart(expr, data=train)
    p <- predict(fit, newdata=test, cp=0.018)
    predictedFlags <- colnames(p)[max.col(p, ties.method = "first")]

    showMatrix(test, predictedFlags)

    nerr <- sum(predictedFlags != test$flag)
    printf("Err: %f\n", nerr / nrow(test))
}

In [51]:
showMatrix <- function(test, predictedFlags) {
    predictedRed <- test[predictedFlags == "Red", ]
    predictedBlue <- test[predictedFlags == "Blue", ]
    print(predictedRed %>% group_by(flag, AccelOrBrake) %>% summarize(count=n()))
    print(predictedBlue %>% group_by(flag, AccelOrBrake) %>% summarize(count=n()))
}

In [52]:
createFormula <- function(invalids, allFeatures) {
    isValidColumn <- function(x) ! x %in% invalids
    
    features <- Filter(isValidColumn, allFeatures)
    return(paste("flag ~ ", paste(features, collapse=" + ")))
}

# Predict Red

In [53]:
train <- read.csv("../data/middle/sp5.csv", stringsAsFactors=FALSE)

In [54]:
train <- train[, c(valuables, "flag")]

In [55]:
test <- read.csv("../data/middle/sp6.csv", stringsAsFactors=FALSE)

In [56]:
test$AccelOrBrake[test$flag == "RedA"] <- "Accel"
test$AccelOrBrake[test$flag == "RedB"] <- "Brake"
test$AccelOrBrake[test$flag == "BlueA"] <- "Accel"
test$AccelOrBrake[test$flag == "BlueB"] <- "Brake"

In [57]:
train$flag[train$flag == "RedA"] <- "Red"
train$flag[train$flag == "RedB"] <- "Red"
train$flag[train$flag == "BlueA"] <- "Blue"
train$flag[train$flag == "BlueB"] <- "Blue"
train$flag <- as.factor(train$flag)

In [58]:
test$flag[test$flag == "RedA"] <- "Red"
test$flag[test$flag == "RedB"] <- "Red"
test$flag[test$flag == "BlueA"] <- "Blue"
test$flag[test$flag == "BlueB"] <- "Blue"
test$flag <- as.factor(test$flag)

In [59]:
allFeatures <- c(colnames(train))
expr <- createFormula(invalids, allFeatures)

In [60]:
test %>% group_by(flag, AccelOrBrake) %>% summarize(count=n())

flag,AccelOrBrake,count
Blue,Accel,57
Blue,Brake,148
Red,Accel,181
Red,Brake,90


In [61]:
result <- trainAndPredict(expr, train, test, verbose=TRUE)

Source: local data frame [4 x 3]
Groups: flag [?]

    flag AccelOrBrake count
  <fctr>        <chr> <int>
1   Blue        Accel    24
2   Blue        Brake    68
3    Red        Accel    92
4    Red        Brake    50
Source: local data frame [4 x 3]
Groups: flag [?]

    flag AccelOrBrake count
  <fctr>        <chr> <int>
1   Blue        Accel    33
2   Blue        Brake    80
3    Red        Accel    89
4    Red        Brake    40
Err: 0.464286
