In [1]:
library(grid)
library(rpart)
library(rpart.plot)
library(partykit)
library(lattice)
library(ggplot2)
library(caret)

In [2]:
set.seed(1)

In [3]:
jRoadType <- c("都市間高速", "都市高速", "有料道路", "国道", "県道", "主要地方道", "一般道1、一般道2、一般道3", "その他")

In [4]:
printf <- function(...) cat(sprintf(...))

In [5]:
invalids <- c('Time', 'Longitude', 'Latitude', 'Brake', 'Accel', 'RoadType', 'flag')
isValidColumn <- function(x) {
    if (x %in% invalids) {
        return(FALSE)
    }
    return(TRUE)
}

In [6]:
CV <- function(dfx, target) {    
    nred <- nrow(dfx[dfx$flag == target, ])
    rate <- as.numeric(nred)/nrow(dfx)
    
    folds <- createFolds(dfx$flag)
    
    errs <- c()
    for (ids in folds) {
        train <- dfx[-ids, ]
        test <- dfx[ids, ]
        fit <- rpart(expr, data=train, method="class")
        p <- predict(fit, newdata=test)
        res <- colnames(p)[max.col(p, ties.method = "first")] == test$flag
        errs <- c(errs, sum(res == FALSE) / nrow(test))
    }
    
    return(c(rate, mean(errs)))
}

# Predict Red A

In [7]:
df <- read.csv("../data/middle/sp.csv", stringsAsFactors=FALSE)

In [8]:
df$flag[df$flag == "RedB"] <- "ELSE"
df$flag[df$flag == "BlueA"] <- "ELSE"
df$flag[df$flag == "BlueB"] <- "ELSE"
df$flag <- as.factor(df$flag)

In [9]:
allFeatures <- c(colnames(df))
features <- Filter(isValidColumn, allFeatures)

In [10]:
expr <- paste("flag ~ ", paste(features, collapse=" + "))

In [11]:
roadTypes <- unique(df$RoadType)

In [12]:
poss <- c()
for (i in roadTypes) {
    printf("RoadType: %d (%s)\n", i, jRoadType[i+1])
    dfx <- df[df$RoadType == i, ]
    res <- CV(dfx, "RedA")
    printf("RedA/all = %f\n", res[1])
    printf("CV value: %f\n", res[2])
    printf("\n")
    
    if (res[1] > res[2]) {
        poss <- c(poss, jRoadType[i+1])
    }
}
poss

RoadType: 7 (その他)
RedA/all = 0.185185
CV value: 0.083333

RoadType: 6 (一般道1、一般道2、一般道3)
RedA/all = 0.250000
CV value: 0.368750

RoadType: 4 (県道)
RedA/all = 0.346154
CV value: 0.601667

RoadType: 5 (主要地方道)
RedA/all = 0.311037
CV value: 0.327519

RoadType: 3 (国道)
RedA/all = 0.485893
CV value: 0.476494

RoadType: 0 (都市間高速)
RedA/all = 0.617284
CV value: 0.481944

RoadType: 2 (有料道路)
RedA/all = 0.446809
CV value: 0.433333



# Predict Red B

In [13]:
df2 <- read.csv("../data/middle/sp.csv", stringsAsFactors=FALSE)

In [14]:
df2$flag[df2$flag == "RedA"] <- "ELSE"
df2$flag[df2$flag == "BlueA"] <- "ELSE"
df2$flag[df2$flag == "BlueB"] <- "ELSE"
df2$flag <- as.factor(df2$flag)

In [15]:
poss <- c()
for (i in roadTypes) {
    printf("RoadType: %d (%s)\n", i, jRoadType[i+1])
    dfx <- df2[df2$RoadType == i, ]
    res <- CV(dfx, "RedB")
    printf("RedB/all = %f\n", res[1])
    printf("CV value: %f\n", res[2])
    printf("\n")
    
    if (res[1] > res[2]) {
        poss <- c(poss, jRoadType[i+1])
    }
}
poss

RoadType: 7 (その他)
RedB/all = 0.740741
CV value: 0.133333

RoadType: 6 (一般道1、一般道2、一般道3)
RedB/all = 0.225000
CV value: 0.278971

RoadType: 4 (県道)
RedB/all = 0.173077
CV value: 0.250000

RoadType: 5 (主要地方道)
RedB/all = 0.204013
CV value: 0.261194

RoadType: 3 (国道)
RedB/all = 0.134796
CV value: 0.193848

RoadType: 0 (都市間高速)
RedB/all = 0.061728
CV value: 0.059722

RoadType: 2 (有料道路)
RedB/all = 0.212766
CV value: 0.175000



# Predict Reds

In [16]:
df3 <- read.csv("../data/middle/sp.csv", stringsAsFactors=FALSE)

In [17]:
df3$flag[df3$flag == "RedA"] <- "Red"
df3$flag[df3$flag == "RedB"] <- "Red"
df3$flag[df3$flag == "BlueA"] <- "ELSE"
df3$flag[df3$flag == "BlueB"] <- "ELSE"
df3$flag <- as.factor(df3$flag)

In [18]:
poss <- c()
for (i in roadTypes) {
    printf("RoadType: %d (%s)\n", i, jRoadType[i+1])
    dfx <- df3[df3$RoadType == i, ]
    res <- CV(dfx, "Red")
    printf("Red/all = %f\n", res[1])
    printf("CV value: %f\n", res[2])    
    printf("\n")
    
    if (res[1] > res[2]) {
        poss <- c(poss, jRoadType[i+1])
    }
}
poss

RoadType: 7 (その他)
Red/all = 0.925926
CV value: 0.066667

RoadType: 6 (一般道1、一般道2、一般道3)
Red/all = 0.475000
CV value: 0.388848

RoadType: 4 (県道)
Red/all = 0.519231
CV value: 0.600000

RoadType: 5 (主要地方道)
Red/all = 0.515050
CV value: 0.442462

RoadType: 3 (国道)
Red/all = 0.620690
CV value: 0.417217

RoadType: 0 (都市間高速)
Red/all = 0.679012
CV value: 0.370040

RoadType: 2 (有料道路)
Red/all = 0.659574
CV value: 0.273333



# Data

In [23]:
sp <- read.csv("../data/middle/sp.csv", stringsAsFactors=FALSE)

In [24]:
redA <- sp[sp$flag == "RedA", ]
redB <- sp[sp$flag == "RedB", ]
blueA <- sp[sp$flag == "BlueA", ]
blueB <- sp[sp$flag == "BlueB", ]

In [25]:
printf("RedA/All: %f\n", nrow(redA) / nrow(sp))
printf("RedB/All: %f\n", nrow(redB) / nrow(sp))
printf("Red/All: %f\n", (nrow(redA) + nrow(redB)) / nrow(sp) )

RedA/All: 0.387817
RedB/All: 0.186802
Red/All: 0.574619
