# Apply Random Forest

In [1]:
library(ggplot2)
library(caret)
library(e1071)
library(dplyr)
library(randomForest)

Loading required package: lattice

Attaching package: ‘dplyr’

The following objects are masked from ‘package:stats’:

    filter, lag

The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union

randomForest 4.6-12
Type rfNews() to see new features/changes/bug fixes.

Attaching package: ‘randomForest’

The following object is masked from ‘package:dplyr’:

    combine

The following object is masked from ‘package:ggplot2’:

    margin



In [2]:
set.seed(1)

In [3]:
# 利用する特徴量を列挙
valuables <- c("RoadType","CurveAverage","Speed", "Curve100", "Curve150", "MaxSpeed","RiskFactor","Curve","DistSignal","Pitch","AheadDistance","AverageVelocity","TimeToCollision","AccelerationSpeed", "Engine", "SteeringAngle", "TimeHeadway", "Jerk", "LaneCount", "DiffAvgSpeed", "EmptinessOfRoad", "RoadFactor")

In [4]:
printf <- function(...) cat(sprintf(...))

In [5]:
showMatrix <- function(test, predictedFlags) {
    # test のうち、Red, Blue と判断したものを取得
    predictedRed <- test[predictedFlags == "Red", ]
    predictedBlue <- test[predictedFlags == "Blue", ]
    
    # これらのうち、Red or Blue, Accel or Brake でグループ分けし、それらの個数を出力
    printf("Red\n")
    print(predictedRed %>% group_by(flag, AccelOrBrake) %>% summarize(count=n()))
    printf("Blue\n")
    print(predictedBlue %>% group_by(flag, AccelOrBrake) %>% summarize(count=n()))
    printf("\n")
}

# Predict Red

In [6]:
train <-  read.csv("../data/middle/sp5.csv", stringsAsFactors=FALSE)

In [7]:
test <- read.csv("../data/middle/sp6.csv", stringsAsFactors=FALSE)

In [8]:
test$AccelOrBrake[test$flag == "RedA"] <- "Accel"
test$AccelOrBrake[test$flag == "RedB"] <- "Brake"
test$AccelOrBrake[test$flag == "BlueA"] <- "Accel"
test$AccelOrBrake[test$flag == "BlueB"] <- "Brake"

In [9]:
train$flag[train$flag == "RedA"] <- "Red"
train$flag[train$flag == "RedB"] <- "Red"
train$flag[train$flag == "BlueA"] <- "Blue"
train$flag[train$flag == "BlueB"] <- "Blue"
train$flag <- as.factor(train$flag)

In [10]:
test$flag[test$flag == "RedA"] <- "Red"
test$flag[test$flag == "RedB"] <- "Red"
test$flag[test$flag == "BlueA"] <- "Blue"
test$flag[test$flag == "BlueB"] <- "Blue"
test$flag <- as.factor(test$flag)

In [11]:
# 使用する特徴量だけ抜き取る
train <- train[, c(valuables, "flag")]
test <- test[, c(valuables, "AccelOrBrake", "flag")]

In [12]:
set.seed(1)

In [13]:
# 全データを使って表を作成 
fit <- randomForest(flag ~ ., data=train, mtry=3, ntree=500)
p <- predict(fit, newdata=test)

showMatrix(test, p)

printf("\n")
nerr <- sum(p != test$flag)
printf("Err: %f", nerr / nrow(test))

Red
Source: local data frame [4 x 3]
Groups: flag [?]

    flag AccelOrBrake count
  <fctr>        <chr> <int>
1   Blue        Accel    30
2   Blue        Brake    52
3    Red        Accel    99
4    Red        Brake    50
Blue
Source: local data frame [4 x 3]
Groups: flag [?]

    flag AccelOrBrake count
  <fctr>        <chr> <int>
1   Blue        Accel    27
2   Blue        Brake    96
3    Red        Accel    82
4    Red        Brake    40


Err: 0.428571

In [14]:
# 各道路タイプごとに表を作成
for (i in c(0, 2, 3, 4, 5, 6, 7))  {
    printf("RoadType: %d\n", i)
    X <- train[train$RoadType == i, ]
    y <- test[test$RoadType == i, ]
    fit <- randomForest(flag ~ ., data=X, mtry=3, ntree=300)
    p <- predict(fit, newdata=y)

    showMatrix(test, p)
    
    printf("\n")
    nerr <- sum(p != y$flag)
    printf("Err: %f\n\n",  nerr / nrow(y))
}

RoadType: 0
Red
Source: local data frame [4 x 3]
Groups: flag [?]

    flag AccelOrBrake count
  <fctr>        <chr> <int>
1   Blue        Accel    51
2   Blue        Brake   128
3    Red        Accel   158
4    Red        Brake    80
Blue
Source: local data frame [4 x 3]
Groups: flag [?]

    flag AccelOrBrake count
  <fctr>        <chr> <int>
1   Blue        Accel     6
2   Blue        Brake    20
3    Red        Accel    23
4    Red        Brake    10


Err: 0.395833

RoadType: 2
Red
Source: local data frame [4 x 3]
Groups: flag [?]

    flag AccelOrBrake count
  <fctr>        <chr> <int>
1   Blue        Accel    47
2   Blue        Brake   125
3    Red        Accel   154
4    Red        Brake    74
Blue
Source: local data frame [4 x 3]
Groups: flag [?]

    flag AccelOrBrake count
  <fctr>        <chr> <int>
1   Blue        Accel    10
2   Blue        Brake    23
3    Red        Accel    27
4    Red        Brake    16


Err: 0.315789

RoadType: 3
Red
Source: local data frame [4 x 3]