# Arctic Project in Random Forest (Testing)

## Import libraries

In [None]:
library(MASS)
library(tidyverse)
library(randomForest)

## Load data

In [None]:
arctic <- read.csv("arctic_data.csv",stringsAsFactors = F)

## Data segmentation

In [None]:
folds <- cut(seq(1,nrow(arctic)), breaks = 10, labels = FALSE) 

## Prediction

In [None]:
prediction <- as.data.frame(
    # loop 1:K
    sapply(1:10, FUN = function(i){
        testID <- which(folds == i, arr.ind = TRUE)
        test <- arctic[testID, ]
        train <- arctic[-testID, ] # set K-fold

        # print(test) # if needed

        # random forest (mtry = 5 and ntree = 200)
        model <- randomForest(Extent~Rainfall+Daylight+Population+CO2+Ozone
                              +OceanTemperature_NorthernHemisphere
                              +LandTemperature_NorthernHemisphere
                              +MinTemperature_NorthSlopeAlaska
                              +GDP_WORLD,
                              data=train, mtry=5, ntree=200)

        # print(summary(model)) # if needed

        # prediction output
        predict(model,test)
    })
)

## Table gathering and merging

In [None]:
pred_gather <- gather(data=prediction, key="fold",value="prediction",1:10)
result <- as.data.frame(cbind(arctic[,c(1,5)],pred_gather))

## Calculate Mean Squre Error

In [None]:
result["R^2"] <- ((result$Extent-result$prediction)^2)
mse <- sum(result$`R^2`)/490
print(mse)

## Plot line chart (Prediction vs True)

In [None]:
# figure size
options(repr.plot.width=15, repr.plot.height=7.5)

# plot
plot(result$observation,result$Extent,type ='l',ylim = c(0,1.5),lwd = '2',
     xlab = "Date", ylab = "Value",xaxt='n',cex.lab = 1.5)

# attributes of prediction lines
lines(result$observation,result$prediction,lty=1,col='red',lwd = '2')

# x-axis labels
axis(1,at=c(1,61,121,181,241,301,361,421,481),
   labels=c("Jan 1980","Jan 1985","Jan 1990","Jan 1995","Jan 2000","Jan 2005","Jan 2010","Jan 2015","Jan 2020"))

# title
title(main = list("Random Forest", cex = 2, col = "black", font = 2))

# legend
legend("topright",inset=.05,c("Prediction","True"),
       bty = 'n',lty=c(1, 1),col=c("red", "black"),lwd =c(2, 2),cex = 1.5)

In [None]:
# figure size
options(repr.plot.width=15, repr.plot.height=10)

plot(result$Extent, result$prediction,xlim = c(0,1.0), ylim = c(0,1.2),
     xlab = "True value", ylab = "Predicted value",cex.lab = 1.5)
par(new = TRUE)
curve(x+0,0,1,bty="l",xlim = c(0,1.0), ylim = c(0,1.2),
      col=c("red"),lwd =c(2),lty=c(2),xlab = "True value", ylab = "Predicted value",cex.lab=1.5)
# title
title(main = list("Random Forest", cex = 2, col = "black", font = 2))