# Model Selection : Neural network (nnet)

In [24]:
library(nnet)
library(caret)

Loading required package: lattice
Loading required package: ggplot2
Registered S3 methods overwritten by 'ggplot2':
  method         from 
  [.quosures     rlang
  c.quosures     rlang
  print.quosures rlang


In [5]:
# Confusion matrix function
confusion_matrix <- function(valid, predi, posLabel="yes"){
 cm <- table(valid, predi)
 print("Confusion matrix")
 print(cm)
 error_rate <- 1-sum(diag(cm))/sum(cm)
 print(paste("Error rate =", error_rate))
}

In [9]:
# Data
training_set <- read.csv("../Data/PreProcess/processed_training_data_split.csv")
validation_set <- read.csv("../Data/PreProcess/processed_verification_data_split.csv")

In [21]:
training_set

id,population,permit,construction_year,quality_group,quantity,Internal,Lake.Nyasa,Lake.Rukwa,Lake.Tanganyika,...,X18,X19,X20,X21,X24,X40,X60,X80,X90,X99
functional,-0.07262067,0.6889358,-1.2105682,0.3440186,0.81768317,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
functional,-0.08339009,0.6889358,0.0000000,0.3440186,-0.08578228,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
functional,-0.61825111,0.6889358,-1.2105682,-3.1172670,0.81768317,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
non functional,-0.07624876,0.6889358,0.0000000,0.3440186,0.81768317,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
non functional,-0.07624876,0.6889358,0.0000000,-0.8097433,0.81768317,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
non functional,0.48793255,0.6889358,0.0000000,-3.1172670,-0.08578228,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
functional,-0.08339009,0.6889358,0.0000000,0.3440186,0.81768317,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
functional,0.13555158,-1.5302158,-1.4092612,-3.1172670,0.81768317,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
functional,-0.07262067,-1.5302158,0.9750546,0.3440186,-0.98924773,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
functional,-0.61825111,0.6889358,-1.2105682,0.3440186,0.81768317,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [11]:
# Columns to drop in training and validation dataset
columns_to_drop<-c("X.1","X")
training_set<-training_set[,!(names(training_set) %in% columns_to_drop)] # Drop the desired columns
validation_set<-validation_set[,!(names(validation_set) %in% columns_to_drop)]# Drop the desired columns

In [12]:
training_set$id = factor(training_set$id)

### Neural network with hidden layer (11 neurons)

In [20]:
# Neural network with 11 neurons in hidden layer
nn11 <- nnet(id ~ ., data = training_set, skip = FALSE, size = 11, maxit = 1000)

# weights:  927
initial  value 66204.080071 
iter  10 value 34481.280137
iter  20 value 30815.018919
iter  30 value 29586.994969
iter  40 value 28818.841057
iter  50 value 28350.780435
iter  60 value 28001.448916
iter  70 value 27607.157022
iter  80 value 27301.699064
iter  90 value 27125.203922
iter 100 value 27006.832700
iter 110 value 26927.081800
iter 120 value 26867.073680
iter 130 value 26780.935413
iter 140 value 26709.988253
iter 150 value 26658.067320
iter 160 value 26600.309217
iter 170 value 26531.330151
iter 180 value 26452.916375
iter 190 value 26378.383750
iter 200 value 26306.345765
iter 210 value 26225.391926
iter 220 value 26148.685362
iter 230 value 26072.648742
iter 240 value 25999.354929
iter 250 value 25949.788533
iter 260 value 25918.761423
iter 270 value 25890.645404
iter 280 value 25871.880892
iter 290 value 25860.823450
iter 300 value 25851.256137
iter 310 value 25843.264279
iter 320 value 25840.360782
iter 330 value 25837.150833
iter 340 value 25834.910806
ite

In [30]:
# Confusion matrix
pred_nn11 <- predict(nn11, newdata = validation_set, type = "class")

confusionMatrix(table(pred_nn11, validation_set$id))

Confusion Matrix and Statistics

                         
pred_nn11                 functional functional needs repair non functional
  functional                    5774                     583           1423
  functional needs repair         98                     113             30
  non functional                 651                     221           3217

Overall Statistics
                                         
               Accuracy : 0.7518         
                 95% CI : (0.744, 0.7594)
    No Information Rate : 0.5386         
    P-Value [Acc > NIR] : < 2.2e-16      
                                         
                  Kappa : 0.5247         
                                         
 Mcnemar's Test P-Value : < 2.2e-16      

Statistics by Class:

                     Class: functional Class: functional needs repair
Sensitivity                     0.8852                       0.123228
Specificity                     0.6410                       0.988564
Pos Pr

### NN runs:
- No hidden layer: 0.31 error rate
- 2 neurons hidden layer: 0.49 error rate
- 11 neurons hidden layer: 0.25 error rate

In [19]:
save(nn11, file="4-Models/nn_model_classifier.RData")