<a href="https://colab.research.google.com/github/stephenfrein/csc8491/blob/main/Neural_Network.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
install.packages("caret")
install.packages("lime")

In [None]:
# read in data and examine structure
concrete_raw = read.csv("https://csc8491.s3.amazonaws.com/concrete.csv")
str(concrete_raw)
head(concrete_raw)
summary(concrete_raw)

In [None]:
# apply normalization to entire data frame
# NN will work best when data normalized in range
# custom normalization function
normalize <- function(x) {
  return((x - min(x)) / (max(x) - min(x)))
}

# lappply is "list apply" - applies function to all of the columns - remember a df is a list of vectors
concrete_norm <- as.data.frame(lapply(concrete, normalize))
# confirm that the range is now between zero and one
summary(concrete$strength)
summary(concrete_norm$strength)

In [None]:
# create training and test data
train_pct <- 0.75
set.seed(123)
train <- sample(1:nrow(concrete_norm),train_pct * nrow(concrete_norm))
concrete_train <- concrete_norm[train, ]
concrete_test <- concrete_norm[-train, ]

In [None]:
install.packages("neuralnet")
library(neuralnet)
# simple ANN with only a single hidden neuron
net <- neuralnet(strength~.,concrete_train,hidden=1)
plot(net, rep="best") # the rep argument is needed here for plot to render in Colab



In [None]:
# obtain model results
net_results <- compute(net, concrete_test[,1:8])
# obtain predicted strength values
predicted_strength <- net_results$net.result
# examine the correlation between predicted and actual values
cor(predicted_strength, concrete_test$strength)

In [None]:
# a more complex neural network topology with 5 hidden neurons
net2 <- neuralnet(strength~.,concrete_train,hidden=5)
plot(net2, rep="best")




In [None]:
# obtain model results
net_results2 <- compute(net2, concrete_test[,1:8])
# obtain predicted strength values
predicted_strength2 <- net_results2$net.result
# examine the correlation between predicted and actual values
cor(predicted_strength2, concrete_test$strength)

In [None]:
print("Linear model: ")
lm.fit = lm(strength~.,concrete_train)
summary(lm.fit)
lm_preds = predict(lm.fit,concrete_test[,1:8])
cor(lm_preds, concrete_test$strength)

print("Tree model: ")
install.packages("tree")
library(tree)
tree.fit = tree(strength~.,concrete_train)
summary(tree.fit)
plot(tree.fit)
text(tree.fit,pretty=0)
tree_preds = predict(lm.fit,concrete_test[,1:8])
cor(tree_preds, concrete_test$strength)

print("Random forest: ")
install.packages("randomForest")
library(randomForest)
rf=randomForest(strength~.,data=concrete_train,importance=TRUE)
rf
importance(rf)
rf_preds = predict(rf,concrete_test[,1:8])
cor(rf_preds, concrete_test$strength)


In [None]:
# tuning with grid search
library(caret)
#decay fights overfitting
my.grid <- expand.grid(.decay = c(0.01, 0.001, 0.0001), .size = c(8, 10, 12))
my.grid
caret.net.fit <- train(strength~., data = concrete_train,
                       method = "nnet", maxit = 1000, tuneGrid = my.grid,
                       trace = FALSE)
caret.net.fit
caret.net.predict <- predict(caret.net.fit, newdata = concrete_test[,1:8])
cor(caret.net.predict, concrete_test$strength)

In [None]:
install.packages('lime')
library(lime)
explainer <- lime(concrete_train, caret.net.fit)
# show what’s happening with first 10 examples in training set
# limit to 5 most significant features for each example
explanation <- explain(concrete_test[1:10,], explainer, n_features = 5)
plot_features(explanation)
