**Import libraries**

In [None]:
library(dplyr)
library(summarytools)
library(neuralnet)
library(ggplot2)
library(naniar)
library(GGally)
library(caret)

**Deriving dataset path**

In [None]:
list.files(path = "../input")
df = read.csv('../input/pima-indians-diabetes-database/diabetes.csv')

In [None]:
head(df)

In [None]:
tail(df)

In [None]:
str(df)

In [None]:
dim(df)

In [None]:
table(is.na(df))

**Descriptives**

In [None]:
descr(df,style='rmarkdown')

**Missing values**

In [None]:
df%>%
    miss_var_summary()

**Exploratory data annalysis**

In [None]:


ggpairs(df,columns=c(2,4,7),aes(color='red'))

In [None]:
ggplot(df,aes(x=Glucose,y=Insulin))+
       geom_point(color='purple')+
       labs(title='scatterplot of Insulin vs Glucose')+
       theme_classic()

In [None]:
ggplot(df,aes(x=Age))+
       geom_boxplot(fill='purple')+
       labs(title='Insuling boxplot')+
       theme_classic()

In [None]:
df$Outcome<- as.factor(df$Outcome)

In [None]:
ggplot(df,aes(x=Age))+
       geom_histogram(color='blue',fill='purple',bins=30)+
       labs(title='Distribution of age by diabetic status')+
       facet_wrap(~Outcome)+
       theme_classic()

In [None]:
ggplot(df,aes(x=Age,fill=Outcome))+
       geom_histogram()+
       labs(title='Histogram of Age by diabetic status')+
       theme(plot.title=element_text(color='blue',size=12,face='bold',hjust=0.5))


**Classification:Neural networks**

**i.Scale data**

In [None]:
scale <- preProcess(df, method=c("range")) #min-max normalization
df_scale <- predict(scale,df)

**ii.Splitting data**

In [None]:
set.seed(2)
train_idx <- sample(nrow(df_scale), 2/3 * nrow(df_scale))
df_train <- df_scale[train_idx, ]
df_test <- df_scale[-train_idx, ]


In [None]:
summary(df_train$Outcome)

In [None]:
summary(df_test$Outcome)

**iii.Neural network**

In [None]:
set.seed(2)

In [None]:
model<-neuralnet(Outcome~., df_train, hidden = 5,threshold = 0.01,stepmax=1e6,
err.fct="sse",rep = 2, linear.output=TRUE)


In [None]:
plot(model, rep="best")                                   #visualization

In [None]:
model$result.matrix                 

**iv.Prediction on test data**

In [None]:
pred <- predict(model, df_test,all.units=FALSE)
table(df_test$Outcome, apply(pred, 1, which.max))

True negatives:143
True positives: 50


**END**