# Preprocess Data

In [1]:
df <- read.csv("dividendinfo.csv")

In [2]:
head(df)

dividend,fcfps,earnings_growth,de,mcap,current_ratio
0,2.75,-19.25,1.11,545,0.9240557
1,4.96,0.83,1.09,630,1.4685722
1,2.78,1.09,0.19,562,1.9758751
0,0.43,12.97,1.7,388,1.9419977
1,2.94,2.44,1.83,684,2.487467
1,3.9,-6.29,0.46,621,1.7832525


# Normalization

In [3]:
normalize <- function(x) {
    return ((x-min(x))/(max(x)-min(x)))
}

In [4]:
minmaxdf <- as.data.frame(lapply(df,normalize))

In [5]:
head(minmaxdf)

dividend,fcfps,earnings_growth,de,mcap,current_ratio
0,0.54361055,0.0,0.26717557,0.6350575,0.3177037
1,0.99188641,0.3383319,0.26208651,0.7571839,0.5052078
1,0.54969574,0.3427127,0.03307888,0.6594828,0.6798973
0,0.07302231,0.5428812,0.4173028,0.4094828,0.6682316
1,0.5821501,0.3654591,0.45038168,0.8347701,0.8560637
1,0.77687627,0.2183656,0.10178117,0.7442529,0.6135678


# train & test split

In [6]:
train_df <- minmaxdf[1:160,]
test_df <- minmaxdf[161:200,]

# Building Model - Training set

In [9]:
library(neuralnet)

In [13]:
nn <- neuralnet(dividend ~ fcfps + earnings_growth + de + mcap + current_ratio,data=train_df,hidden=c(2,1),linear.output=FALSE, threshold=0.01)

In [17]:
nn$result.matrix

0,1
error,1.008319
reached.threshold,0.006904156
steps,1367.0
Intercept.to.1layhid1,-3.331013
fcfps.to.1layhid1,-5.28651
earnings_growth.to.1layhid1,-0.6962974
de.to.1layhid1,12.21447
mcap.to.1layhid1,4.03429
current_ratio.to.1layhid1,-4.629061
Intercept.to.1layhid2,3.699422


# Test set

In [23]:
temp_test_set <- subset(test_df, select = c("fcfps","earnings_growth", "de", "mcap", "current_ratio"))

In [24]:
nn_test <- compute(nn,temp_test_set)

In [28]:
results <- data.frame(actual = test_df$dividend, prediction = nn_test$net.result)

In [29]:
results

Unnamed: 0,actual,prediction
161,0,3.094185e-11
162,1,1.0
163,0,6.103907e-10
164,0,1.041836e-11
165,0,6.046956e-11
166,1,1.0
167,1,1.0
168,1,1.0
169,0,1.285911e-11
170,1,8.957441e-07


# Confusion Matrix

In [30]:
roundedresults<-sapply(results,round,digits=0)
roundedresultsdf=data.frame(roundedresults)
attach(roundedresultsdf)
table(actual,prediction)

      prediction
actual  0  1
     0 17  0
     1  5 18

Ultimately, we yield an 87.5% (35/40) accuracy rate in determining whether a stock pays a dividend or not.