-
Notifications
You must be signed in to change notification settings - Fork 0
/
R Case Study on the Variability of Slave Transactions
57 lines (44 loc) · 1.94 KB
/
R Case Study on the Variability of Slave Transactions
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
#1) Open the dataset
path = "C:/Users/mikad/Desktop/RStudio/slavery.csv"
dat = read.csv(path)
#2) Dataset Manipulation
colnames(dat)
dat = data.frame(dat)
names(dat)
#Cleaning the dataset and removing textual data
dat$Transaction.Date
Dates_extracted = as.integer(format(as.Date(dat$Transaction.Date, format="%m/%d/%Y"),"%Y"))
Dates_extracted
dat = dat[ , names(dat) %in% c("Slave.Age", "Transaction.Number.of.Adult.Slaves", "Transaction.Number.of.Child.Slaves", "Transaction.Number.of.Total.Slaves.Purchased", "Transaction.Sale.Details.Discount.Rate", "Transaction.Sale.Details.Predicted.Interest.Rate", "Transaction.Sale.Details.Price")]
dat = data.frame(dat)
dat$Dates = Dates_extracted
names(dat)
dat = na.omit(dat)
#3) Pre-PCA dataset presentation
#pre-normalization summary
summary(dat[, 1:4])
summary(dat[, 5:8])
#normalize for coherence in order to run PCA
scaled.dat = scale(dat)
apply(scaled.dat, 2, sd)
#post-normalization summary
summary(scaled.dat[, 1:4])
summary(scaled.dat[, 5:8])
#symmetric matrix with scatterpolots of all pairs in the lower triangle,
#the density estimate along the diagonal line, and
#pair-wise correlation in the upper triangle
library(GGally)
ggpairs(data.frame(scaled.dat))
#4) Perform PCA
#summary to principle component findings
pr.out = prcomp(scaled.dat)
summary(pr.out)
pr.out$rotation
#plot of proportion of variance explained for each principle component
pr.var = (pr.out$sdev)^2
pve = pr.var/sum(pr.var)
plot(pve, xlab = "Principal Component", ylab = "Proportion of Variance Explained", ylim=c(0,1), type="b")
#plot of cummulative propertion explained for the principle components listed in decreasing order
plot(cumsum(pve), xlab = "Principal Component", ylab = "Cumulative proportion of Variance Explained", ylim=c(0,1), type="b")
#biplot with 1st two principle components as axis and arrows representing scores on each principle component
biplot(pr.out, scale = 0)