# R Packages

In [None]:
install.packages("ISLR")
install.packages("ISLR2")
install.packages("caret")
install.packages("pROC")

In [None]:
library("ISLR")
library("ISLR2")
library("caret")
library("pROC")

# ISLR :: Smarket

In [None]:
df_smarket <- ISLR::Smarket
names(df_smarket)

In [None]:
smarket_glm <- glm(Direction ~ . -Today, data = df_smarket, family = binomial)
formula(smarket_glm)
summary(smarket_glm)

## Linear Regression

In [None]:
set.seed(123)
particao <- sample(seq_len(nrow(df_smarket)), size = 0.7 * nrow (df_smarket))
df_smarket_ml <- df_smarket[particao, ]
df_smarket_test <- df_smarket[-particao, ]
prop.table(table(df_smarket_ml$Direction));
prop.table(table(df_smarket_test$Direction))

In [None]:
pairs(df_smarket, col=df_smarket$Direction)

In [None]:
ggplot(df_smarket, aes(Lag4, fill = Direction)) +
  geom_histogram(bins = 30, alpha = .8, position = "identity") +
  labs(title = "Distribuição de Direction por Today", x = "Direction", y = "Contagem")

## Predicition

In [None]:
pred_smarket_glm <- predict(smarket_glm, newdata = df_smarket_test, type = "response")
summary(pred_smarket_glm)

In [None]:
answer_smarket_glm <- ifelse(pred_smarket_glm < 0.5, "Down", "Up")
prop.table(table(answer_smarket_glm==df_smarket_test$Direction))

# ISLR :: Default

In [None]:
df_default <- ISLR::Default
head(df_default)

In [None]:
default_glm <- glm(student ~ . , data = df_default, family = binomial)
formula(default_glm)
summary(default_glm)

## Linear Regression

In [None]:
set.seed(123)
particao <- sample(seq_len(nrow(df_default)), size = 0.7 * nrow (df_default))
df_default_ml <- df_default[particao, ]
df_default_test <- df_default[-particao, ]
prop.table(table(df_default_ml$student));
prop.table(table(df_default_test$student))

In [None]:
pairs(df_default, col=df_default$student)

In [None]:
ggplot(df_default, aes(income, fill = student)) +
  geom_histogram(bins = 30, alpha = .8, position = "identity") +
  labs(title = "Distribuição de student por income", x = "student", y = "Contagem")

# ISLR2 :: Smarket

In [None]:
df_smarket2 <- ISLR2::Smarket
head(df_smarket)

In [None]:
smarket2_glm <- glm(Direction ~ . -Today, data = df_smarket2, family = binomial)
formula(smarket2_glm)
summary(smarket2_glm)

## Linear Regression

In [None]:
set.seed(123)
particao <- sample(seq_len(nrow(df_smarket2)), size = 0.7 * nrow (df_smarket2))
df_smarket2_ml <- df_smarket2[particao, ]
df_smarket2_test <- df_smarket2[-particao, ]
prop.table(table(df_smarket2_ml$Direction));
prop.table(table(df_smarket2_test$Direction))

In [None]:
pairs(df_smarket2, col=df_smarket2$Direction)

In [None]:
ggplot(df_smarket2, aes(Lag1, fill = Direction)) +
  geom_histogram(bins = 30, alpha = .8, position = "identity") +
  labs(title = "Distribuição de Direction por Today", x = "Direction", y = "Contagem")

# ISLR2 :: Weekly

In [None]:
df_weekly <- ISLR2::Weekly
head(df_weekly)

In [None]:
weekly_glm <- glm(Direction ~ . -Today, data = df_weekly, family = binomial)
formula(weekly_glm)
summary(weekly_glm)

## Linear Regression

In [None]:
set.seed(123)
particao <- sample(seq_len(nrow(df_weekly)), size = 0.7 * nrow (df_weekly))
df_weekly_ml <- df_weekly[particao, ]
df_weekly_test <- df_weekly[-particao, ]
prop.table(table(df_weekly_ml$Direction));
prop.table(table(df_weekly_test$Direction))

In [None]:
pairs(df_weekly, col=df_weekly$Direction)

In [None]:
ggplot(df_weekly, aes(Lag1, fill = Direction)) +
  geom_histogram(bins = 30, alpha = .8, position = "identity") +
  labs(title = "Distribuição de Direction por Today", x = "Direction", y = "Contagem")

# ISLR2 :: Caravan

In [None]:
df_caravan <- ISLR2::Caravan
head(df_caravan)

In [None]:
caravan_glm <- glm(Purchase ~ . , data = df_caravan, family = binomial)
summary(caravan_glm)

## Linear Regression

In [None]:
set.seed(123)
particao <- sample(seq_len(nrow(df_caravan)), size = 0.7 * nrow (df_caravan))
df_caravan_ml <- df_caravan[particao, ]
df_caravan_test <- df_caravan[-particao, ]
prop.table(table(df_caravan_ml$Purchase));
prop.table(table(df_caravan_test$Purchase))

In [None]:
subset_cols <- c("Purchase", "MOSTYPE", "MAANTHUI", "MGEMOMV", "MGEMLEEF")
pairs(df_caravan[, subset_cols], col = df_caravan$Purchase)

In [None]:
ggplot(df_caravan, aes(MGEMLEEF, fill = Purchase)) +
  geom_histogram(bins = 30, alpha = .8, position = "identity") +
  labs(title = "Distribuição de Purchase por Today", x = "Purchase", y = "Contagem")