# Banking Stress Testing 
- title: "Banking_StressTesting"
- author: SP Tian
- date: April, 2019 
- output: html_document

## Import dataset
income - Income_Hist_Data
macro - Supervisory Severaly Adverse Domestic 
> How to get the foreign data? Exchange rates? 

In [None]:
library(readxl)
library(zoo)
library(fpp2)

In [None]:
income <- read_excel('../input/Income_Hist_Data.xlsx')
macroeco <- read.csv('../input/SupervisorySeverelyAdverseDomestic.csv'
                     , check.names = FALSE
                     , stringsAsFactors = FALSE
                     , na.strings = "")

In [None]:
colnames(income)[colnames(income) == '..1'] <- "date"
date <- strsplit(income$date, "Q") 
income$year <- as.numeric(sapply(date, function(x) x[1]))
income$quarter <- sapply(date, function(x) x[2])
income$date <- paste(income$year, sep = " Q", income$quarter)
income$date <- as.yearqtr(income$date)

income[, c(10,11)] <- NULL

old.names <- colnames(macroeco)
new.names <- tolower(old.names)
new.names <- gsub(" ", ".", new.names)
new.names <- gsub("[()]", "", new.names) # Or "\\(|\\)"
colnames(macroeco) <- new.names

colnames(macroeco)[colnames(macroeco) == '3-month.treasury.rate'] <- "three"
colnames(macroeco)[colnames(macroeco) == '5-year.treasury.yield'] <- "five"
colnames(macroeco)[colnames(macroeco) == '10-year.treasury.yield'] <- "ten"

date <- strsplit(macroeco$date, " ")
macroeco$year <- as.numeric(sapply(date, function(x) x[2]))
macroeco$quarter <- sapply(date, function(x) x[1])
macroeco$date <- paste(macroeco$year, macroeco$quarter)
macroeco$date <- as.yearqtr(macroeco$date)
macroeco$year <- NULL 
macroeco$quarter <- NULL 
colnames(macroeco)

rm(old.names, new.names, date)

## Data Exploration 

### Summary table of macroeconomic variables 

In [None]:
summary(macroeco)

> REPORT: Explanation of units for each column 

### Report NAs

In [None]:
sapply(macroeco, function(x) sum(is.na(x)))

### Time-series Drawing of macroeconomic variables

In [None]:
variable_ts <- ts(macroeco, start = c(1976,1))
colnames(variable_ts)

In [None]:
# Alternatively, using a loop? 
autoplot(variable_ts[, "real.gdp.growth"]) + scale_x_continuous(limits = c(2010, 2019)) + 
ggtitle("Real GDP Growth") + 
annotate(
    "rect", 
    xmin = 2016, xmax = 2019, ymin = -Inf, ymax = -Inf, 
    fill = "lightblue", alpha = 0,3
)

In [None]:
autoplot(variable_ts[, "nominal.gdp.growth"]) + scale_x_continuous(limits = c(2010, 2019)) + 
ggtitle("Nominal GDP Growth") + 
annotate(
    "rect", 
    xmin = 2016, xmax = 2019, ymin = -Inf, ymax = -Inf, 
    fill = "lightblue", alpha = 0,3
)

In [None]:
autoplot(variable_ts[, "real.disposable.income.growth"]) + scale_x_continuous(limits = c(2010, 2019)) + 
ggtitle("Real Disposable Income Growth") + 
annotate(
    "rect", 
    xmin = 2016, xmax = 2019, ymin = -Inf, ymax = -Inf, 
    fill = "lightblue", alpha = 0,3
)

In [None]:
autoplot(variable_ts[, "unemployment.rate"]) + scale_x_continuous(limits = c(2010, 2019)) + 
ggtitle("Unemployment Rate") + 
annotate(
    "rect", 
    xmin = 2016, xmax = 2019, ymin = -Inf, ymax = -Inf, 
    fill = "lightblue", alpha = 0,3
)

In [None]:
autoplot(variable_ts[, "cpi.inflation.rate"]) + scale_x_continuous(limits = c(2010, 2019)) + 
ggtitle("CPI Inflation Rate") + 
annotate(
    "rect", 
    xmin = 2016, xmax = 2019, ymin = -Inf, ymax = -Inf, 
    fill = "lightblue", alpha = 0,3
)

In [None]:
autoplot(variable_ts[, "three"]) + scale_x_continuous(limits = c(2010, 2019)) + 
ggtitle("Three Month Treasury Yield") + 
annotate(
    "rect", 
    xmin = 2016, xmax = 2019, ymin = -Inf, ymax = -Inf, 
    fill = "lightblue", alpha = 0,3
)

In [None]:
autoplot(variable_ts[, "five"]) + scale_x_continuous(limits = c(2010, 2019)) + 
ggtitle("5 Year Treasury Yield") + 
annotate(
    "rect", 
    xmin = 2016, xmax = 2019, ymin = -Inf, ymax = -Inf, 
    fill = "lightblue", alpha = 0,3
)

In [None]:
autoplot(variable_ts[, "ten"]) + scale_x_continuous(limits = c(2010, 2019)) + 
ggtitle("10 Year Treasury Yield") + 
annotate(
    "rect", 
    xmin = 2016, xmax = 2019, ymin = -Inf, ymax = -Inf, 
    fill = "lightblue", alpha = 0,3
)

In [None]:
autoplot(variable_ts[, "bbb.corporate.yield"]) + scale_x_continuous(limits = c(2010, 2019)) + 
ggtitle("BBB Corporate Yield") + 
annotate(
    "rect", 
    xmin = 2016, xmax = 2019, ymin = -Inf, ymax = -Inf, 
    fill = "lightblue", alpha = 0,3
)

In [None]:
autoplot(variable_ts[, "mortgage.rate"]) + scale_x_continuous(limits = c(2010, 2019)) + 
ggtitle("Mortgage Rate") + 
annotate(
    "rect", 
    xmin = 2016, xmax = 2019, ymin = -Inf, ymax = -Inf, 
    fill = "lightblue", alpha = 0,3
)

In [None]:
autoplot(variable_ts[, "prime.rate"]) + scale_x_continuous(limits = c(2010, 2019)) + 
ggtitle("Prime Rate") + 
annotate(
    "rect", 
    xmin = 2016, xmax = 2019, ymin = -Inf, ymax = -Inf, 
    fill = "lightblue", alpha = 0,3
)

In [None]:
autoplot(variable_ts[, "dow.jones.total.stock.market.index.level"])

In [None]:
autoplot(variable_ts[, "house.price.index.level"])

In [None]:
autoplot(variable_ts[, "commercial.real.estate.price.index.level"])

In [None]:
autoplot(variable_ts[, "market.volatility.index.level"])

### Correlation Table

In [None]:
cor(macroeco[, -1], use = "na.or.complete")

## Data Manipulation 
> problem of using mean - should we find another way? 

### Lagged difference of all price index levels 

In [None]:
macroeco$diff.stock <- c(NA, diff(macroeco$dow.jones.total.stock.market.index.level, lag = 1))

In [None]:
mean(diff(macroeco$house.price.index.level, lag = 1))

In [None]:
macroeco$diff.house <- c(0.6773, diff(macroeco$house.price.index.level, lag = 1))

In [None]:
mean(diff(macroeco$commercial.real.estate.price.index.level, lag = 1))

In [None]:
macroeco$diff.real.estate <- c(0.8576, diff(macroeco$commercial.real.estate.price.index.level, lag = 1))

In [None]:
macroeco$diff.market <- c(NA, diff(macroeco$market.volatility.index.level, lag = 1))

### Draw New Time-series on Lagged data

In [None]:
autoplot(ts(macroeco$diff.stock))

In [None]:
autoplot(ts(macroeco$diff.house))

In [None]:
autoplot(ts(macroeco$diff.real.estate))

In [None]:
autoplot(ts(macroeco$diff.market))

> Illustration of all data and units chosen in the macroeconomic variables dataset 

In [None]:
macroeco[, 14:17] <- NULL
colnames(macroeco)

### Aggregated difference of income file
### split up file for seperate banks

In [None]:
citi <- income[1:68,]
jpmg <- income[-(1:68), ]

In [None]:
citi$int.income <- c(NA, diff(citi$total.interes.income.reported, lag = 1))
citi$int.exp <- c(NA, diff(citi$total.interest.expense.reported, lag = 1))
citi$nonint.income <- c(NA, diff(citi$total.noninterest.income.reported, lag = 1))
citi$nonint.exp <- c(NA, diff(citi$total.noninterest.expense.reported, lag = 1))
citi$loss <- c(NA, diff(citi$provision.losses, lag = 1))      # what's missing here? 

In [None]:
jpmg$int.income <- c(NA, diff(jpmg$total.interes.income.reported, lag = 1))
jpmg$int.exp <- c(NA, diff(jpmg$total.interest.expense.reported, lag = 1))
jpmg$nonint.income <- c(NA, diff(jpmg$total.noninterest.income.reported, lag = 1))
jpmg$nonint.exp <- c(NA, diff(jpmg$total.noninterest.expense.reported, lag = 1))
jpmg$loss <- c(NA, diff(jpmg$provision.losses, lag = 1))

In [None]:
citi <- citi[, -c(2:9)]
jpmg <- jpmg[, -c(2:9)]

### Merge database by "date"

In [None]:
citimacroeco <- merge(macroeco, citi, by = "date", 
                  all.x = FALSE, all.y = TRUE)
jpmgmacroeco <- merge(macroeco, jpmg, by = "date", 
                     all.x = FALSE, all.y = TRUE)

## Regression Model

### Here: Regression Model!!!
> * int income ~ diff.real.estate, diff.stock?, five yr 
> * int exp ~ diff.stock, diff.house, five yr
> * non int income ~ cpi.inflation.rate + diff.stock + real.gdp
> * non int exp ~ cpi.inflation.rate + diff.real.estate 
> * loss ~ unemployment.rate + diff.real.estate 

In [None]:
tsciti <- ts(na.omit(citimacroeco))
intinc_tsmodel1 <- tslm(int.income ~ diff.real.estate + diff.stock + five, data = tsciti)
summary(intinc_tsmodel1)

In [None]:
checkresiduals(intinc_tsmodel1)

In [None]:
fc1 <- forecast(intinc_tsmodel1, newdata = citimacroeco, h = 1)
autoplot(fc1) + ggtitle("Forecasts for Citigroup Interest Income")

In [None]:
fc1

In [None]:
intexp_tsmodel1 <- tslm(int.exp ~ diff.stock + diff.house + five, data = tsciti)
summary(intexp_tsmodel1)

In [None]:
checkresiduals(intexp_tsmodel1)

In [None]:
fc2 <- forecast(intinc_tsmodel1, newdata = citimacroeco, h = 8)
autoplot(fc2) + ggtitle("Forecasts for Citigroup Interest Expense")

In [None]:
fc2

In [None]:
citi_noninc <- tslm(nonint.income ~ cpi.inflation.rate + real.gdp.growth, data = tsciti)
summary(citi_noninc)

In [None]:
checkresiduals(citi_noninc)

In [None]:
fc3 <- forecast(citi_noninc, newdata = citimacroeco, h = 8)
autoplot(fc3) + ggtitle("Forecasts for Citigroup Non-interest Income")

In [None]:
fc3

In [None]:
citi_nonexp <- tslm(nonint.exp ~ cpi.inflation.rate + diff.real.estate, data = tsciti)
summary(citi_nonexp)

In [None]:
checkresiduals(citi_nonexp)

In [None]:
fc4 <- forecast(citi_noninc, newdata = citimacroeco, h = 8)
autoplot(fc4) + ggtitle("Forecasts for Citigroup Non-interest Expense")

In [None]:
fc4

In [None]:
citi_loss <- tslm(loss ~ unemployment.rate + diff.real.estate, data = tsciti)
summary(citi_loss)

In [None]:
checkresiduals(citi_loss)

In [None]:
fc5 <- forecast(citi_loss, newdata = citimacroeco, h = 8)
autoplot(fc5) + ggtitle("Forecasts for Citigroup Loss")

In [None]:
fc5

In [None]:
tsjpmg <- ts(na.omit(jpmgmacroeco))
intinc_tsmodel1 <- tslm(int.income ~ diff.real.estate + cpi.inflation.rate, data = tsjpmg)
summary(intinc_tsmodel1)

In [None]:
checkresiduals(intinc_tsmodel1)

In [None]:
fc6 <- forecast(intinc_tsmodel1, newdata = jpmgmacroeco, h = 8)
autoplot(fc6) + ggtitle("Forecasts for JPMG Interest Income")

In [None]:
fc6

In [None]:
intexp_tsmodel1 <- tslm(int.exp ~ diff.stock + diff.house + five, data = tsjpmg)
summary(intexp_tsmodel1)

> Not that good 

In [None]:
checkresiduals(intexp_tsmodel1)

In [None]:
fc7 <- forecast(intexp_tsmodel1, newdata = jpmgmacroeco, h = 8)
autoplot(fc7) + ggtitle("Forecasts for JPMG Interest Expense")

In [None]:
fc7

In [None]:
jpmg_noninc <- tslm(nonint.income ~ cpi.inflation.rate + real.gdp.growth, data = tsjpmg)
summary(jpmg_noninc)

In [None]:
checkresiduals(jpmg_noninc)

In [None]:
fc8 <- forecast(jpmg_noninc, newdata = jpmgmacroeco, h = 8)
autoplot(fc8) + ggtitle("Forecasts for JPMG Non-interest Income")

In [None]:
fc8

In [None]:
jpmg_nonexp <- tslm(nonint.exp ~ cpi.inflation.rate + diff.real.estate, data = tsjpmg)
summary(jpmg_nonexp)

> Not that good

In [None]:
checkresiduals(jpmg_nonexp)

In [None]:
fc9 <- forecast(jpmg_nonexp, newdata = jpmgmacroeco, h = 8)
autoplot(fc9) + ggtitle("Forecasts for JPMG Non-interest Expense")

In [None]:
fc9

In [None]:
jpmg_loss <- tslm(loss ~ unemployment.rate + diff.real.estate, data = tsjpmg)
summary(jpmg_loss)

In [None]:
checkresiduals(jpmg_loss)

In [None]:
fc10 <- forecast(jpmg_loss, newdata = jpmgmacroeco, h = 8)
autoplot(fc10) + ggtitle("Forecasts for JPMG Loss")

In [None]:
fc10