In [None]:
#Clean the environment
rm(list=ls())
library(rstudioapi)
#set the working directory
current_path<-getActiveDocumentContext()$path
setwd(dirname(current_path))
print(getwd())

##upload dataset
library(readxl)
serie <- read_excel("dataset_finale.xlsx")
serie$Date<-as.Date(serie$Date)
View(serie)

#create dataset for tm univariate
#dataset1 for CPI
dataset1 <- serie[,c(1,3)]
dataset2 <- serie[,c(1,4)]
dataset3 <- serie[,c(1,5)]

In [None]:
# time series analysis for CPI

library(tseries)
CPI.ts <- ts(dataset1$CPI_net_food_energy, frequency = 4) #transform in Time Series

library(ggplot2)
ggplot(dataset1, aes(Date, CPI_net_food_energy)) + geom_line() + scale_x_date('Quarter') + theme_bw()

#summary
summary(CPI.ts)
start(CPI.ts)
end(CPI.ts)
time(CPI.ts)
frequency(CPI.ts) #frequency 4 = trimester
ts.plot(CPI.ts, main= " Valori trimestrali dell'indice CPI USA (Q1'91 - Q4'21)") #plot the time series
abline(abline(reg = lm(CPI.ts~time(CPI.ts)), col= "red")) #trend

#decomposition of time series
decomposed_res<- decompose(CPI.ts, type = "mult")
plot(decomposed_res)
# There is no seasonality effect in the data

#Verify stationarity of the series
acf(CPI.ts)
adf.test(CPI.ts, alternative = "stationary") #THE SERIES IS NOT STATIONARY

CIP.diff <- diff(log(CPI.ts), differences = 1) #  APPLY DIFFERENTIATION
adf.test(CIP.diff, alternative = "stationary") # THE SERIES IS STATIONARY

dev.new()
par(mfrow=c(1,2))
acf(CIP.diff)
pacf(CIP.diff)

library(forecast)
#CHOOSE THE BEST ARMA MODEL
#MODEL: ARMA (0,0)
mod1 = Arima(CPI.ts, order=c(1,1,3),include.constant = TRUE)
summary(mod1)
mod1$coef
checkresiduals(mod1)
#best model for Arima(1,1,3)

ts.plot(CPI.ts, col="red")
lines(mod1$fitted, col="blue")
dev.new()

#Forecast

CPI_2022 <- forecast(mod1, h=4)
CPI_2022
plot(CPI_2022)

In [None]:
# time series analysis for Financial rates
dataset2<- dataset2[-c(123,124),]
FR.ts <- ts(dataset2$Financial_rate_consumer_loans, frequency = 4) #transform in time series

library(ggplot2)
ggplot(dataset2, aes(Date,Financial_rate_consumer_loans)) + geom_line() + scale_x_date('Quarter') + theme_bw()

#summary
summary(FR.ts)
start(FR.ts)
end(FR.ts)
time(FR.ts)
frequency(FR.ts) #frequency 4 = trimester
ts.plot(FR.ts, main= "Tasso Finanziario su Prestiti Personali presso Banche Commerciali USA, Prestito 24 Mesi (Q1'91 - Q4'21)") #plot the time series
abline(abline(reg = lm(FR.ts~time(FR.ts)), col= "red")) #trend

#decomposition of time series
decomposed_res<- decompose(FR.ts, type = "mult")
plot(decomposed_res)
# there is no seasonality effect in the data

# Verify stationarity of the series
acf(FR.ts)
adf.test(FR.ts, alternative = "stationary") #THE SERIES IS NOT STATIONARY

FR.diff <- diff(log(FR.ts), differences = 1) #APPLY DIFFERENTIATION
adf.test(FR.diff, alternative = "stationary") #THE SERIES IS STATIONARY

dev.new()
par(mfrow=c(1,2))
acf(FR.diff)
pacf(FR.diff)

library(forecast)
#CHOOSE THE BEST ARMA MODEL
#MODEL ARMA (0,1)
mod2 = Arima(FR.ts, order=c(1,1,0),include.constant = TRUE)
summary(mod2)
mod2$coef
checkresiduals(mod2)
#best model for Arima(1,1,0)

FR_2022 <- forecast(mod2, h=6)
FR_2022

In [None]:
# time series Analysis about consumer confidence

FC.ts <- ts(dataset3$Fiducia_Consumatori_Usa, frequency = 4) #transform in time series

library(ggplot2)
ggplot(dataset3, aes(Date, Fiducia_Consumatori_Usa)) + geom_line() + scale_x_date('Quarter') + theme_bw()

#summary
summary(FC.ts)
start(FC.ts)
end(FC.ts)
time(FC.ts)
frequency(FC.ts) #frequency 4 = trimester
ts.plot(FC.ts, main= "Valori trimestrali dell'indice di fiducia dei consumatori, USA (Q1'91 - Q4'2022)") #plot the time series
abline(abline(reg = lm(FC.ts~time(FC.ts)), col= "red")) #trend

#decomposition of time series
decomposed_res<- decompose(FC.ts, type = "mult")
plot(decomposed_res)
# there is no seasonality effect in the data

# Verify stationarity of the series
acf(FC.ts)
adf.test(FC.ts, alternative = "stationary") #THE SERIES IS NOT STATIONARY

FC.diff <- diff(log(FC.ts), differences = 1) # APPLY DIFFERENTIATION
adf.test(FC.diff, alternative = "stationary") #THE SERIES IS STATIONARY

dev.new()
par(mfrow=c(1,2))
acf(FC.diff)
pacf(FC.diff)

#CHOOSE THE BEST ARMA MODEL
#MODEL ARMA (1,0)
mod3 = Arima(FC.ts, order=c(1,1,0),include.constant = TRUE)
summary(mod3)
mod3$coef
checkresiduals(mod3)
#best model for Arima(1,1,0)

FC_2022 <- forecast(mod3, h=4)
FC_2022


In [None]:
# OLS regression for revenue

dataset_reg <- read_excel("dataset_x_ols.xlsx")
View(dataset_reg)
dataset_reg1 <- dataset_reg[-c(125:128),]
dataset_reg2 <- dataset_reg[c(125:128),]

library(car)
risultati <- lm(data = dataset_reg1, formula = Ricavi~CPI_net_food_energy+
                  Financial_rate_consumer_loans+Fiducia_Consumatori_Usa)
summary(risultati)
anova(risultati)

vif(risultati)
#VIF values>10 indicate the presence of problematic multicollinearity. All of our values are less than 10

#residue normality check
library(stats)
qqnorm(pnorm(scale(risultati$residuals))) 
grid()               
qqline(pnorm(scale(risultati$residuals)), 
       lwd = 2,      
       col = "red"   
)
round(mean(pnorm(scale(risultati$residuals)))) #average of residuals = 0

#check heteroschedasticity: ok
ncvTest(risultati)

library(olsrr)
ols_test_normality(risultati)
#Shapiro-Wilk = 0.9897
#Kolmogorov-Smirnov = 0.0456


#MULTIVARIATE LINEAR REGRESSION - TEST 

my.predict=predict.lm(risultati,newdata = dataset_reg2, se.fit=TRUE, interval="confidence", level = 0.95)
my.predict$fit

#             fit      lwr [95%]  upr[95%]
#         -------------------------------- 
# Q1-2022 | 108645.0 | 97852.96 | 119437.0
# Q2-2022 | 106323.0 | 96004.95 | 116641.0
# Q3-2022 | 108633.1 | 98025.47 | 119240.8
# Q4-2022 | 108757.3 | 98223.58 | 119291.0


# 0. Build linear model 
mydata <- cbind(dataset_reg2, my.predict)
mydata <- mydata[,-c(2,9:11)]
colnames(mydata)[5] <- "Ricavi_previsti"
View(mydata)

# 2. Regression line + confidence intervals
library("ggplot2")
p <- ggplot(mydata, aes(Date, Ricavi_previsti)) +
  geom_point() +
  stat_smooth(method = lm)

# 3. Add prediction intervals
p + geom_line(aes(y = fit.lwr), color = "red", linetype = "dashed")+
  geom_line(aes(y = fit.upr), color = "red", linetype = "dashed")

