In [None]:
library(ggplot2) # powerful visualization library
library(dplyr) # powerful data wrangling library

In [None]:
## Sales data (user-given)
t = seq(1, 20)
s = c(2.8, 2.1, 4, 4.5, 3.8, 3.2, 4.8, 5.4, 4, 3.6, 5.5, 5.8, 4.3, 3.9, 6, 6.4, NA, NA, NA, NA)
salesData <- data.frame(t, s)
colnames(salesData) = c('time', 'sales')
salesData

In [None]:
## Plot the sales data
p1 = ggplot(salesData, aes(x = time, y = sales)) +
  geom_line(col = 'light blue' ) +
  geom_point() +
  xlab('Time') +
  ylab('Sales') +
  theme_minimal()
p1 

In [None]:
## Fit a linear regression model to the sales data
p2 = ggplot(salesData, aes(x = time, y = sales), col = 'light blue' ) +
  geom_point() +
  stat_smooth(method= 'lm' ,col= 'red', se = FALSE)
p2 

In [None]:
## Calculate a moving average of the data
k = 4 # size of the subset for which average is calculated

s = salesData$sales # sales values
mavg = rep(NA, length(s)) # empty array to store moving averages
index = k-1

for (i in c(1:length(s))){
  if (i <= length(s)-k+ 1 ){
    #mavg[3] = mean(s[1:4])
    mavg[index] = mean(s[seq(i, i+(k-1))])
    index = index + 1
  }
}

mavg

In [None]:
print(mean(s[seq(15, 15+(k-1))]))

In [None]:
## Superimpose plot of moving averages over original time series
mSalesData = data.frame(salesData, mavg = mavg)
p3 = ggplot(mSalesData, aes(x = time, y = sales))+
  geom_line(col = 'light blue')+
  geom_point()+
  geom_line(aes(x = time, y = mavg), col = 'orange')+
  geom_point(aes(x = time, y = mavg), col = 'orange')+
  xlab('Time')+
  ylab('Sales')+
  theme_minimal()
p3  

In [None]:
## Calculate seasonality-plus-irregular component of the data
k = 4 # size of the subset for which average is calculated

s = salesData$sales # sales values
si = rep(NA, length(s)) # empty array to store seasonality-plus-irregular component 
index = k-1

for (i in c(1:length(s))){
  if (i <= length(s)-k+ 1 ){
    si[index] = s[index] / mean(s[seq(i, i+(k-1))]) # Seasonality + Irregular Component
    index = index + 1
  }
}

si

In [None]:
## Superimpose plot of sesonality over original time series
smSalesData = data.frame(mSalesData, si = si)
p4 = ggplot(smSalesData, aes(x = time, y = sales))+
  geom_line(col = 'light blue')+
  geom_point()+
  geom_line(aes(x = time, y = si), col = 'orange')+
  geom_point(aes(x = time, y = si), col = 'orange')+
  xlab('Time')+
  ylab('Sales')+
  theme_minimal()
p4 

In [None]:
eps = rnorm(100, mean = 0, sd = 1) # random noise
mu = 2 # the constant mean of the time series
# The process, that is the time series
#Y_t = mu + eps
#Y_t = 1.5*(1:length(eps)) + eps
#Y_t = 5.5*sin((2*pi/10)*(1:length(eps))) + eps
Y_t = 1.5*(1:length(eps)) + 5.5*sin((2*pi/10)*(1:length(eps))) + eps
#Y_t = 1.5*(1:length(eps)) * 5.5*sin((2*pi/10)*(1:length(eps))) * eps

# plotting the time series
ts.plot(Y_t, main = "Example of (random) stationary time series", ylab = expression(X[t]))

In [None]:
acf(Y_t, lag.max = 20)

In [None]:
# Load the car sales dataset
file = '/content/carsales.csv'
csData = read.csv(file, header = TRUE, stringsAsFactors = FALSE)
colnames(csData) = c('Date', 'Sales')
head(csData)

In [None]:
# Sales for March 1994
csData[which(csData$Date == '1994-03-01'), 'Sales']

In [None]:
# Convert dataframe into a time series object
csData.ts = ts(data = csData$Sales, frequency = 12, start = c(1992, 1), end = c(2021, 10)) 

In [None]:
class(csData.ts)

In [None]:
print(csData.ts)

In [None]:
start(csData.ts)

In [None]:
end(csData.ts)

In [None]:
time(csData.ts)

In [None]:
frequency(csData.ts)

In [None]:
deltat(csData.ts)

In [None]:
csData.ts[1:5]

In [None]:
tmp = window(csData.ts, start=c(1994, 3), end=c(1994, 4))
class(tmp) 

In [None]:
tmp

In [None]:
ts.plot(window(csData.ts, start = c(1992, 1), end = c(2021, 10)),
ylab = "Sales",  col = "blue", lwd = 2, main = "Car Sales Data") 

In [None]:
length(csData$Sales)

In [None]:
## Calculate a moving average of the data
k = 12 # size-1 of the subset for which average is calculated

csData['SalesT'] = NA

for (i in seq(7, length(csData$Sales)-6)){
  # Approach-1
  #csData[i, 'SalesT'] = mean(csData[i-6:i+6, 'Sales'])
  # Approach-2
  csData[i, 'SalesT'] = (1/24)*csData[i-6, 'Sales'] +
                        (1/12)*sum(csData[(i-5):(i+5), 'Sales']) +
                        (1/24)*csData[i+6, 'Sales']  
}  

In [None]:
head(csData, 20)

In [None]:
plot.ts(csData$SalesT)

In [None]:
# Add another column to dataframe containing
# the seasonality and the noise component
csData['SalesSN'] = csData$Sales / csData$SalesT

In [None]:
plot(csData$SalesSN, type = 'l')

In [None]:
# Convert dataframe into a time series object
tmp1.ts = ts(data = csData$Sales, frequency = 12, start = c(1992, 1), end = c(2021, 10))
tmp2.ts = ts(data = csData$SalesT, frequency = 12, start = c(1992, 1), end = c(2021, 10))
tmp3.ts = ts(data = csData$SalesSN, frequency = 12, start = c(1992, 1), end = c(2021, 10))
csData.ts = cbind(tmp1.ts, tmp2.ts, tmp3.ts)

In [None]:
# Average for January
ind = seq(1, length(csData.ts[, 1]), by = 12)
print(ind)
print(length(csData.ts[, 1]))
mean(csData.ts[ind, 3], na.rm = TRUE)

In [None]:
csData['SalesS'] = NA
m = 1
ind = seq(m, length(csData.ts[, 1]), by = 12)
monthly_average[m] = mean(csData.ts[ind, 3], na.rm = TRUE)
csData[ind, 'SalesS']

In [None]:
## Calculate the average seasonal+plus noise component of every month
csData['SalesS'] = NA
monthly_average = rep(0, 12)

for (m in c(1:12)){
  ind = seq(m, length(csData.ts[, 1]), by = 12)
  monthly_average[m] = mean(csData.ts[ind, 3], na.rm = TRUE)
  csData[ind, 'SalesS'] = monthly_average[m]
}

In [None]:
# Add a column to store the noise
csData['Noise'] = csData$SalesSN-csData$SalesS

In [None]:
head(csData, 20)

In [None]:
# Convert dataframe into a time series object
tmp1.ts = ts(data = csData$Sales, frequency = 12, start = c(1992, 1), end = c(2021, 10))
tmp2.ts = ts(data = csData$SalesT, frequency = 12, start = c(1992, 1), end = c(2021, 10))
tmp3.ts = ts(data = csData$SalesS, frequency = 12, start = c(1992, 1), end = c(2021, 10))
tmp4.ts = ts(data = csData$Noise, frequency = 12, start = c(1992, 1), end = c(2021, 10))
csData.ts = cbind(tmp1.ts, tmp2.ts, tmp3.ts, tmp4.ts)

In [None]:
plot(csData.ts)

In [None]:
plot(decompose(csData.ts[, 1]))

In [None]:
# Autocorrelation function
acf(csData.ts[, 1], lag.max = 1)