# Assignment 1: Time Series 
- Florencia Luque
- Simon Schmetz

In [None]:
import pandas as pd
import numpy as np  
import matplotlib.pyplot as plt

In [None]:
# Load and preprocess data
data_raw = pd.read_excel('data/INE_IPC.xlsx')

data_raw.rename(columns={
    'Unnamed: 0': 'time',
    '    Índice general': 'general_index',
    '    01 Alimentos y bebidas no alcohólicas': 'food_beverages',
    '    02 Bebidas alcohólicas y tabaco': 'alc_tobacco',
    '    03 Vestido y calzado': 'clothing_footwear',
    '    04 Vivienda, agua, electricidad, gas y otros combustibles': 'housing_utilities',
    '    05 Muebles, artículos del hogar y artículos para el mantenimiento corriente del hogar': 'furniture_household',
    '    06 Sanidad': 'health',
    '    07 Transporte': 'transport',
    '    08 Comunicaciones': 'communications',
    '    09 Ocio y cultura': 'leisure_culture',
    '    10 Enseñanza': 'education',
    '    11 Restaurantes y hoteles': 'restaurants_hotels',
    '    12 Otros bienes y servicios': 'other_goods_services'
}, inplace=True)

data_raw['time'] = pd.to_datetime(data_raw['time'], format='%YM%m')
data = data_raw.copy()

In [None]:
### Funcions

# plot lineplot and histogram

def plot_eda(data, feature, color='skyblue', transform_func=None):
    plt.figure(figsize=(10, 7))

    # Line plot of the feature over time
    plt.subplot(2, 1, 1)
    plt.plot(data['time'], data[feature], marker='.', color=color)
    plt.xlabel('Time')
    plt.ylabel(feature.replace('_', ' ').title())
    plt.title(f'{feature.replace("_", " ").title()} Over Time')
    plt.grid(True)

    # Histogram of the feature
    plt.subplot(2, 1, 2)
    plt.hist(data[feature].dropna(), bins=30, edgecolor='k', color=color,density=True)

    mean = data[feature].mean()
    std = data[feature].std()
    x = np.linspace(mean - 3*std, mean + 3*std, 100)
    p = np.exp(-0.5 * ((x - mean) / std)**2) / (std * np.sqrt(2 * np.pi))

    plt.plot(x, p, color='red', linewidth=2)
    plt.xlabel(feature.replace('_', ' ').title())
    plt.ylabel('Density')
    plt.title(f'Historgam + Normal Distribution (sample mean/variance) of {feature.replace("_", " ").title()}')
    plt.grid(True)
    plt.show()


# statistical tests
from scipy.stats import jarque_bera
from statsmodels.tsa.stattools import adfuller

def stat_tests(data,feature):
    print("Testing for Stationarity and Normality of Featzre:", feature)


    ### Perform Jarque-Bera test (normality test)
    jb_stat, jb_p_value = jarque_bera(data[feature])

    # Print the test statistic and p-value
    print('\nJarque-Bera Statistic:', jb_stat)
    print('p-value:', jb_p_value)

    ### Perform Dickey-Fuller test (Stationarity test)
    result = adfuller(data[feature])

    # Print the test statistic and p-value
    print('\nADF Statistic:', result[0])
    print('p-value:', result[1])
    
    print("\n\n")


# plot Autocorrelations
from statsmodels.graphics.tsaplots import plot_acf
def plot_autocorrelations(data, feature, lags=40):
    plt.figure(figsize=(10, 7))
    plot_acf(data[feature], lags=lags, alpha=0.05)
    plt.xlabel('Lags')
    plt.ylabel('Autocorrelation')
    plt.title(f'Autocorrelation of {feature.replace("_", " ").title()}')
    plt.grid(True)
    plt.show()

## Untransformed Data

In [None]:
data.head()

In [None]:
# Example usage with first difference transformation
plot_eda(data, 'general_index')

In [None]:
stat_tests(data,"general_index")

In [None]:
plot_autocorrelations(data, 'general_index')

## Deterministic Transform

In [None]:
# deterministic transform

from sklearn.linear_model import LinearRegression

# fit model
X = data[['time']].apply(lambda x: x.dt.to_period('M').apply(lambda y: y.ordinal)).values.reshape(-1, 1)
y = data['general_index']

model = LinearRegression()
model.fit(X, y)


# Plot the results
plt.figure(figsize=(10, 7))
plt.plot(data['time'], y, color="skyblue", label='Actual Data')
plt.plot(data['time'], model.predict(X), color='lightcoral', linewidth=2, label='Fitted Line')
plt.xlabel('Time')
plt.ylabel('General Index')
plt.title('Linear Regression Fit')
plt.legend()
plt.grid(True)
plt.show()


# Subtract the trend from the general_index to get the detrended series
data['general_index_transformed'] = data['general_index'] - model.predict(X)

plot_eda(data, 'general_index_transformed')

In [None]:
plot_autocorrelations(data, 'general_index_transformed')

## Stochastic transform

In [None]:
#transform the data
data['general_index_diff'] = data['general_index'].diff()
data = data.dropna(subset=['general_index_diff'])

In [None]:
# Example usage with first difference transformation
plot_eda(data, 'general_index_diff')

In [None]:
stat_tests(data,"general_index_diff")

In [None]:
plot_autocorrelations(data, 'general_index_diff')