Unemployment is a very real problem in virtually every country. A small unemployment percentage is normal and healthy for the economy, but there are times when it becomes problematic.
What could affect unemployment in the Netherlands? I will be using linear regression to try and predict the NL unemployment using monthly data gathered since 1996-02. The model will be far from perfect since there is not a large amount of data gathered so meticulously every month, but as Geogre E.P. Box said- all models are wrong, but some are useful. 

# IMPORTS 

In [1]:
import pandas as pd
import numpy as np
import matplotlib as mp
import seaborn as sb
import os

In [2]:
sys.path.append('/Users/luizasemeniuk/chomage/chomage_project/code')

import econometrics_modules
from econometrics_modules import pull_data #importing a function that pulls data from eurostat 

In [3]:
from econometrics_modules import clean_dataframe , is_year_month_format

# IMPORTING DATASETS FROM EUROSTAT:

In [4]:
#Price index.
pull_data('prc_hicp_mmor', 'price_index.csv')

In [5]:
#Unemployment rate
pull_data('ei_lmhr_m', 'unemployment_rate.csv')

In [6]:
#economic sentiment indicator
pull_data('ei_bssi_m_r2', 'sentiment_indicator.csv')

In [7]:
#retail sale
pull_data('ei_bsrt_m_r2', 'retail_sale.csv')

In [8]:
#employment expectations over the next 3 months
pull_data('ei_bsrt_m_r2', 'empl_exp.csv')

# DATA FORMATTING AND CLEANING

In [9]:
#Price Index formatting
price_index = pd.read_csv('data/price_index.csv')
clean_dataframe(price_index, 5 ,'price_index')

In [10]:
#Unemployment rate - formatting
unemployment_rate = pd.read_csv('data/unemployment_rate.csv')
clean_dataframe(unemployment_rate, 8, 'unemployment_rate')

In [11]:
#Economic sentiment indicator - formatting
sentiment_indicator = pd.read_csv('data/sentiment_indicator.csv')
clean_dataframe(sentiment_indicator, 0, 'sentiment_indicator')

In [12]:
#Retail sale - formatting
retail_sale = pd.read_csv('data/retail_sale.csv')
clean_dataframe(retail_sale, 2, 'retail_sale')

In [13]:
#Employment expectations over the next 3 months - formatting
empl_exp = pd.read_csv('data/empl_exp.csv')
clean_dataframe(empl_exp, 6, 'empl_exp')

# CREATING THE DATASET FOR THE LEAST SQUARES MODEL

In [14]:
#Downloading the datasets updated with clean_dataframe module
empl_exp = pd.read_csv('data/empl_exp.csv')
price_index = pd.read_csv('data/price_index.csv')
retail_sale = pd.read_csv('data/retail_sale.csv')
sentiment_indicator = pd.read_csv('data/sentiment_indicator.csv')
unemployment_rate = pd.read_csv('data/unemployment_rate.csv')

In [16]:
dataset = pd.concat([empl_exp, price_index, retail_sale, sentiment_indicator, unemployment_rate], axis=0)

In [17]:
dataset

Unnamed: 0,1996-02-01,1996-03-01,1996-04-01,1996-05-01,1996-06-01,1996-07-01,1996-08-01,1996-09-01,1996-10-01,1996-11-01,...,2024-02-01,2024-03-01,2024-04-01,2024-05-01,2024-06-01,2024-07-01,2024-08-01,2024-09-01,2024-10-01,2024-11-01
0,1.7,2.7,2.7,2.7,2.7,2.7,1.7,4.7,0.7,2.7,...,1.8,4.6,3.4,-1.6,-0.3,-1.2,0.8,-0.2,0.0,-5.1
0,0.5,1.2,0.3,-0.4,-0.5,-0.3,-0.2,1.1,0.3,-0.2,...,0.9,0.6,1.1,0.3,0.2,1.3,0.4,-0.9,0.6,-1.0
0,12.5,9.1,14.5,11.8,16.5,16.8,17.1,17.5,14.5,17.8,...,5.7,-1.6,2.7,-2.4,-6.1,-1.8,0.0,-2.4,0.3,6.1
0,-6.4,0.6,6.6,6.1,5.6,0.6,0.6,0.1,-4.4,-4.9,...,6.5,7.4,9.0,6.8,10.7,11.7,11.2,14.7,12.1,12.0
0,8.7,8.6,8.0,7.8,7.8,7.0,7.0,7.3,7.3,7.3,...,3.9,3.7,3.6,3.5,3.7,3.5,3.7,3.9,3.7,3.5


In [18]:
dataset.index = ['EmplExp', 'PriceIndex', 'RetailIndic', 'EconSntIdic', 'UnemplRate'] 

In [19]:
dataset = dataset.transpose()

In [20]:
#Define the dataset file path
file_path = os.path.join('data', 'dataset.csv')

# Save the dataset to the CSV file
dataset.to_csv(file_path, index=False)

In [21]:
dataset

Unnamed: 0,EmplExp,PriceIndex,RetailIndic,EconSntIdic,UnemplRate
1996-02-01,1.7,0.5,12.5,-6.4,8.7
1996-03-01,2.7,1.2,9.1,0.6,8.6
1996-04-01,2.7,0.3,14.5,6.6,8.0
1996-05-01,2.7,-0.4,11.8,6.1,7.8
1996-06-01,2.7,-0.5,16.5,5.6,7.8
...,...,...,...,...,...
2024-07-01,-1.2,1.3,-1.8,11.7,3.5
2024-08-01,0.8,0.4,0.0,11.2,3.7
2024-09-01,-0.2,-0.9,-2.4,14.7,3.9
2024-10-01,0.0,0.6,0.3,12.1,3.7


# EXPLORATORY DATA ANALYSIS