In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from scipy.stats import pearsonr
# Load the data
data = pd.read_csv('data/61111-0003_F.csv', sep=';', encoding='ISO-8859-1', skiprows=7,names=['consumption_code', 'consumption_label', '2019', '2020', '2021', '2022', '2023'])

In [3]:
data.head()

Unnamed: 0,consumption_code,consumption_label,2019,2020,2021,2022,2023
0,CC13-01,Food and non-alcoholic beverages,97.9,100.0,103.1,116.0,130.3
1,CC13-011,Food,97.8,100.0,103.1,116.9,131.4
2,CC13-0111,Bread and cereals,99.2,100.0,103.3,116.8,135.9
3,CC13-01111,"Rice, including rice preparations",98.7,100.0,102.6,115.7,135.5
4,CC13-01112,Flour and other cereals,100.2,100.0,104.1,134.2,157.6


In [4]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 444 entries, 0 to 443
Data columns (total 7 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   consumption_code   444 non-null    object
 1   consumption_label  441 non-null    object
 2   2019               441 non-null    object
 3   2020               441 non-null    object
 4   2021               441 non-null    object
 5   2022               441 non-null    object
 6   2023               441 non-null    object
dtypes: object(7)
memory usage: 24.4+ KB


In [None]:
# Convert the date column to datetime format
data['Date'] = pd.to_datetime(data['Date'])

# Set the date as the index
data.set_index('Date', inplace=True)

# 1. How have food prices evolved over the last five years?
# Plot the CPI for food over the last five years
data.loc['2019-01-01':'2024-01-01', 'Food_CPI'].plot()
plt.show()

# 2. For which product (or group of products) CPI has been stable for the last five years?
# Calculate the standard deviation of the CPI for each product over the last five years
# The product with the smallest standard deviation has the most stable CPI
stability = data.loc['2019-01-01':'2024-01-01'].std().idxmin()
print(f'The most stable product is: {stability}')

# 3. What is the correlation between the CPI for dairy products and the CPI for meat products?
# Calculate the correlation coefficient
corr, _ = pearsonr(data['Dairy_CPI'], data['Meat_CPI'])
print(f'The correlation between dairy and meat is: {corr}')

# 4. For which product (or group of products) CPI has been increased and decreased the most?
# Calculate the difference between the maximum and minimum CPI for each product
# The product with the largest difference has the most increase and decrease
diff = data.max() - data.min()
most_change = diff.idxmax()
print(f'The product with the most change is: {most_change}')

# 5. Can we predict future trends in food prices based on historical CPI data?
# Fit a linear regression model to the data and make a prediction for the next year
model = LinearRegression()
X = data.index.year.values.reshape(-1, 1)
y = data['Food_CPI'].values
model.fit(X, y)
prediction = model.predict([[2025]])
print(f'The predicted CPI for food in 2025 is: {prediction[0]}')
