# Visualization of results

In [None]:
# Data manipulation
import pandas as pd # data manipulation and dataframes
import numpy as np # arrays manipulation and mathematical operations

# Data visualization
import matplotlib.pyplot as plt
import seaborn as sns

# Datetime
from datetime import datetime

# Te quiero demasiado
from tqdm import tqdm

In [None]:
# Reads csvs
initial = pd.read_csv('../eda/initial.csv', index_col = 0) #2021-03-06 #2022-08-18
scraped = pd.read_csv('../eda/scraped.csv', index_col = 0) # 2023-07-10

scraped['hue'] = 'scraped'
initial['hue'] = 'initial'

scraped.drop(scraped[scraped['category'] == 'solidario'].index, inplace = True)
scraped.drop(scraped[scraped['category'] == 'navidad'].index, inplace = True)
scraped.drop(scraped[scraped['category'] == 'internacional'].index, inplace = True)

initial.drop(initial[initial['category'] == 'solidario'].index, inplace = True)
initial.drop(initial[initial['category'] == 'navidad'].index, inplace = True)
initial.drop(initial[initial['category'] == 'internacional'].index, inplace = True)

initial.dropna(axis = 0, inplace = True)

frames = [initial, scraped]
concatenation = pd.concat(frames)

unique_dates = concatenation['insert_date'].unique();
unique_categories = concatenation['category'].unique();

In [None]:
mean_21_22 = sum(initial['price']) / len(initial['price'])
mean_23 = sum(scraped['price']) / len(scraped['price'])

In [None]:
mean_price_evolution = {'dates': [], 'mean_prices': []}

for d in tqdm(unique_dates):
    mean_price_evolution['dates'].append(d)
    mean_price_evolution['mean_prices'].append(concatenation[concatenation['insert_date'] == d]['price'].mean())

In [None]:
plt.figure(figsize = (20,8))

bar_plot = sns.barplot(x = mean_price_evolution['dates'], y = mean_price_evolution['mean_prices'], palette= "mako")

plt.title('History of the evolution of the average price of the DIA supermarket')
plt.xlabel('Date')
plt.ylabel('Average price')
plt.xticks(rotation = 90);

plt.axhline(mean_21_22, color = 'blue', linewidth = 2, linestyle = 'dotted')
plt.axhline(mean_23, color ='red', linewidth = 2, linestyle = 'dotted')

for index, label in enumerate(bar_plot.get_xticklabels()):
   if index % 5 == 0:
      label.set_visible(True)
   else:
      label.set_visible(False)
plt.show()

In [None]:
mean_price_by_categories = {'categories': [], 'mean_prices': []}

for c in tqdm(unique_categories):
    mean_price_by_categories['categories'].append(c)
    mean_price_by_categories['mean_prices'].append(initial[initial['category'] == c]['price'].mean())

In [None]:
mean_price_by_categories2 = {'categories': [], 'mean_prices': []}

for c in tqdm(unique_categories):
    mean_price_by_categories2['categories'].append(c)
    mean_price_by_categories2['mean_prices'].append(scraped[scraped['category'] == c]['price'].mean())

In [None]:
df1 = pd.DataFrame()
df2 = pd.DataFrame()

df1['mean_prices'] = mean_price_by_categories['mean_prices']
df1['category'] = mean_price_by_categories['categories']
df1['hue'] = 'initial'

df2['mean_prices'] = mean_price_by_categories2['mean_prices']
df2['category'] = mean_price_by_categories2['categories']
df2['hue'] = 'scraped'

frames = [df1, df2]
df12 = pd.concat(frames)



In [None]:
plt.figure(figsize = (20,8))

sns.barplot(df12, x = 'category', y = 'mean_prices', hue = 'hue', palette = "mako")

plt.title('Average price by category')
plt.xlabel('Category')
plt.ylabel('Average price')
plt.xticks(rotation = 90);

In [None]:
price_evolution_expensive_categories = {'dates': [], 'mean_prices': []}

for d in tqdm(unique_dates):
    price_evolution_expensive_categories['dates'].append(d)
    price_evolution_expensive_categories['mean_prices'].append(concatenation[(concatenation['insert_date'] == d) & (concatenation['category'] == 'charcuteria_y_quesos')]['price'].mean())

In [None]:
plt.figure(figsize = (20,8))


line_plot = sns.lineplot(x = price_evolution_expensive_categories['dates'], 
             y = price_evolution_expensive_categories['mean_prices'], 
             linewidth = 2, 
             palette= "mako")

plt.title('History of the evolution of the average price of the "charcuteria_y_quesos" category')
plt.xlabel('Date')
plt.ylabel('Average price')
plt.xticks(rotation = 90);

for index, label in enumerate(line_plot.get_xticklabels()):
   if index % 5 == 0:
      label.set_visible(True)
   else:
      label.set_visible(False)
      
plt.show()

In [None]:
fig, ax = plt.subplots(1, 2, figsize = (20, 8))

ax[0].boxplot(initial['price'][initial['category'] == 'charcuteria_y_quesos']) # figura del primer subplot
ax[1].boxplot(scraped['price'][scraped['category'] == 'charcuteria_y_quesos']) # figura del segundo subplot

ax[0].set_title('2021-2022')
ax[1].set_title('2023')

plt.show()

In [None]:
price_evolution_expensive_categories2 = {'dates': [], 'mean_prices': []}

for d in tqdm(unique_dates):
    price_evolution_expensive_categories2['dates'].append(d)
    price_evolution_expensive_categories2['mean_prices'].append(concatenation[(concatenation['insert_date'] == d) & (concatenation['category'] == 'cervezas_vinos_y_bebidas_con_alcohol')]['price'].mean())

In [None]:
plt.figure(figsize = (20,8))


line_plot = sns.lineplot(x = price_evolution_expensive_categories2['dates'], 
             y = price_evolution_expensive_categories2['mean_prices'], 
             linewidth = 2, 
             palette = "mako")

plt.title('History of the evolution of the average price of the "cervezas_vinos_y_bebidas_con_alcohol" category')
plt.xlabel('Date')
plt.ylabel('Average price')
plt.xticks(rotation = 90);

for index, label in enumerate(line_plot.get_xticklabels()):
   if index % 5 == 0:
      label.set_visible(True)
   else:
      label.set_visible(False)
      
plt.show()

In [None]:
fig, ax = plt.subplots(1, 2, figsize = (20, 8)) 

ax[0].boxplot(initial['price'][initial['category'] == 'cervezas_vinos_y_bebidas_con_alcohol']) # figura del primer subplot

ax[1].boxplot(scraped['price'][scraped['category'] == 'cervezas_vinos_y_bebidas_con_alcohol']) # figura del segundo subplot

ax[0].set_title('2021-2022')
ax[1].set_title('2023')

plt.show()

In [None]:
price_evolution_expensive_categories3 = {'dates': [], 'mean_prices': []}

for d in tqdm(unique_dates):
    price_evolution_expensive_categories3['dates'].append(d)
    price_evolution_expensive_categories3['mean_prices'].append(concatenation[(concatenation['insert_date'] == d) & (concatenation['category'] == 'mascotas')]['price'].mean())

In [None]:
plt.figure(figsize = (20,8))

line_plot = sns.lineplot(x = price_evolution_expensive_categories3['dates'], 
             y = price_evolution_expensive_categories3['mean_prices'], 
             linewidth = 2, 
             palette = "mako")

plt.title('History of the evolution of the average price of the "mascotas" category')
plt.xlabel('Date')
plt.ylabel('Average price')
plt.xticks(rotation=90);

for index, label in enumerate(line_plot.get_xticklabels()):
   if index % 5 == 0:
      label.set_visible(True)
   else:
      label.set_visible(False)
      
plt.show()

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(20, 8)) 

ax[0].boxplot(initial['price'][initial['category'] == 'mascotas']) # figura del primer subplot

ax[1].boxplot(scraped['price'][scraped['category'] == 'mascotas']) # figura del segundo subplot

ax[0].set_title('2021-2022')
ax[1].set_title('2023')

plt.show()

In [None]:
price_evolution_expensive_categories4 = {'dates': [], 'mean_prices': []}

for d in tqdm(unique_dates):
    price_evolution_expensive_categories4['dates'].append(d)
    price_evolution_expensive_categories4['mean_prices'].append(concatenation[(concatenation['insert_date'] == d) & (concatenation['category'] == 'aceites_salsas_y_especias')]['price'].mean())

In [None]:
plt.figure(figsize =(20,8))


line_plot = sns.lineplot(x = price_evolution_expensive_categories4['dates'], 
             y = price_evolution_expensive_categories4['mean_prices'], 
             linewidth = 2, 
             palette= "mako")

plt.title('History of the evolution of the average price of the "aceites_salsas_y_especias" category')
plt.xlabel('Date')
plt.ylabel('Average price')
plt.xticks(rotation=90);

for index, label in enumerate(line_plot.get_xticklabels()):
   if index % 5 == 0:
      label.set_visible(True)
   else:
      label.set_visible(False)
      
plt.show()

In [None]:
fig, ax = plt.subplots(1, 2 , figsize=(20, 8))

ax[0].boxplot(initial['price'][initial['category'] == 'aceites_salsas_y_especias']) # figura del primer subplot
ax[1].boxplot(scraped['price'][scraped['category'] == 'aceites_salsas_y_especias']) # figura del segundo subplot

ax[0].set_title('2021-2022')
ax[1].set_title('2023')

plt.show()

In [None]:
price_evolution_expensive_categories5 = {'dates': [], 'mean_prices': []}

for d in tqdm(unique_dates):
    price_evolution_expensive_categories5['dates'].append(d)
    price_evolution_expensive_categories5['mean_prices'].append(concatenation[(concatenation['insert_date'] == d) & (concatenation['category'] == 'congelados')]['price'].mean())

In [None]:
plt.figure(figsize =(20,8))


line_plot = sns.lineplot(x = price_evolution_expensive_categories5['dates'], 
             y = price_evolution_expensive_categories5['mean_prices'], 
             linewidth = 2, 
             palette = "mako")

plt.title('History of the evolution of the average price of the "congelados" category')
plt.xlabel('Date')
plt.ylabel('Average price')
plt.xticks(rotation = 90);

for index, label in enumerate(line_plot.get_xticklabels()):
   if index % 5 == 0:
      label.set_visible(True)
   else:
      label.set_visible(False)
      
plt.show()

In [None]:
fig, ax = plt.subplots(1, 2, figsize = (20, 8)) 

ax[0].boxplot(initial['price'][initial['category'] == 'congelados']) # figura del primer subplot

ax[1].boxplot(scraped['price'][scraped['category'] == 'congelados']) # figura del segundo subplot

ax[0].set_title('2021-2022')
ax[1].set_title('2023')

plt.show()