<a href="https://colab.research.google.com/github/luciacasass/UFV-VisualizacionDatos/blob/main/EjerciciosClase/Clase_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Visualización de Datos

Lucía Casas Sierra

In [11]:
import pandas as pd
import warnings
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

warnings.filterwarnings("ignore")

# Carga de datos
df = pd.read_csv('datos_ejercicio_ventas.csv')
print(df.head())

countries = df['COUNTRY'].unique()
brands = df['SUBBRAND'].unique()

df['DATE'] = pd.to_datetime(df['YEAR'].astype(str) + '-' + df['MONTH'].astype(str) + '-01')
date_range_actual = [df['DATE'][df['SCENARIO']=='actual'].dt.date.min().strftime('%Y-%m'),
                     df['DATE'][df['SCENARIO']=='actual'].dt.date.max().strftime('%Y-%m')]
date_range_forecast = [df['DATE'][df['SCENARIO']=='AI_forecast'].dt.date.min().strftime('%Y-%m'),
                       df['DATE'][df['SCENARIO']=='AI_forecast'].dt.date.max().strftime('%Y-%m')]

print('\nLista de países:\n', countries)
print('\nLista de marcas:\n', brands)
print()
print('Espacio temporal para datos reales de ventas: ', date_range_actual)
print('Espacio temporal para predicciones de ventas: ', date_range_forecast)

         COUNTRY        SUBBRAND  YEAR  MONTH     SCENARIO FORECAST  \
0       Portugal     Lipton (L3)  2023     12  AI_forecast  AI_P02F   
1  Great Britain     Lipton (L3)  2023     12  AI_forecast  AI_P10F   
2          Spain  Pepsi Max (L3)  2023     12  AI_forecast  AI_P09F   
3  Great Britain        7up (L3)  2024     12  AI_forecast  AI_P10F   
4        Hungary     Lipton (L3)  2023      9  AI_forecast  AI_P03F   

   FORECAST_YEAR         AMOUNT  
0         2023.0  754356.237194  
1         2023.0  560030.558029  
2         2023.0   88501.980847  
3         2023.0  363224.511516  
4         2023.0  396176.120491  

Lista de países:
 ['Portugal' 'Great Britain' 'Spain' 'Hungary' 'Norway' 'Denmark'
 'Netherlands' 'Italy' 'Czech']

Lista de marcas:
 ['Lipton (L3)' 'Pepsi Max (L3)' '7up (L3)' 'Pepsi Regular (L3)'
 'Mountain Dew (L3)' '7up Free (L3)']

Espacio temporal para datos reales de ventas:  ['2023-01', '2024-08']
Espacio temporal para predicciones de ventas:  ['2023-01', '2

In [12]:
df_actual = df[df['SCENARIO'] == 'actual']
df_forecast = df[df['SCENARIO'] == 'AI_forecast']

print('VENTAS REALES:')
print('Número de filas: ', len(df_actual))
print(f'Representa el {(len(df_actual)/len(df)*100):.2f}% del archivo original')

print('\nDATOS DE PRONÓSTICO:')
print('Número de filas: ', len(df_forecast))
print(f'Representa el {(len(df_forecast)/len(df)*100):.2f}% del archivo original')

VENTAS REALES:
Número de filas:  900
Representa el 4.82% del archivo original

DATOS DE PRONÓSTICO:
Número de filas:  17766
Representa el 95.18% del archivo original


In [16]:
# Horizontes de predicción
df_forecast['FORECAST_MONTH'] = pd.to_numeric(df_forecast['FORECAST'].str.extract('-(\d+)')[0])
df_forecast['FORECAST_MONTH'] = df_forecast['FORECAST_MONTH'].fillna(1).astype(int)
df_forecast['FORECAST_YEAR'] = df_forecast['FORECAST_YEAR'].astype(int)

df_forecast['FORECAST_DATE'] = pd.to_datetime(
    df_forecast['FORECAST_YEAR'].astype(str) + '-' +
    df_forecast['FORECAST_MONTH'].astype(str) + '-01'
)
# De acuerdo al conocimiento de la base de datos, se resta un mes
df_forecast['FORECAST_DATE'] = df_forecast['FORECAST_DATE'] - pd.DateOffset(months=1)

df_forecast['HORIZON_MONTHS'] = (
    (df_forecast['DATE'].dt.year - df_forecast['FORECAST_DATE'].dt.year) * 12
    + (df_forecast['DATE'].dt.month - df_forecast['FORECAST_DATE'].dt.month)
)

print(df_forecast.head())

subbrand_horizon_months = df_forecast.groupby('SUBBRAND')['HORIZON_MONTHS'].max().reset_index()
print(subbrand_horizon_months.to_string(index=False))

         COUNTRY        SUBBRAND  YEAR  MONTH     SCENARIO FORECAST  \
0       Portugal     Lipton (L3)  2023     12  AI_forecast  AI_P02F   
1  Great Britain     Lipton (L3)  2023     12  AI_forecast  AI_P10F   
2          Spain  Pepsi Max (L3)  2023     12  AI_forecast  AI_P09F   
3  Great Britain        7up (L3)  2024     12  AI_forecast  AI_P10F   
4        Hungary     Lipton (L3)  2023      9  AI_forecast  AI_P03F   

   FORECAST_YEAR         AMOUNT       DATE  FORECAST_MONTH FORECAST_DATE  \
0           2023  754356.237194 2023-12-01               1    2022-12-01   
1           2023  560030.558029 2023-12-01               1    2022-12-01   
2           2023   88501.980847 2023-12-01               1    2022-12-01   
3           2023  363224.511516 2024-12-01               1    2022-12-01   
4           2023  396176.120491 2023-09-01               1    2022-12-01   

   HORIZON_MONTHS  
0              12  
1              12  
2              12  
3              24  
4               

### Distribución de ventas reales
#### Por país

In [26]:

sales_by_country = df_actual.groupby('COUNTRY')['AMOUNT'].sum().reset_index()
sales_by_country.columns = ['COUNTRY', 'TOTAL_SALES']

# Create a bar graph using Plotly Express
fig = px.bar(
    sales_by_country,
    x='COUNTRY',
    y='TOTAL_SALES',
    color_discrete_sequence=['#489fb5'],
    title='Total Sales by Country'
)

# Show the plot
fig.show()

#### Por mes y año

In [27]:
sales_by_month_year = df_actual.groupby(['YEAR', 'MONTH'])['AMOUNT'].sum().reset_index()

# Rename the columns for clarity
sales_by_month_year.columns = ['YEAR', 'MONTH', 'TOTAL_SALES']

# Create a new column for easy display of month and year together (optional, for better labeling)
sales_by_month_year['YEAR_MONTH'] = sales_by_month_year['YEAR'].astype(str) + '-' + sales_by_month_year['MONTH'].astype(str).str.zfill(2)

# Create a bar graph using Plotly Express
fig = px.bar(
    sales_by_month_year,
    x='YEAR_MONTH',  # Use the year-month for the x-axis
    y='TOTAL_SALES',
    color_discrete_sequence=['#489fb5'],
    title='Total Sales by Year and Month',
    barmode='group',  # Group the bars by country for each year-month
    height=600  # Adjust height for better readability
)

# Customize axis labels for better readability
fig.update_layout(
    xaxis_title='Year-Month',
    yaxis_title='Total Sales'
)

# Show the plot
fig.show()

#### Por marca

In [28]:
sales_by_country = df_actual.groupby('SUBBRAND')['AMOUNT'].sum().reset_index()
sales_by_country.columns = ['SUBBRAND', 'TOTAL_SALES']

# Create a bar graph using Plotly Express
fig = px.bar(
    sales_by_country,
    x='SUBBRAND',
    y='TOTAL_SALES',
    color_discrete_sequence=['#489fb5'],
    title='Total Sales by SubBrand'
)

# Show the plot
fig.show()