In [1]:
import pandas as pd
import plotly.graph_objects as go

### **Loading data**

In [2]:
df = pd.read_csv("../datasets/chocolate-sales/Chocolate Sales.csv")
print(df.shape)
df.head()

(1094, 6)


Unnamed: 0,Sales Person,Country,Product,Date,Amount,Boxes Shipped
0,Jehu Rudeforth,UK,Mint Chip Choco,04-Jan-22,"$5,320",180
1,Van Tuxwell,India,85% Dark Bars,01-Aug-22,"$7,896",94
2,Gigi Bohling,India,Peanut Butter Cubes,07-Jul-22,"$4,501",91
3,Jan Morforth,Australia,Peanut Butter Cubes,27-Apr-22,"$12,726",342
4,Jehu Rudeforth,UK,Peanut Butter Cubes,24-Feb-22,"$13,685",184


In [3]:
df['Amount'] = df['Amount'].str.replace('[$,]', '', regex=True)
df['Amount'] = df['Amount'].astype(float)

df["Date"] = pd.to_datetime(df["Date"], format='mixed')

df.head()

Unnamed: 0,Sales Person,Country,Product,Date,Amount,Boxes Shipped
0,Jehu Rudeforth,UK,Mint Chip Choco,2022-01-04,5320.0,180
1,Van Tuxwell,India,85% Dark Bars,2022-08-01,7896.0,94
2,Gigi Bohling,India,Peanut Butter Cubes,2022-07-07,4501.0,91
3,Jan Morforth,Australia,Peanut Butter Cubes,2022-04-27,12726.0,342
4,Jehu Rudeforth,UK,Peanut Butter Cubes,2022-02-24,13685.0,184


In [4]:
df.isna().sum()

Sales Person     0
Country          0
Product          0
Date             0
Amount           0
Boxes Shipped    0
dtype: int64

## **Explotary data analysis**

In [5]:
def plot_bar_with_amount(title:str, df:pd.DataFrame, x_col:str, y_col:str, x_angle:int=0,
                         orient='v', height=600):
    if orient=='v':
        text_col = y_col
        hovertemplate = f"<b>{x_col}:</b> %{{x}}<br><b>{y_col}:</b> $%{{y:,.0f}}<extra></extra>"
        axis_title = (x_col, 'Total Sales Amount (in $)')
    else:
        text_col = x_col
        hovertemplate = f"<b>{y_col}:</b> %{{y}}<br><b>{x_col}:</b> $%{{x:,.0f}}<extra></extra>"
        axis_title = ('Total Sales Amount (in $)', y_col)
    
    fig = go.Figure()
    
    fig.add_trace(go.Bar(
        x=df[x_col],
        y=df[y_col],
        orientation=orient,
        marker_color='darkblue',
        text=[f'{value:,.0f}' for value in df[text_col].values] ,  
        textposition='inside',
        textfont=dict(size=10),
        hovertemplate=hovertemplate
    ))
    
    # Configurar diseño del gráfico
    fig.update_layout(
        title=title,
        xaxis_title=axis_title[0],
        yaxis_title=axis_title[1],
        xaxis_tickangle=x_angle,
        template='plotly_white',
        xaxis=dict(showgrid=False),
        yaxis=dict(showgrid=False),
        hoverlabel=dict(
            bgcolor='#333333',
            font_size=14,
            font_color='white',
            bordercolor='#333333'
        ),
        height=height
    )
    
    # Mostrar gráfico
    fig.show()

### **Total sales by country**

In [6]:
sales_by_country = df.groupby('Country')['Amount'].sum().sort_values(ascending=False).reset_index(drop=False)

plot_bar_with_amount('Total Chocolate Sales by Country',
                     sales_by_country, 'Country', 'Amount', orient='v')

### **Total sales by product**

In [7]:
sales_by_product = df.groupby('Product')['Amount'].sum().sort_values(ascending=True).reset_index(drop=False)

plot_bar_with_amount('Total Chocolate Sales by Product',
                     sales_by_product, 'Amount', 'Product', orient='h', height=900)

### **Salesperson's sales performance**

In [8]:
salesperson_performance = df.groupby('Sales Person')['Amount'].sum().sort_values(ascending=False).reset_index(drop=False)[:10]

plot_bar_with_amount('Top 10 Best Performing Sales Person',
                     salesperson_performance, 'Sales Person', 'Amount', -45)

### **Sales trend over time**

In [9]:
def plot_line(title:str, df:pd.DataFrame, date_col:str, y_col:str, set_ylim: bool = False):
    fig = go.Figure()
    
    fig.add_trace(go.Scatter(
        x=df[date_col],
        y=df[y_col],
        mode='lines',# +markers
        line=dict(color='darkblue', width=2),
        # marker=dict(size=5, symbol='circle'),
        hovertemplate=f'<b>{date_col}:</b> %{{x}}<br><b>{y_col}:</b> $%{{y:,.0f}}<extra></extra>'
    ))
    
    y_axis_params = dict(showgrid=False)
    if set_ylim:
        y_axis_params['range'] = [0, 1.2 * df[y_col].max()]
    
    fig.update_layout(
        title=title,
        xaxis_title=date_col,
        yaxis_title='Total Sales Amount (in $)',
        template='plotly_white',
        xaxis=dict(tickangle=0, showgrid=False),
        yaxis=y_axis_params,
        hoverlabel=dict(bgcolor='#333333', font_size=14, font_color='white', bordercolor='#333333'),
        hovermode='x',
        height=400
    )
    
    fig.show()

In [10]:
sales_over_time = df.groupby('Date')['Amount'].sum().reset_index(drop=False)

plot_line('Chocolate Sales Over Time', sales_over_time,'Date', 'Amount')

### **Sales trend over trend (Monthly)**

In [14]:
monthly_sales = df.groupby(pd.Grouper(key='Date', freq='MS'))['Amount'].sum().reset_index(drop=False)

plot_line('Monthly Chocolate Sales', monthly_sales,'Date', 'Amount', True)

### **Distribution of Boxes Shipped**

In [12]:
fig = go.Figure()

fig.add_trace(go.Histogram(
    x=df['Boxes Shipped'],
    nbinsx=5,
    marker=dict(color='darkblue', line=dict(color='black', width=0)),
    opacity=1
))

fig.update_layout(
    title='Distribution of Boxes Shipped',
    xaxis_title='Number of Boxes Shipped',
    yaxis_title='Frequency',
    template='plotly_white',
    xaxis=dict(tickangle=0, showgrid=False),
    yaxis=dict(showgrid=False),
    bargap=0.15,  # Espaciado entre barras
    height=400,
    hoverlabel=dict(bgcolor="#333333", font_size=14, font_color="white", bordercolor="#333333")
)

fig.show()