In [1]:
import pandas as pd
import numpy as np
import warnings

warnings.filterwarnings('ignore')


In [2]:
from bokeh.io import output_notebook, show
from bokeh.plotting import figure
from bokeh.models import ColumnDataSource, ColorBar
from bokeh.transform import factor_cmap
from bokeh.palettes import Viridis256

In [3]:
sales_df = pd.read_csv('../../data/company_sales/sales.csv')

In [4]:
# columnas
sales_df.columns = ['orderNumber', 'orderLineNumber', 'orderDate', 'shippedDate', 'requiredDate', 'customerNumber',
                    'employeeNumber', 'productCode', 'status', 'comments', 'quantityOrdered', 'priceEach', 'sales_amount', 'origin']
sales_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3001 entries, 0 to 3000
Data columns (total 14 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   orderNumber      3001 non-null   int64  
 1   orderLineNumber  3001 non-null   int64  
 2   orderDate        3001 non-null   object 
 3   shippedDate      2859 non-null   object 
 4   requiredDate     3001 non-null   object 
 5   customerNumber   3001 non-null   int64  
 6   employeeNumber   3001 non-null   int64  
 7   productCode      3001 non-null   object 
 8   status           3001 non-null   object 
 9   comments         759 non-null    object 
 10  quantityOrdered  3001 non-null   int64  
 11  priceEach        3001 non-null   float64
 12  sales_amount     3001 non-null   float64
 13  origin           3001 non-null   object 
dtypes: float64(2), int64(5), object(7)
memory usage: 328.4+ KB


In [5]:
# Load dataset (assuming df is already loaded)
output_notebook()


In [16]:


# 1. Bar Plot: Top 10 Products by Sales
top_products = sales_df.groupby("productCode")["sales_amount"].sum().nlargest(10)
source = ColumnDataSource(data=dict(products=top_products.index, sales=top_products.values))

p = figure(x_range=list(top_products.index), title="Top 10 Products by Sales", toolbar_location=None, tools="")
p.vbar(x='products', top='sales', width=0.9, source=source, line_color='white', fill_color=factor_cmap('products', palette=Viridis256, factors=top_products.index))
p.xgrid.grid_line_color = None
p.y_range.start = 0
show(p)

In [18]:

# 2. Histogram: Sales Amount Distribution
hist, edges = np.histogram(sales_df["sales_amount"], bins=30)
p = figure(title="Sales Amount Distribution", toolbar_location=None, tools="")
p.quad(top=hist, bottom=0, left=edges[:-1], right=edges[1:], fill_color="purple", line_color="white")
show(p)

In [19]:
# 3. Scatter Plot: Quantity Ordered vs Sales Amount
p = figure(title="Quantity Ordered vs Sales Amount", x_axis_label='Quantity Ordered', y_axis_label='Sales Amount')
p.scatter(sales_df["quantityOrdered"], sales_df["sales_amount"], size=5, color="red", alpha=0.5)
show(p)

In [20]:
# 4. Boxplot: Sales Amount by Order Status
from bokeh.models import FactorRange

categories = sales_df["status"].unique().tolist()
data = [sales_df[sales_df["status"] == category]["sales_amount"].values for category in categories]
source = ColumnDataSource(data=dict(categories=categories, values=data))
p = figure(x_range=FactorRange(*categories), title="Sales Amount by Order Status")
p.vbar(x='categories', top='values', width=0.9, source=source, fill_color="blue")
show(p)

In [21]:
# 5. Heatmap: Correlation Matrix
import numpy as np
from bokeh.models import LinearColorMapper

corr = sales_df[["orderNumber", "orderLineNumber", "customerNumber", "quantityOrdered", "priceEach", "sales_amount"]].corr()
corr_matrix = corr.values
colors = Viridis256
mapper = LinearColorMapper(palette=colors, low=corr_matrix.min(), high=corr_matrix.max())
p = figure(title="Correlation Heatmap", x_range=list(corr.columns), y_range=list(reversed(corr.columns)))
p.image(image=[corr_matrix], x=0, y=0, dw=10, dh=10, color_mapper=mapper)
show(p)
