In [15]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from bokeh.io import output_notebook, show
from bokeh.plotting import figure
from bokeh.layouts import column, row
from bokeh.models import ColumnDataSource, HoverTool, Legend, BasicTicker, ColorBar, LinearColorMapper
from bokeh.transform import factor_cmap
from bokeh.palettes import Category10, Viridis256

# Enable Bokeh output in the notebook
output_notebook()

# Create sample data
np.random.seed(42)
date_range = pd.date_range(start='2023-01-01', end='2023-12-31', freq='D')
n_samples = len(date_range)

data = {
    'Date': date_range,
    'Model': np.random.choice(['Model A', 'Model B', 'Model C', 'Model D'], n_samples),
    'Units_Sold': np.random.randint(1, 50, n_samples),
    'Price': np.random.uniform(500, 2000, n_samples).round(2),
    'Customer_Rating': np.random.uniform(3, 5, n_samples).round(1),
    'Region': np.random.choice(['North', 'South', 'East', 'West'], n_samples)
}

df = pd.DataFrame(data)
df['Revenue'] = df['Units_Sold'] * df['Price']

# 1. Monthly Sales Trend
monthly_sales = df.groupby(df['Date'].dt.to_period('M'))['Revenue'].sum().reset_index()
monthly_sales['Date'] = monthly_sales['Date'].dt.to_timestamp()

p1 = figure(title="Monthly Sales Trend", x_axis_label="Month", y_axis_label="Total Revenue",
            x_axis_type="datetime", width=800, height=400)
p1.line(monthly_sales['Date'], monthly_sales['Revenue'], line_width=2)
p1.add_tools(HoverTool(tooltips=[("Date", "@x{%F}"), ("Revenue", "@y{$0,0.00}")],
                       formatters={"@x": "datetime"}))

import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from bokeh.io import output_notebook, show
from bokeh.plotting import figure
from bokeh.layouts import column, row
from bokeh.models import ColumnDataSource, HoverTool, Legend, BasicTicker, ColorBar, LinearColorMapper, LabelSet
from bokeh.transform import factor_cmap, cumsum
from bokeh.palettes import Category10, Viridis256
from math import pi

# Enable Bokeh output in the notebook
output_notebook()

# [Previous imports and data generation code remains the same]

# 2. Sales by Model (Pie Chart)
from math import pi, cos, sin
model_sales = df.groupby('Model')['Revenue'].sum()
model_sales_percent = model_sales / model_sales.sum() * 100
model_sales_percent = model_sales_percent.reset_index()
model_sales_percent['angle'] = model_sales_percent['Revenue'] / model_sales_percent['Revenue'].sum() * 2 * pi
model_sales_percent['color'] = Category10[len(model_sales_percent)]
model_sales_percent['percent'] = model_sales_percent['Revenue'].map(lambda x: f"{x:.1f}%")

# Calculate label positions
def calc_label_position(angle, radius):
    return cos(angle) * radius, sin(angle) * radius

radius = 0.5
label_radius = 0.8
model_sales_percent['label_angle'] = (model_sales_percent['angle'].cumsum() - model_sales_percent['angle'] / 2)
model_sales_percent['x'] = model_sales_percent['label_angle'].map(lambda a: calc_label_position(a, radius)[0])
model_sales_percent['y'] = model_sales_percent['label_angle'].map(lambda a: calc_label_position(a, radius)[1])
model_sales_percent['label_x'] = model_sales_percent['label_angle'].map(lambda a: calc_label_position(a, label_radius)[0])
model_sales_percent['label_y'] = model_sales_percent['label_angle'].map(lambda a: calc_label_position(a, label_radius)[1])

source = ColumnDataSource(model_sales_percent)

p2 = figure(title="Sales Distribution by Model", toolbar_location=None,
            tools="hover", tooltips="@Model: @percent", 
            x_range=(-1, 1), y_range=(-1, 1), width=500, height=500)

p2.wedge(x=0, y=0, radius=radius,
         start_angle=cumsum('angle', include_zero=True), end_angle=cumsum('angle'),
         line_color="white", fill_color='color', source=source)

# Add labels
labels = LabelSet(x='label_x', y='label_y', text='percent',
                  angle=0, text_font_size="9pt", text_align="center", source=source)
p2.add_layout(labels)

# Add connecting lines
p2.segment(x0='x', y0='y', x1='label_x', y1='label_y', line_color="black", source=source)

# Add model names
model_labels = LabelSet(x='label_x', y='label_y', text='Model',
                        angle=0, text_font_size="9pt", text_align="center", 
                        y_offset=10, source=source)
p2.add_layout(model_labels)

p2.axis.axis_label = None
p2.axis.visible = False
p2.grid.grid_line_color = None
p2.outline_line_color = None

# [Rest of the code remains the same]









# 3. Regional Sales Comparison (Bar Plot)
regional_sales = df.groupby('Region')['Revenue'].sum().reset_index()
source = ColumnDataSource(regional_sales)

p3 = figure(x_range=regional_sales['Region'], title="Total Sales by Region",
            toolbar_location=None, tools="")

p3.vbar(x='Region', top='Revenue', width=0.9, source=source, line_color='white',
        fill_color=factor_cmap('Region', palette=Category10[4], factors=regional_sales['Region']))

p3.xgrid.grid_line_color = None
p3.y_range.start = 0
p3.xaxis.axis_label = "Region"
p3.yaxis.axis_label = "Total Revenue"

# 4. Customer Rating vs Price (Scatter Plot)
source = ColumnDataSource(df)

p4 = figure(title="Customer Rating vs Price", x_axis_label="Price", y_axis_label="Customer Rating",
            width=800, height=400)

scatter = p4.scatter('Price', 'Customer_Rating', size='Units_Sold', 
                     color=factor_cmap('Model', palette=Category10[4], factors=df['Model'].unique()),
                     alpha=0.6, source=source)

p4.add_tools(HoverTool(tooltips=[
    ("Model", "@Model"),
    ("Price", "@Price{$0,0.00}"),
    ("Rating", "@Customer_Rating"),
    ("Units Sold", "@Units_Sold")
]))

legend = Legend(items=[(x, [scatter]) for x in df['Model'].unique()])
p4.add_layout(legend, 'right')

# 5. Sales Heatmap (Units Sold by Day and Month)
df['Month'] = df['Date'].dt.month_name()
df['Day'] = df['Date'].dt.day
heatmap_data = df.pivot_table(values='Units_Sold', index='Day', columns='Month', aggfunc='sum')

months = ['January', 'February', 'March', 'April', 'May', 'June', 
          'July', 'August', 'September', 'October', 'November', 'December']
days = list(range(1, 32))

data = {'Month': [], 'Day': [], 'Units_Sold': []}
for month in months:
    for day in days:
        if month in heatmap_data.columns and day in heatmap_data.index:
            data['Month'].append(month)
            data['Day'].append(str(day))
            data['Units_Sold'].append(heatmap_data.loc[day, month])
        else:
            data['Month'].append(month)
            data['Day'].append(str(day))
            data['Units_Sold'].append(0)

source = ColumnDataSource(data=data)

p5 = figure(title="Sales Heatmap: Units Sold by Day and Month",
            x_range=months, y_range=list(reversed([str(day) for day in days])),
            x_axis_location="above", width=900, height=400,
            tools="hover", toolbar_location=None,
            tooltips=[('Date', '@Month @Day'), ('Units Sold', '@Units_Sold')])

# Create color mapper
color_mapper = LinearColorMapper(palette=Viridis256, low=min(data['Units_Sold']), high=max(data['Units_Sold']))

# Add rect glyphs
p5.rect(x='Month', y='Day', width=1, height=1, source=source,
        line_color=None, fill_color={'field': 'Units_Sold', 'transform': color_mapper})

# Add color bar
color_bar = ColorBar(color_mapper=color_mapper, label_standoff=12, border_line_color=None, location=(0, 0))
p5.add_layout(color_bar, 'right')

p5.xaxis.axis_label = 'Month'
p5.yaxis.axis_label = 'Day'

# Show all plots
show(column(p1, row(p2, p3), p4, p5))

# Print summary statistics
print(df.describe())
print("\nTotal Revenue: $", df['Revenue'].sum().round(2))
print("Best Selling Model:", df.groupby('Model')['Units_Sold'].sum().idxmax())
print("Highest Rated Model:", df.groupby('Model')['Customer_Rating'].mean().idxmax())
print("Most Profitable Region:", df.groupby('Region')['Revenue'].sum().idxmax())

                      Date  Units_Sold        Price  Customer_Rating  \
count                  365  365.000000   365.000000       365.000000   
mean   2023-07-02 00:00:00   26.443836  1263.350767         4.032055   
min    2023-01-01 00:00:00    1.000000   503.890000         3.000000   
25%    2023-04-02 00:00:00   16.000000   912.100000         3.500000   
50%    2023-07-02 00:00:00   27.000000  1294.760000         4.000000   
75%    2023-10-01 00:00:00   37.000000  1611.460000         4.500000   
max    2023-12-31 00:00:00   49.000000  1996.540000         5.000000   
std                    NaN   13.725337   416.199019         0.585247   

            Revenue         Day  
count    365.000000  365.000000  
mean   33711.071397   15.720548  
min      577.450000    1.000000  
25%    16101.670000    8.000000  
50%    30191.360000   16.000000  
75%    48995.060000   23.000000  
max    97830.460000   31.000000  
std    22120.676637    8.808321  

Total Revenue: $ 12304541.06
Best Selling Mo