In [8]:
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Load the dataset
file_path = 'C:\\Users\\loydt\\Downloads\\Projects\\Superstore Sales Dataset.xlsx'
data = pd.read_excel(file_path)

# Filter the big four states generating higher sales
states_of_interest = ['Washington', 'California', 'New York', 'Florida', 'Pennsylvania']
state = data[data['State'].isin(states_of_interest)]

# Extracting columns to explore market insights
high_sales_states = state[['Segment', 'State', 'City', 'Region', 'Ship Mode', 'Order Date', 'Category', 'Sub-Category', 'Product Name', 'Sales']]

# Calculate mean, median, max, and min sales for each category
mean_sales = high_sales_states.groupby('Category')['Sales'].mean().reset_index()
median_sales = high_sales_states.groupby('Category')['Sales'].median().reset_index()
max_sales = high_sales_states.groupby('Category')['Sales'].max().reset_index()
min_sales = high_sales_states.groupby('Category')['Sales'].min().reset_index()

# Create subplots
categories = high_sales_states['Category'].unique()
fig = make_subplots(rows=1, cols=len(categories), shared_yaxes=True, 
                    subplot_titles=categories)

# Colors for different categories
colors = ['red', 'blue', 'green']

# Add histograms for each category in a separate subplot
for i, category in enumerate(categories):
    category_data = high_sales_states[high_sales_states['Category'] == category]
    
    # Create histogram for the specific category
    fig.add_trace(
        go.Histogram(x=category_data['Sales'], name=category, 
                     opacity=0.7, marker_color=colors[i], 
                     showlegend=False),
        row=1, col=i + 1
    )

    # Calculate mean, median, max, and min values
    mean_value = mean_sales[mean_sales['Category'] == category]['Sales'].values[0]
    median_value = median_sales[median_sales['Category'] == category]['Sales'].values[0]
    max_value = max_sales[max_sales['Category'] == category]['Sales'].values[0]
    min_value = min_sales[min_sales['Category'] == category]['Sales'].values[0]
    
    # Add mean line (as Scatter trace for hover information)
    fig.add_trace(go.Scatter(
        x=[mean_value, mean_value],
        y=[0, category_data['Sales'].max() * 0.8],
        mode='lines',
        line=dict(color='white', width=2, dash='dash'),
        name=f'{category} Mean: {round(mean_value, 2)}',
        hoverinfo='text',
        text=f'Mean: {round(mean_value, 2)}'
    ), row=1, col=i + 1)
    
    # Add median line (as Scatter trace for hover information)
    fig.add_trace(go.Scatter(
        x=[median_value, median_value],
        y=[0, category_data['Sales'].max() * 0.8],
        mode='lines',
        line=dict(color='yellow', width=2, dash='dash'),
        name=f'{category} Median: {round(median_value, 2)}',
        hoverinfo='text',
        text=f'Median: {round(median_value, 2)}'
    ), row=1, col=i + 1)

    # Print min, mean, median, and max values for each category
    print(f"Category: {category}")
    print(f" - Min Sales: {round(min_value, 2)}")
    print(f" - Mean Sales: {round(mean_value, 2)}")
    print(f" - Median Sales: {round(median_value, 2)}")
    print(f" - Max Sales: {round(max_value, 2)}\n")

# Update layout
fig.update_layout(
    title_text='Sales Distribution by Category',
    template='plotly_dark',
    height=400,
    showlegend=True
)

# Add legend for mean and median lines
fig.add_trace(go.Scatter(x=[None], y=[None], mode='lines', name='Mean', line=dict(color='white', width=2, dash='dash')))
fig.add_trace(go.Scatter(x=[None], y=[None], mode='lines', name='Median', line=dict(color='yellow', width=2, dash='dash')))

# Show the figure
fig.show()


Category: Office Supplies
 - Min Sales: 0.85
 - Mean Sales: 119.35
 - Median Sales: 29.2
 - Max Sales: 8187.65

Category: Furniture
 - Min Sales: 2.78
 - Mean Sales: 358.25
 - Median Sales: 183.97
 - Max Sales: 4416.17

Category: Technology
 - Min Sales: 0.99
 - Mean Sales: 501.38
 - Median Sales: 194.38
 - Max Sales: 22638.48

