In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.stats as stats
import ipywidgets as widgets
from sklearn.linear_model import LinearRegression
from IPython.display import display, clear_output, HTML
import numpy as np

# === Styling for Presentation ===
display(HTML('''
<style>
select, input, button {
    font-size: 22px !important;
    padding: 10px 20px !important;
    width: 700px !important;
}
.output {
    font-size: 22px !important;
}
.widget-button {
    height: 60px !important;
    width: 350px !important;
    font-weight: bold !important;
}
.widget-dropdown {
    width: 700px !important;
}
</style>
'''))

# === Load & Prepare Data ===
df = pd.read_csv('Sales Dataset.csv')
df['Order Date'] = pd.to_datetime(df['Order Date'])
df['Year-Month'] = df['Order Date'].dt.to_period('M').astype(str)
df['MonthIndex'] = (df['Order Date'].dt.year - df['Order Date'].dt.year.min()) * 12 + df['Order Date'].dt.month

output_area = widgets.Output(layout=widgets.Layout(width='100%', border='1px solid #ccc', padding='10px'))

def display_table(df):
    display(HTML(
        df.to_html(classes='table', index=False)
        .replace('<table', '<table style="font-size:22px; width:100%; margin-top: 10px;"')
    ))

# === Analysis Functions ===
def profit_by_category():
    with output_area:
        clear_output()
        result = df.groupby('Category')['Profit'].mean().sort_values(ascending=False).reset_index()
        display(HTML("<div style='font-size:22px; font-weight:500;'>Average profit by category:</div>"))
        display_table(result)

def anova_by_category():
    with output_area:
        clear_output()
        data = [df[df['Category'] == cat]['Profit'] for cat in df['Category'].unique()]
        f_stat, p_val = stats.f_oneway(*data)
        sns.set_context("talk", font_scale=1.4)
        plt.figure(figsize=(14, 8))
        sns.boxplot(x='Category', y='Profit', data=df)
        plt.title("Profit Distribution by Category", fontsize=24)
        plt.xlabel("Category", fontsize=18)
        plt.ylabel("Profit", fontsize=18)
        plt.xticks(rotation=45, fontsize=16)
        plt.yticks(fontsize=16)
        plt.show()
        display(HTML(f"""
    <div style='font-size:22px; margin-top: 20px;'>
        ANOVA F-Statistic: {f_stat:.2f}, P-Value: {p_val:.4f}<br>
        Conclusion: profit difference across categories is statistically {'significant' if p_val < 0.05 else 'not significant'}.
    </div>
    """))

def monthly_profit_linear_regression():
    with output_area:
        clear_output()
        monthly = df.groupby('MonthIndex')['Profit'].sum().reset_index()
        monthly['Month'] = pd.date_range(start=df['Order Date'].min(), periods=len(monthly), freq='MS')
        X = monthly[['MonthIndex']]
        y = monthly['Profit']
        model = LinearRegression()
        model.fit(X, y)
        y_pred = model.predict(X)

        sns.set_context("talk", font_scale=1.4)
        plt.figure(figsize=(14, 8))
        plt.plot(monthly['Month'], y, marker='o', label='Actual')
        plt.plot(monthly['Month'], y_pred, linestyle='--', color='red', label='Trend Line')
        plt.title("Monthly Profit Trend with Linear Regression", fontsize=24)
        plt.xlabel("Time", fontsize=18)
        plt.ylabel("Total Profit", fontsize=18)
        plt.xticks(rotation=30, fontsize=14)
        plt.yticks(fontsize=14)
        plt.legend()
        plt.show()

        display(HTML(f"<div style='font-size:22px;'>Linear Regression Coefficient: {model.coef_[0]:.2f}</div>"))
        display(HTML("<div style='font-size:22px;'>→ Indicates whether profit is increasing or decreasing over time.</div>"))

def top_profit_cities():
    with output_area:
        clear_output()
        city_profit = df.groupby('City')['Profit'].mean().sort_values(ascending=False).head(10)
        sns.set_context("talk", font_scale=1.4)
        plt.figure(figsize=(14, 8))
        city_profit.plot(kind='bar', color='skyblue')
        plt.title("Top 10 Cities by Average Profit", fontsize=24)
        plt.ylabel("Avg Profit", fontsize=18)
        plt.xticks(rotation=45, fontsize=14)
        plt.yticks(fontsize=14)
        plt.tight_layout()
        plt.show()
        display(HTML("<div style='font-size:22px;'>The highest profit comes from the cities at the top of this list, where average profit exceeds others significantly.</div>"))

def outlier_orders():
    with output_area:
        clear_output()
        df['Z-Score'] = stats.zscore(df['Profit'])
        outliers = df[(df['Z-Score'].abs() > 2)]
        display(HTML(f"<div style='font-size:22px;'>Number of profit outliers: {len(outliers)}</div>"))
        display_table(outliers[['Order ID', 'Profit', 'Z-Score']].sort_values(by='Z-Score', ascending=False).head())
        display(HTML("<div style='font-size:22px;'>→ These are orders with unusually high or low profit, beyond 2 standard deviations.</div>"))

def test_significance_vs_50():
    with output_area:
        clear_output()
        t_stat, p_val = stats.ttest_1samp(df['Profit'], 50)
        display(HTML(f"<div style='font-size:22px;'>T-Statistic: {t_stat:.2f}, P-Value: {p_val:.4f}</div>"))
        display(HTML("<div style='font-size:22px;'>→ Profit is significantly different from $50.</div>" if p_val < 0.05 else "<div style='font-size:22px;'>→ Not significantly different from $50.</div>"))

# === Widgets ===
main_choice = widgets.Dropdown(
    options=[
        'Category performance',
        'Monthly trends',
        'Geographic insights',
        'Profit anomalies'
    ],
    layout=widgets.Layout(width='700px', height='50px'),
    style={'description_width': 'initial'}
)

follow_up = widgets.Dropdown(
    layout=widgets.Layout(width='700px', height='50px'),
    style={'description_width': 'initial'}
)

button = widgets.Button(
    description="Show Insights",
    layout=widgets.Layout(width='350px', height='60px'),
    button_style='success',
    style=widgets.ButtonStyle(font_weight='bold')
)

def update_followup_options(*args):
    if main_choice.value == "Category performance":
        follow_up.options = [
            "Which categories are most profitable?",
            "Does profit differ by category?"
        ]
    elif main_choice.value == "Monthly trends":
        follow_up.options = [
            "Is profit increasing or decreasing over time?",
            "Is the average profit significantly different from $50?"
        ]
    elif main_choice.value == "Geographic insights":
        follow_up.options = [
            "Which cities are most profitable?"
        ]
    elif main_choice.value == "Profit anomalies":
        follow_up.options = [
            "Which orders had unusual profits?"
        ]

main_choice.observe(update_followup_options, 'value')
update_followup_options()

def on_button_click(b):
    q = follow_up.value.strip()
    if q == "Which categories are most profitable?":
        profit_by_category()
    elif q == "Does profit differ by category?":
        anova_by_category()
    elif q == "Is profit increasing or decreasing over time?":
        monthly_profit_linear_regression()
    elif q == "Which cities are most profitable?":
        top_profit_cities()
    elif q == "Which orders had unusual profits?":
        outlier_orders()
    elif q == "Is the average profit significantly different from $50?":
        test_significance_vs_50()

button.on_click(on_button_click)

ui = widgets.VBox([
    widgets.HTML("<h2 style='font-size:32px; margin-bottom: 20px;'>Business Insights Dashboard</h2>"),
    widgets.HBox([
        widgets.HTML("<b style='font-size:22px; width:180px;'>Focus:</b>"),
        main_choice
    ]),
    widgets.HBox([
        widgets.HTML("<b style='font-size:22px; width:180px;'>Question:</b>"),
        follow_up
    ]),
    widgets.HBox([
        button
    ], layout=widgets.Layout(justify_content='center', margin='20px 0')),
    output_area
], layout=widgets.Layout(width='1000px'))

display(ui)