# DV Lab 06 

### Perform the following Tasks in Seaborn

##### Import the required libraries and data from csv file "ecom_data" and perform the following tasks on it.

In [None]:
import seaborn as sns
import pandas as pd

# Read the CSV data
data = pd.read_csv("ecom_data.csv")

#### Create a bar chart showing the average order value (price multiplied by quantity) for each customer country, colored by the dominant product category in that country.

In [None]:
# Calculate average order value per country
avg_order_value = data.groupby("customer_country")["price", "quantity"].sum()
avg_order_value["average_order_value"] = avg_order_value["price"] * avg_order_value["quantity"]

# Find dominant category per country
dominant_category = data.groupby("customer_country")["category"].mode(axis=0).reset_index()["category"].tolist()

# Create the bar chart with color-coding
sns.barplot(x=avg_order_value.index, y=avg_order_value["average_order_value"], hue=dominant_category)
sns.title("Average Order Value per Customer Country (Dominant Category)")
sns.xlabel("Customer Country")
sns.ylabel("Average Order Value")
plt.xticks(rotation=45)  # Rotate x-axis labels for better readability
plt.show()


#### Explore the joint distribution of price and quantity, considering the potential interaction between these variables.

In [None]:
sns.jointplot(x="price", y="quantity", data=data, kind="kde")  # Use kernel density estimation for smoother distribution
sns.title("Joint Distribution of Product Price and Quantity Sold")
sns.xlabel("Product Price")
sns.ylabel("Quantity Sold")
plt.show()

#### Create a stacked bar chart visualizing the total quantity sold of each product category across different customer countries. Include error bars (standard deviation) to represent the variability in sales.

In [None]:
# Calculate total quantity per category-country combination
category_quantity = data.groupby(["category", "customer_country"])["quantity"].sum()

# Calculate standard deviation (assuming a normal distribution of sales)
category_quantity_std = data.groupby(["category", "customer_country"])["quantity"].std()

# Unstack the DataFrame for easier plotting
category_quantity_unstacked = category_quantity.unstack()

# Create the stacked bar chart with error bars
sns.barplot(x=category_quantity_unstacked.index, y=category_quantity_unstacked.sum(axis=0), errwidth=category_quantity_std.sum(axis=0))
sns.title("Total Quantity Sold per Category by Customer Country (with Error Bars)")
sns.xlabel("Customer Country")
sns.ylabel("Total Quantity Sold")
plt.xticks(rotation=45)  # Rotate x-axis labels for better readability
plt.show()


#### Visualize the distribution of order quantity using a kernel density estimation (KDE) plot. Highlight a specific threshold quantity (e.g., orders with more than 3 items) using a vertical line.

In [None]:
threshold_quantity = 3

sns.kdeplot(data["quantity"])
sns.axvline(x=threshold_quantity, color="red", linestyle="dashed", label=f"Threshold ({threshold_quantity} items)")
sns.title("Distribution of Order Quantity (Threshold Highlighted)")
sns.xlabel("Order Quantity")
plt.legend()
plt.show()


#### Show the distribution of product prices across different categories, highlighting potential outliers.

In [None]:
sns.boxplot(x="category", y="price", showmeans=True, data=data)
sns.title("Distribution of Product Prices by Category")
sns.xlabel("Category")
sns.ylabel("Product Price")
plt.xticks(rotation=45)  # Rotate x-axis labels for better readability
plt.show()


### Perform the following Tasks in Plotly

#### Plot the total sales per category with an interactive bar chart where users can hover over bars to see details (category and total sales). Additionally, display the cumulative total sales as a line on the same chart.

In [None]:
import plotly.graph_objects as go

# Calculate total sales per category
category_totals = data.groupby("category")["price"].sum()
sorted_totals = category_totals.sort_values(ascending=False)

# Cumulative totals
cumulative_totals = sorted_totals.cumsum()

# Create the bar chart with hovertext and line for cumulative totals
fig = go.Figure()
fig.add_trace(go.Bar(x=sorted_totals.index, y=sorted_totals.values, text=sorted_totals.values))
fig.add_trace(go.Scatter(x=sorted_totals.index, y=cumulative_totals.values, mode="lines", line=dict(color="red"), name="Cumulative Total"))

# Customize layout
fig.update_layout(
    title="Total Sales per Category (Cumulative Totals)",
    xaxis_title="Category",
    yaxis_title="Total Sales",
)

# Hover information
fig.update_traces(hovertemplate="Category: %{x}<br>Total Sales: %{y:.2f}")

fig.show()


#### Create a box plot visualizing the distribution of product prices across different categories. Add annotations to highlight outliers or interesting patterns in the data.

In [None]:
fig = go.Figure()
fig.add_trace(go.Box(x="category", y="price", data=data))

# Identify potential outliers (adjust IQR threshold as needed)
q1 = data.groupby("category")["price"].quantile(0.25)
q3 = data.groupby("category")["price"].quantile(0.75)
iqr = q3 - q1
outliers = data[(data["price"] < (q1 - 1.5 * iqr)) | (data["price"] > (q3 + 1.5 * iqr))]

# Add annotations for outliers
for index, row in outliers.iterrows():
    fig.add_annotation(
        x=row["category"],
        y=row["price"],
        text=f"Outlier (${row['price']:.2f})",
        showarrow=True,
        arrowhead=7,
        arrowcolor="red",
    )

# Customize layout
fig.update_layout(
    title="Distribution of Product Prices by Category (Outliers Highlighted)",
    xaxis_title="Category",
    yaxis_title="Product Price",
)

fig.show()


#### Visualize the average order value (price multiplied by quantity) and total quantity sold per category using a grouped bar chart with separate y-axes for each metric.

In [None]:
# Calculate average order value and total quantity per category
avg_order_value = data.groupby("category")["price", "quantity"].sum()
avg_order_value["average_order_value"] = avg_order_value["price"] * avg_order_value["quantity"]

# Create the grouped bar chart with dual y-axes
fig = go.Figure()
fig.add_trace(
    go.Bar(
        x=avg_order_value.index,
        y=avg_order_value["average_order_value"],
        name="Average Order Value",
    )
)
fig.add_trace(
    go.Bar(x=avg_order_value.index, y=avg_order_value["quantity"], name="Total Quantity Sold")
)

# Configure y-axes
fig.update_layout(
    title="Average Order Value & Total Quantity Sold by Category",
    xaxis_title="Category",
    yaxis_title_left="Average Order Value",
    yaxis_title_right="Total Quantity Sold",
    yaxis2_range=[0, max(avg_order_value["quantity"]) * 1.1],  # Adjust y2 range based on data
)

fig.show()


#### Create an animated scatter plot visualizing the relationship between product price and quantity sold. Implement brushing functionality to allow users to select a range of data points on the plot and see the corresponding distribution in a separate histogram.

In [None]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Define animation frames
frames = []
for frame_index in range(len(data)):
    frame_data = data.iloc[: frame_index + 1]  # Incrementally add data points for animation
    frames.append(go.Scatter(x=frame_data["price"], y=frame_data["quantity"], mode="markers"))

# Create base plot and brushing callback
fig = make_subplots(rows=1, cols=2)
fig.add_trace(frames[0], row=1, col=1)  # Add first frame to the initial plot
brush_data = None

def update_brush(trace, point_info, state):
    global brush_data
    selected_pts = [(p["x"], p["y"]) for p in point_info["points"]]
    brush_data = data[data["price"].isin([p[0] for p in selected_pts]) & data["quantity"].isin([p[1] for p in selected_pts])]

fig.update_layout(
    title="Product Price vs. Quantity Sold (Animated with Brushing)",
    xaxis_title="Product Price",
    yaxis_title="Quantity Sold",
    updatemodes=[
        dict(type="buttons", buttons=[dict(label="Play", method="animate", start_frame=0, end_frame=-1)], direction="horizontal")
    ],
)

# Brushing functionality using a callback
fig.update_xaxes(constrain="range", dragging=True, rangecallback=update_brush)

# Create histogram subplot for brushed data
fig.add_trace(go.Histogram(x=brush_data["quantity"], name="Brushed Data Distribution"), row=1, col=2)
fig.update_yaxes(row=1, col=2, title="Frequency")

fig.show()


#### Create a stacked bar chart showing the total quantity sold for each product category across different customer countries. Include tooltips that display the category name, country name, and total quantity when hovering over a bar segment.

In [None]:
import plotly.graph_objects as go

# Calculate total quantity per category-country combination
category_quantity = data.groupby(["category", "customer_country"])["quantity"].sum()

# Unstack the DataFrame for easier plotting
category_quantity_unstacked = category_quantity.unstack()

# Create the stacked bar chart with tooltips
fig = go.Figure(
    data=[
        go.Bar(
            x=category_quantity_unstacked.index,
            y=col,
            name=col,
            hovertemplate="Category: %{x}<br>Country: %{customdata[0]}<br>Total Quantity: %{y:.0f}",
            customdata=category_quantity_unstacked.columns.tolist(),
        )
        for col in category_quantity_unstacked.columns
    ]
)

# Customize layout
fig.update_layout(
    title="Total Quantity Sold per Category by Customer Country (Stacked)",
    xaxis_title="Customer Country",
    yaxis_title="Total Quantity Sold",
)

fig.show()
