# DV Lab 06 

### Perform the following Tasks in Seaborn

##### Import the required libraries and data from csv file "sales_data" and perform the following tasks on it.

In [None]:
import seaborn as sns
import pandas as pd

# Read the CSV data
data = pd.read_csv("sales_data.csv")

#### Plot the total sales per category across all orders.

In [None]:
# Calculate total sales per category
category_totals = data.groupby("category")["price"].sum()

# Create the line chart
sns.lineplot(x=category_totals.index, y=category_totals.values)
sns.title("Total Sales per Category")
sns.xlabel("Category")
sns.ylabel("Total Sales")
plt.show()

#### Visualize the relationship between product price and quantity sold. Add a regression line to show the general trend.

In [None]:
sns.regplot(x="price", y="quantity", data=data)
sns.title("Product Price vs. Quantity Sold (Regression Line)")
sns.xlabel("Product Price")
sns.ylabel("Quantity Sold")
plt.show()

#### Explore the distribution of product price within each category using violin plots. Group the plots by customer country using a facet grid.

In [None]:
sns.violinplot(x="category", y="price", hue="customer_country", showmeans=True, data=data, split=True)
sns.title("Distribution of Product Price by Category and Customer Country")
plt.xticks(rotation=45)  # Rotate x-axis labels for better readability
plt.show()


#### Create a heatmap visualizing the correlation matrix between product price, quantity, and a potential additional numerical variable (e.g., shipping cost, discount applied). Apply hierarchical clustering to order categories and countries based on their correlations.

In [None]:
# Calculate correlation matrix
correlation_matrix = data[["price", "quantity", "shipping_cost"]].corr()

# Create the heatmap with hierarchical clustering
sns.heatmap(correlation_matrix, annot=True, cmap="coolwarm", center=0)
sns.title("Correlation Matrix (Hierarchical Clustering)")
plt.show()


#### Explore the relationships between all numerical variables in your data (price, quantity, shipping cost, etc.) using a pairplot. This provides a quick overview of pairwise correlations and potential outliers.

In [None]:
sns.pairplot(data[["price", "quantity", "shipping_cost"]])  # Adjust variable names as needed
plt.show()


### Perform the Following Tasks in Plotly 

#### Plot the total sales per category using an interactive bar chart where users can hover over bars to see details (category and total sales).

In [None]:
import plotly.graph_objects as go

# Calculate total sales per category
category_totals = data.groupby("category")["price"].sum()

# Create the bar chart with hovertext
fig = go.Figure(
    data=[go.Bar(x=category_totals.index, y=category_totals.values, text=category_totals.values)]
)

# Add hover information
fig.update_traces(hovertemplate="Category: %{x}<br>Total Sales: %{y:.2f}")

# Customize layout
fig.update_layout(
    title="Total Sales per Category",
    xaxis_title="Category",
    yaxis_title="Total Sales",
)

fig.show()


#### Create a scatter plot visualizing the relationship between product price and quantity sold, color-coded by customer country.

In [None]:
fig = go.Figure(
    data=[
        go.Scatter(
            x=data["price"],
            y=data["quantity"],
            mode="markers",
            marker=dict(color=data["customer_country"], size=10),
        )
    ]
)

# Customize layout
fig.update_layout(
    title="Product Price vs. Quantity Sold (by Customer Country)",
    xaxis_title="Product Price",
    yaxis_title="Quantity Sold",
    legend_title="Customer Country",
)

fig.show()


#### Plot a scatter plot visualizing the relationship between product price and quantity sold, color-coded by customer country.

In [None]:
import plotly.graph_objects as go

# Create the scatter plot with color-coding by customer country
fig = go.Figure(
    data=[
        go.Scatter(
            x=data["price"],
            y=data["quantity"],
            mode="markers",
            marker=dict(color=data["customer_country"], size=10),
        )
    ]
)

# Customize layout
fig.update_layout(
    title="Product Price vs. Quantity Sold (by Customer Country)",
    xaxis_title="Product Price",
    yaxis_title="Quantity Sold",
    legend_title="Customer Country",
)

fig.show()


#### Visualize the average order value per country on a world map, colored by the average value.

In [None]:
import plotly.graph_objects as go

# Assuming you have latitude and longitude data for each country in separate columns (e.g., "latitude", "longitude")

# Calculate average order value per country
avg_order_value = data.groupby("customer_country")["price", "quantity"].sum()
avg_order_value["average_order_value"] = avg_order_value["price"] * avg_order_value["quantity"]

# Create the choropleth map
fig = go.Figure(
    data=[
        go.Choropleth(
            locations=avg_order_value.index,  # Country names from the index
            z=avg_order_value["average_order_value"],  # Average order values
            text=avg_order_value.index,  # Display country names on hover
            colorscale="Viridis",  # Color scale for the map
            colorbar_title="Average Order Value",  # Title for the colorbar
            locationmode="ISO-3",  # Match country names to map geometry using ISO codes (assuming data format)
            geojson="world_geojson.json",  # Path to the GeoJSON file defining the world map geometry
        )
    ]
)

# Customize layout (optional)
fig.update_layout(
    title="Average Order Value by Country (World Map)",
    geo=dict(showframe=False, projection="natural earth")  # Adjust layout options
)

fig.show()


#### Visualize the relationship between product price, quantity sold, and a third numerical variable (e.g., shipping cost) using a 3D scatter plot. Color-code the data points based on the customer country.

In [None]:
fig = go.Figure(
    data=[
        go.Scatter3d(
            x=data["price"],
            y=data["quantity"],
            z=data["shipping_cost"],  # Assuming you have "shipping_cost" data
            mode="markers",
            marker=dict(color=data["customer_country"], size=5),
        )
    ]
)

# Customize layout
fig.update_layout(
    title="Product Price vs. Quantity vs. Shipping Cost (by Customer Country)",
    scene=dict(xaxis_title="Price", yaxis_title="Quantity", zaxis_title="Shipping Cost"),
    legend_title="Customer Country",
)

fig.show()
