In [3]:
# Install required library
!pip install plotly

# Import libraries
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from google.colab import files

# Upload dataset (make sure you select OnlineRetail.csv)
uploaded = files.upload()

# Load CSV
df = pd.read_csv("OnlineRetail.csv", encoding="ISO-8859-1")

# Data Cleaning
df.drop_duplicates(inplace=True)
df = df.dropna(subset=["CustomerID"])
df["InvoiceDate"] = pd.to_datetime(df["InvoiceDate"])
df["Quantity"] = pd.to_numeric(df["Quantity"], errors="coerce")
df["UnitPrice"] = pd.to_numeric(df["UnitPrice"], errors="coerce")

# Add Sales column
df["Sales"] = df["Quantity"] * df["UnitPrice"]

# Preview cleaned dataset
df.head()




Saving OnlineRetail.csv to OnlineRetail (1).csv


Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country,Sales
0,536365,85123A,WHITE HANGING HEART T-LIGHT HOLDER,6,2010-12-01 08:26:00,2.55,17850.0,United Kingdom,15.3
1,536365,71053,WHITE METAL LANTERN,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom,20.34
2,536365,84406B,CREAM CUPID HEARTS COAT HANGER,8,2010-12-01 08:26:00,2.75,17850.0,United Kingdom,22.0
3,536365,84029G,KNITTED UNION FLAG HOT WATER BOTTLE,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom,20.34
4,536365,84029E,RED WOOLLY HOTTIE WHITE HEART.,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom,20.34


In [4]:
sales_country = df.groupby("Country")["Sales"].sum().reset_index().sort_values("Sales", ascending=False)

fig = px.bar(sales_country.head(10), x="Sales", y="Country", orientation="h",
             title="Top Countries by Total Sales")
fig.show()


In [5]:
top_products = df.groupby("Description")["Sales"].sum().reset_index().sort_values("Sales", ascending=False).head(10)

fig = px.bar(top_products, x="Sales", y="Description", orientation="h",
             title="Top 10 Products by Sales")
fig.show()


In [6]:
sales_time = df.groupby(df["InvoiceDate"].dt.date)["Sales"].sum().reset_index()

fig = px.line(sales_time, x="InvoiceDate", y="Sales",
              title="Sales Trend Over Time")
fig.update_xaxes(rangeslider_visible=True)  # Add slider for interactivity
fig.show()


In [7]:
df["Hour"] = df["InvoiceDate"].dt.hour
hourly_sales = df.groupby("Hour")["Sales"].sum().reset_index()

fig = px.line(hourly_sales, x="Hour", y="Sales",
              title="Sales by Hour of Day")
fig.show()
