<a href="https://colab.research.google.com/github/moushumipriya/Global-E-commerce-Sales-Analysis/blob/main/Global_E_commerce_Sales_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns


Load Data & Inspect

In [None]:
df = pd.read_csv("data.csv", encoding='ISO-8859-1')
df.head()
df.info()
df.describe()


Data Cleaning

In [None]:
# Drop duplicates
df.drop_duplicates(inplace=True)

# Check missing values
df.isnull().sum()

# Drop rows with missing CustomerID or InvoiceNo
df = df.dropna(subset=['CustomerID', 'InvoiceNo'])


Exploratory Data Analysis (EDA)

1.Top-selling products

In [None]:
top_products = df.groupby('Description')['Quantity'].sum().sort_values(ascending=False).head(10)
plt.figure(figsize=(10,6))
sns.barplot(x=top_products.values, y=top_products.index)
plt.title("Top 10 Selling Products")
plt.xlabel("Quantity Sold")
plt.show()


Country-wise sales

In [None]:
country_sales = df.groupby('Country')['Quantity'].sum().sort_values(ascending=False).head(10)
plt.figure(figsize=(10,6))
sns.barplot(x=country_sales.values, y=country_sales.index)
plt.title("Top 10 Countries by Sales Quantity")
plt.xlabel("Quantity Sold")
plt.show()


Monthly Revenue Trend

In [None]:
# Convert InvoiceDate to datetime
df['InvoiceDate'] = pd.to_datetime(df['InvoiceDate'])

# Revenue = Quantity * UnitPrice
df['Revenue'] = df['Quantity'] * df['UnitPrice']

# Monthly revenue
monthly_revenue = df.groupby(df['InvoiceDate'].dt.to_period('M'))['Revenue'].sum()
monthly_revenue.plot(kind='line', figsize=(12,6), marker='o')
plt.title("Monthly Revenue Trend")
plt.xlabel("Month")
plt.ylabel("Revenue")
plt.show()


Customer Purchase Frequency

In [None]:
customer_freq = df.groupby('CustomerID')['InvoiceNo'].nunique()
plt.figure(figsize=(10,6))
sns.histplot(customer_freq, bins=50, kde=False)
plt.title("Customer Purchase Frequency")
plt.xlabel("Number of Orders")
plt.ylabel("Number of Customers")
plt.show()
