<a href="https://colab.research.google.com/github/mosesj1706/global-electronics-eda/blob/main/EDA_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# ===============================================
# 📊 Global Electronics - Exploratory Data Analysis
# ===============================================

# ---- STEP 1: Setup ----
!pip install pandas numpy matplotlib seaborn --quiet

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os

# Paths
project_path = "/content/global-electronics-eda"
cleaned_path = os.path.join(project_path, "Cleaned_CSV_files")

# Load datasets
customers = pd.read_csv(os.path.join(cleaned_path, "Customers_clean.csv"))
products  = pd.read_csv(os.path.join(cleaned_path, "Products_clean.csv"))
sales     = pd.read_csv(os.path.join(cleaned_path, "Sales_clean.csv"))
stores    = pd.read_csv(os.path.join(cleaned_path, "Stores_clean.csv"))
rates     = pd.read_csv(os.path.join(cleaned_path, "Exchange_Rates_clean.csv"))

print("✅ Data loaded")

# ---- STEP 2: Customer Analysis ----
print("\n--- Customer Analysis ---")
print(customers.head())

# Gender distribution
sns.countplot(data=customers, x="Gender")
plt.title("Customer Distribution by Gender")
plt.show()

# Age group distribution
sns.countplot(data=customers, x="AgeGroup", order=customers['AgeGroup'].value_counts().index)
plt.title("Customer Distribution by Age Group")
plt.show()

# Customers by continent
sns.countplot(data=customers, x="Continent", order=customers['Continent'].value_counts().index)
plt.title("Customers by Continent")
plt.show()

# ---- STEP 3: Sales Analysis ----
print("\n--- Sales Analysis ---")

# Convert dates
sales['Order Date'] = pd.to_datetime(sales['Order Date'], errors='coerce')
sales['YearMonth'] = sales['Order Date'].dt.to_period("M")
sales['Revenue'] = sales['Quantity'] * 1.0  # placeholder, will adjust below

# Merge products for revenue calc
sales = sales.merge(products[['ProductKey','Unit Price USD']], on="ProductKey", how="left")
sales['Revenue'] = sales['Quantity'] * sales['Unit Price USD']

# Sales trend
monthly_sales = sales.groupby('YearMonth')['Revenue'].sum().reset_index()
monthly_sales['YearMonth'] = monthly_sales['YearMonth'].astype(str)
sns.lineplot(data=monthly_sales, x="YearMonth", y="Revenue", marker="o")
plt.xticks(rotation=45)
plt.title("Monthly Sales Trend")
plt.show()

# ---- STEP 4: Product Analysis ----
print("\n--- Product Analysis ---")

# Top 10 products by revenue
top_products = sales.groupby('ProductKey')['Revenue'].sum().reset_index()
top_products = top_products.merge(products[['ProductKey','Product Name','Category']], on="ProductKey", how="left")
top_products = top_products.sort_values('Revenue', ascending=False).head(10)

sns.barplot(data=top_products, x="Revenue", y="Product Name")
plt.title("Top 10 Products by Revenue")
plt.show()

# Revenue by category
category_sales = sales.groupby('Category')['Revenue'].sum().reset_index()
sns.barplot(data=category_sales, x="Category", y="Revenue")
plt.title("Revenue by Product Category")
plt.xticks(rotation=45)
plt.show()

# ---- STEP 5: Store Analysis ----
print("\n--- Store Analysis ---")

store_perf = sales.groupby('StoreKey')['Revenue'].sum().reset_index()
store_perf = store_perf.merge(stores[['StoreKey','Country','Square Meters']], on="StoreKey", how="left")
store_perf['Revenue per SqM'] = store_perf['Revenue'] / store_perf['Square Meters']

# Top stores by revenue
top_stores = store_perf.sort_values('Revenue', ascending=False).head(10)
sns.barplot(data=top_stores, x="Revenue", y="Country")
plt.title("Top 10 Stores by Revenue")
plt.show()

# Revenue per square meter
sns.histplot(store_perf['Revenue per SqM'], bins=20, kde=True)
plt.title("Distribution of Revenue per Square Meter")
plt.show()

# ---- STEP 6: Currency Impact ----
print("\n--- Currency Impact ---")

# Sales by currency
currency_sales = sales.groupby('Currency Code')['Revenue'].sum().reset_index()
sns.barplot(data=currency_sales, x="Currency Code", y="Revenue")
plt.title("Revenue by Currency")
plt.show()

print("\n🎉 EDA complete – plots and insights generated!")
