# Data Overview

In [1]:
#Importing the required libraries
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 
import seaborn as sns


In [2]:
#LOADING

customers = pd.read_csv('csv files/Customers.csv') # Reading Customers.csv dataset
products = pd.read_csv('csv files/Products.csv') # Reading Products.csv dataset
transactions = pd.read_csv('csv files/Transactions.csv') # Reading Transactions.csv dataset

In [3]:
#MERGING

transactions_customers = transactions.merge(customers, on='CustomerID') #Merging the datasets
full_data = transactions_customers.merge(products, on='ProductID') # Join all three

# Data Checks

In [None]:
full_data.head() #Previewing the top rows of the dataset to understand its structure and data types.

In [None]:
full_data.tail() #Examining the last few rows to verify the dataset ends as expected.

In [None]:
full_data.describe() #Analyzing summary statistics for numerical features to identify potential outliers or unusual distributions.

In [None]:
full_data.info() #Checking column data types, non-null counts, and dataset metadata for quick assessment. 

In [None]:
full_data.value_counts("Category") #Checking the frequency of unique values in the column to understand distribution patterns.

In [None]:
full_data.size #Checking the total number of elements to understand dataset size.

In [None]:
full_data.shape #Retrieving the dataset's dimensions to know the number of rows and columns. 

In [None]:
full_data.isnull().sum() #Verifying that no missing values exist in the dataset to confirm data completeness.

In [13]:
# Converting date columns to datetime for better analysis
full_data['TransactionDate'] = pd.to_datetime(full_data['TransactionDate'])
full_data['SignupDate'] = pd.to_datetime(full_data['SignupDate'])

# Exploratory Data Analysis

### 1. Monthly Transaction Trend

In [None]:
# This shows seasonal trends in customer activity. Peaks may represent holiday seasons or promotional periods.

sns.set(style="whitegrid", palette="muted", font_scale=1.2) #plot style
plt.figure(figsize=(10, 6))
transactions_per_month = full_data.groupby(full_data['TransactionDate'].dt.to_period('M')).size()
transactions_per_month.plot(kind='line', marker='o')
plt.title("Monthly Transaction Trend", fontsize=14)
plt.xlabel("Month")
plt.ylabel("Number of Transactions")
plt.grid(True)
plt.xticks(rotation=45)
plt.show()


### 2. Most Popular Product Categories

In [None]:
# This indicates the categories most frequently purchased by customers, helping focus inventory and promotions.

plt.figure(figsize=(10, 6))
popular_categories = full_data['Category'].value_counts()
sns.barplot(x=popular_categories.index, y=popular_categories.values, palette="viridis")
plt.title("Most Popular Product Categories", fontsize=14)
plt.xlabel("Product Category")
plt.ylabel("Number of Transactions")
plt.xticks(rotation=45)
plt.show()

### 3. Total Sales by Region


In [None]:
# Regions contributing the most to sales can be targeted for expansion, while others may require localized promotions.

plt.figure(figsize=(10, 6))
sales_by_region = full_data.groupby('Region')['TotalValue'].sum().sort_values(ascending=False)
sns.barplot(x=sales_by_region.index, y=sales_by_region.values, palette="coolwarm")
plt.title("Total Sales by Region", fontsize=14)
plt.xlabel("Region")
plt.ylabel("Total Sales")
plt.show()

### 4. Average Order Value by Region

In [None]:
# Regions with higher average order values may have affluent customers, guiding premium product offerings.

plt.figure(figsize=(10, 6))
avg_order_by_region = full_data.groupby('Region')['TotalValue'].mean().sort_values(ascending=False)
sns.barplot(x=avg_order_by_region.index, y=avg_order_by_region.values, palette="magma")
plt.title("Average Order Value by Region", fontsize=14)
plt.xlabel("Region")
plt.ylabel("Average Order Value")
plt.show()

### 5. Customer Segmentation by Region

In [None]:
# This shows which regions have the most customers, helping allocate resources for regional marketing strategies.

plt.figure(figsize=(10, 6))
customers_by_region = full_data['Region'].value_counts()
sns.barplot(x=customers_by_region.index, y=customers_by_region.values, palette="cubehelix")
plt.title("Customer Segmentation by Region", fontsize=14)
plt.xlabel("Region")
plt.ylabel("Number of Customers")
plt.xticks(rotation=45)
plt.show()

### 6. Top 10 Most Purchased Products

In [None]:
# Identifies best-selling products to focus on promotion, restocking, and bundling strategies.

plt.figure(figsize=(10, 6))
top_products = full_data['ProductName'].value_counts().head(10)
sns.barplot(y=top_products.index, x=top_products.values, palette="plasma")
plt.title("Top 10 Most Purchased Products", fontsize=14)
plt.xlabel("Number of Transactions")
plt.ylabel("Product Name")
plt.show()

### COMPREHENSIVE INSIGHT

This exploratory data analysis uncovers opportunities to refine marketing approaches, inventory management, and regional strategies. By capitalizing on seasonal trends, leveraging regional market strengths, and optimizing product performance, the business can drive revenue growth and foster deeper customer engagement. 

Thank you.