# Exploratory Data Analysis (EDA)

In [None]:

import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity

customers_df = pd.read_csv('Customers.csv')
products_df = pd.read_csv('Products.csv')
transactions_df = pd.read_csv('Transactions.csv')

customers_df['SignupDate'] = pd.to_datetime(customers_df['SignupDate'])
transactions_df['TransactionDate'] = pd.to_datetime(transactions_df['TransactionDate'])


region_counts = customers_df['Region'].value_counts()

category_counts = products_df['Category'].value_counts()

transactions_df['TransactionMonth'] = transactions_df['TransactionDate'].dt.to_period('M')
monthly_transactions = transactions_df.groupby('TransactionMonth')['TotalValue'].sum()

product_revenue = transactions_df.groupby('ProductID')['TotalValue'].sum().sort_values(ascending=False).head(10)

customer_revenue = transactions_df.groupby('CustomerID')['TotalValue'].sum().sort_values(ascending=False).head(10)

plt.figure(figsize=(16, 10))

plt.subplot(2, 2, 1)
region_counts.plot(kind='bar', color='skyblue')
plt.title('Number of Customers by Region')
plt.xlabel('Region')
plt.ylabel('Count')

plt.subplot(2, 2, 2)
category_counts.plot(kind='bar', color='orange')
plt.title('Number of Products by Category')
plt.xlabel('Category')
plt.ylabel('Count')

plt.subplot(2, 2, 3)
monthly_transactions.plot(kind='line', marker='o', color='green')
plt.title('Monthly Transaction Revenue')
plt.xlabel('Month')
plt.ylabel('Total Revenue (USD)')

plt.subplot(2, 2, 4)
product_revenue.plot(kind='bar', color='purple')
plt.title('Top 10 Products by Revenue')
plt.xlabel('ProductID')
plt.ylabel('Total Revenue (USD)')

plt.tight_layout()
plt.show()
    