# Importing required libraries

In [1]:
import pandas as pd

# Loading the datasets

In [2]:
customers_df = pd.read_csv('Customers.csv')
products_df = pd.read_csv('Products.csv')
transactions_df = pd.read_csv('Transactions.csv')


# 1. Checking for missing values

In [3]:
print("Missing values in Customers dataset:")
print(customers_df.isnull().sum())
print("\nMissing values in Products dataset:")
print(products_df.isnull().sum())
print("\nMissing values in Transactions dataset:")
print(transactions_df.isnull().sum())

Missing values in Customers dataset:
CustomerID      0
CustomerName    0
Region          0
SignupDate      0
dtype: int64

Missing values in Products dataset:
ProductID      0
ProductName    0
Category       0
Price          0
dtype: int64

Missing values in Transactions dataset:
TransactionID      0
CustomerID         0
ProductID          0
TransactionDate    0
Quantity           0
TotalValue         0
Price              0
dtype: int64


# 2. Checking for duplicate rows

In [4]:
print("\nDuplicate rows in Customers dataset:", customers_df.duplicated().sum())
print("Duplicate rows in Products dataset:", products_df.duplicated().sum())
print("Duplicate rows in Transactions dataset:", transactions_df.duplicated().sum())



Duplicate rows in Customers dataset: 0
Duplicate rows in Products dataset: 0
Duplicate rows in Transactions dataset: 0


# 3. Converting date columns to datetime format

In [5]:
customers_df['SignupDate'] = pd.to_datetime(customers_df['SignupDate'])
transactions_df['TransactionDate'] = pd.to_datetime(transactions_df['TransactionDate'])


# 4. Summary statistics for numeric columns in Products and Transactions datasets

In [6]:

print("\nSummary statistics for Products dataset:")
print(products_df.describe())
print("\nSummary statistics for Transactions dataset:")
print(transactions_df.describe())



Summary statistics for Products dataset:
            Price
count  100.000000
mean   267.551700
std    143.219383
min     16.080000
25%    147.767500
50%    292.875000
75%    397.090000
max    497.760000

Summary statistics for Transactions dataset:
                     TransactionDate     Quantity   TotalValue       Price
count                           1000  1000.000000  1000.000000  1000.00000
mean   2024-06-23 15:33:02.768999936     2.537000   689.995560   272.55407
min              2023-12-30 15:29:12     1.000000    16.080000    16.08000
25%       2024-03-25 22:05:34.500000     2.000000   295.295000   147.95000
50%       2024-06-26 17:21:52.500000     3.000000   588.880000   299.93000
75%              2024-09-19 14:19:57     4.000000  1011.660000   404.40000
max              2024-12-28 11:00:00     4.000000  1991.040000   497.76000
std                              NaN     1.117981   493.144478   140.73639


# 5. Analyzing the price range of products

In [7]:

print("\nPrice range in Products dataset:")
print("Minimum Price:", products_df['Price'].min())
print("Maximum Price:", products_df['Price'].max())
print("Mean Price:", products_df['Price'].mean())


Price range in Products dataset:
Minimum Price: 16.08
Maximum Price: 497.76
Mean Price: 267.55170000000004


# 6. Analyzing transaction value and quantity

In [8]:
print("\nTransaction details:")
print("Minimum Total Value:", transactions_df['TotalValue'].min())
print("Maximum Total Value:", transactions_df['TotalValue'].max())
print("Mean Total Value:", transactions_df['TotalValue'].mean())
print("Minimum Quantity:", transactions_df['Quantity'].min())
print("Maximum Quantity:", transactions_df['Quantity'].max())
print("Mean Quantity:", transactions_df['Quantity'].mean())


Transaction details:
Minimum Total Value: 16.08
Maximum Total Value: 1991.04
Mean Total Value: 689.9955600000001
Minimum Quantity: 1
Maximum Quantity: 4
Mean Quantity: 2.537
