In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

# Set display options for better readability
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)

# Configure plot styling
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")

print("=" * 70)
print("SUPPLY CHAIN MANAGEMENT - DATA EXPLORATION")
print("=" * 70)

# Load the dataset
import pandas as pd
path = "/content/drive/MyDrive/Supply Chain Project/supply_chain_data.csv"
df = pd.read_csv(path)


print("\n[1] Dataset Shape:")
print(f"    Rows: {df.shape[0]}")
print(f"    Columns: {df.shape[1]}")

print("\n[2] First 5 Rows:")
print(df.head())

print("\n[3] Dataset Information:")
print(df.info())

print("\n[4] Statistical Summary:")
print(df.describe())

print("\n[5] Missing Values Check:")
missing = df.isnull().sum()
if missing.sum() == 0:
    print("    ✓ No missing values found!")
else:
    print(missing[missing > 0])

print("\n[6] Data Types:")
for col in df.columns:
    print(f"    {col}: {df[col].dtype}")

print("\n[7] Unique Values Count:")
for col in df.columns:
    unique_count = df[col].nunique()
    print(f"    {col}: {unique_count} unique values")

print("\n[8] Product Type Distribution:")
print(df['Product type'].value_counts())

print("\n[9] Customer Demographics Distribution:")
print(df['Customer demographics'].value_counts())

print("\n[10] Supplier Distribution:")
print(df['Supplier name'].value_counts())

print("\n[11] Location Distribution:")
print(df['Location'].value_counts())

print("\n[12] Shipping Carrier Distribution:")
print(df['Shipping carriers'].value_counts())

print("\n[13] Transportation Mode Distribution:")
print(df['Transportation modes'].value_counts())

print("\n[14] Inspection Results Distribution:")
print(df['Inspection results'].value_counts())

# Check for duplicates
print("\n[15] Duplicate Rows Check:")
duplicates = df.duplicated().sum()
if duplicates == 0:
    print("    ✓ No duplicate rows found!")
else:
    print(f"    ⚠ Found {duplicates} duplicate rows")

print("\n" + "=" * 70)
print("DATA EXPLORATION")
print("=" * 70)

SUPPLY CHAIN MANAGEMENT - DATA EXPLORATION

[1] Dataset Shape:
    Rows: 100
    Columns: 24

[2] First 5 Rows:
  Product type   SKU      Price  Availability  Number of products sold  \
0     haircare  SKU0  69.808006            55                      802   
1     skincare  SKU1  14.843523            95                      736   
2     haircare  SKU2  11.319683            34                        8   
3     skincare  SKU3  61.163343            68                       83   
4     skincare  SKU4   4.805496            26                      871   

   Revenue generated Customer demographics  Stock levels  Lead times  \
0        8661.996792            Non-binary            58           7   
1        7460.900065                Female            53          30   
2        9577.749626               Unknown             1          10   
3        7766.836426            Non-binary            23          13   
4        2686.505152            Non-binary             5           3   

   Order q