# Amazon Supply Chain Data Analysis

This notebook demonstrates how to download a supply chain dataset from Kaggle, perform exploratory data analysis, and create beautiful visuals relevant to Amazon's supply chain optimization.

In [5]:
# Install Kaggle if not already installed
!pip install -q kaggle

# Download dataset from Kaggle (requires Kaggle API key)
# Replace 'your-dataset' with the actual dataset name or URL
!kaggle datasets download -d stuti24/amazon-supply-chain-dataset



403 Client Error: Forbidden for url: https://www.kaggle.com/api/v1/datasets/metadata/stuti24/amazon-supply-chain-dataset


##Option 2

In [None]:
import zipfile
import os

# Path to your downloaded zip file
zip_path = 'amazon-supply-chain-dataset.zip'
extract_to = './data'

# Create target directory if it doesn't exist
os.makedirs(extract_to, exist_ok=True)

with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_to)

In [None]:
import os
zip_path = 'amazon-supply-chain-dataset.zip'
if os.path.exists(zip_path):
    import zipfile
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall('./data')
    print("Unzipped successfully.")
else:
    print(f"File not found: {zip_path}. Please check your Kaggle download step for errors.")

##move to global supper store dataset


In [None]:
# Install Kaggle if not already installed
!pip install -q kaggle
!kaggle datasets download -d vivek468/superstore-dataset-final
import zipfile
with zipfile.ZipFile('superstore-dataset-final.zip', 'r') as zip_ref:
    zip_ref.extractall('./data')

## Import Libraries and Load Data

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

# Load the main dataset
df = pd.read_csv('./data/Amazon_Supply_Chain_Dataset.csv')
df.head()

## Visual 1: Order Volume Trends (Line Plot)

In [None]:
df['Order Date'] = pd.to_datetime(df['Order Date'])
order_trends = df.groupby(df['Order Date'].dt.to_period('M')).size().reset_index(name='Order Count')
order_trends['Order Date'] = order_trends['Order Date'].dt.to_timestamp()
plt.figure(figsize=(12,6))
sns.lineplot(data=order_trends, x='Order Date', y='Order Count', color='#232f3e')
plt.title('Monthly Order Volume Trends')
plt.xlabel('Month')
plt.ylabel('Order Count')
plt.grid(True, linestyle='--', alpha=0.5)
plt.show()

## Visual 2: Fulfillment Center Distribution (Map)

In [None]:
# If latitude/longitude columns exist in the data
if 'Latitude' in df.columns and 'Longitude' in df.columns:
    fig = px.scatter_geo(df, lat='Latitude', lon='Longitude',
                        scope='world',
                        title='Amazon Fulfillment Center Locations',
                        color_discrete_sequence=['#ff9900'])
    fig.show()
else:
    print('Latitude/Longitude columns not found in dataset.')

## Visual 3: Delivery Speed Distribution (Histogram)

In [None]:
if 'Delivery Speed' in df.columns:
    plt.figure(figsize=(10,5))
    sns.histplot(df['Delivery Speed'], bins=20, color='#ff9900', kde=True)
    plt.title('Delivery Speed Distribution')
    plt.xlabel('Delivery Speed (days)')
    plt.ylabel('Frequency')
    plt.show()
else:
    print('Delivery Speed column not found in dataset.')

## Visual 4: Inventory Turnover (Bar Plot)

In [None]:
if 'Product Category' in df.columns and 'Inventory Turnover' in df.columns:
    turnover = df.groupby('Product Category')['Inventory Turnover'].mean().reset_index()
    plt.figure(figsize=(12,6))
    sns.barplot(data=turnover, x='Product Category', y='Inventory Turnover', palette=['#232f3e', '#ff9900'])
    plt.title('Average Inventory Turnover by Product Category')
    plt.xticks(rotation=45)
    plt.show()
else:
    print('Product Category or Inventory Turnover column not found in dataset.')