<a href="https://colab.research.google.com/github/sonalee88/nd-to-end-data-processing-pipeline-shipment-JSON-data/blob/main/swift_shipment_tracking_sonali_kumari.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
# Install Git (if not already installed in Colab environment)
!apt-get install git -y

# Configure Git credentials (replace with yours if needed)
!git config --global user.email "guptasonalee88@gmail.com"
!git config --global user.name "sonalee88"


Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
git is already the newest version (1:2.34.1-1ubuntu1.12).
0 upgraded, 0 newly installed, 0 to remove and 34 not upgraded.


In [4]:
# Replace this with your actual repo URL
!git clone https://github.com/sonalee88/nd-to-end-data-processing-pipeline-shipment-JSON-data.git


Cloning into 'nd-to-end-data-processing-pipeline-shipment-JSON-data'...
remote: Enumerating objects: 3, done.[K
remote: Counting objects: 100% (3/3), done.[K
remote: Compressing objects: 100% (2/2), done.[K
remote: Total 3 (delta 0), reused 0 (delta 0), pack-reused 0 (from 0)[K
Receiving objects: 100% (3/3), done.


In [5]:
# Replace folder name with the one inside your repo
%cd nd-to-end-data-processing-pipeline-shipment-JSON-data


/content/nd-to-end-data-processing-pipeline-shipment-JSON-data


In [6]:
from google.colab import files
uploaded = files.upload()


Saving Swift Assignment 4 - Dataset.json to Swift Assignment 4 - Dataset.json


In [7]:
import os
os.rename("Swift Assignment 4 - Dataset.json", "shipment_dataset.json")


In [9]:
### 🔹  Import Required Libraries
import json
import pandas as pd
from datetime import datetime

In [10]:
### 🔹  Define Helper Function for IST Conversion
def to_ist(dt_str):
    if not dt_str:
        return None
    dt = pd.to_datetime(dt_str, utc=True)
    return dt.tz_convert('Asia/Kolkata')

In [11]:
### 🔹  Process and Flatten Shipment JSON Data
with open("shipment_dataset.json", "r") as file:
    data = json.load(file)

records = []

for record in data:
    for detail in record.get("trackDetails", []):
        tracking_number = detail.get("trackingNumber")
        payment_type = "COD" if any(h.get("type") == "COD" for h in detail.get("specialHandlings", [])) else "Prepaid"

        date_map = {d["type"]: d["dateOrTimestamp"] for d in detail.get("datesOrTimes", [])}
        pickup_dt_ist = to_ist(date_map.get("ACTUAL_PICKUP"))
        delivery_dt_ist = to_ist(date_map.get("ACTUAL_DELIVERY"))
        days_taken = (delivery_dt_ist - pickup_dt_ist).days if pickup_dt_ist and delivery_dt_ist else None

        od_dates = set()
        for event in detail.get("events", []):
            if event.get("eventType") in ("OD", "DL"):
                ts = int(event["timestamp"]["$numberLong"])
                dt = pd.to_datetime(ts, unit="ms", utc=True).tz_convert("Asia/Kolkata").date()
                od_dates.add(dt)
        delivery_attempts = len(od_dates)

        shipment_weight = detail.get("shipmentWeight", {}).get("value")
        pickup_city = detail.get("shipperAddress", {}).get("city")
        pickup_state = detail.get("shipperAddress", {}).get("stateOrProvinceCode")
        pickup_pincode = next((e.get("address", {}).get("postalCode") for e in detail.get("events", []) if e.get("eventType") == "PU"), None)

        drop_city = detail.get("destinationAddress", {}).get("city")
        drop_state = detail.get("destinationAddress", {}).get("stateOrProvinceCode")
        drop_pincode = next((e.get("address", {}).get("postalCode") for e in detail.get("events", []) if e.get("eventType") == "DL"), None)

        records.append({
            "Tracking number": tracking_number,
            "Payment type": payment_type,
            "Pickup Date Time in IST": pickup_dt_ist,
            "Delivery Date Time in IST": delivery_dt_ist,
            "Days taken for delivery": days_taken,
            "Shipment weight": shipment_weight,
            "Pickup Pincode": pickup_pincode,
            "Pickup City": pickup_city,
            "Pickup State": pickup_state,
            "Drop Pincode": drop_pincode,
            "Drop City": drop_city,
            "Drop State": drop_state,
            "Number of delivery attempts needed": delivery_attempts
        })


In [16]:
### 🔹 Create DataFrame and Save Output
df = pd.DataFrame(records)
df.to_csv("shipment_tracking_output.csv", index=False)
df.head()

Unnamed: 0,Tracking number,Payment type,Pickup Date Time in IST,Delivery Date Time in IST,Days taken for delivery,Shipment weight,Pickup Pincode,Pickup City,Pickup State,Drop Pincode,Drop City,Drop State,Number of delivery attempts needed
0,391128701026,COD,2020-03-16 15:44:00+05:30,2020-03-20 13:37:00+05:30,3,14.0,560048,Bangalore,KA,122001,Gurgaon,HR,1
1,390901883808,Prepaid,2020-03-06 16:07:00+05:30,2020-03-09 19:50:00+05:30,3,14.0,560048,Bangalore,KA,560034,Bangalore,KA,1
2,391128749178,Prepaid,2020-03-16 15:44:00+05:30,2020-03-19 15:29:00+05:30,2,14.0,560048,Bangalore,KA,380028,Ahmedabad,GJ,1
3,390807986805,Prepaid,2020-03-03 16:19:00+05:30,2020-03-07 14:24:00+05:30,3,14.0,560048,Bangalore,KA,110088,New Delhi,DL,1
4,390948921190,COD,2020-03-09 15:12:00+05:30,2020-03-13 14:44:00+05:30,3,14.0,560048,Bangalore,KA,110009,Delhi,DL,1


In [15]:
### 🔹  Generate Summary Statistics
summary = {
    "Metric": ["Mean Days", "Median Days", "Mode Days", "Mean Attempts", "Median Attempts", "Mode Attempts"],
    "Value": [
        df["Days taken for delivery"].mean(),
        df["Days taken for delivery"].median(),
        df["Days taken for delivery"].mode().iloc[0] if not df["Days taken for delivery"].mode().empty else None,
        df["Number of delivery attempts needed"].mean(),
        df["Number of delivery attempts needed"].median(),
        df["Number of delivery attempts needed"].mode().iloc[0] if not df["Number of delivery attempts needed"].mode().empty else None,
    ]
}
summary_df = pd.DataFrame(summary)
summary_df.to_csv("shipment_tracking_summary.csv", index=False)
summary_df


Unnamed: 0,Metric,Value
0,Mean Days,3.262626
1,Median Days,3.0
2,Mode Days,3.0
3,Mean Attempts,1.222222
4,Median Attempts,1.0
5,Mode Attempts,1.0


In [14]:
### 🔹 Download Final Output Files
from google.colab import files
files.download("shipment_tracking_output.csv")
files.download("shipment_tracking_summary.csv")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>