# Data Cleaning and Feature Engineering

Data source: https://www.kaggle.com/datasets/apoorvwatsky/supply-chain-shipment-pricing-data

Reporter: Trinh Nguyen

### Introduction
This notebook focuses on cleaning, standardizing, and preparing the raw logistics dataset for analysis.  
Key objectives include data validation, handling inconsistencies, and creating derived features required for downstream exploratory and performance analysis.


### Data cleaning

In [22]:
# Load data
import pandas as pd

df_raw = pd.read_csv("../Data/SCMS_Delivery_History_Dataset_20150929.csv", encoding="latin1")
df_raw.head()

# Standardize column names
list(df_raw.columns)

df_raw.columns = (df_raw.columns.str.strip().str.replace(" ", "_").str.replace("/", "_").str.lower())

df = df_raw.copy()
list(df.columns)

['id',
 'project_code',
 'pq_#',
 'po___so_#',
 'asn_dn_#',
 'country',
 'managed_by',
 'fulfill_via',
 'vendor_inco_term',
 'shipment_mode',
 'pq_first_sent_to_client_date',
 'po_sent_to_vendor_date',
 'scheduled_delivery_date',
 'delivered_to_client_date',
 'delivery_recorded_date',
 'product_group',
 'sub_classification',
 'vendor',
 'item_description',
 'molecule_test_type',
 'brand',
 'dosage',
 'dosage_form',
 'unit_of_measure_(per_pack)',
 'line_item_quantity',
 'line_item_value',
 'pack_price',
 'unit_price',
 'manufacturing_site',
 'first_line_designation',
 'weight_(kilograms)',
 'freight_cost_(usd)',
 'line_item_insurance_(usd)']

In [23]:
# Import neccesary libraries

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

In [24]:
# Convert date columns
df["po_sent_to_vendor_date_raw"] = df['po_sent_to_vendor_date']
df["pq_first_sent_to_client_date_raw"] = df["pq_first_sent_to_client_date"]

date_cols = ['pq_first_sent_to_client_date', 'po_sent_to_vendor_date', 'scheduled_delivery_date', 'delivered_to_client_date', 'delivery_recorded_date']

for col in date_cols:
    df[col] = pd.to_datetime(df[col], errors='coerce')

df[date_cols]

  df[col] = pd.to_datetime(df[col], errors='coerce')
  df[col] = pd.to_datetime(df[col], errors='coerce')
  df[col] = pd.to_datetime(df[col], errors='coerce')
  df[col] = pd.to_datetime(df[col], errors='coerce')
  df[col] = pd.to_datetime(df[col], errors='coerce')


Unnamed: 0,pq_first_sent_to_client_date,po_sent_to_vendor_date,scheduled_delivery_date,delivered_to_client_date,delivery_recorded_date
0,NaT,NaT,2006-06-02,2006-06-02,2006-06-02
1,NaT,NaT,2006-11-14,2006-11-14,2006-11-14
2,NaT,NaT,2006-08-27,2006-08-27,2006-08-27
3,NaT,NaT,2006-09-01,2006-09-01,2006-09-01
4,NaT,NaT,2006-08-11,2006-08-11,2006-08-11
...,...,...,...,...,...
10319,2014-10-16,NaT,2015-07-31,2015-07-15,2015-07-20
10320,2014-10-24,NaT,2015-07-31,2015-08-06,2015-08-07
10321,2014-08-12,NaT,2015-08-31,2015-08-25,2015-09-03
10322,2015-07-01,NaT,2015-09-09,2015-08-04,2015-08-11


In [25]:
# Date logic checks
df["invalid_delivery"] = ((df["delivered_to_client_date"] < df["po_sent_to_vendor_date"]) |
                          (df["scheduled_delivery_date"] < df["po_sent_to_vendor_date"]))
df.loc[df["invalid_delivery"] == True]

Unnamed: 0,id,project_code,pq_#,po___so_#,asn_dn_#,country,managed_by,fulfill_via,vendor_inco_term,shipment_mode,...,pack_price,unit_price,manufacturing_site,first_line_designation,weight_(kilograms),freight_cost_(usd),line_item_insurance_(usd),po_sent_to_vendor_date_raw,pq_first_sent_to_client_date_raw,invalid_delivery
430,4190,116-ZA-T01,Pre-PQ Process,SCMS-14050,ASN-1252,South Africa,PMO - US,Direct Drop,DDP,,...,5.99,0.1,"BMS Meymac, France",Yes,Weight Captured Separately,Freight Included in Commodity Cost,7.67,11/12/2007,Pre-PQ Process,True
454,4432,106-HT-T01,Pre-PQ Process,SCMS-22780,ASN-1680,Haiti,PMO - US,Direct Drop,DDP,Air,...,85.0,0.85,Inverness Japan,Yes,Weight Captured Separately,Freight Included in Commodity Cost,4.08,4/28/2008,Pre-PQ Process,True
2698,13148,103-DO-T30,FPQ-14469,SCMS-244720,ASN-26962,Dominican Republic,PMO - US,Direct Drop,DDP,Air,...,32.44,0.2,ABBVIE Labs North Chicago US,Yes,Weight Captured Separately,Freight Included in Commodity Cost,152.4,6/23/2014,6/13/2014,True
3387,25539,116-ZA-T30,FPQ-15950,SCMS-277981,ASN-32394,South Africa,PMO - US,Direct Drop,DDP,Truck,...,4.62,0.05,ABBVIE (Abbott) France,Yes,Weight Captured Separately,Freight Included in Commodity Cost,0.01,5/29/2015,2/20/2015,True
4906,52710,116-ZA-T30,FPQ-13973,SCMS-238571,ASN-26885,South Africa,PMO - US,Direct Drop,DDP,Truck,...,22.06,0.07,ABBVIE (Abbott) Logis. UK,Yes,Weight Captured Separately,Freight Included in Commodity Cost,0.05,6/26/2014,4/30/2014,True


In [26]:
# Clean numeric columns
num_cols = df.select_dtypes(include=['number']).columns
print(num_cols)

for col in num_cols:
    if col in df.columns:
        df[col] = pd.to_numeric(df[col], errors='coerce')

df[num_cols].head(20)

Index(['id', 'unit_of_measure_(per_pack)', 'line_item_quantity',
       'line_item_value', 'pack_price', 'unit_price',
       'line_item_insurance_(usd)'],
      dtype='object')


Unnamed: 0,id,unit_of_measure_(per_pack),line_item_quantity,line_item_value,pack_price,unit_price,line_item_insurance_(usd)
0,1,30,19,551.0,29.0,0.97,
1,3,240,1000,6200.0,6.2,0.03,
2,4,100,500,40000.0,80.0,0.8,
3,15,60,31920,127360.8,3.99,0.07,
4,16,60,38000,121600.0,3.2,0.05,
5,23,240,416,2225.6,5.35,0.02,
6,44,90,135,4374.0,32.4,0.36,
7,45,60,16667,60834.55,3.65,0.06,
8,46,60,273,532.35,1.95,0.03,
9,47,120,2800,115080.0,41.1,0.34,


In [27]:
# Clean and normalize categorical columns
cat_cols = df.select_dtypes(include="object").columns
df[cat_cols] = df[cat_cols].apply(lambda col: col.str.strip())
df["country"] = df["country"].str.title()
df["shipment_mode"] = df["shipment_mode"].str.title()
df["vendor"] = df["vendor"].str.title()

In [28]:
# Extract numeric values from Weight & Freight
df["weight_kg_numeric"] = pd.to_numeric(df["weight_(kilograms)"], errors = "coerce")
df["freight_cost_usd_numeric"] = pd.to_numeric(df["freight_cost_(usd)"], errors = "coerce")


df["asn_reference"] = df["weight_(kilograms)"].str.extract(r"See (ASN-\d+|DN-\d+)")
asn_weight_map = (df.loc[df["weight_kg_numeric"].notna(), ["asn_dn_#", "weight_kg_numeric"]].drop_duplicates(subset = "asn_dn_#"))
asn_freight_map = (df.loc[df["freight_cost_usd_numeric"].notna(), ["asn_dn_#", "freight_cost_usd_numeric"]].drop_duplicates(subset = "asn_dn_#"))
asn_freight_map

df = df.merge(asn_weight_map, on = "asn_dn_#", how = "left")
df = df.merge(asn_freight_map, on = "asn_dn_#", how = "left")
df

Unnamed: 0,id,project_code,pq_#,po___so_#,asn_dn_#,country,managed_by,fulfill_via,vendor_inco_term,shipment_mode,...,freight_cost_(usd),line_item_insurance_(usd),po_sent_to_vendor_date_raw,pq_first_sent_to_client_date_raw,invalid_delivery,weight_kg_numeric_x,freight_cost_usd_numeric_x,asn_reference,weight_kg_numeric_y,freight_cost_usd_numeric_y
0,1,100-CI-T01,Pre-PQ Process,SCMS-4,ASN-8,Côte D'Ivoire,PMO - US,Direct Drop,EXW,Air,...,780.34,,Date Not Captured,Pre-PQ Process,False,13.0,780.34,,13.0,780.34
1,3,108-VN-T01,Pre-PQ Process,SCMS-13,ASN-85,Vietnam,PMO - US,Direct Drop,EXW,Air,...,4521.5,,Date Not Captured,Pre-PQ Process,False,358.0,4521.50,,358.0,4521.50
2,4,100-CI-T01,Pre-PQ Process,SCMS-20,ASN-14,Côte D'Ivoire,PMO - US,Direct Drop,FCA,Air,...,1653.78,,Date Not Captured,Pre-PQ Process,False,171.0,1653.78,,171.0,1653.78
3,15,108-VN-T01,Pre-PQ Process,SCMS-78,ASN-50,Vietnam,PMO - US,Direct Drop,EXW,Air,...,16007.06,,Date Not Captured,Pre-PQ Process,False,1855.0,16007.06,,1855.0,16007.06
4,16,108-VN-T01,Pre-PQ Process,SCMS-81,ASN-55,Vietnam,PMO - US,Direct Drop,EXW,Air,...,45450.08,,Date Not Captured,Pre-PQ Process,False,7590.0,45450.08,,7590.0,45450.08
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10319,86818,103-ZW-T30,FPQ-15197,SO-50020,DN-4307,Zimbabwe,PMO - US,From RDC,N/A - From RDC,Truck,...,See DN-4307 (ID#:83920),705.79,N/A - From RDC,10/16/2014,False,,,DN-4307,25880.0,46111.55
10320,86819,104-CI-T30,FPQ-15259,SO-50102,DN-4313,Côte D'Ivoire,PMO - US,From RDC,N/A - From RDC,Truck,...,See DN-4313 (ID#:83921),161.71,N/A - From RDC,10/24/2014,False,,,DN-4313,4426.0,14734.92
10321,86821,110-ZM-T30,FPQ-14784,SO-49600,DN-4316,Zambia,PMO - US,From RDC,N/A - From RDC,Truck,...,Freight Included in Commodity Cost,5284.04,N/A - From RDC,8/12/2014,False,,,,85128.0,
10322,86822,200-ZW-T30,FPQ-16523,SO-51680,DN-4334,Zimbabwe,PMO - US,From RDC,N/A - From RDC,Truck,...,Freight Included in Commodity Cost,134.03,N/A - From RDC,7/1/2015,False,1392.0,,,1392.0,


In [29]:
df.loc[df["freight_cost_usd_numeric_y"].isna()]

Unnamed: 0,id,project_code,pq_#,po___so_#,asn_dn_#,country,managed_by,fulfill_via,vendor_inco_term,shipment_mode,...,freight_cost_(usd),line_item_insurance_(usd),po_sent_to_vendor_date_raw,pq_first_sent_to_client_date_raw,invalid_delivery,weight_kg_numeric_x,freight_cost_usd_numeric_x,asn_reference,weight_kg_numeric_y,freight_cost_usd_numeric_y
6,44,110-ZM-T01,Pre-PQ Process,SCMS-139,ASN-130,Zambia,PMO - US,Direct Drop,DDU,Air,...,Freight Included in Commodity Cost,,Date Not Captured,Pre-PQ Process,False,328.0,,,328.0,
9,47,110-ZM-T01,Pre-PQ Process,SCMS-165,ASN-199,Zambia,PMO - US,Direct Drop,CIP,Air,...,Freight Included in Commodity Cost,,11/13/2006,Pre-PQ Process,False,643.0,,,643.0,
10,60,110-ZM-T01,Pre-PQ Process,SCMS-221,ASN-223,Zambia,PMO - US,Direct Drop,CIP,Air,...,Freight Included in Commodity Cost,,12/1/2006,Pre-PQ Process,False,643.0,,,643.0,
12,62,102-NG-T01,Pre-PQ Process,SCMS-230,ASN-144,Nigeria,PMO - US,Direct Drop,EXW,Air,...,Invoiced Separately,,Date Not Captured,Pre-PQ Process,False,,,,,
15,68,113-ZW-T01,Pre-PQ Process,SCMS-308,ASN-285,Zimbabwe,PMO - US,Direct Drop,CIP,Air,...,Freight Included in Commodity Cost,,1/10/2007,Pre-PQ Process,False,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10190,86604,102-DO-T30,FPQ-12902,SO-46510,DN-3325,Dominican Republic,PMO - US,From RDC,N/A - From RDC,Air,...,Invoiced Separately,0.00,N/A - From RDC,8/29/2013,False,,,,,
10300,86797,114-UG-T30,FPQ-15639,SO-50670,DN-4215,Uganda,PMO - US,From RDC,N/A - From RDC,Truck,...,Freight Included in Commodity Cost,196.88,N/A - From RDC,12/22/2014,False,,,,8221.0,
10321,86821,110-ZM-T30,FPQ-14784,SO-49600,DN-4316,Zambia,PMO - US,From RDC,N/A - From RDC,Truck,...,Freight Included in Commodity Cost,5284.04,N/A - From RDC,8/12/2014,False,,,,85128.0,
10322,86822,200-ZW-T30,FPQ-16523,SO-51680,DN-4334,Zimbabwe,PMO - US,From RDC,N/A - From RDC,Truck,...,Freight Included in Commodity Cost,134.03,N/A - From RDC,7/1/2015,False,1392.0,,,1392.0,


In [30]:
# Drop and rename weight and freight fields
df = df.drop(columns = ["weight_(kilograms)", "freight_cost_(usd)", "weight_kg_numeric_x", 
                        "freight_cost_usd_numeric_x", "asn_reference"])

# Rename weight and freight fields
df.rename(columns = {"weight_kg_numeric_y": "weight_kg", "freight_cost_usd_numeric_y": "freight_cost_usd"}, inplace = True)
df

Unnamed: 0,id,project_code,pq_#,po___so_#,asn_dn_#,country,managed_by,fulfill_via,vendor_inco_term,shipment_mode,...,pack_price,unit_price,manufacturing_site,first_line_designation,line_item_insurance_(usd),po_sent_to_vendor_date_raw,pq_first_sent_to_client_date_raw,invalid_delivery,weight_kg,freight_cost_usd
0,1,100-CI-T01,Pre-PQ Process,SCMS-4,ASN-8,Côte D'Ivoire,PMO - US,Direct Drop,EXW,Air,...,29.00,0.97,Ranbaxy Fine Chemicals LTD,Yes,,Date Not Captured,Pre-PQ Process,False,13.0,780.34
1,3,108-VN-T01,Pre-PQ Process,SCMS-13,ASN-85,Vietnam,PMO - US,Direct Drop,EXW,Air,...,6.20,0.03,"Aurobindo Unit III, India",Yes,,Date Not Captured,Pre-PQ Process,False,358.0,4521.50
2,4,100-CI-T01,Pre-PQ Process,SCMS-20,ASN-14,Côte D'Ivoire,PMO - US,Direct Drop,FCA,Air,...,80.00,0.80,ABBVIE GmbH & Co.KG Wiesbaden,Yes,,Date Not Captured,Pre-PQ Process,False,171.0,1653.78
3,15,108-VN-T01,Pre-PQ Process,SCMS-78,ASN-50,Vietnam,PMO - US,Direct Drop,EXW,Air,...,3.99,0.07,"Ranbaxy, Paonta Shahib, India",Yes,,Date Not Captured,Pre-PQ Process,False,1855.0,16007.06
4,16,108-VN-T01,Pre-PQ Process,SCMS-81,ASN-55,Vietnam,PMO - US,Direct Drop,EXW,Air,...,3.20,0.05,"Aurobindo Unit III, India",Yes,,Date Not Captured,Pre-PQ Process,False,7590.0,45450.08
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10319,86818,103-ZW-T30,FPQ-15197,SO-50020,DN-4307,Zimbabwe,PMO - US,From RDC,N/A - From RDC,Truck,...,3.60,0.06,"Mylan, H-12 & H-13, India",No,705.79,N/A - From RDC,10/16/2014,False,25880.0,46111.55
10320,86819,104-CI-T30,FPQ-15259,SO-50102,DN-4313,Côte D'Ivoire,PMO - US,From RDC,N/A - From RDC,Truck,...,6.52,0.11,Hetero Unit III Hyderabad IN,No,161.71,N/A - From RDC,10/24/2014,False,4426.0,14734.92
10321,86821,110-ZM-T30,FPQ-14784,SO-49600,DN-4316,Zambia,PMO - US,From RDC,N/A - From RDC,Truck,...,9.99,0.33,Cipla Ltd A-42 MIDC Mahar. IN,No,5284.04,N/A - From RDC,8/12/2014,False,85128.0,
10322,86822,200-ZW-T30,FPQ-16523,SO-51680,DN-4334,Zimbabwe,PMO - US,From RDC,N/A - From RDC,Truck,...,6.52,0.11,Mylan (formerly Matrix) Nashik,Yes,134.03,N/A - From RDC,7/1/2015,False,1392.0,


In [31]:
# Weight and freight consistency checks
df["inconsistency_check"] = ((df["freight_cost_usd"] > 0) & 
                             (df["weight_kg"].isna()) | (df["weight_kg"] == 0))
df.loc[df["inconsistency_check"] == True]

Unnamed: 0,id,project_code,pq_#,po___so_#,asn_dn_#,country,managed_by,fulfill_via,vendor_inco_term,shipment_mode,...,unit_price,manufacturing_site,first_line_designation,line_item_insurance_(usd),po_sent_to_vendor_date_raw,pq_first_sent_to_client_date_raw,invalid_delivery,weight_kg,freight_cost_usd,inconsistency_check
114,1115,116-ZA-T01,Pre-PQ Process,SCMS-47500,ASN-4332,South Africa,PMO - US,Direct Drop,DDP,Truck,...,0.33,"Aurobindo Unit III, India",Yes,38.75,4/2/2009,Pre-PQ Process,False,,3134.85,True
257,2481,116-ZA-T01,Pre-PQ Process,SCMS-47500,ASN-4332,South Africa,PMO - US,Direct Drop,DDP,Truck,...,0.07,"BMS Meymac, France",No,0.59,4/2/2009,Pre-PQ Process,False,,3134.85,True
378,3745,116-ZA-T01,Pre-PQ Process,SCMS-41050,ASN-3635,South Africa,PMO - US,Direct Drop,DDP,Air,...,0.08,"MSD Midrand, J'burg, SA",Yes,1.67,Date Not Captured,Pre-PQ Process,False,,4230.75,True
501,5049,116-ZA-T01,Pre-PQ Process,SCMS-41160,ASN-3760,South Africa,PMO - US,Direct Drop,DDP,Air,...,0.22,GSK Aranda,Yes,12.85,Date Not Captured,Pre-PQ Process,False,,1521.4,True
629,6400,116-ZA-T01,Pre-PQ Process,SCMS-41050,ASN-3635,South Africa,PMO - US,Direct Drop,DDP,Air,...,0.02,"Aurobindo Unit III, India",No,0.16,Date Not Captured,Pre-PQ Process,False,,4230.75,True
631,6402,116-ZA-T01,Pre-PQ Process,SCMS-41160,ASN-3760,South Africa,PMO - US,Direct Drop,DDP,Air,...,0.02,GSK Mississauga (Canada),No,1.66,Date Not Captured,Pre-PQ Process,False,,1521.4,True
643,6437,116-ZA-T01,Pre-PQ Process,SCMS-47500,ASN-4332,South Africa,PMO - US,Direct Drop,DDP,Truck,...,0.12,"Aurobindo Unit III, India",No,27.75,4/2/2009,Pre-PQ Process,False,,3134.85,True
644,6438,116-ZA-T01,Pre-PQ Process,SCMS-47500,ASN-4332,South Africa,PMO - US,Direct Drop,DDP,Truck,...,0.01,"Aurobindo Unit III, India",No,1.63,4/2/2009,Pre-PQ Process,False,,3134.85,True
645,6439,116-ZA-T01,Pre-PQ Process,SCMS-47500,ASN-4332,South Africa,PMO - US,Direct Drop,DDP,Truck,...,0.04,"Aurobindo Unit III, India",No,1.47,4/2/2009,Pre-PQ Process,False,,3134.85,True
700,7007,108-VN-T01,Pre-PQ Process,SCMS-19630,ASN-1718,Vietnam,PMO - US,Direct Drop,DDU,Truck,...,0.69,Gilead(Nycomed) Oranienburg DE,Yes,65.15,3/21/2008,Pre-PQ Process,False,,1401.49,True


In [32]:
# Create logistics KPIs
df["lead_time_days"] = (df["delivered_to_client_date"] - df["po_sent_to_vendor_date"]).dt.days
df["schedule_delay_days"] = (df["delivered_to_client_date"] - df["scheduled_delivery_date"]).dt.days
df["on_time"] = df["schedule_delay_days"] <= 0
df["freight_per_kg"] = df["freight_cost_usd"] / df["weight_kg"].replace(0, np.nan)

# Time features
df["delivered_month"] = df["delivered_to_client_date"].dt.to_period("M").astype(str)
df["delivered_year"] = df["delivered_to_client_date"].dt.year

In [33]:
# Missing values summary
df.isna().mean().sort_values(ascending=False).head(15)

po_sent_to_vendor_date          0.555211
lead_time_days                  0.555211
pq_first_sent_to_client_date    0.259686
freight_per_kg                  0.177451
freight_cost_usd                0.172995
dosage                          0.168152
weight_kg                       0.155366
shipment_mode                   0.034870
line_item_insurance_(usd)       0.027799
schedule_delay_days             0.000000
line_item_value                 0.000000
pack_price                      0.000000
unit_price                      0.000000
manufacturing_site              0.000000
first_line_designation          0.000000
dtype: float64

In [34]:
# Logical consistency checks
# Negative lead time?
df[df["lead_time_days"] < 0].shape[0]

# Freight per kg extremely high?
df["freight_per_kg"].describe(percentiles=[.9, .95, .99])

count     8492.000000
mean        31.163999
std        496.770891
min          0.000539
50%          6.324641
90%         27.564314
95%         59.078545
99%        329.722450
max      31087.705000
Name: freight_per_kg, dtype: float64

In [35]:
# Data quality summary
print("Final dataset shape:", df.shape)

print("\nMissing values (top 10):")
print(df.isna().sum().sort_values(ascending=False).head(10))

print("\nInvalid date sequences:", df["invalid_delivery"].sum())
print("Freight-weight mismatches:", df["inconsistency_check"].sum())

Final dataset shape: (10324, 43)

Missing values (top 10):
po_sent_to_vendor_date          5732
lead_time_days                  5732
pq_first_sent_to_client_date    2681
freight_per_kg                  1832
freight_cost_usd                1786
dosage                          1736
weight_kg                       1604
shipment_mode                    360
line_item_insurance_(usd)        287
schedule_delay_days                0
dtype: int64

Invalid date sequences: 5
Freight-weight mismatches: 46


In [36]:
# Format date columns
date_cols = ["pq_first_sent_to_client_date", "po_sent_to_vendor_date", "scheduled_delivery_date", "delivered_to_client_date", "delivery_recorded_date"]

for col in date_cols:
    if col in df.columns and pd.api.types.is_datetime64_any_dtype(df[col]):
        df[col] = df[col].dt.strftime("%d/%m/%y")

df[["po_sent_to_vendor_date","delivered_to_client_date", "lead_time_days","schedule_delay_days","on_time", "freight_cost_usd","weight_kg","freight_per_kg"]].head(10)


Unnamed: 0,po_sent_to_vendor_date,delivered_to_client_date,lead_time_days,schedule_delay_days,on_time,freight_cost_usd,weight_kg,freight_per_kg
0,,02/06/06,,0,True,780.34,13.0,60.026154
1,,14/11/06,,0,True,4521.5,358.0,12.629888
2,,27/08/06,,0,True,1653.78,171.0,9.671228
3,,01/09/06,,0,True,16007.06,1855.0,8.629143
4,,11/08/06,,0,True,45450.08,7590.0,5.988153
5,,28/09/06,,0,True,5920.42,504.0,11.746865
6,,08/01/07,,0,True,,328.0,
7,,24/11/06,,0,True,6212.41,1478.0,4.203254
8,,07/12/06,,0,True,4861.14,479.0,10.148518
9,13/11/06,30/01/07,78.0,0,True,,643.0,


In [37]:
df.to_csv("../Data/cleaned_supply_chain_data.csv", index=False)