In [1]:
# read Cloud Warehouse Compersion Chart.csv
import pandas as pd

cloud_warehouse_compersion_chart_df = pd.read_csv("Cloud Warehouse Compersion Chart.csv", header=0)

cloud_warehouse_compersion_chart_df

Unnamed: 0,index,Shiprocket,Unnamed: 1,INCREFF
0,0,Heads,Price (Per Unit),Price (Per Unit)
1,1,Inbound (Fresh Stock and RTO),₹4.00,4
2,2,Outbound,₹7.00,11
3,3,Storage Fee/Cft,₹25.00,Rs 0.15/- Per Day
4,4,Customer Return with Detailed QC,₹6.00,15.5
5,5,(A) SCOPE OF WORK and STANDARD OPERATING PROCE...,,
6,6,Inward,Unloading,• Boxes counted and weighed. Data captured in ...
7,7,,Validation,• GRN will be initiated against a valid PO doc...
8,8,,QC,• No QC to be done (Outer Visual Inspection an...
9,9,,GRN,• Scanning (Goods will have scannable EAN cod...


In [2]:
#check percentage of missing values

cloud_warehouse_compersion_chart_df.isna().mean() * 100

index          0.0
Shiprocket    58.0
Unnamed: 1    18.0
INCREFF       44.0
dtype: float64

In [3]:
#drop index column
cloud_warehouse_compersion_chart_df.drop(columns=["index"], inplace=True)
cloud_warehouse_compersion_chart_df = cloud_warehouse_compersion_chart_df.drop(
    0).reset_index(drop=True)

#and also drop everything from row 4 onwards
cloud_warehouse_compersion_chart_df.drop(cloud_warehouse_compersion_chart_df.index[4:], inplace=True)

In [4]:
#rename columns
cloud_warehouse_compersion_chart_df.columns = [
    "Cost Head", "Shiprocket", "INCREFF"]

In [5]:
cloud_warehouse_compersion_chart_df

Unnamed: 0,Cost Head,Shiprocket,INCREFF
0,Inbound (Fresh Stock and RTO),₹4.00,4
1,Outbound,₹7.00,11
2,Storage Fee/Cft,₹25.00,Rs 0.15/- Per Day
3,Customer Return with Detailed QC,₹6.00,15.5


In [6]:
# Clean Shiprocket column: remove ₹ and commas, handle non-numeric gracefully
cloud_warehouse_compersion_chart_df["Shiprocket"] = cloud_warehouse_compersion_chart_df["Shiprocket"].str.replace(
    r"[₹,]", '', regex=True).str.strip()

# Clean INCREFF column: remove Rs, /-, Per Day, commas etc.
cloud_warehouse_compersion_chart_df["INCREFF"] = cloud_warehouse_compersion_chart_df["INCREFF"].str.replace(
    r"Rs|/-|Per Day|,", '', regex=True).str.strip()

In [7]:
# Attempt to convert to numeric (invalid parsing will result in NaN)
cloud_warehouse_compersion_chart_df["Shiprocket"] = pd.to_numeric(cloud_warehouse_compersion_chart_df["Shiprocket"], errors="coerce")

cloud_warehouse_compersion_chart_df["INCREFF"] = pd.to_numeric(cloud_warehouse_compersion_chart_df["INCREFF"], errors="coerce")

In [8]:
# Drop rows where both Shiprocket & INCREFF are NaN
cloud_warehouse_compersion_chart_df_cleaned = cloud_warehouse_compersion_chart_df.dropna(
    subset=["Shiprocket", "INCREFF"], how="all").reset_index(drop=True)

# Clean Cost Head text: remove extra spaces, line breaks etc.
cloud_warehouse_compersion_chart_df_cleaned["Cost Head"] = cloud_warehouse_compersion_chart_df_cleaned["Cost Head"].astype(
    str).str.replace(r"\s+", ' ', regex=True).str.strip()

cloud_warehouse_compersion_chart_df_cleaned

Unnamed: 0,Cost Head,Shiprocket,INCREFF
0,Inbound (Fresh Stock and RTO),4.0,4.0
1,Outbound,7.0,11.0
2,Storage Fee/Cft,25.0,0.15
3,Customer Return with Detailed QC,6.0,15.5


In [10]:
#save df to db

from sqlalchemy import create_engine

# create table amazon_sales and load data into it
engine = create_engine("mysql+pymysql://root@localhost:3306/e-commerce_sales")

cloud_warehouse_compersion_chart_df_cleaned.to_sql(
    name="cloud_warehouse_compersion_chart", con=engine, if_exists="replace", index=False)

engine.dispose()