# Products Data Preprocessing

## Import Required Libraries

In [None]:
import pandas as pd
import numpy as np
print("Libraries imported successfully")

## Load Raw Data

In [None]:
# Load products data
products = pd.read_csv("products_data.csv")
print("Original Products Data Shape:", products.shape)

## Data Inspection

In [None]:
# Display first few rows
print("First few rows of the data:")
print(products.head())

In [None]:
# Check data types
print("Data Types:")
print(products.dtypes)

In [None]:
# Check missing values
print("Missing Values:")
print(products.isnull().sum())

In [None]:
# Check basic statistics
print("Basic Statistics:")
print(products.describe())

## Data Cleaning

In [None]:
# Create copy for cleaning
products_cleaned = products.copy()

In [None]:
# Clean Product_Price (remove ? and , characters)
products_cleaned["Product_Price"] = products_cleaned["Product_Price"].str.replace("?", "").str.replace(",", "")
products_cleaned["Product_Price"] = pd.to_numeric(products_cleaned["Product_Price"], errors="coerce")
print("Product_Price cleaned and converted to numeric")

In [None]:
# Handle missing Product_ID
products_cleaned["Product_ID"] = products_cleaned["Product_ID"].fillna(products_cleaned["Product_ID"].max() + 1)
print("Missing Product_IDs handled")

## Verification

In [None]:
# Check for any remaining missing values
print("Missing Values After Cleaning:")
print(products_cleaned.isnull().sum())

In [None]:
# Display cleaned data statistics
print("Cleaned Data Statistics:")
print(products_cleaned.describe())

## Save Cleaned Data

In [None]:
# Save cleaned products data
products_cleaned.to_csv("products_cleaned.csv", index=False)
print("Cleaned products data saved to products_cleaned.csv")