In [None]:
import pandas as pd
import numpy as np
from scipy import stats

# Load the dataset
df = pd.read_csv('order_history_kaggle_data.csv')

# Preview the data
print("Initial Data Overview:")
print(df.head())

# Check for missing values
print("\nMissing values per column:")
print(df.isnull().sum())

# Drop rows with missing values (optional)
df_cleaned = df.dropna()

# Data types and basic info
print("\nDataset Info After Cleaning:")
print(df_cleaned.info())

# Basic statistics for numerical columns
print("\nDescriptive statistics:")
print(df_cleaned.describe())

# Unique value counts for key categorical columns
if 'product_category' in df_cleaned.columns:
    print("\nProduct category distribution:")
    print(df_cleaned['product_category'].value_counts())

# Example: Convert 'order_date' to datetime if it exists
if 'order_date' in df_cleaned.columns:
    df_cleaned['order_date'] = pd.to_datetime(df_cleaned['order_date'])

# Statistical Analysis: Example with 'price' column
if 'price' in df_cleaned.columns:
    prices = df_cleaned['price']

    # Mean, Median, Std Dev
    print("\nPrice Statistics:")
    print(f"Mean: {np.mean(prices):.2f}")
    print(f"Median: {np.median(prices):.2f}")
    print(f"Standard Deviation: {np.std(prices):.2f}")

    # Detect outliers using Z-score
    z_scores = np.abs(stats.zscore(prices))
    outliers = df_cleaned[z_scores > 3]
    print(f"\nNumber of Outliers in Price: {len(outliers)}")

# Group Analysis: Average price per product category
if 'product_category' in df_cleaned.columns and 'price' in df_cleaned.columns:
    avg_price_by_category = df_cleaned.groupby('product_category')['price'].mean().sort_values(ascending=False)
    print("\nAverage Price by Product Category:")
    print(avg_price_by_category)

# Optional: Save cleaned data
df_cleaned.to_csv('cleaned_order_history.csv', index=False)


Initial Data Overview:
   Restaurant ID Restaurant name   Subzone       City    Order ID  \
0       20320607           Swaad  Sector 4  Delhi NCR  6168884918   
1       20320607           Swaad  Sector 4  Delhi NCR  6170707559   
2       20320607           Swaad  Sector 4  Delhi NCR  6169375019   
3       20320607           Swaad  Sector 4  Delhi NCR  6151677434   
4       20320607           Swaad  Sector 4  Delhi NCR  6167540897   

               Order Placed At Order Status         Delivery Distance  \
0  11:38 PM, September 10 2024    Delivered  Zomato Delivery      3km   
1  11:34 PM, September 10 2024    Delivered  Zomato Delivery      2km   
2  03:52 PM, September 10 2024    Delivered  Zomato Delivery     <1km   
3  03:45 PM, September 10 2024    Delivered  Zomato Delivery      2km   
4  03:04 PM, September 10 2024    Delivered  Zomato Delivery      2km   

                                      Items in order  ... Rating Review  \
0  1 x Grilled Chicken Jamaican Tender, 1 x Gril