In [2]:
import pandas as pd

# 1. Load the dataset
df = pd.read_csv("customer_purchase.csv")

# 2. Apply a discount function using apply() and lambda
df['Discounted_Price'] = df.apply(
    lambda row: row['Price'] * 0.9 if row['Category'] == 'Electronics' 
    else (row['Price'] * 0.95 if row['Category'] == 'Groceries' 
    else row['Price']), 
    axis=1
)

# 3. Use map() to categorize regions
region_map = {
    'New York': 'Urban', 'Los Angeles': 'Urban', 'Chicago': 'Urban',
    'Rural South': 'Rural', 'Rural Midwest': 'Rural', 'Rural West': 'Rural'
}
df['Region_Type'] = df['Region'].map(region_map)

# 4. Convert Date_of_Purchase to datetime
df['Date_of_Purchase'] = pd.to_datetime(df['Date_of_Purchase'])

# 5. Calculate total revenue per transaction
df['Total_Revenue'] = df.apply(lambda row: row['Discounted_Price'] * row['Quantity'], axis=1)

# 6. Group data by Category
category_group = df.groupby('Category').agg(
    Total_Revenue=('Total_Revenue', 'sum'),
    Average_Price=('Discounted_Price', 'mean'),
    Total_Quantity_Sold=('Quantity', 'sum')
).reset_index()

# 7. Group data by Customer_ID
customer_group = df.groupby('Customer_ID').agg(
    Total_Amount_Spent=('Total_Revenue', 'sum'),
    Total_Purchases=('Product', 'count')
).reset_index()

# 8. Pivot table: Revenue across Region and Category
pivot_table = df.pivot_table(
    index='Region', columns='Category', values='Total_Revenue', aggfunc='sum'
)

# 9. Sort customers by total spending
sorted_customers = customer_group.sort_values(by='Total_Amount_Spent', ascending=False)

# 10. Sort products alphabetically
sorted_products = df.set_index('Product').sort_index().reset_index()

# 11. Top-selling product based on quantity sold
top_selling_product = df.groupby('Product')['Quantity'].sum().idxmax()
top_selling_quantity = df.groupby('Product')['Quantity'].sum().max()

# 12. Customers who spent more than average
average_spending = customer_group['Total_Amount_Spent'].mean()
high_spenders = customer_group[customer_group['Total_Amount_Spent'] > average_spending]

# 13. Export final aggregated dataset
final_dataset = df.merge(customer_group, on='Customer_ID', how='left')
final_dataset.to_csv("final_aggregated_customer_data.csv", index=False)
