### Operation

In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv('sales_dataset_comprehensive.csv')

In [3]:
df.head()

Unnamed: 0,Date,Product,Category,Region,Salesperson,Customer_ID,Quantity,Unit_Price,Discount_Rate,Subtotal,...,Total_Amount,Customer_Age,Customer_Satisfaction,Payment_Method,Order_Priority,Month,Year,Quarter,Day_of_Week,Month_Name
0,2020-01-01 00:00:00.000000000,Headphones,Accessories,West,Sales_Person_15,Customer_0577,2,79.22,0.068,158.45,...,147.74,20,4.5,Bank Transfer,Low,1,2020,1,Wednesday,January
1,2020-01-01 07:00:33.846769353,Keyboard,Accessories,Central,Sales_Person_19,Customer_0581,15,46.69,0.074,700.42,...,648.43,31,3.0,Credit Card,High,1,2020,1,Wednesday,January
2,2020-01-01 14:01:07.693538707,Scanner,Accessories,East,Sales_Person_22,Customer_0952,4,182.63,0.063,730.51,...,684.3,37,2.5,Bank Transfer,High,1,2020,1,Wednesday,January
3,2020-01-01 21:01:41.540308061,Tablet,Electronics,South,Sales_Person_10,Customer_0009,2,355.84,0.041,711.69,...,682.49,57,4.9,Credit Card,Medium,1,2020,1,Wednesday,January
4,2020-01-02 04:02:15.387077415,Monitor,Electronics,Central,Sales_Person_12,Customer_0236,2,283.01,0.206,566.02,...,449.4,70,4.3,Bank Transfer,Medium,1,2020,1,Thursday,January


#### DataFrame.apply() - Row-wise operations

In [None]:
def calculate_profit_margin(row):
    cost = row['Subtotal'] * 0.6
    profit = row['Total_Amount'] - cost
    return (profit / row['Total_Amount']) * 100 if row['Total_Amount'] > 0 else 0

In [None]:
# DataFrame.apply() - Row-wise operations
print("Using DataFrame.apply() - Calculate profit margin:")
def calculate_profit_margin(row):
    cost = row['Subtotal'] * 0.6  # Assume 60% cost
    profit = row['Total_Amount'] - cost
    return (profit / row['Total_Amount']) * 100 if row['Total_Amount'] > 0 else 0

df['Profit_Margin'] = df.apply(calculate_profit_margin, axis=1)
print(df[['Product', 'Total_Amount', 'Profit_Margin']].head())

# apply() function to every row
print("\nApply function to every row - Revenue per unit:")
df['Revenue_Per_Unit'] = df.apply(lambda row: row['Total_Amount'] / row['Quantity'], axis=1)
print(df[['Product', 'Quantity', 'Total_Amount', 'Revenue_Per_Unit']].head())

# Series.apply() - Element-wise operations
print("\nUsing Series.apply() - Categorize customers by age:")
def age_category(age):
    if age < 25:
        return 'Young'
    elif age < 45:
        return 'Adult'
    elif age < 65:
        return 'Middle-aged'
    else:
        return 'Senior'

df['Age_Group'] = df['Customer_Age'].apply(age_category)
print(df['Age_Group'].value_counts())

# aggregate() method - Multiple aggregations
print("\nUsing aggregate() - Multiple statistics for Total_Amount:")
amount_stats = df['Total_Amount'].agg(['count', 'mean', 'median', 'std', 'min', 'max'])
print(amount_stats.round(2))

# DataFrame.mean() method
print(f"\nDataFrame mean values:")
numerical_means = df.select_dtypes(include=[np.number]).mean()
print(numerical_means.round(2))

# Series.mean() by groups
print(f"\nSeries mean by Region:")
region_means = df.groupby('Region')['Total_Amount'].mean()
print(region_means.round(2))

# mad() - Mean Absolute Deviation
print(f"\nMean Absolute Deviation:")
print(f"Total Amount MAD: {df['Total_Amount'].mad():.2f}")
print(f"Customer Age MAD: {df['Customer_Age'].mad():.2f}")

# Series.mad() for different series
satisfaction_mad = df['Customer_Satisfaction'].mad()
print(f"Customer Satisfaction MAD: {satisfaction_mad:.2f}")

# sem() - Standard Error of Mean
print(f"\nStandard Error of Mean:")
print(f"Total Amount SEM: {df['Total_Amount'].sem():.2f}")
print(f"Customer Age SEM: {df['Customer_Age'].sem():.2f}")

# value_counts() for different columns
print(f"\nValue counts analysis:")
print(f"Product distribution:")
print(df['Product'].value_counts())

print(f"\nPayment Method distribution:")
print(df['Payment_Method'].value_counts())

# Index.value_counts() example
print(f"\nMonth distribution (using Index):")
month_index = pd.Index(df['Month'])
print(month_index.value_counts().sort_index())

# Lambda functions with apply
print(f"\nApplying Lambda functions:")
df['High_Value_Flag'] = df['Total_Amount'].apply(lambda x: 'High' if x > df['Total_Amount'].quantile(0.8) else 'Normal')
print(df['High_Value_Flag'].value_counts())

df['Satisfaction_Level'] = df['Customer_Satisfaction'].apply(
    lambda x: 'Excellent' if x >= 4.5 else 'Good' if x >= 3.5 else 'Average' if x >= 2.5 else 'Poor'
)
print(df['Satisfaction_Level'].value_counts())