In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Set seaborn theme
sns.set_style("whitegrid")


In [None]:
# Replace this with your actual path
df = pd.read_csv("ecommerce_customer_data.csv")

# Preview
print(df.head())


In [None]:
# Check for missing values
print(df.isnull().sum())

# Drop rows with any missing values
df.dropna(inplace=True)

# Optional: Remove age outliers (e.g., customers under 15 or over 100)
df = df[(df['Age'] >= 15) & (df['Age'] <= 100)]


In [None]:
sns.histplot(df['Age'], bins=20, kde=False)
plt.title("Customer Age Distribution")
plt.xlabel("Age")
plt.ylabel("Number of Customers")
plt.show()


In [None]:
sns.histplot(df['Age'], bins=20, kde=True)
plt.title("Customer Age Distribution with KDE")
plt.xlabel("Age")
plt.ylabel("Frequency")
plt.show()


In [None]:
sns.histplot(df['Age'], bins=40, kde=True)
plt.title("Age Distribution with More Bins")
plt.xlabel("Age")
plt.ylabel("Frequency")
plt.show()


In [None]:
sns.histplot(data=df, x='Age', hue='Gender', bins=30, kde=True)
plt.title("Customer Age Distribution by Gender")
plt.xlabel("Age")
plt.ylabel("Frequency")
plt.show()


In [None]:
sns.kdeplot(data=df[df['Gender'] == 'Male'], x='Age', label='Male', color='blue')
sns.kdeplot(data=df[df['Gender'] == 'Female'], x='Age', label='Female', color='red')

plt.title("Age Distribution KDE: Male vs Female")
plt.xlabel("Age")
plt.ylabel("Density")
plt.legend()
plt.show()


In [None]:
sns.histplot(data=df, x='Age', hue='Gender', kde=True, bins=30)
plt.title("Customer Age Distribution by Gender")
plt.xlabel("Customer Age")
plt.ylabel("Number of Customers")
plt.show()


In [None]:
sns.histplot(data=df, x='Age', hue='Gender', kde=True, bins=30, palette='Set2')
plt.title("Age Distribution by Gender (Custom Colors)")
plt.xlabel("Age")
plt.ylabel("Frequency")
plt.show()


In [None]:
plt.figure(figsize=(8, 5))
sns.histplot(data=df, x='Age', hue='Gender', kde=True, bins=30, palette='Set2')
plt.title("Final Age Distribution by Gender")
plt.xlabel("Customer Age")
plt.ylabel("Frequency")
plt.tight_layout()
plt.savefig("age_distribution_by_gender.png", dpi=300)
plt.close()


In [None]:
sns.histplot(data=df, x='Age', hue='Gender', kde=True, bins=30, palette='Set2')
plt.title("Final Age Distribution by Gender")
plt.xlabel("Customer Age")
plt.ylabel("Frequency")
plt.show()
