# Customer Behavior Statistics Project

In [None]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats


In [None]:

df = pd.read_csv("customer_behavior.csv")
df.head()


In [None]:

df.info()
df.isnull().sum()


In [None]:

mean_val = df["PurchaseAmount"].mean()
median_val = df["PurchaseAmount"].median()
mode_val = df["PurchaseAmount"].mode()[0]

mean_val, median_val, mode_val


In [None]:

Q1 = df["PurchaseAmount"].quantile(0.25)
Q3 = df["PurchaseAmount"].quantile(0.75)
IQR = Q3 - Q1

outliers = df[(df["PurchaseAmount"] < Q1 - 1.5 * IQR) |
              (df["PurchaseAmount"] > Q3 + 1.5 * IQR)]

len(outliers)


In [None]:

plt.boxplot(df["PurchaseAmount"])
plt.title("Boxplot of Purchase Amount")
plt.show()


In [None]:

df["PurchaseAmount"].skew(), df["PurchaseAmount"].kurtosis()


In [None]:

plt.hist(df["PurchaseAmount"], bins=30)
plt.title("Distribution of Purchase Amount")
plt.show()


In [None]:

male = df[df["Gender"] == "Male"]["PurchaseAmount"]
female = df[df["Gender"] == "Female"]["PurchaseAmount"]

stats.ttest_ind(male, female, equal_var=False)


In [None]:

contingency = pd.crosstab(df["ProductCategory"], df["Churn"])
stats.chi2_contingency(contingency)


In [None]:

region_groups = [group["PurchaseAmount"].values for name, group in df.groupby("Region")]
stats.f_oneway(*region_groups)


In [None]:

df.groupby("CampaignGroup")["PurchaseAmount"].mean()


In [None]:

df.groupby("CampaignGroup")["PurchaseAmount"].mean().plot(kind="bar")
plt.title("Campaign Performance")
plt.show()


In [None]:

sample = df["PurchaseAmount"].sample(500, random_state=42)
stats.shapiro(sample)


In [None]:

sample_means = []
for i in range(1000):
    sample_means.append(df["PurchaseAmount"].sample(50).mean())

plt.hist(sample_means, bins=30)
plt.title("Central Limit Theorem")
plt.show()


In [None]:

stats.t.interval(0.95, len(df)-1, df["PurchaseAmount"].mean(), stats.sem(df["PurchaseAmount"]))
