# Member vs Normal Customer Proportions Analysis

This notebook analyzes the difference in proportions between Member and Normal customers.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import norm, f

In [None]:
# Load the supermarket sales dataset
df = pd.read_csv("../data/supermarket_sales.csv")

In [None]:
df.head()

### Difference of proportion of Member and Normal

In [None]:
members = df[df['Customer type'] == 'Member']
normals = df[df['Customer type'] == 'Normal']

In [None]:
phat1 = len(members)/len(df["Customer type"]) #proportion of member customers
phat2 = len(normals)/len(df["Customer type"]) #proportion of normal customers

In [None]:
n1 = len(members) #sample size for member customers (no. member customers)
n2 = len(normals) #sample size for normal customers (no. normal customers)

#we know that p^1 & p^2 follows normal with (p^1, sqrt(p^1q^1/n)) and (p^2, sqrt(p^2q^2/n))

In [None]:
qhat1 = 1 - phat1
qhat2 = 1 - phat2

In [None]:
#now the difference p1 - p2 also follows normal with (p^1 - p^2, sqrt(p^1q^1/n1  + p^2q^2/n2 ))
#lets calculate the actual values
meanofp1p2 = phat1 - phat2
stddev =np.sqrt(phat1*(qhat2)/n1 + phat2*(qhat2)/n2)
#now the point estimator is p^1 - p^2
print(f"std dev is: {stddev}, n1 = {n1}, n2 = {n2}, phat1 = {phat1}, phat2 = {phat2}")
thetahatofp1p2 = phat1 - phat2
print(f"Point estimate is: {thetahatofp1p2}")

std dev is: 0.03165438358268883, n1 = 501, n2 = 499, phat1 = 0.501, phat2 = 0.499
Point estimate is: 0.0020000000000000018


In [None]:
#now the interval estimator .. n is large so the estimator is (p^1-p^2 +- Z(0.025) * sqrt(p^1q^1/n1  + p^2q^2/n2 ) )
from scipy.stats import norm
zalphaovertwo = norm.ppf(1-0.025)
print(zalphaovertwo)
upperlimit = meanofp1p2 + (zalphaovertwo*(stddev))
lowerlimit = meanofp1p2 - (zalphaovertwo*(stddev))
intervalestimateofp1p2 = (float(upperlimit), float(lowerlimit))
print(intervalestimateofp1p2)

1.959963984540054
(0.06404145177488607, -0.06004145177488607)


In [None]:
#Since the interval estimator is inconclusive we will use hypothesis testing

pivotalquantityZ = (phat1 - phat2)/(np.sqrt((phat1*qhat1/n1)+ (phat2*qhat2/n2)))
zalpha = norm.ppf(1-0.05)
zalpha
if(pivotalquantityZ < -zalpha):

    print(f"Since Z = {pivotalquantityZ}< {-zalpha}, then we reject H0")
else:
    print(f"Since Z= {pivotalquantityZ} > {-zalpha}, then we accept H0, therefore we conclude that the proportion of members in general is higher than the normal")


Since Z= 0.06324555320336765 > -1.6448536269514722, then we accept H0, therefore we conclude that the proportion of members in general is higher than the normal
