# Variance Comparison Between Yangon and Naypyitaw

This notebook compares the variance in sales between two cities using F-test.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import norm, f

In [None]:
# Load the supermarket sales dataset
df = pd.read_csv("../data/supermarket_sales.csv")

In [None]:
df.head()

### Variance comparison in two cities

In [None]:
cityY = df[df['City'] == 'Yangon']
cityN = df[df['City'] == 'Naypyitaw']
total_cityY = cityY["Total"]
total_cityN = cityN["Total"]
SvarY = (total_cityY.std())**2
SvarN = (total_cityN.std())**2

In [None]:
#Interval Estimate and point estimate

from scipy.stats import f
import numpy as np

# Sample variances (you already computed these)
s1_sq = SvarY   # variance of sample 1
s2_sq = SvarN   # variance of sample 2

m1 = len(total_cityY) #sample size for city 1
m2 = len(total_cityN)  #sample size for city 2

alpha = 0.05

df1 = m1 - 1
df2 = m2 - 1

# F critical values
F_lower = f.ppf(alpha/2, df1, df2)        # F_{α/2}
F_upper = f.ppf(1 - alpha/2, df1, df2)    # F_{1-α/2}

# point estimate
ratio_hat = s1_sq / s2_sq

# Confidence interval
lower_limit = ratio_hat / F_upper
upper_limit = ratio_hat / F_lower
print(f"df1 = {df1}, df2 = {df2}, m1 = {m1}, m2 ={m2} , s1_sq = {s1_sq}, s2_sq = {s2_sq}")
print(f"Interval estimate is : {float(lower_limit), float(upper_limit)}")
print(f"Point estimate is S1^2/ S2^2: {ratio_hat}")
#1 is yangon, 2 is Naypyitaw
#ratio is less than 1 which means the variance in 2 is more than 1
#Since the point estimate S1² / S2² = 0.7748 is less than 1 and the confidence interval (0.6246, 0.9606) lies entirely below 1,
# we conclude that the variance of total sales in Naypyitaw is significantly larger than the variance in Yangon.


df1 = 339, df2 = 327, m1 = 340, m2 =328 , s1_sq = 53657.100965952595, s2_sq = 69253.64345528162
Interval estimate is : (0.6246352383662521, 0.9606364285052473)
Point estimate is S1^2/ S2^2: 0.7747910187656768
