# Hypothesis Testing

This notebook performs statistical hypothesis testing to determine if there is a significant difference in the number of charging stations between the European and Asian sides of Istanbul.


In [1]:
# Import required libraries
import pandas as pd
from scipy.stats import chi2
import numpy as np


## Step 1: Load Cleaned Data


In [2]:
# Read the cleaned CSV file
df = pd.read_csv('cleaned_stations.csv')

print(f"Total number of stations: {len(df)}")
print(f"\nFirst few rows:")
print(df.head())


Total number of stations: 2933

First few rows:
  ISTASYON_NO                                                AD  \
0    ŞRJ/2140  KMO FENERKÖY KUZEY OHT1, KMO FENERKÖY KUZEY OHT2   
1    ŞRJ/2672                                      CAROUSEL AVM   
2    ŞRJ/2803                            SERENİTY COMFORT HOTEL   
3    ŞRJ/2804                                        MAYİ HOTEL   
4    ŞRJ/2859                             THE İSTANBUL REZİDANS   

                                               ADRES  \
0  Ali Paşa Mahallesi Merkez Saray Sokağı  Y: 351...   
1  Zeytinlik Mahallesi Halit Uşaklıgil Caddesi  N...   
2  Mahmutbey Mahallesi Payami Safa Caddesi  No:16...   
3  Mahmutbey Mahallesi Payami Safa Caddesi  No:25...   
4  Sümer Mahallesi Pr.dr. Turan Güneş Caddesi  No...   

                                      operator    brand   LATITUDE  LONGITUDE  \
0  BEEFULL ENERJİ TEKNOLOJİLERİ ANONİM ŞİRKETİ  beefull  41.121918  28.228961   
1  BEEFULL ENERJİ TEKNOLOJİLERİ ANONİM ŞİRKETİ  be

## Step 2: Display Side Distribution


In [3]:
# Count stations by side
side_counts = df['side'].value_counts()
print("Number of stations by side:")
print(side_counts)
print(f"\nEurope: {side_counts.get('Europe', 0)} stations")
print(f"Asia: {side_counts.get('Asia', 0)} stations")


Number of stations by side:
side
Europe    1605
Asia      1328
Name: count, dtype: int64

Europe: 1605 stations
Asia: 1328 stations


## Step 3: Define Statistical Hypotheses


In [4]:
# Statistical Hypotheses:
# H0 (Null Hypothesis): There is no significant difference in the number of stations 
#                        between the European and Asian sides.
# H1 (Alternative Hypothesis): There is a significant difference in the number of 
#                              stations between the two sides.

print("H0: There is no significant difference in the number of stations between the European and Asian sides.")
print("H1: There is a significant difference in the number of stations between the two sides.")
print("\nSignificance level (alpha) = 0.05")


H0: There is no significant difference in the number of stations between the European and Asian sides.
H1: There is a significant difference in the number of stations between the two sides.

Significance level (alpha) = 0.05


## Step 4: Perform Chi-Square Test


In [5]:
# Prepare data for chi-square test
# We'll use a chi-square goodness-of-fit test to compare observed vs expected counts
# Under H0, we expect equal distribution (50-50)

observed = side_counts.values
total = observed.sum()
expected = np.array([total / 2, total / 2])  # Expected equal distribution

print("Observed counts:", observed)
print("Expected counts (under H0):", expected)

# Perform chi-square test
chi2_stat = np.sum((observed - expected) ** 2 / expected)
dof = len(observed) - 1  # degrees of freedom

# Calculate p-value using chi-square distribution
p_value = 1 - chi2.cdf(chi2_stat, dof)

print(f"\nChi-square statistic: {chi2_stat:.4f}")
print(f"Degrees of freedom: {dof}")
print(f"P-value: {p_value:.6f}")


Observed counts: [1605 1328]
Expected counts (under H0): [1466.5 1466.5]

Chi-square statistic: 26.1606
Degrees of freedom: 1
P-value: 0.000000


## Step 5: Interpretation


In [6]:
# Interpret the results
alpha = 0.05

print("=" * 60)
print("HYPOTHESIS TEST RESULTS")
print("=" * 60)
print(f"Test Statistic (Chi-square): {chi2_stat:.4f}")
print(f"P-value: {p_value:.6f}")
print(f"Significance Level (alpha): {alpha}")
print("=" * 60)

if p_value < alpha:
    print("\n✓ Reject H0: There is a significant difference in the number of")
    print("  charging stations between the European and Asian sides of Istanbul.")
    print(f"  (p-value = {p_value:.6f} < {alpha})")
else:
    print("\n✗ Fail to reject H0: No significant difference is detected in the")
    print("  number of charging stations between the two sides.")
    print(f"  (p-value = {p_value:.6f} >= {alpha})")

print("\n" + "=" * 60)


HYPOTHESIS TEST RESULTS
Test Statistic (Chi-square): 26.1606
P-value: 0.000000
Significance Level (alpha): 0.05

✓ Reject H0: There is a significant difference in the number of
  charging stations between the European and Asian sides of Istanbul.
  (p-value = 0.000000 < 0.05)

