# Import Libraries

In [2]:
from ecofuture.preproc.chiplets import load_chiplets
import glob
import pathlib
import itertools
import random
from tqdm import tqdm
from collections import Counter
from scipy.stats import chi2_contingency


# Sampling Chips 

In [3]:
subset_instance_nums = list(range(1,13000,100))

chiplets = load_chiplets(
    chiplet_dir=pathlib.Path("/data/projects/punim1932/Data/chiplets/level4"),
    subset_nums=["3"],
    measurement="level4",
    subset_instance_nums=subset_instance_nums
)

In [4]:
chiplet_data=[]
for chiplet in chiplets:
    chiplet_data.append((chiplet.year,chiplet.data,chiplet.position))

# Hypothesis Testing

In [38]:
alpha = 0.3

In [26]:
position_distributions = {}
for year, data, position in chiplet_data:
    tfid_distribution = Counter(data.flatten())
    if position in position_distributions:
        position_distributions[position].append(tfid_distribution)
    else:
        position_distributions[position] = [tfid_distribution]

In [41]:
# Perform hypothesis testing for each position
failed_hypothesis=0
total_hypothesis=0
for position, distributions in list(position_distributions.items()):
    n_years = len(distributions)
    for i in range(n_years - 1):
        distribution1 = distributions[i]
        distribution2 = distributions[i + 1]

        # Create contingency table for chi-square test
        contingency_table = []
        for tfid in set(distribution1.keys()).union(distribution2.keys()):
            count1 = distribution1.get(tfid, 0)
            count2 = distribution2.get(tfid, 0)
            contingency_table.append([count1, count2])
        # Perform chi-square test
        chi2, p_value, _, _ = chi2_contingency(contingency_table)

        # Compare p-value with significance level
        if p_value < alpha:
            failed_hypothesis+=1
        total_hypothesis+=1

In [42]:
failed_hypothesis/total_hypothesis * 100 #At 0.05

99.94871794871794

In [24]:
failed_hypothesis/total_hypothesis * 100 #At 0.3

99.94871794871794

In [None]:
# Define the significance level for the hypothesis test
alpha = 0.05