In [None]:
import math
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

def sum_chi_squared_values(chi_square_table_list):
    total_chi_squared = 0

    for interval_data in chi_square_table_list:
        _, _, _,chi_squared = interval_data
        total_chi_squared += chi_squared

    return total_chi_squared


def create_chi_square_table_list(merged_data):
    chi_square_table_list = []

    for interval_data in merged_data:
        data_point,frequency,expected_freq = interval_data
        chi_squared = ((frequency - expected_freq) ** 2) / expected_freq
        chi_square_table_list.append((data_point, frequency,expected_freq, chi_squared))

    return chi_square_table_list

def find_expected_freq(lst,lst_length,len):
    expc_list = []
    for interval_data in lst:
        data_point,frequency = interval_data
        expected_frequency = (1/lst_length)* len
        expc_list.append((data_point,frequency, expected_frequency))

    return expc_list


def merge_chi_intervals(data, thresh):
    if len(data) <= 1:
        return data

    merged_data = []
    current_interval = data[0]

    for interval in data[1:]:
        l, o , e = interval
        if e < thresh:
            current_interval = ((current_interval[0],l), current_interval[1] + o, current_interval[2] + e)
        else:
            merged_data.append(current_interval)
            current_interval = interval

    merged_data.append(current_interval)

    return merged_data

def count_frequency(data):
  frequency = {}
  for item in data:
    if item in frequency:
      frequency[item] += 1
    else:
      frequency[item] = 1
  return frequency

def make_list(file_path):
  data=pd.read_csv(file_path,header=None)
  list1=data.values.tolist()
  list2=[]
  for i in list1:
    for j in i:
      list2.append(j)

  print("minimum value i.e parameter1:", min(list2))
  print("maximum value i.e parameter2:", max(list2))
  return list2

def mean_continuous_distribution(data):
    return sum(data)/len(data)

def variance_continuous_distribution(data):
    squared_sum=0
    mean = sum(data)/len(data)
    for number in data:
        squared_sum+=(number-mean)**2
    return squared_sum/len(data)


x=make_list("/content/3.csv")
print(len(x))
print(x)

mean=mean_continuous_distribution(x)
variance=variance_continuous_distribution(x)

print("mean:", mean)
print("variance:" ,variance)

dict = count_frequency(x)
sorted_dict = {k: dict[k] for k in sorted(dict)}
print(sorted_dict)

lst = [(interval[0],interval[1]) for interval in sorted_dict.items()]
print(lst)

expected_freq_list=find_expected_freq(lst,len(lst),len(x))
print(expected_freq_list)

threshold=5
expected_freq_table=merge_chi_intervals(expected_freq_list,threshold)
print(expected_freq_table)

chi_square_table_list = create_chi_square_table_list(expected_freq_table)
print( chi_square_table_list)

total_chi_squared = sum_chi_squared_values(chi_square_table_list)
print("Total chi-squared value:", total_chi_squared)

parameters=2
degree_of_freedom=len(chi_square_table_list)-parameters-1
significance_level=0.05
print(degree_of_freedom)

chi_square_table_value_at_given_significance_level_with_dof=12.592

if total_chi_squared < chi_square_table_value_at_given_significance_level_with_dof:
   print("we accept the hypothesis i.e given data follows the descrete uniform distribution")
else:
   print("we reject the hypothesis i.e given data doesn't follow the descrete uniform distribution")



minimum value i.e parameter1: 0
maximum value i.e parameter2: 8
500
[0, 1, 4, 4, 1, 4, 3, 3, 3, 4, 1, 1, 2, 3, 4, 0, 2, 4, 1, 2, 1, 0, 2, 4, 3, 1, 3, 3, 3, 2, 2, 1, 2, 1, 0, 1, 1, 4, 3, 2, 1, 2, 4, 1, 0, 2, 1, 3, 1, 3, 2, 3, 3, 1, 1, 1, 3, 3, 2, 2, 2, 4, 3, 3, 1, 3, 3, 2, 2, 2, 2, 1, 5, 5, 5, 2, 3, 1, 3, 1, 3, 2, 3, 4, 6, 3, 6, 3, 4, 2, 2, 2, 2, 2, 3, 3, 4, 0, 2, 3, 1, 2, 0, 1, 6, 2, 3, 1, 2, 1, 1, 1, 1, 1, 1, 0, 0, 2, 2, 0, 3, 1, 2, 3, 3, 6, 3, 4, 3, 2, 2, 3, 4, 1, 1, 4, 3, 2, 5, 3, 2, 0, 1, 3, 1, 2, 2, 3, 1, 2, 4, 4, 3, 1, 2, 0, 0, 4, 1, 4, 3, 5, 2, 1, 5, 0, 2, 3, 2, 2, 4, 0, 2, 3, 0, 3, 3, 5, 1, 4, 2, 1, 3, 6, 2, 0, 3, 3, 3, 1, 1, 0, 2, 2, 3, 0, 1, 1, 1, 3, 1, 3, 1, 5, 5, 3, 4, 3, 2, 2, 2, 2, 1, 5, 3, 1, 0, 5, 4, 1, 3, 1, 2, 1, 3, 1, 2, 2, 5, 4, 2, 2, 3, 3, 2, 1, 3, 0, 2, 3, 2, 1, 1, 4, 3, 6, 6, 2, 2, 0, 3, 4, 2, 1, 4, 4, 5, 4, 7, 3, 2, 1, 3, 3, 1, 1, 2, 2, 1, 3, 2, 3, 2, 4, 2, 1, 1, 4, 4, 1, 4, 2, 4, 3, 0, 2, 1, 1, 5, 6, 3, 1, 1, 6, 5, 3, 3, 3, 4, 0, 1, 1, 2, 0, 2, 0, 3, 1, 2, 4, 1