In [None]:
import math
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

def make_list(file_path):
  data=pd.read_csv(file_path,header=None)
  list1=data.values.tolist()
  list2=[]
  for i in list1:
    for j in i:
      list2.append(j)

  return list2

def mean_continuous_distribution(data):
    return sum(data)/len(data)

def variance_continuous_distribution(data,mean):
    squared_sum=0
    for number in data:
        squared_sum+=(np.log(number)-mean)**2
    return squared_sum/len(data)



def find_interval_boundaries(expo_function_string,n_intervals):
    interval_boundaries = []
    f = eval(expo_function_string)

    for i in range(1, n_intervals):
        upper_bound = f(i)
        print("upper bound is", upper_bound)
        if i == 1:
            interval_boundaries.append((0, upper_bound))
        else:
            interval_boundaries.append((interval_boundaries[-1][1], upper_bound))


    interval_boundaries.append((interval_boundaries[-1][1],float('inf')))

    return interval_boundaries


def sum_chi_squared_values(chi_square_table_list):
    total_chi_squared = 0

    for interval_data in chi_square_table_list:
        _, _, _, _, chi_squared = interval_data
        total_chi_squared += chi_squared

    return total_chi_squared


def create_chi_square_table_list(merged_data):
    chi_square_table_list = []

    for interval_data in merged_data:
        lower_bound, upper_bound, frequency, expected_freq = interval_data
        chi_squared = ((frequency - expected_freq) ** 2) / expected_freq
        chi_square_table_list.append((lower_bound, upper_bound, frequency, expected_freq, chi_squared))

    return chi_square_table_list


def find_expected_freq(lst,expected_freq):
    expc_list = []
    for ele in lst:
        lower_bound, upper_bound, frequency = ele
        expc_list.append((lower_bound, upper_bound, frequency, expected_freq))

    return expc_list


def find_freq_table(data, interval_boundaries):
    freq_table = {}


    for x in data:
        for interval in interval_boundaries:
            lower_bound, upper_bound = interval
            if lower_bound <= x < upper_bound:
                freq_table[interval] = freq_table.get(interval, 0) + 1
                break

    return freq_table


def func_alpha(alpha, second_term, data):
    n = len(data)
    first_term = n / alpha
    third_term = n * (sum((x ** alpha) * np.log(x) for x in data)) / (sum((x ** alpha) for x in data))
    return first_term + second_term - third_term

def func_alpha_derivative(n, data, alpha):
    first_term = n / (alpha ** 2)
    second_term = n * (sum((x ** alpha) * (np.log(x) ** 2) for x in data)) / (sum((x ** alpha) for x in data))
    third_term = n * ((sum((x ** alpha) * np.log(x) for x in data)) ** 2) / ((sum((x ** alpha) for x in data)) ** 2)
    return -first_term - second_term + third_term


def newton_raphson_calc(alpha_0,data):
  previous=alpha_0
  n=len(data)
  second_term = sum(np.log(x) for x in data)
  current=previous - (func_alpha(previous,second_term,data)/func_alpha_derivative(n,data,previous))
  error=current-previous
  while abs(error) >=0.0000001:
    previous=current
    # print("previous is ",previous)
    current=previous - (func_alpha(previous,second_term,data)/func_alpha_derivative(n,data,previous))
    error=current-previous
    # print("error value is ",error)

  return current

def find_beta(alpha, data):
    n = len(data)
    beta = ((1 / n) * (sum((x ** alpha) for x in data))) ** (1 / alpha)
    return beta

def weibull_function_string(alpha,beta,p):
    return f"lambda x: ({beta}* (-math.log(1-x*{p}))**(1/{alpha}))"


# x=make_list("/content/n7.csv")
# print(len(x))
# print(x)

# mean=mean_continuous_distribution(x)
# variance=variance_continuous_distribution(x)

x=np.random.weibull(2,20000)
mean=np.mean(x)
variance=np.var(x)

print("mean value is :",mean)
print("variance value is :",variance)

alpha_0=mean/math.sqrt(variance)

alpha=newton_raphson_calc(alpha_0,x)
beta=find_beta(alpha,x)

print("alpha value is :",alpha)
print("beta value is :",beta)

n_intervals = int(input("Enter the number of intervals: "))
print("the number of intervals are",n_intervals)

expected_freq=len(x)/n_intervals
print("expected frequency is:",expected_freq)

p_value = 1/n_intervals
print("p_value is :",p_value)

pdf_str=weibull_function_string(alpha,beta,p_value)

interval_list=find_interval_boundaries(pdf_str,n_intervals)
print(interval_list)


l=find_freq_table(x,interval_list)#l is a dictionary so i have converted it into a list
sorted_dict = {k: l[k] for k in sorted(l)}
print(sorted_dict)

lst = [(interval[0],interval[1],freq) for interval,freq in sorted_dict.items()]
print(lst)

expected_freq_list=find_expected_freq(lst,expected_freq)
print(expected_freq_list)

chi_square_table_list = create_chi_square_table_list(expected_freq_list)
print( chi_square_table_list)

total_chi_squared = sum_chi_squared_values(chi_square_table_list)
print("Total chi-squared value:", total_chi_squared)

parameters=1
degree_of_freedom=len(chi_square_table_list)-parameters-1
significance_level=0.05
print(degree_of_freedom)

chi_square_table_value_at_given_significance_level_with_dof=7.815 #i hardcoded this after taking number of intervals=5,and finding the degree of frredom

if total_chi_squared < chi_square_table_value_at_given_significance_level_with_dof:
   print("we accept the hypothesis i.e given data follows the weibull distribution")
else:
   print("we reject the hypothesis i.e given data doesn't follow the weibull distribution")


mean value is : 0.8889598461754602
variance value is : 0.21579371549025744
alpha value is : 1.9996992225172994
beta value is : 1.0029852530644137
Enter the number of intervals: 5
the number of intervals are 5
expected frequency is: 4000.0
p_value is : 0.2
upper bound is 0.4737374605921059
upper bound is 0.7168180704935934
upper bound is 0.9600820259719396
upper bound is 1.2724689813081032
[(0, 0.4737374605921059), (0.4737374605921059, 0.7168180704935934), (0.7168180704935934, 0.9600820259719396), (0.9600820259719396, 1.2724689813081032), (1.2724689813081032, inf)]
{(0, 0.4737374605921059): 4018, (0.4737374605921059, 0.7168180704935934): 3904, (0.7168180704935934, 0.9600820259719396): 4080, (0.9600820259719396, 1.2724689813081032): 4050, (1.2724689813081032, inf): 3948}
[(0, 0.4737374605921059, 4018), (0.4737374605921059, 0.7168180704935934, 3904), (0.7168180704935934, 0.9600820259719396, 4080), (0.9600820259719396, 1.2724689813081032, 4050), (1.2724689813081032, inf, 3948)]
[(0, 0.4737