In [1]:
import pandas as pd
import numpy as np
from scipy.stats import norm

# Function to calculate Wilson/Brown confidence interval
def wilson_brown_confidence_interval(n, p, alpha=0.05):
    z_alpha = norm.ppf(1 - alpha / 2)
    center = p + z_alpha**2 / (2 * n)
    width = z_alpha * np.sqrt(p * (1 - p) / n + z_alpha**2 / (4 * n**2))
    lower_bound = (center - width) / (1 + z_alpha**2 / n)
    upper_bound = (center + width) / (1 + z_alpha**2 / n)
    return lower_bound, upper_bound

# Function to normalize data
def normalize_data(data, method):
    normalized_data = data.copy()
    if method == "Column Total":
        normalized_data = data.div(data.sum(axis=0), axis=1)
    elif method == "Row Total":
        normalized_data = data.div(data.sum(axis=1), axis=0)
    elif method == "Grand Total":
        normalized_data = data.div(data.values.sum())
    elif method == "All":
        normalized_data_column = data.div(data.sum(axis=0), axis=1)
        normalized_data_row = data.div(data.sum(axis=1), axis=0)
        normalized_data_grand = data.div(data.values.sum())
        normalized_data = {'Column Total': normalized_data_column, 
                           'Row Total': normalized_data_row, 
                           'Grand Total': normalized_data_grand}
    return normalized_data

# Function to calculate confidence intervals
def calculate_confidence_intervals(data, percentage):
    if percentage > 0 and percentage < 100:
        n = data.sum().sum()  # Total number of observations
        p = data.values.sum() / (data.shape[0] * data.shape[1])  # Proportion of successes
        lower_bound, upper_bound = wilson_brown_confidence_interval(n, p)
        return lower_bound, upper_bound
    else:
        return None, None

# Read data from Excel
data = pd.read_excel("Samuel_Project/sample_input.xlsx")

# Ask for normalization method
method = input("Choose normalization method (Column Total/Row Total/Grand Total/All): ").capitalize()

# Normalize data
normalized_data = normalize_data(data, method)

# Ask if user wants to calculate confidence intervals
calculate_ci = input("Do you want to calculate confidence intervals (yes/no)? ").lower()

if calculate_ci == "yes":
    # Ask for percentage for confidence interval calculation
    percentage = float(input("Enter the percentage for confidence interval calculation (e.g., 95): "))
    lower_bound, upper_bound = calculate_confidence_intervals(data, percentage)
    print("Lower Bound:", lower_bound)
    print("Upper Bound:", upper_bound)

# Output the normalized data
if isinstance(normalized_data, dict):
    for key, value in normalized_data.items():
        print("Normalized Data -", key)
        print(value)
        print()
else:
    print("Normalized Data")
    print(normalized_data)


FileNotFoundError: [Errno 2] No such file or directory: 'Samuel_Project/sample_input.xlsx'