In [None]:
import pandas as pd

def calculate_institutional_ownership_proxies(df):
    
    df = df.dropna(subset=['holders', 'shares_held', 'out_shares'])
    df['IO_NO'] = df['holders']
    df['IO_RATIO'] = df['shares_held'] / df['out_shares']
    
    return df[['date', 'IO_NO', 'IO_RATIO']]

def process_excel_file(file_path):
   
    xls = pd.ExcelFile(file_path)
    results = {}
    
    for sheet_name in xls.sheet_names:
        df = pd.read_excel(xls, sheet_name)
        if df.empty:
            print(f"Sheet {sheet_name} is empty, skipping.")
            continue  
        df.columns = df.columns.str.strip()  
        if 'date' not in df.columns:
            print(f"Sheet {sheet_name} does not contain 'date' column, skipping.")
            continue  
        
        print(f"Processing sheet {sheet_name}")
        print(df.head())  
        
        try:
            
            print(f"Date column before parsing for {sheet_name}:\n{df['date'].head()}")
            
            df['date'] = pd.to_datetime(df['date'], format='%m/%y', errors='coerce')
            
            print(f"Date column after parsing for {sheet_name}:\n{df['date'].head()}")
            
            df.dropna(subset=['date'], inplace=True)
            
            print(f"After date parsing and dropping NA, data for {sheet_name}:\n{df.head()}")
            results[sheet_name] = calculate_institutional_ownership_proxies(df)
        except Exception as e:
            print(f"Error processing sheet {sheet_name}: {e}")
            continue
    
    return results

def save_to_excel(results, output_path):
    
    with pd.ExcelWriter(output_path) as writer:
        for company, data in results.items():
            data.to_excel(writer, sheet_name=company)

file_path = r"C:\UU\THESIS\AMX\HYPOTHESIS3\ownership\DELETED_OWNERSHIP.xlsx"
output_path = r"C:\UU\THESIS\AMX\HYPOTHESIS3\ownership\DELETED2_institutional_ownership_proxies.xlsx"

results = process_excel_file(file_path)
save_to_excel(results, output_path)

print(f"Calculated institutional ownership proxies saved to {output_path}")


In [None]:

import pandas as pd

file_path = r"C:\UU\THESIS\AMX\HYPOTHESIS3\ownership\DELETED2_institutional_ownership_proxies.xlsx"
df = pd.read_excel(file_path, sheet_name=None)

def transform_to_yearly(data):
    data['date'] = pd.to_datetime(data['date'], errors='coerce')
    data = data.set_index('date').resample('Y').mean()
    data['year'] = data.index.year
    return data.reset_index(drop=True)

yearly_data = {sheet_name: transform_to_yearly(sheet_data) for sheet_name, sheet_data in df.items()}

output_path = r"C:\UU\THESIS\AMX\HYPOTHESIS3\ownership\DELETED2_yearly_institutional_ownership_proxies.xlsx"
with pd.ExcelWriter(output_path) as writer:
    for sheet_name, data in yearly_data.items():
        data.to_excel(writer, sheet_name=sheet_name, index=False)

output_path


In [None]:
import pandas as pd


file_path = r"C:\UU\THESIS\AMX\HYPOTHESIS3\ownership\DELETED2_yearly_institutional_ownership_proxies.xlsx"
df = pd.read_excel(file_path, sheet_name=None)


def calculate_yearly_means(data):
    data['event'] = data['event'].astype(int)
    yearly_means = data.groupby('event').mean()
    return yearly_means

aggregated_data = pd.concat([calculate_yearly_means(sheet_data) for sheet_name, sheet_data in df.items()])

#calculate the overall yearly mean for each proxy across all companies
overall_yearly_means = aggregated_data.groupby('event').mean()

output_path = r"C:\UU\THESIS\AMX\HYPOTHESIS3\ownership\DELETED2_overall_yearly_means.xlsx"
overall_yearly_means.to_excel(output_path)

output_path


In [None]:
import pandas as pd
from scipy.stats import mannwhitneyu, wilcoxon, binomtest, ttest_rel
import numpy as np

file_path = r"C:\UU\THESIS\AMX\HYPOTHESIS3\ownership\DELETED_yearly_institutional_ownership_proxies2.xlsx"
xls = pd.ExcelFile(file_path)
data = pd.concat([pd.read_excel(xls, sheet_name=sheet) for sheet in xls.sheet_names])

periods_deleted = {
    't-5': -5,
    't': 0,
    't+5': 5
}


df_t_minus_5 = data[data['event'] == periods_deleted['t-5']]
df_t = data[data['event'] == periods_deleted['t']]
df_t_plus_5 = data[data['event'] == periods_deleted['t+5']]

df_t_minus_5 = df_t_minus_5.sort_values(by='event').reset_index(drop=True)
df_t = df_t.sort_values(by='event').reset_index(drop=True)
df_t_plus_5 = df_t_plus_5.sort_values(by='event').reset_index(drop=True)

columns_to_test = [ 'IO_NO']

results = {
    'mann_whitney': {'Measure': [], 'Period': [], 'Statistic': [], 'p-value': []},
    'wilcoxon': {'Measure': [], 'Period': [], 'Statistic': [], 't-value': [], 'p-value': []},
    'sign_test': {'Measure': [], 'Period': [], 'Statistic': [], 'p-value': []},
    'paired_ttest': {'Measure': [], 'Period': [], 'Statistic': [], 'p-value': []}
}

def perform_wilcoxon_test(df1, df2, column):
    data1 = df1[column].dropna().values
    data2 = df2[column].dropna().values
    if len(data1) == 0 or len(data2) == 0:
        return (None, None, None)
    
    stat, p_value = wilcoxon(data1, data2)
    
    #calculate the t-value from the Wilcoxon statistic
    n = len(data2)
    mu_w = n * (n + 1) / 4
    sigma_w = np.sqrt(n * (n + 1) * (2 * n + 1) / 24)
    t_value = (stat - mu_w) / sigma_w
    
    return stat, t_value, p_value


def perform_sign_test(df1, df2, column):
    data1 = df1[column].dropna().values
    data2 = df2[column].dropna().values
    min_length = min(len(data1), len(data2))
    data1 = data1[:min_length]
    data2 = data2[:min_length]
    differences = data1 - data2
    n_positive = sum(differences > 0)
    n_negative = sum(differences < 0)
    n_total = n_positive + n_negative
    if n_total == 0:
        return (None, None)
    p_value = binomtest(n_positive, n_total, 0.5, alternative='two-sided').pvalue
    return (n_positive - n_negative, p_value)

for column in columns_to_test:
    #mann-Whitney U Test
    if not df_t_minus_5[column].dropna().empty and not df_t[column].dropna().empty:
        stat, p_value = mannwhitneyu(df_t_minus_5[column], df_t[column], alternative='two-sided')
        results['mann_whitney']['Measure'].append(column)
        results['mann_whitney']['Period'].append('t-5 vs t')
        results['mann_whitney']['Statistic'].append(stat)
        results['mann_whitney']['p-value'].append(p_value)
        
    if not df_t[column].dropna().empty and not df_t_plus_5[column].dropna().empty:
        stat, p_value = mannwhitneyu(df_t[column], df_t_plus_5[column], alternative='two-sided')
        results['mann_whitney']['Measure'].append(column)
        results['mann_whitney']['Period'].append('t vs t+5')
        results['mann_whitney']['Statistic'].append(stat)
        results['mann_whitney']['p-value'].append(p_value)

    #Wilcoxon Signed Rank Test
    stat, t_value, p_value = perform_wilcoxon_test(df_t_minus_5, df_t, column)
    results['wilcoxon']['Measure'].append(column)
    results['wilcoxon']['Period'].append('t-5 vs t')
    results['wilcoxon']['Statistic'].append(stat)
    results['wilcoxon']['t-value'].append(t_value)
    results['wilcoxon']['p-value'].append(p_value)
    
    stat, t_value, p_value = perform_wilcoxon_test(df_t, df_t_plus_5, column)
    results['wilcoxon']['Measure'].append(column)
    results['wilcoxon']['Period'].append('t vs t+5')
    results['wilcoxon']['Statistic'].append(stat)
    results['wilcoxon']['t-value'].append(t_value)
    results['wilcoxon']['p-value'].append(p_value)

    #sign Test
    stat, p_value = perform_sign_test(df_t_minus_5, df_t, column)
    results['sign_test']['Measure'].append(column)
    results['sign_test']['Period'].append('t-5 vs t')
    results['sign_test']['Statistic'].append(stat)
    results['sign_test']['p-value'].append(p_value)
    
    stat, p_value = perform_sign_test(df_t, df_t_plus_5, column)
    results['sign_test']['Measure'].append(column)
    results['sign_test']['Period'].append('t vs t+5')
    results['sign_test']['Statistic'].append(stat)
    results['sign_test']['p-value'].append(p_value)
    
    #paired T-Test
    data_t_minus_5 = df_t_minus_5[column].dropna().values
    data_t = df_t[column].dropna().values
    data_t_plus_5 = df_t_plus_5[column].dropna().values
    min_length_t_minus_5_t = min(len(data_t_minus_5), len(data_t))
    min_length_t_t_plus_5 = min(len(data_t), len(data_t_plus_5))
    data_t_minus_5 = data_t_minus_5[:min_length_t_minus_5_t]
    data_t = data_t[:min_length_t_minus_5_t]
    data_t_plus_5 = data_t_plus_5[:min_length_t_t_plus_5]
    if len(data_t_minus_5) > 1 and len(data_t) > 1:
        stat, p_value = ttest_rel(data_t_minus_5, data_t)
        results['paired_ttest']['Measure'].append(column)
        results['paired_ttest']['Period'].append('t-5 vs t')
        results['paired_ttest']['Statistic'].append(stat)
        results['paired_ttest']['p-value'].append(p_value)
    else:
        results['paired_ttest']['Measure'].append(column)
        results['paired_ttest']['Period'].append('t-5 vs t')
        results['paired_ttest']['Statistic'].append(None)
        results['paired_ttest']['p-value'].append(None)
    if len(data_t) > 1 and len(data_t_plus_5) > 1:
        stat, p_value = ttest_rel(data_t, data_t_plus_5)
        results['paired_ttest']['Measure'].append(column)
        results['paired_ttest']['Period'].append('t vs t+5')
        results['paired_ttest']['Statistic'].append(stat)
        results['paired_ttest']['p-value'].append(p_value)
    else:
        results['paired_ttest']['Measure'].append(column)
        results['paired_ttest']['Period'].append('t vs t+5')
        results['paired_ttest']['Statistic'].append(None)
        results['paired_ttest']['p-value'].append(None)


results_mann_whitney_df = pd.DataFrame(results['mann_whitney'])
results_wilcoxon_df = pd.DataFrame(results['wilcoxon'])
results_sign_test_df = pd.DataFrame(results['sign_test'])
results_paired_ttest_df = pd.DataFrame(results['paired_ttest'])

print(results_mann_whitney_df)

print(results_wilcoxon_df)

print(results_sign_test_df)

print(results_paired_ttest_df)

output_path = r"C:\UU\THESIS\AMX\HYPOTHESIS3\ownership\DELETED_IO_NO_significance_test_results_updated.xlsx"
with pd.ExcelWriter(output_path) as writer:
    results_mann_whitney_df.to_excel(writer, sheet_name='Mann-Whitney')
    results_wilcoxon_df.to_excel(writer, sheet_name='Wilcoxon')
    results_sign_test_df.to_excel(writer, sheet_name='Sign Test')
    results_paired_ttest_df.to_excel(writer, sheet_name='Paired T-Test')


