In [5]:
import numpy as np
import pandas as pd
from scipy.stats import fisher_exact, contingency
import statsmodels.stats.proportion as smp
import csv
import statsmodels.api as sm

In [6]:
def calculate_odds_ratios(data, group_col, attr_cols):
    group1_data = data[data[:, group_col] == 1]
    group2_data = data[data[:, group_col] == 0]
    
    if group1_data.shape[0] == 0 or group2_data.shape[0] == 0:
        print("Error: One of the groups has no data.")
        return
    
    odds_ratios = []
    p_values = []
    conf_ints = []
    
    for attr_col in attr_cols:
        group1_attr = group1_data[:, attr_col]
        group2_attr = group2_data[:, attr_col]
        
        group1_positive = np.sum(group1_attr == 1)
        group1_total = group1_positive + np.sum(group1_attr == 0)
        group2_positive = np.sum(group2_attr == 1)
        group2_total = group2_positive + np.sum(group2_attr == 0)
        
        y = np.concatenate([group1_attr, group2_attr])
        X = np.concatenate([np.ones((group1_total, 1)), np.zeros((group2_total, 1))])
        X = sm.add_constant(X)
        logit = sm.Logit(y, X)
        result = logit.fit()
        odds_ratio = np.exp(result.params[1])
        p_value = result.pvalues[1]
        conf_int = result.conf_int(alpha=0.05, cols=None)[1]
        
        odds_ratios.append(odds_ratio)
        p_values.append(p_value)
        conf_ints.append(conf_int)
    
    return odds_ratios, p_values, conf_ints

# Example usage
with open('../data/Attribute Odds Ratio Data.csv', 'r') as f:
        reader = csv.reader(f)
        headers = next(reader)
        data = [row for row in reader]

data = np.array(data, dtype=int)
group_col = 0
attr_cols = np.arange(1, 12)

odds_ratios, p_values, conf_ints = calculate_odds_ratios(data, group_col, attr_cols)

for i, attr_col in enumerate(attr_cols):
    print("Attribute:", headers[attr_col])
    print("Odds Ratio:", odds_ratios[i])
    print("Confidence Interval:", conf_ints[i])
    print("P-Value:", p_values[i])
    print("\n")

Optimization terminated successfully.
         Current function value: 0.444313
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.651966
         Iterations 5
Optimization terminated successfully.
         Current function value: 0.616405
         Iterations 5
Optimization terminated successfully.
         Current function value: 0.176892
         Iterations 7
Optimization terminated successfully.
         Current function value: 0.475736
         Iterations 5
Optimization terminated successfully.
         Current function value: 0.670256
         Iterations 5
Optimization terminated successfully.
         Current function value: 0.583926
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.122737
         Iterations 9
Optimization terminated successfully.
         Current function value: 0.587780
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.376505
  

In [7]:
import statsmodels.stats.contingency_tables as ct

def contingency_table(data, group_col, attribute_col):
    groups = data[group_col].unique()
    contingency_table = [[data[(data[group_col] == groups[0]) & (data[attribute_col] == 1)].shape[0],
                          data[(data[group_col] == groups[0]) & (data[attribute_col] == 0)].shape[0]],
                         [data[(data[group_col] == groups[1]) & (data[attribute_col] == 1)].shape[0],
                          data[(data[group_col] == groups[1]) & (data[attribute_col] == 0)].shape[0]]]
    return contingency_table

def odds_ratio(data, group_col, attribute_col):
    contingency_table_results = contingency_table(data, group_col, attribute_col)
    result = ct.Table2x2(contingency_table_results)
    return result.oddsratio, result.oddsratio_confint()

data = pd.read_csv("../data/Attribute Odds Ratio Data.csv")
group_col = "ASD"
attribute_cols = data.columns.tolist()
attribute_cols.remove(group_col)

for attribute_col in attribute_cols:
    odds_ratio_res, confidence_interval_res = odds_ratio(data, group_col, attribute_col)
    print(f"Attribute: {attribute_col}")
    print(f"Odds ratio: {odds_ratio_res}")
    print(f"Confidence interval: {confidence_interval_res}")
    print()

Attribute: premature
Odds ratio: 1.2217889073079558
Confidence interval: (1.0407292540790245, 1.4343482016768785)

Attribute: verbal
Odds ratio: 3.1680390850544082
Confidence interval: (2.784525389745717, 3.604374261191022)

Attribute: sleep
Odds ratio: 2.3299653194611176
Confidence interval: (2.0338434235711667, 2.6692017325303175)

Attribute: hearing
Odds ratio: 1.2578593272171255
Confidence interval: (0.9340832260973595, 1.6938641470714164)

Attribute: vision
Odds ratio: 0.8438327778516382
Confidence interval: (0.7295603919959401, 0.9760038576501774)

Attribute: noise
Odds ratio: 2.178231886691619
Confidence interval: (1.928085434957744, 2.460831904113272)

Attribute: motor
Odds ratio: 3.62322075501874
Confidence interval: (3.1063081356792392, 4.226151452527419)

Attribute: metabolic
Odds ratio: 3.93708408953418
Confidence interval: (2.304390390049981, 6.726564732691391)

Attribute: gi
Odds ratio: 3.47243857797491
Confidence interval: (2.9838323115789978, 4.041054730527937)

Attribu