In [9]:
import numpy as np
from scipy import stats
import pandas as pd

def read_tsv_file(tsv_file):
    """
    Read data from a TSV file into a DataFrame.
    
    Parameters:
        tsv_file (str): File path of the input TSV file.
    
    Returns:
        pandas.DataFrame: DataFrame containing the data from the TSV file.
    """
    
    data = pd.read_csv(tsv_file, sep='\t')
    return data

def do_ttest(tsv_file, g1, g2):
    """
    Run a t-test on the data loaded from a TSV file.
    
    Parameters:
        tsv_file (str): File path of the input TSV file.
        group1_column (str): Name of the column containing data for group 1.
        group2_column (str): Name of the column containing data for group 2.
    
    Returns:
        tuple: A tuple containing the t-statistic and p-value.
    """
    
    data = read_tsv_file(tsv_file) 
    g1 = data[g1]   
    g2 = data[g2]
    
    t_statistic, p_value = stats.ttest_ind(g1, g2)
    abs_t_statistic = abs(t_statistic)  

    print("T-statistic:", abs_t_statistic)
    print("P-value:", p_value)

    if p_value < 0.05:
        print("Result: There is a significant difference between the two groups.")
    else:
        print("Result: There is no significant difference between the two groups.")

    return abs_t_statistic, p_value


def do_ttest_on_dicts(list_dict):
    '''
    Perform t-test on a list of dictionaries containing group data.
    
    Parameters:
        list_dict (list of dicts): List of dictionaries with 'g1' and 'g2' keys 
                                    containing the data for the t-test.
                                    
    Returns:
        new_dict_list (list of dicts): List of dictionaries with added 't_statistic' and 'p_value' keys.
    '''
    new_dict_list = []
    for i in list_dict:
        g1 = i['g1']
        g2 = i['g2']
        t_statistic, p_value = do_ttest(g1, g2)
        i['t_statistic'] = t_statistic
        i['p_value'] = p_value
        new_dict_list.append(i)
    return new_dict_list

def write_as_tsv_file(new_dict_list, outfile):
    """
    Write a list of dictionaries to a TSV file using pandas.
    
    Parameters:
        new_dict_list (list): List of dictionaries to be written to the file.
        outfile (str): Name of the output TSV file.
    """
    df = pd.DataFrame(new_dict_list)
    
    df.to_csv(outfile, sep='\t', index=False)
    
    return outfile




In [10]:
tsv_file = '/Users/raziashekh/Documents/special course/prototyping_feature_table.tsv' 
group1_column = 'Sample1_A_Ac-C18_POS_UMETAB404_NBC01469_ISP2_20_B7.mzML'
group2_column = 'Sample1_B'


t_statistic, p_value = do_ttest(tsv_file, group1_column, group2_column)


list_dict = [{'g1': group1_column, 'g2': group2_column, 't_statistic': t_statistic, 'p_value': p_value}]


output_filename = "output_data.tsv"
write_as_tsv_file(list_dict, output_filename)

T-statistic: 2.4326625329624174
P-value: 0.01588599534648828
Result: There is a significant difference between the two groups.


'output_data.tsv'