In [1]:
# DEFINE PATHS AND READ DATA
import os
import numpy as np

def read_xvg(file_path):
    """
    Reads an .xvg file and returns the data as a numpy array.
    Assumes the file is formatted correctly.
    """
    data = []
    with open(file_path, 'r') as file:
        for line in file:
            if not line.startswith(('#', '@')):
                values = line.split()
                data.append([float(value) for value in values])
    return np.array(data)

# Base directories for the two conditions
base_dir_protein = '/home/mariona/project/1-Apo-FRAP'
base_dir_ternary = '/home/mariona/project/2-Frap-Lig'

# Subdirectories for replicas
replica_dirs = ['Replica1', 'Replica2', 'Replica3']

# File name for the hydrogen bond data
file_name = 'hbond_pp.xvg'

# Function to get full file paths for a given base directory
def get_file_paths(base_dir):
    paths = [os.path.join(base_dir, replica_dir, file_name) for replica_dir in replica_dirs]
    for path in paths:
        print(f"Checking path: {path}")
        if not os.path.exists(path):
            print(f"File not found: {path}")
    return paths

# Get file paths for both conditions
file_paths_protein = get_file_paths(base_dir_protein)
file_paths_ternary = get_file_paths(base_dir_ternary)

# Read the data from files
data_protein = [read_xvg(file_path) for file_path in file_paths_protein if os.path.exists(file_path)]
data_ternary = [read_xvg(file_path) for file_path in file_paths_ternary if os.path.exists(file_path)]

Checking path: /home/mariona/project/1-Apo-FRAP/Replica1/hbond_pp.xvg
Checking path: /home/mariona/project/1-Apo-FRAP/Replica2/hbond_pp.xvg
Checking path: /home/mariona/project/1-Apo-FRAP/Replica3/hbond_pp.xvg
Checking path: /home/mariona/project/2-Frap-Lig/Replica1/hbond_pp.xvg
Checking path: /home/mariona/project/2-Frap-Lig/Replica2/hbond_pp.xvg
Checking path: /home/mariona/project/2-Frap-Lig/Replica3/hbond_pp.xvg


In [2]:
# Calculate the MEAN and STANDARD DEVIATION for each replica
means_protein = [np.mean(data) for data in data_protein]
stds_protein = [np.std(data) for data in data_protein]

means_ternary = [np.mean(data) for data in data_ternary]
stds_ternary = [np.std(data) for data in data_ternary]
    
print("Means Protein:", means_protein)
print("Standard Deviations Protein:", stds_protein)
print("Means Ternary:", means_ternary)
print("Standard Deviations Ternary:", stds_ternary)

Means Protein: [83337.32040025866, 83337.74785837617, 83337.3316866996]
Standard Deviations Protein: [144336.22764509937, 144335.98085769342, 144336.2211293799]
Means Ternary: [83336.59820803584, 83337.21522902875, 83336.70315260362]
Standard Deviations Ternary: [144336.64459952162, 144336.28836460118, 144336.58400929568]


In [3]:
# Perform the T-TEST and interpret the results
from scipy.stats import ttest_ind

# Perform Welch's t-test
t_stat, p_val = ttest_ind(means_protein, means_ternary, equal_var=False)

# Output the t-statistic and p-value
print(f"T-statistic: {t_stat}")
print(f"P-value: {p_val}")

# Interpretation of the p-value
if p_val < 0.05:
    print("Result: SIGNIFICANT")
else:
    print("Result: NOT SIGNIFICANT")

T-statistic: 2.650252487993983
P-value: 0.062119630822666874
Result: NOT SIGNIFICANT
