In [33]:
import os
import glob

import matplotlib.pyplot as plt
import pandas as pd
from scipy.stats import f_oneway
import scipy.stats as stats

In [34]:
measurements1 = '/home/konsternacja/Git/master-thesis1/fluorCAM/measurement1'
measurements2 = '/home/konsternacja/Git/master-thesis1/fluorCAM/measurement2'
measurements3 = '/home/konsternacja/Git/master-thesis1/fluorCAM/measurement3'

In [35]:
def flatten_sum(matrix):
    return sum(matrix, [])

In [36]:
def combine_reports(directory_path):
    
    files = glob.glob(directory_path + '*.TXT')

    # Initialize an empty list to store DataFrames
    dfs = []
    plant_list = []
    inoculation_list = []

    for file in files:
        # Get the base name of the file without extension
        base_name = os.path.splitext(os.path.basename(file))[0]
    
        # Read the file into a DataFrame
        df = pd.read_csv(file, sep='\t', skiprows=2, index_col=0)
    
        # Append the DataFrame to the list
        dfs.append(df)
    
        # Add an entry to the group dictionary
        if 'wteminus' in base_name:
            plant_list.append(['WT'] * len(df.columns))
            inoculation_list.append(['E-'] * len(df.columns))
        elif 'wteplus' in base_name:
            plant_list.append(['WT'] * len(df.columns))
            inoculation_list.append(['E+'] * len(df.columns))
        elif '70eminus' in base_name:
            plant_list.append(['AT5G45470'] * len(df.columns))
            inoculation_list.append(['E-'] * len(df.columns))
        elif '70eplus' in base_name:
            plant_list.append(['AT5G45470'] * len(df.columns))
            inoculation_list.append(['E+'] * len(df.columns))
        else:
            plant_list.append([None] * len(df.columns))
            inoculation_list.append([None] * len(df.columns))
            
    # Concatenate the DataFrames along the common column (Unnamed: 0)
    result_df = pd.concat(dfs, ignore_index=True, axis=1)

    plant_list_flat = flatten_sum(plant_list)
    inoculation_list_flat = flatten_sum(inoculation_list)

    # Convert the group_dict to a DataFrame and transpose it
    plant_df_flat = pd.DataFrame(plant_list_flat)
    inoculation_df_flat = pd.DataFrame(inoculation_list_flat)

    # Transpose group_df_flat so it can be appended as a row
    plant_df_flat = plant_df_flat.transpose()
    inoculation_df_flat = inoculation_df_flat.transpose()

    concatenated_df = pd.concat([result_df, plant_df_flat, inoculation_df_flat], ignore_index=True)
    
    # Read the file
    file = pd.read_csv(directory_path + '70eminus1.TXT', sep='\t', skiprows=2)

    # Extract the index column
    index_column = file['Unnamed: 0']

    # Insert the index_column 
    df_for_analysis = concatenated_df.join(index_column)

    df_for_analysis = df_for_analysis.T

    # Set the last row as the header
    df_for_analysis.columns = df_for_analysis.iloc[-1]

    # Remove the last row
    df_for_analysis = df_for_analysis.iloc[:-1]

    
    # Get the last and second last column names
    last_column = df_for_analysis.columns[-1]
    second_last_column = df_for_analysis.columns[-2]

    # Rename the last and second last columns
    df_for_analysis = df_for_analysis.rename(columns={last_column: 'inoculation', second_last_column: 'plant'})

    # Save the concatenated DataFrame to a new TSV file
    df_for_analysis.to_csv(directory_path + 'concatenated_file.tsv', sep='\t', index=False)
    return df_for_analysis

In [37]:
df1 = combine_reports('/home/konsternacja/Git/master-thesis1/fluorCAM/measurement1/')
df2 = combine_reports('/home/konsternacja/Git/master-thesis1/fluorCAM/measurement2/')
df3 = combine_reports('/home/konsternacja/Git/master-thesis1/fluorCAM/measurement3/')