In [185]:
#script to read in .csv output files from SmartRoot analysis, concatenate them and then analyse and make plots
#use qPCR conda environment

In [186]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import glob 
import sys
import argparse




In [187]:
# #define arguments
# parser = argparse.ArgumentParser(description='Analyse SmartRoot output')
# parser.add_argument('-i', '--input', help='input directory', required=True)
# parser.add_argument('-o', '--output', help='output directory', required=True)





In [188]:
#function to recursively find all .csv files in a directory and concatenate them into a single dataframe
def concat_csv_recursive(PATH, EXT):
    #find all .csv files in the directory
    csv_files = [file for path, subdir, fname in os.walk(PATH) 
                for file in glob.glob(os.path.join(path, EXT))]
        #glob.glob(f'{directory}/{EXT}', recursive=True)
    #print(csv_files)
    #initialise empty dataframe
    df = pd.DataFrame()
    #loop through all files and concatenate them into a single dataframe
    for file in csv_files:
        df = pd.concat([df, pd.read_csv(file)], ignore_index=True)
    return df

In [189]:
def sort_data(df,output_location):
    #sort dataframe by sample name
    df = df.sort_values(by=['image'])
    #remove duplicate rows
    df = df.drop_duplicates(keep='first')
    #make nitrate concentration column using image column
    df['nitrate_concentration'] = df['image'].str.split('_').str[1]
    #make sample name column using image column
    df['sample_name'] = df['image'].str.split('_').str[0]
    #make plate column
    df['plate'] = df['image'].str.split('_').str[2]
    #remove spaces from column names
    df.columns = df.columns.str.replace(' ', '')
    
    #make several new columns
    #first make new df which will contain one row per plant
    df_plant = df[df.root_order == 0]
    #remove all lines which have no length or which are NaN
    df_plant = df_plant[df_plant.length.notnull()]
    #df_plant = df_plant[df_plant.length != 0]
    # df_plant = df.groupby(['sample_name', 'plate', 'nitrate_concentration',root_ontology]).agg({'image':'count', 'nitrate_concentration':'first', 'sample_name':'first', 'plate':'first'})
    #print(df_plant)
    #save df as tsv file
    df_plant.to_csv(f'{output_location}/single_plant_data.tsv', sep='\t', index=False)
    #count number of plants for each plant line
    


    ## PR = primary root length (cm)
    #add primary root 


    # LR = lateral root number (visible from scan)
    # LRL = total lateral root length (all LRs added together - cm)
    # ALRL = average lateral root length (LRL/LR - cm)
    # TRL = total root length (PR + LRL)
    # LRD = lateral root density (LR/PR)
    # LRL_div_TRL = percentage of LRL contributing to TRL (LRL/TRL)
    #make PRL column
    #make LR column
    #make LRL column
    #make TRL column
    #partition variation across mutants relative to wild type using principal component analysis of all RSA traits
    #do stats: Using a two-way ANOVA, three phenotypic categories: genotype effects in both nitrogen conditions (genotype-dependent), genotype effects in only one condition (nitrogen-condition-dependent) or genotype by nitrogen condition-dependent effects 
    

    #print(len(df))
    return df

In [190]:
#main function
def main(args):
    #read in arguments
    #input_dir = args.input
    input_dir = f'../../data/CRISPR_library/images/rsa_output'
    #output_dir = args.output
    output_dir = f'../../data/CRISPR_library'
    #make directory for the plots to be exported to
    output_dir = f'{output_dir}/smartroot_plots'
    try:
        # Create target Directory
        os.mkdir(output_dir)
        print("Directory " , output_dir ,  " created") 
    except FileExistsError:
        print("Directory " , output_dir ,  " already exists")

    #read in and concatenate .csv files
    df = concat_csv_recursive(input_dir, '*.csv')
    #print(df.head())

    
    #sort data
    df = sort_data(df,output_dir)
    #analyse dataframe and make plots
    #analyse_data(output_dir)
    
    #save dataframe to csv file
    df.to_csv(f'{output_dir}/all_smartroot_data.csv', index=False)

In [191]:
if __name__ == '__main__':
    main(sys.argv)

Directory  ../../data/CRISPR_library/smartroot_plots  already exists
