# Importing all Libraries

In [29]:
#importing custom functions
import sys
sys.path.append('/Users/ristomartin/Dropbox/UniStuff/DPhil/Experimental/python_analysis/common_functions')
from custom_functions import *
from common_fits import *

#importing standard functions
import os
import pandas as pd
import numpy as np
import scipy.stats as stats
from scipy.stats import iqr
import matplotlib.pyplot as plt
import seaborn as sns
from statannot import add_stat_annotation
import itertools

from scipy.optimize import curve_fit

# Generation of raw datasets from image analysis

## Setting location of directories and constants

In [30]:
#Root location
root = '/Users/ristomartin/Dropbox/UniStuff/DPhil/Experimental/'
#current location
location = os.getcwd()
print(location)

#Data set locations
#Hollow fibre micro-fibre analysis
#Effect of polymer concentration & Pyridine concentration with S3 polymer solution
hf_s3_poly_pyrid = root+'ES_PCL_PDO_270219/SEM/bead_free/compiled_data/radius/comb/'
#Effect of Pyridine concentration with S4 polymer solution
hf_s3_pyrid = root+'transport/SEM/compiled_data/hollow_fibre_s4_pyridine'

#Flat sheet membrane micro-fibre analysis
#Effect of Pyridine concentration with S4 polymer solution
fs_s4_pyrid = root+'transport/SEM/compiled_data/flat_sheet_s4_pyridine/'
#Effect of Flowrate with S4 polymer solution
fs_s4_fr = root+'transport/SEM/compiled_data/flat_sheet_s4_flow_rate/'

#test data location within each analysis set
#test_dat_loc = analysis_set+'compiled_data/radius/test/'

#Pierre biolig data
bio_lig = root+'biolig/29_09_20/ImageJ/radius/'

#List of all data sets to be analysed
data_sets = [bio_lig]

#sample key location
sample_key_name = 'sample_key_17032020'
sample_key = '/Users/ristomartin/Dropbox/UniStuff/DPhil/Experimental/sample_keys/'+sample_key_name+'.xlsx'

#Import sample key
sample_key = pd.read_excel(sample_key)

#Set location of processed data
processed = location+'/processed/'
checkdir(processed) #check that processed data directory exists
#location of processed histogram data
hist_dists = processed+'hist_dist/'
checkdir(hist_dists) #check that histogram data directory exists
#location of processed recreated raw distribtions
raw_dists = processed+'raw_dist/'
checkdir(raw_dists) #check that processed recreated raw distribtion data directory exists
#location of microfibre distribution figures
#figure_loc = processed+'/'+matching+'/'+not_matching
#checkdir(figure_loc) #check that figure directory exists
#location of summary data
#summary_loc = processed+'/summary_loc/'
#checkdir(summary_loc) #check that figure directory exists

#set variables to be considered to separate out distributions
variables = ['mass_pcl','flow_rate'] #['solution_name','pyridine_conc','flow_rate','rotation_speed','poly_wall?']
#create a dictionary of variable titles for each of the variables considered
variable_label = {'solution_name':'Polymer Solution','pyridine_conc':'Pyridine Concentration (PPM)','wire_speed':'Wire Speed (mm/s)',
                  'rotation_speed':'Rotation Speed (degrees/s)','flow_rate':'Flow Rate (ml/hr)','poly_wall?':'Polystyrene wall used',
                 'mass_pcl':'PCL g/ml','flat_or_fibre':'flat_or_fibre'}  

#Accepted R^2 value
accept_r2 = 0.9

/Users/ristomartin/Dropbox/UniStuff/DPhil/Experimental/python_analysis/micro_fibre_dists/fibre_diameter


## generation of raw data sets to be further processed

In [31]:
#Initially specify all data sets of interest in data_sets list
for dat_loc in data_sets:
    #opening each file within each of the data sets to then be processed
    for filename in os.listdir(dat_loc):
        #only open the file if it end with the specified parameters as specified in file name
        if filename.endswith(".csv"):
            #print(filename)
            #reads the specified directory and opens it as a dataframe
            df = pd.read_csv(os.path.join(dat_loc,filename))

            #Trimming df of rows of 0 frequency
            df = df[df.Sum_of_Frequencies != 0]
            #take all asociated diameters into a list
            diameters = df['Radius']*2

            #df_d finds the column headings by querying column by column in raw df to see if it ends with csv, as these are the files corresponding to each image
            df_d = [col for col in df if col.endswith('csv')]
            #copy all frequency columns across
            df = df[df_d]
            #calculate the normal mean and maintain series as pandas dataframe
            df = pd.DataFrame((df.mean(axis = 1)/(df.mean(axis = 1).sum()))*100)

            #give name to column with normal frequencies
            df.columns = ['normal_frequency']
            #add in column of diameters associated with each normal frequency 
            df['diameter'] = diameters
            #print(df)

            #add metadata to reconstrcuted data frame, only add to first line to minimise size of data frame
            #for each of the variables considered
            for variable in variables:
                #add in the corresponding variables to the first line of the dataframe
                df.loc[1, variable] = sample_key.loc[sample_key['filenamer'] == filename, variable].iloc[0]
            #re-sort the dataframe according to it index
            df = df.sort_index(axis=0)
            #print(df)

            #save each dataframe as a csv file so may be recalled later
            df.to_csv(hist_dists+str(filename.strip('.csv'))+'.csv')

# Separation of distributions by variables

## Initially create a dictionary of all files which have the same parameters used

In [32]:
para_comb = SimilarSetCollation(hist_dists,variables,sample_key,'filenamer','.csv')
print(para_comb)

{'0.14,1.0': [424, 423], '0.1,1.0': [417, 418], '0.12,1.0': [421], '0.08,1.0': [425]}


## Combination of distributions with same parameters

In [33]:
#create list of files in target directory
target_files = [x for x in os.listdir(hist_dists) if x.endswith('csv')]
#print(target_files)

#Initially itterating through each of the different filename associated with each of the parameter combination identifiers
for identifier in para_comb:
    #create common dataframe for all data with common parameters
    para_df = pd.DataFrame()
    #initilise counter to keep track of how many files are being considered for each parameter set
    file_count = 0
    #create common DataFrame to pool all data of the samples with the same controlled variables along with summary values
    complied_df = pd.DataFrame()
    #create DataFrame to store all unique IDs
    uni_ids = pd.DataFrame()
    #cycle through each of the unique samples 
    for unique_id in para_comb[identifier]:
        #add unique IDs to uni_ids DataFrame
        uni_ids.loc[file_count,'uni_ids'] = unique_id
        #retrieve file name associated with unique_id
        filename = sample_key.loc[sample_key['unique_id'] == unique_id, 'filenamer'].iloc[0]
        #remove any file extension from name
        filename = filename.split('.')[0]
        #print(filename)
        #Check if filename is there or if need to add file extension or number to end
        filenames = [s for s in target_files if filename in s]
        #check any files found
        if len(filenames) > 0:
            #go through each of filenames in the directory associated with each of the identifiers with the same controlled variables
            for name in filenames:
                #print(name)
                #open the dataframe associated with file name
                df = pd.read_csv(hist_dists+name, index_col = 0)
                #having opened file want to recreate original distribution
                df = pd.DataFrame(sum([[row['diameter']] * int(round(row['normal_frequency'])) for index, row in df.iterrows()], []))
                #append into common file to be collated
                complied_df = complied_df.append(df)
                #advance file counter
                file_count = file_count + 1
    
    #print(complied_df.head())
    
    #reset the index to prevent shape and identifier missmatch problems later on
    complied_df = complied_df.reset_index(drop=True)
    #rename column by parameter identifier
    complied_df = complied_df.rename(columns={0:'data'})
    
    #savefile out
    complied_df.to_csv(raw_dists+str(unique_id)+'.csv')

## Creating directory of distributions which only vary by one variable

In [34]:
def VariableSep2(data_set_dir,variables,sample_key):
    #Import libraries
    import os
    import pandas as pd
    import numpy as np
    import itertools

    #initially create two dictionaries the first of all the variables associated with their values paired with the unique ID
    #Second Dictionary will contain each of the parameters and each of the variables values associated with it
    #initilise the dictionary of all unique_ids and their associated parameter values
    id_dic = {}
    #initialise dictionary of every parameter value
    parma_vals = {}
    for filename in os.listdir(data_set_dir):
        #initially open file to extract parameters
        #only open the file if it end with the specified parameters as specified in file name
        if filename.endswith(".csv"):
            #extract uniquie Id from file name
            uni_id = int(filename.split('.')[0])
            # Create list of all variable parameter values
            var_vals = []
            for v in range(len(variables)):
                #look up variable parameter value
                val = variables[v]+'-'+str(sample_key.loc[sample_key['unique_id'] == uni_id, variables[v]].iloc[0])
                #Append value variable value for each parameter considered to var_vals list of all variable parameter value
                var_vals.append(val)
                #add parameter and associated common identifier if does not already exist
                if variables[v] not in parma_vals:
                    parma_vals.setdefault(variables[v],[]).append(val)
                #if parameter identifier already exisits, check if unique_id already in list and add if missing
                elif val not in parma_vals[variables[v]]:
                    parma_vals.setdefault(variables[v],[]).append(val)
                #if already parameter key already exists and unique id in list then pass
                else:
                    pass
            #Append key with associated values to id_dic with each unique id and their associated parameter values
            id_dic.setdefault(uni_id,var_vals)
    #print(id_dic)
    #print(parma_vals)


    #As only want to find combinations which match all but one variable, want to find combinations of n(variables) - 1
    #As have to order matching variable combination create list of original order and the new ordering
    var_com_order = {}
    #To make matching combinations need to make dictionary with one less variable than total variables
    #Create common dictionary for all n(variables - 1)
    all_match_para_comb = []
    #Initially want to cycle through all variables considered and cycle through list removing a different one each time
    for i in range(len(variables)):
        var_com = variables.copy()
        var_com.pop(i)
        #having produced a subset list of variables want to make a subset dictonary of all combinations
        sub_id_dic = { your_key: parma_vals[your_key] for your_key in var_com }
        #print(var_com)
        #having produced sub dictionary of n(variables) - 1 now sort and make all combinations of them
        allparamset = sorted(sub_id_dic)
        #print(allparamset)
        #add entry to dictionary of new order and old order to matching variable dictionary
        #var_com_order.append((allparamset,var_com))
        var_com_order.setdefault(str(allparamset),var_com)

        combinations = list(set(list(itertools.product(*(sub_id_dic[paramset] for paramset in allparamset)))))
        #print(combinations)
        #having produced a dictonary of combinations of each of the variables want to merge into central dictionary to prevent repeats
        all_match_para_comb.extend(combinations)
    #Remove all duplicated combinations with set() and then convert to list be itterated through
    all_match_para_comb = list(set(all_match_para_comb))

    #print(all_match_para_comb)

#######

    #having produced a list of all combinations, all_match_para_comb, of n(variables) - 1 variables need to associate file names with each of these
    #create a dictionary of matched combinations and their associated files
    match_comb_dic = {}
    #Having previously created dictionary of file names and associated parameters, cycle through each dictionary entry and check against matching params
    for file in id_dic:
        file_params = id_dic[file]
        #print(type(id_dic[file]))
        #print(id_dic[file])

        #having converted filename into list now cycle through each of combinations of matching parameter values to check if applicable
        for matching_params in all_match_para_comb:
            #initially convert string into list to be compared
            matching_params = list(matching_params)
            #print(matching_params)
            #print(type(matching_params))
            #check if all elements in matching_params is in file name
            check = all(item in file_params for item in matching_params)
            #print(check)
            if check is True:
                #if combination of variables all found within file name then add to dictionary
                #must first convert matching_params to string to be used as key
                matching_params = str(matching_params)
                #then can append file name to list associated with match_param combination
                match_comb_dic.setdefault(matching_params,[]).append(file)
                #print('match')
            else:
                pass
    #print(match_comb_dic)


    #create dictionary of sorted parameters
    matched_para_comb = {}
    #having produced a dictonary of all files associated with their matching parameters now need to spearate out by non-matching variable within each dictionary
    for matching_params in match_comb_dic:
      #print(matching_params)
        #split matching_params into comma separated list, while look like is already it is actually just one stright (ffs)
        matching_params_name = str(matching_params).replace("'", '').replace(" ", '').replace("[", '').replace("]", '').split(',')
        #print(matching_params_name)
        #having split into string of variable associated with parameter value now want to split into list
        #create list of split variables names and variable values
        split_params_name = []
        #itterating through each variable and parameter value pair
        for var in matching_params_name:
            #split and extend to list of pairings
            split_params_name.extend(var.split('-'))
        #print(split_params_name)
        #Recreate matching parameter name from split
        matching = str(split_params_name).strip('[]').replace("'",'')
        #print(split_params_name)
        #initially itterate through all filenames associated with each matching set of parameters
        for filename in match_comb_dic[matching_params]:
            #for each key want to retrieve corresponding parameter values from id_dic
            param_set = id_dic[filename]
            #for each filename parameter set want to remove all elements in they key from the file name to leave behind only the not matching variable
            not_matching = str([i for i in param_set if i not in matching_params]).strip('[]').strip("'")
            #print(not_matching)
            #Separate variable name and parameter value from one another
            not_matching = not_matching.split('-')
            #print(not_matching)
            #set not matching to refer to variable only
            not_matching = not_matching[0]
            #print(matching)
            #print(not_matching)
            #append if filename is not present in list
            if matching not in matched_para_comb.keys():
                matched_para_comb[matching] = {not_matching:[filename]}
                #print(str(matched_para_comb[matching][not_matching]))
                #print('not there')
            else:
                #append if filename is not present in list
                if filename not in matched_para_comb[matching][not_matching]:
                    matched_para_comb[matching][not_matching].append(filename)
                    #print(str(matched_para_comb[matching][not_matching]))
                    #print('in ya go')
                else: pass
    #print(matched_para_comb)

    #want to remove redundant dictionary keys containing only a single file or less
    #because dictionary is likely to change length
    #initially make list of first level keys to obtain key via index
    matching_dict_keys = list(matched_para_comb.keys())
    #print(type(matching_dict_keys))
    #evaluate length of dictionaries
    matching_len = len(matching_dict_keys)
    #for each of the keys in the matching dictionary
    for m in range(matching_len):
        #print(matching_dict_keys[m])
        #make list of second level keys to obtain key via index
        not_matching_dict_keys = list(matched_para_comb[matching_dict_keys[m]].keys())
        #print(type(not_matching_dict_keys)
        #evaluate length of not matching keys in second level dictionary
        not_matching_len = len(not_matching_dict_keys)
        #for each of the keys in the second level dictoionary
        for n in range(not_matching_len):
            #print(not_matching_dict_keys[n])
            #evaluate if there are less than two files in the second level dictionary
            if len(matched_para_comb[matching_dict_keys[m]][not_matching_dict_keys[n]]) < 2:
                #print('pop')
                #if there is less than files in the second level dictionary delete the second level dictionary
                del(matched_para_comb[matching_dict_keys[m]][not_matching_dict_keys[n]])

                #matched_para_comb[matching_dict_keys[m]].pop(not_matching_dict_keys[n], None)
            else:
                #if there are more than two files in second level dictionary then leave second level dictionary
                pass
        #Re-evaluate length of first level dictionary
        not_matching_len = len(list(matched_para_comb[matching_dict_keys[m]].keys()))
        if not_matching_len < 1:
            #print('POP')
            #if there is less than one key in the first level dictionary then remove the first level dictionary
            del(matched_para_comb[matching_dict_keys[m]])
        else:
            pass

    #Check if only one variable used to separate
    
    
    return(matched_para_comb)

In [35]:
matched_para_comb = VariableSep2(raw_dists,variables,sample_key)
print(matched_para_comb)

{'flow_rate, 1.0': {'mass_pcl': [425, 418, 423, 421]}}


## Creating and deleting outputfiles

In [36]:
creation_and_destruction(processed,matched_para_comb)

# Plotting of Data and summary data output

In [37]:
#Setting characteristic y axis labels
charactieristic_label = {'porosity':'Porosity (%)', 'max_wall_thickness':'Maximum membrane wall thickness ($\mu$m)',
                         'min_wall_thickness':'Minimum membrane wall thickness ($\mu$m)'}

## Combination of data sets based on matching parameters

In [38]:
#Routing through each level of the dictionary initally parasing each of matching parameter sets
for matching in matched_para_comb:
    #strip out whitespace from matching to find directories
    matchings = matching.replace(' ','') 
    #for each of the matching parameter sets route to the corresponding not matching parameters  
    for not_matching in matched_para_comb[matching]:
        #strip out whitespace from not_matching to find directories
        not_matchings = not_matching.replace(' ','')
        #print(not_matching)
        #Make dataframe as holder for concatinated recreated data sets
        con_df = pd.DataFrame()
        #make dataframe for summary data
        sum_df = pd.DataFrame()
        #make list of not_matching variables for ordering
        nm_order = []
        #create file counter
        file_count = 0
        #in each of these not matching parameters parase all of the listed files
        for filename in matched_para_comb[matching][not_matching]:
            #print(matching+not_matching)
            #maintian unique id associated with each data set
            uni_id = filename
            #convert uni_id to filename to allow opening of data set
            filename = str(int(filename))+'.csv'
            #open distribution data as dataframe
            df = pd.read_csv(raw_dists+filename, index_col = 0)
            #print(df.head())
            
            #extract not_matching parameter value from sample key
            nm = sample_key.loc[sample_key['unique_id'] == uni_id, not_matching].iloc[0]
            #append nm_order with not_matching parameter value
            nm_order.append(nm)            
            
            #Calculate of the summary statistics
            sum_df.loc[file_count,'median'] = np.percentile(df['data'].dropna(), 50)
            sum_df.loc[file_count,'IQR'] = iqr(df['data'].dropna())
            sum_df.loc[file_count,'25_quartile'] = np.percentile(df['data'].dropna(), 25)
            sum_df.loc[file_count,'75_quartile'] = np.percentile(df['data'].dropna(), 75)
            sum_df.loc[file_count,'mean'] = df['data'].mean()
            sum_df.loc[file_count,'SD'] = df['data'].std()
            skew = df['data'].skew()
            sum_df.loc[file_count,'skew'] = skew
            #from calculation of skew determine whether to use median or mean
            if abs(skew) > 0.5:
                sum_df.loc[file_count,'stat'] = 'median'
            else:
                sum_df.loc[file_count,'stat'] = 'mean'
            sum_df.loc[file_count,'unique_id'] = uni_id
            sum_df.loc[file_count,not_matching] = nm 

            #print(characteristics[c])
            #Simultaniously want to extract raw characteristic data
            #make dataframe of just extracted characteristic data
            char_dat = df.filter(['data'], axis=1)
            #rename the datacolumn to that of the not matching variable
            char_dat = char_dat.rename(columns={'data':nm})
            #print(char_dat.head())

            #Concatinate each recreated data set to eachother           
            con_df = pd.concat([con_df, char_dat], axis=1, sort=False)
            #progress file count
            file_count = file_count + 1
        
        #save out summary data
        #print(sum_df.head())
        sum_df.to_csv(processed+'/'+matchings+'/'+not_matchings+'/'+matchings+'_'+not_matchings+'_summary.csv')
        #save out concatinated data
        con_df.to_csv(processed+'/'+matchings+'/'+not_matchings+'/'+matchings+'_'+not_matchings+'_concatinated.csv')
        
        #convert multiple columns of data into single column with header as variable
        #data = pd.melt(con_df)

## Violin Plots of microfibre diameter distributions

In [39]:
#Routing through each level of the dictionary initally parasing each of matching parameter sets
for matching in matched_para_comb:
    #strip out whitespace from matching to find directories
    matchings = matching.replace(' ','') 
    #for each of the matching parameter sets route to the corresponding not matching parameters  
    for not_matching in matched_para_comb[matching]:
        #strip out whitespace from not_matching to find directories
        not_matchings = not_matching.replace(' ','') 
        #Make dataframe as holder for concatinated recreated data sets
        con_df = pd.DataFrame()
        #make dataframe for summary data
        sum_df = pd.DataFrame()
        #make list of not_matching variables for ordering
        nm_order = []
        
        #open distribution data as dataframe
        print(matchings+not_matchings)
        df = pd.read_csv(processed+'/'+matchings+'/'+not_matchings+'/'+matchings+'_'+not_matchings+'_concatinated.csv', index_col = 0)
        print(df.head())
        
        #convert multiple columns of data into single column with header as variable
        data = pd.melt(df).dropna()
        print(data)

        #Find order of plots according to max value
        #ordered = data.groupby(['variable'])['value'].aggregate(np.median).reset_index().sort_values('value')
        #ordered_var = ordered['variable'].tolist()

        #Find order of plots from magnitude of nm variable, determined by ordering nm_order
        #first find all unique values in variable
        nm_order = np.unique(data['variable'].values).tolist()
        print(nm_order)
        nm_order.sort(key = float, reverse=True)
        #print(nm_order)

        #create new template for figure
        fig, ax = plt.subplots()
        #plot violin plot into figure
        v_plt = sns.violinplot( x='variable', y= 'value', cut=0, data = data, order = nm_order, ax = ax) #order = ordered_var,
        
        #insert statistical annotations
        #before can add statistical annotation must create boxPairList from previous statistical comparison table
        #set which variable list controls order
        var_order = nm_order.copy()
        #create list for boxpairlist
        pre_boxPairList = []
        #for count of number of o values
        for index in range(len(var_order)):
            #to ensure that all combinations are considered again copy the uniquevalues
            avalues = var_order.copy() #colour hue
            #removing fixed variable so only consider changing variables
            avalues.remove(var_order[index])
            #considereing the appending value
            for index in range(len(avalues)):
                #let a = the file name and the ovalue which corresponds to the number within the list and pair them
                a = (avalues[index],var_order[index])
                #add the pair to the list of boxed pairs
                if avalues[index] != var_order[index]:
                    if a not in pre_boxPairList:
                        pre_boxPairList.append(a)
                else: pass
        #adding statistical annotation
        if len(pre_boxPairList) > 1:
            add_stat_annotation(x='variable', y= 'value', data = data, boxPairList=pre_boxPairList, 
                                test='Mann-Whitney', textFormat='star', loc='inside', verbose = 0,order = nm_order, ax = ax) #order = ordered_var,

        #Set plot labels
        #First add label to the x-axis to describe the variable considered
        #Retreve xlabel axis associated with variable parameter
        xlabel = variable_label[not_matching]

        #Add correct x tick labels
        #initially retreve the existing key labels
        labels = [t.get_text()  for t in ax.get_xticklabels()]

        xlabel_list = []
        for label in labels:
            #print(label)
            #For polymer solution crossreference variable with name
            if not_matching == 'solution_name':
                #initilise dictionary of polymer solution names
                polysolkey = {'0.0':'Trial','1.0':'Initial' ,'2.0':'S1','3.0':'S2','4.0':'S3','5.0':'S4','6.0':'S5'}
                #change label to update polymer solution name
                label = polysolkey[label]
            elif not_matching == 'poly_wall?':
                #initilise dictionary of polymer solution names
                polysolkey = {'0.0':'No','1.0':'Yes'}
                #change label to update polymer solution name
                label = polysolkey[label]

            #append list to list of variable labels
            xlabel_list.append(label)
        #using list of variable labels update x-axis tick labels
        ax.set_xticklabels(xlabel_list)

        #retrieving corrected y-axis label
        ylabel = 'Micro-fibre diameter ($\mu$m)'

        #set the x and y axis labels
        v_plt.set(xlabel=xlabel, ylabel= ylabel) #
        #set the xlabels rotation
        v_plt.set_xticklabels(v_plt.get_xticklabels(),rotation = -0)
        #plt.show()

        #save plot out
        #save into directory for specific characteristic
        fig_dir =processed+'/'+matchings+'/'+not_matchings+'/'
        checkdir(fig_dir)

        fig.savefig(fig_dir+matchings+'_'+not_matchings+'.png',bbox_inches='tight', dpi=300)

        #Close figure to hide previews
        plt.close(fig)
        #print('done!')

flow_rate,1.0mass_pcl
     0.08    0.1   0.14   0.12
0  0.2410  0.625  0.250  0.625
1  0.4820  1.250  0.250  1.250
2  0.4820  1.250  0.375  1.250
3  0.4820  1.250  0.375  1.250
4  0.7228  1.250  0.375  1.250
    variable    value
0       0.08   0.2410
1       0.08   0.4820
2       0.08   0.4820
3       0.08   0.4820
4       0.08   0.7228
..       ...      ...
697     0.12   8.1250
698     0.12   8.7500
699     0.12   9.3750
700     0.12  10.0000
701     0.12  10.6250

[692 rows x 2 columns]
['0.08', '0.1', '0.12', '0.14']


## fibre diameter as function of pyridine

In [40]:
#Routing through each level of the dictionary initally parasing each of matching parameter sets
for matching in matched_para_comb:
    #strip out whitespace from matching to find directories
    matchings = matching.replace(' ','') 
    #for each of the matching parameter sets route to the corresponding not matching parameters  
    for not_matching in matched_para_comb[matching]:
        #strip out whitespace from not_matching to find directories
        not_matchings = not_matching.replace(' ','') 
        #Make dataframe as holder for concatinated recreated data sets
        con_df = pd.DataFrame()
        #make dataframe for summary data
        sum_df = pd.DataFrame()
        #make list of not_matching variables for ordering
        nm_order = []
        
        #open distribution data as dataframe
        df = pd.read_csv(processed+'/'+matchings+'/'+not_matchings+'/'+matchings+'_'+not_matchings+'_summary.csv', index_col = 0)
        #print(df.head())
   
            
            
         #Using matpltlib to plot instead of seaborn to plot summary data
        #print(not_matching)
        if 'solution_name' in not_matching or 'flat_or_fibre' in not_matching :
            pass
        else:
            ###Plotting contineous summary data and fit curve if there are more than 3 data points
            if len(df[not_matching]) > 3:
                #Initilise subplots 
                fig, ax = plt.subplots()
                #print('pop')

                #Using summary data table to extract data
                x = df[not_matching].dropna()
                xdata = np.asarray([1.0e-1 if x==0 else x for x in x])
                ydata = np.asarray([1.0e-1 if x==0 else x for x in df['median'].dropna()])
                yerr = ((df['median']-df['25_quartile']).dropna(),(df['75_quartile']-df['median']).dropna())

                ax.errorbar(xdata, ydata, yerr=yerr, fmt='o')
                ax.set(xlabel=xlabel, ylabel='Porosity (%)') #

                #Set plot labels
                #First add label to the x-axis to describe the variable considered
                #Retreve xlabel axis associated with variable parameter
                xlabel = variable_label[not_matching]

                #retrieving corrected y-axis label
                ylabel = 'Micro-fibre diameter ($\mu$m)'

                #set the x and y axis labels
                ax.set(xlabel=xlabel, ylabel=ylabel) #
                #set the xlabels rotation
                #ax.set_xticklabels(ax.get_xticklabels(),rotation = -0)
                #plt.show()

                        ####Fit log curve with all scalars#####
                yerr = np.asarray([1.0e-1 if x==0 else x for x in df['SD'].dropna()])
                #using Scipy.optimise.curvefit then fit log function to the data to extract the curve parameters
                #setting bounds to prevent -ve log values
                bounds = (0,-np.inf, -np.inf), (np.inf, np.inf, np.inf)
                #fit the curve
                (m,b,c,r_squared,logcurvex,logcurvey) = logfit(xdata,ydata,yerr,bounds)
                #plot
                if r_squared > accept_r2:
                    ax.plot(logcurvex,logcurvey,'r', linewidth=1)
                else:
                    pass

                #saving lot fit data into summary table
                #get first row index key 
                key = df.index[0]
                #print(key)
                df.loc[key,'m'] = m
                df.loc[key,'b'] = b
                df.loc[key,'c'] = c
                df.loc[key,'log_r_squared'] = r_squared
                #print(type(popt))

                ##Fit linier function
                #using Scipy.optimise.curvefit then fit lin function to the data to extract the curve parameters              
                (m,c,r_squared,lincurvex,lincurvey) = linfit(xdata,ydata,yerr,bounds=None)

                #plot if R^2 is significant
                if r_squared > accept_r2:
                    ax.plot(lincurvex,lincurvey,'b', linewidth=1)
                else:
                    pass

                #saving lot fit data into summary table
                #get first row index key 
                df.loc[key,'slope'] = m
                df.loc[key,'intercept'] = c
                df.loc[key,'lin_r_squared'] = r_squared

                #save into directory for specific characteristic
                fig_dir =processed+'/'+matchings+'/'+not_matchings+'/'
                checkdir(fig_dir)

                #save plot out
                fig.savefig(fig_dir+matchings+'_'+not_matchings+'_microfibre_errorbarplt.png',bbox_inches='tight', dpi=300)
                plt.close()

                #saving updated summary dataframe out
                #print(df)
                df.to_csv(fig_dir+matchings+'_'+not_matchings+'_summary.csv')