In [None]:
import pandas as pd
import seaborn as sns
from scipy import stats
import numpy as np
import matplotlib.pyplot as plt
import numpy.matlib
import statannotations
from statannotations.Annotator import Annotator
#pip install --upgrade seaborn==0.11.2
#REQUIERD VERSION 0.11.2

# Functions

In [None]:
import math

from matplotlib.patches import Rectangle
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors


def plot_colortable(colors, *, ncols=4, sort_colors=True):

    cell_width = 212
    cell_height = 22
    swatch_width = 48
    margin = 12

    # Sort colors by hue, saturation, value and name.
    if sort_colors is True:
        names = sorted(
            colors, key=lambda c: tuple(mcolors.rgb_to_hsv(mcolors.to_rgb(c))))
    else:
        names = list(colors)

    n = len(names)
    nrows = math.ceil(n / ncols)

    width = cell_width * 4 + 2 * margin
    height = cell_height * nrows + 2 * margin
    dpi = 72

    fig, ax = plt.subplots(figsize=(width / dpi, height / dpi), dpi=dpi)
    fig.subplots_adjust(margin/width, margin/height,
                        (width-margin)/width, (height-margin)/height)
    ax.set_xlim(0, cell_width * 4)
    ax.set_ylim(cell_height * (nrows-0.5), -cell_height/2.)
    ax.yaxis.set_visible(False)
    ax.xaxis.set_visible(False)
    ax.set_axis_off()

    for i, name in enumerate(names):
        row = i % nrows
        col = i // nrows
        y = row * cell_height

        swatch_start_x = cell_width * col
        text_pos_x = cell_width * col + swatch_width + 7

        ax.text(text_pos_x, y, name, fontsize=14,
                horizontalalignment='left',
                verticalalignment='center')

        ax.add_patch(
            Rectangle(xy=(swatch_start_x, y-9), width=swatch_width,
                      height=18, facecolor=colors[name], edgecolor='0.7')
        )

    return fig

xkcd_fig = plot_colortable(mcolors.XKCD_COLORS)
#xkcd_fig.savefig("XKCD_Colors.png")

In [None]:
def FAindex6(rightMeasurements, leftMeasurements):
    # Based on: https://www.annualreviews.org/doi/pdf/10.1146/annurev.es.17.110186.002135
    # Page 395, table 1
    areaNormalized = rightMeasurements;
    
    for numMeas in range(len(rightMeasurements)):
        a_i = rightMeasurements[numMeas] - leftMeasurements[numMeas];
        avg = (rightMeasurements[numMeas] + leftMeasurements[numMeas])/2;
        areaNormalized[numMeas] = a_i / avg;
        
    res = np.var(areaNormalized)
    return [res, areaNormalized]
    

# Defining initial conditions

In [None]:
# Defining initial folders
#inputFolder = '/Volumes/lmcb4/ym_giulia_s1019_f1222/Katherine_project/Quantitative_wing_analysis/AxioPlan_Images'
#conditionsFolder = ['18Nov2022', '16Dec2022/yw_0.1mm/macro_output', '13Jan2023/yw_0.2mm/macro_output', 
#                    '26Jan2023/yw_0.5mm/Giulia_macro_output']
#conditions = ['straight', '0.1', '0.2', '0.5']
#typeOfAnalysis = 'CPR_output/Output_area.dat'
#sexes = {'F', 'M', ''}

inputFolder = '/Volumes/lmcb4/ym_giulia_s1019_f1222/Katherine_project/Quantitative_wing_analysis/AxioPlan_Images'
conditionsFolder = ['18Nov2022', '16Dec2022/yw_0.1mm/macro_output', '13Jan2023/yw_0.2mm/macro_output',
                    '26Jan2023/yw_0.3mm/macro_output', '26Jan2023/yw_0.5mm/macro_output']
conditions = ['straight', '0.1', '0.2', '0.3', '0.5']
typesOfAnalysis = {'CPR_output/Fluctuating_asymmetry/Output_area.dat', 'CPR_output/Fluctuating_asymmetry/Output_vLength.dat'}
sexes = {'F', 'M', ''}

features = {'L1', 'L2', 'L3', 'L4', 'L5', 'L6', 'L7', 'L8', 'L9', \
            'L10', 'L11', 'L12', 'L13', 'L14', 'L15', 'L16', 'L17', 'L18', \
           'A1', 'A2', 'A3', 'A4', 'A5', 'A6', 'A7', 'A8', 'A4_A5', 'A2_A3', \
           'L9_L10_L11', 'L7_L8', 'L12_L13'}

## Loading data

In [None]:
data = []
for numCondition in range(len(conditionsFolder)):
    dataCondition = []
    for typeOfAnalysis in typesOfAnalysis:
        filepath = inputFolder+'/'+conditionsFolder[numCondition]+'/'+typeOfAnalysis
        new_df = pd.read_csv(filepath,sep='\t')
        new_df['condition'] = conditions[numCondition]
        dataCondition.append(new_df)

    dfCondition = pd.concat(dataCondition, axis=1);
    dfCondition = dfCondition.T.drop_duplicates().T;
    data.append(dfCondition)

df_analysis = pd.concat(data);

## Compute measurements

In [None]:
results = [];
individualResults = [];
for numCondition in range(len(conditions)):
    condition = conditions[numCondition]
    for sex in sexes:
        generalCondition = np.array(df_analysis['condition'] == condition);
        
        if np.array(df_analysis['CPFile'].str.contains('_F_')).any():
            if sex == '':
                generalCondition = generalCondition & np.array(df_analysis['CPFile'].str.contains(''))
            else:
                generalCondition = generalCondition & np.array(df_analysis['CPFile'].str.contains('_'+sex+'_'))
            
            rightWings = df_analysis[np.array(df_analysis['CPFile'].str.contains('_R')) & generalCondition];
            leftWings = df_analysis[np.array(df_analysis['CPFile'].str.contains('_L')) & generalCondition];
        else:
            generalCondition = generalCondition & np.array(df_analysis['Sex'].str.contains(sex))
            
            rightWings = df_analysis[np.array(df_analysis['Tags'] == condition+'_right') & generalCondition]
            leftWings = df_analysis[np.array(df_analysis['Tags'] == condition+'_left') & generalCondition]

        # Quantify
        [FAindex6_res, areaNormalized] = FAindex6(np.array(rightWings['AWing']), np.array(leftWings['AWing']))
        
        # Measures normalised by area of the wing
        if sex != '':
            conditionsRepeated_left = np.matlib.repmat([condition, sex, 'left'], leftWings.shape[0], 1)
            conditionsRepeated_right = np.matlib.repmat([condition, sex, 'right'], rightWings.shape[0], 1)
        else:
            conditionsRepeated_left = np.matlib.repmat([condition, 'all', 'left'], leftWings.shape[0], 1)
            conditionsRepeated_right = np.matlib.repmat([condition, 'all', 'right'], rightWings.shape[0], 1)
            
        for feature in features:
            if feature in leftWings.columns:
                wingValues_left = np.array(100*leftWings[feature]/leftWings['AWing']);
                wingValues_right = np.array(100*rightWings[feature]/rightWings['AWing']);
            else:
                addFeatures = feature.split('_')
                addingFeatures = leftWings[addFeatures]
                wingValues_left = np.array(100*addingFeatures.sum(axis=1)/leftWings['AWing']);
                addingFeatures = rightWings[addFeatures]
                wingValues_right = np.array(100*addingFeatures.sum(axis=1)/rightWings['AWing']);
                
            conditionsRepeated_left = np.column_stack((conditionsRepeated_left, wingValues_left))
            conditionsRepeated_right = np.column_stack((conditionsRepeated_right, wingValues_right))

        columnNames = np.append(['condition', 'sex', 'side'], numpy.array(list(features)))
        newIndividualResults = pd.DataFrame(conditionsRepeated_left, columns=columnNames)
        individualResults.append(newIndividualResults)
        
        # Save results into a DF
        newResult_df = pd.DataFrame([[condition, sex, sum(generalCondition)/2, FAindex6_res, areaNormalized]], \
                       columns=['condition', 'sex', 'n', 'FAindex', 'areaNormalised_LR'])
        results.append(newResult_df)

individualResults_df = pd.concat(individualResults)
results_df = pd.concat(results)

features_array = numpy.array(list(features))
individualResults_df[features_array] = individualResults_df[features_array].apply(pd.to_numeric)

## Wing features from Wings4 normalised by its total wing area

In [None]:
Pablo=False

In [None]:
#fig = plt.figure(figsize=(5,150),facecolor='w') 
#axs = fig.subplots(len(features_array))
#fig, axs = plt.subplots(ncols=len(features_array))
for numAxs in range(len(features_array)):
    fig = plt.figure(facecolor='w') 
    ax = sns.boxplot(data=individualResults_df, x="condition", y=features_array[numAxs], hue="sex")
    if Pablo:
        
        pairs=[(("1g", "F"), ("1g", "M")),
               (("1g", "F"), ("0g", "F")),
               (("1g", "M"), ("0g", "M")),
               (("0g", "F"), ("0g", "M")),
               (("0g", "all"), ("1g", "all"))]
    else:
        #['straight', '0.1', '0.2', '0.3', '0.5']
        pairs=[(("straight", "F"), ("straight", "M")),
               (("straight", "F"), ("0.1", "F")),
               (("straight", "M"), ("0.1", "M")),
               (("0.1", "F"), ("0.1", "M")),
               (("0.1", "all"), ("straight", "all"))]

    annotator = Annotator(ax, pairs, data=individualResults_df, x="condition", y=features_array[numAxs], hue="sex")
    annotator.configure(test='Mann-Whitney', text_format='star', loc='inside')
    annotator.apply_and_annotate()
    
    fig.savefig(inputFolder+'/wingFeatures'+features_array[numAxs]+'.png', dpi=300, bbox_inches='tight')

## FAi 6 analysis

In [None]:
#https://www.w3schools.com/colors/colors_xkcd.asp

toDisplay_FAi_condition = []
toDisplay_FAi_sex = []
toDisplay_FAi_values = []
toDisplay_sex = []
toDisplay_condition = []
toDisplay_areaNormalised = [];
for row in results_df.itertuples():
    toDisplay_FAi_condition.append(row.condition)
    if row.sex == '':
        rowSex = 'All'
    else:
        rowSex = row.sex
    toDisplay_FAi_sex.append(rowSex)
    toDisplay_FAi_values.append(row.FAindex)
    for value in row.areaNormalised_LR:
        toDisplay_sex.append(rowSex)
        toDisplay_condition.append(row.condition)
        toDisplay_areaNormalised.append(value)

df_toDisplay = pd.DataFrame(dict(Condition=toDisplay_FAi_condition, Sex = toDisplay_FAi_sex, FAi=toDisplay_FAi_values))
cax = sns.barplot(data=df_toDisplay, x="Condition", y="FAi", hue="Sex")
sns.savefig(inputFolder+'/FAiIndex.png', dpi=300, bbox_inches='tight')

In [None]:
df_toDisplay = pd.DataFrame(dict(Condition=toDisplay_condition, Sex = toDisplay_sex, AreaNormalised=toDisplay_areaNormalised))
sns.catplot(data=df_toDisplay, x="Condition", y="AreaNormalised", hue="Sex", kind="box")
plt.savefig(inputFolder+'/WingAreaNormalised.png', dpi=300, bbox_inches='tight')
#plt.ylim([-0.08, 0.08])

# KATHERINE'S OLD NOTEBOOK

In [None]:
#Load in the straight wings
filepath = '/Volumes/lmcb4/ym_giulia_s1019_f1222/Katherine_project/Quantitative_wing_analysis/AxioPlan_Images/18Nov2022/CPR_output/Output_vLength.dat'
df_straight = pd.read_csv(filepath,sep='\t')
# Adding new column with a constant value
df_straight["Channel_Size"] = 'straight'

#Load in the o.1mm wings
filepath = '/Volumes/lmcb4/ym_giulia_s1019_f1222/Katherine_project/Quantitative_wing_analysis/AxioPlan_Images/16Dec2022/yw_0.1mm/macro_output/CPR_Output/Output_vLength.dat'
df_01 = pd.read_csv(filepath,sep='\t')
# Adding new column with a constant value
df_01["Channel_Size"] = 0.1


#Load in the o.2mm wings
filepath = '/Volumes/lmcb4/ym_giulia_s1019_f1222/Katherine_project/Quantitative_wing_analysis/AxioPlan_Images/13Jan2023/yw_0.2mm/macro_output/CPR_Output/Output_vLength.dat'
df_02 = pd.read_csv(filepath,sep='\t')
# Adding new column with a constant value
df_02["Channel_Size"] = 0.2


#Load in the o.3mm wings
filepath = '/Volumes/lmcb4/ym_giulia_s1019_f1222/Katherine_project/Quantitative_wing_analysis/AxioPlan_Images/26Jan2023/yw_0.3mm/Giulia_macro_output/CPR_Output/Output_vLength.dat'
df_03 = pd.read_csv(filepath,sep='\t')
# Adding new column with a constant value
df_03["Channel_Size"] = 0.3

#Load in the o.5mm wings
filepath = '/Volumes/lmcb4/ym_giulia_s1019_f1222/Katherine_project/Quantitative_wing_analysis/AxioPlan_Images/26Jan2023/yw_0.5mm/Giulia_macro_output/CPR_Output/Output_vLength.dat'
df_05 = pd.read_csv(filepath,sep='\t')
# Adding new column with a constant value
df_05["Channel_Size"] = 0.5

#Join all the df's together to create a large dataframe, but it includes the channel size
df_all = pd.concat([df_straight, df_01, df_02, df_03, df_05])

In [None]:
# Confirm that the proportions of M and F in the wings sampled is the same,
# because the differnce between M and F global size was significant.

#straight
# Create a list of lists of each of the elements in each file name

CPFile_name_straight = df_straight['CPFile']
CPFile_name_list_straight = CPFile_name_straight.to_list()

split_CPFile_name_list_straight = []
for file_name in CPFile_name_list_straight: 
    split_CPFile_name_list_straight.append(file_name.split("_"))

# create a list of the file names of the female wings
females_length_straight = []
female_count_straight = 0
for file_name in split_CPFile_name_list_straight: 
    for element_in_file_name in file_name: 
        if element_in_file_name == 'F':
            females_length_straight.append(file_name)
            female_count_straight = female_count_straight + 1
            


# create a list of the file names of the male wings
males_length_straight = []
male_count_straight = 0
for file_name in split_CPFile_name_list_straight: 
    for element_in_file_name in file_name: 
        if element_in_file_name == 'M':
            males_length_straight.append(file_name)
            male_count_straight = male_count_straight + 1


            
#0.1
# Create a list of lists of each of the elements in each file name

CPFile_name_01 = df_01['CPFile']
CPFile_name_list_01 = CPFile_name_01.to_list()

split_CPFile_name_list_01 = []
for file_name in CPFile_name_list_01: 
    split_CPFile_name_list_01.append(file_name.split("_"))

# create a list of the file names of the female wings
females_length_01 = []
female_count_01 = 0
for file_name in split_CPFile_name_list_01: 
    for element_in_file_name in file_name: 
        if element_in_file_name == 'F':
            females_length_01.append(file_name)
            female_count_01 = female_count_01 + 1
            


# create a list of the file names of the male wings
males_length_01 = []
male_count_01 = 0
for file_name in split_CPFile_name_list_01: 
    for element_in_file_name in file_name: 
        if element_in_file_name == 'M':
            males_length_01.append(file_name)
            male_count_01 = male_count_01 + 1


            
            
#0.2
# Create a list of lists of each of the elements in each file name
CPFile_name_02 = df_02['CPFile']
CPFile_name_list_02 = CPFile_name_02.to_list()

split_CPFile_name_list_02 = []
for file_name in CPFile_name_list_02: 
    split_CPFile_name_list_02.append(file_name.split("_"))

# create a list of the file names of the female wings
females_length_02 = []
female_count_02 = 0
for file_name in split_CPFile_name_list_02: 
    for element_in_file_name in file_name: 
        if element_in_file_name == 'F':
            females_length_02.append(file_name)
            female_count_02 = female_count_02 + 1
            


# create a list of the file names of the male wings
males_length_02 = []
male_count_02 = 0
for file_name in split_CPFile_name_list_02: 
    for element_in_file_name in file_name: 
        if element_in_file_name == 'M':
            males_length_02.append(file_name)
            male_count_02 = male_count_02 + 1
            

#0.3
# Create a list of lists of each of the elements in each file name
CPFile_name_03 = df_03['CPFile']
CPFile_name_list_03 = CPFile_name_03.to_list()

split_CPFile_name_list_03 = []
for file_name in CPFile_name_list_03: 
    split_CPFile_name_list_03.append(file_name.split("_"))

# create a list of the file names of the female wings
females_length_03 = []
female_count_03 = 0
for file_name in split_CPFile_name_list_03: 
    for element_in_file_name in file_name: 
        if element_in_file_name == 'F':
            females_length_03.append(file_name)
            female_count_03 = female_count_03 + 1
            


# create a list of the file names of the male wings
males_length_03 = []
male_count_03 = 0
for file_name in split_CPFile_name_list_03: 
    for element_in_file_name in file_name: 
        if element_in_file_name == 'M':
            males_length_03.append(file_name)
            male_count_03 = male_count_03 + 1
            

                        
#0.5
# Create a list of lists of each of the elements in each file name
CPFile_name_05 = df_05['CPFile']
CPFile_name_list_05 = CPFile_name_05.to_list()

split_CPFile_name_list_05 = []
for file_name in CPFile_name_list_05: 
    split_CPFile_name_list_05.append(file_name.split("_"))

# create a list of the file names of the female wings
females_length_05 = []
female_count_05 = 0
for file_name in split_CPFile_name_list_05: 
    for element_in_file_name in file_name: 
        if element_in_file_name == 'F':
            females_length_05.append(file_name)
            female_count_05 = female_count_05+ 1
            

# create a list of the file names of the male wings
males_length_05 = []
male_count_05 = 0
for file_name in split_CPFile_name_list_05: 
    for element_in_file_name in file_name: 
        if element_in_file_name == 'M':
            males_length_05.append(file_name)
            male_count_05 = male_count_05 + 1

In [None]:
# join togther the list of elements in each file name using _
joined_males_length_straight = []
for file_name in males_length_straight: 
    joined_males_length_straight.append('_'.join(file_name))

joined_females_length_straight = []
for file_name in females_length_straight: 
    joined_females_length_straight.append('_'.join(file_name))

joined_males_length_01 = []
for file_name in males_length_01: 
    joined_males_length_01.append('_'.join(file_name))

joined_females_length_01 = []
for file_name in females_length_01: 
    joined_females_length_01.append('_'.join(file_name))
    
joined_males_length_02 = []
for file_name in males_length_02: 
    joined_males_length_02.append('_'.join(file_name))

joined_females_length_02 = []
for file_name in females_length_02: 
    joined_females_length_02.append('_'.join(file_name))

joined_males_length_03 = []
for file_name in males_length_03: 
    joined_males_length_03.append('_'.join(file_name))
    
joined_females_length_03 = []
for file_name in females_length_03: 
    joined_females_length_03.append('_'.join(file_name))
    
joined_males_length_05 = []
for file_name in males_length_05: 
    joined_males_length_05.append('_'.join(file_name))
    
joined_females_length_05 = []
for file_name in females_length_05: 
    joined_females_length_05.append('_'.join(file_name))

In [None]:
print(f'The count of male for straight, 0.1, 0.2, 0.3, and 0.5mm flies is {male_count_straight, male_count_01, male_count_02, male_count_03, male_count_05}, respectively.')
print(f'The count of female for straight, 0.1, 0.2, 0.3, and 0.5mm flies is {female_count_straight, female_count_01, female_count_02, female_count_03, female_count_05}, respectively.')

In [None]:
#creating lists of the lengths associated with each length number and each channel type. 

for i in range(1, 19):
    exec(f"L{i}_straight = df_straight['L{i}']")   
    exec(f"L{i}_01 = df_01['L{i}']")
    exec(f"L{i}_02 = df_02['L{i}']")
    exec(f"L{i}_03 = df_03['L{i}']")
    exec(f"L{i}_05 = df_05['L{i}']")
    exec(f"data_L{i} = [L{i}_straight, L{i}_01, L{i}_02, L{i}_03, L{i}_05]")

In [None]:
#creating lists of the lengths associated with each length number and each channel type. 
#Adding males and females seperately

channel_list = ['straight', '01', '02', '03', '05']
for i in range(len(channel_list)): 
    exec(f"females_length_{channel_list[i]} = df_{channel_list[i]}.loc[df_{channel_list[i]}['CPFile'].isin(joined_females_length_{channel_list[i]})]")
    exec(f"males_length_{channel_list[i]} = df_{channel_list[i]}.loc[df_{channel_list[i]}['CPFile'].isin(joined_males_length_{channel_list[i]})]")

for i in range(1, 19):
    exec(f"L{i}_straight_M = males_length_straight['L{i}']")   
    exec(f"L{i}_straight_F = females_length_straight['L{i}']")  
    exec(f"L{i}_01_M = males_length_01['L{i}']")
    exec(f"L{i}_01_F = females_length_01['L{i}']")
    exec(f"L{i}_02_M = males_length_02['L{i}']")
    exec(f"L{i}_02_F = females_length_02['L{i}']")
    exec(f"L{i}_03_M = males_length_03['L{i}']")
    exec(f"L{i}_03_F = females_length_03['L{i}']")
    exec(f"L{i}_05_M = males_length_05['L{i}']")
    exec(f"L{i}_05_F = females_length_05['L{i}']")
    exec(f"data_L{i} = [L{i}_straight_M, L{i}_straight_F, L{i}_01_M, L{i}_01_F, L{i}_02_M, L{i}_02_F, L{i}_03_M, L{i}_03_F, L{i}_05_M, L{i}_05_F]")

    

In [None]:
#Looping through the 18 different lengths and plotting them each on a graph
#With the males and females seperately

fig1, ((ax1, ax2, ax3), (ax4, ax5, ax6), (ax7, ax8, ax9), (ax10, ax11, ax12), 
       (ax13, ax14, ax15), (ax16, ax17, ax18), ) = plt.subplots(6,3, figsize=(30, 60), dpi=100)

for i in range(1, 19):
    
    exec(f"ax{i}.set_title('Variability between Length {i}')")
    exec(f"bplot = ax{i}.boxplot(data_L{i}, patch_artist=True)")
    exec(f"ax{i}.set_xticklabels(['straight', ' ',  '0.1mm', ' ', '0.2mm', ' ', '0.3mm', ' ', '0.5mm', ' '])")
    sns.set_style("white")
    exec(f"ax{i}.yaxis.grid(True)")

    # fill with colours
    colors = ['steelblue', 'lightgreen', 'steelblue', 'lightgreen', 'steelblue', 'lightgreen', 
              'steelblue', 'lightgreen', 'steelblue', 'lightgreen']
    for patch, color in zip(bplot['boxes'], colors):
        patch.set_facecolor(color)
        
        
plt.savefig("Variability in Lengths 1-18.png")

In [None]:
# loop through each length and each gender to see which are statistically significant from one another. 
# there is no statistically significant differences between male 0.1mm and straight adult wings.

for i in range(1, 19):
    exec(f"t, p = stats.ttest_ind(L{i}_straight_F, L{i}_01_F)")
    exec(f"print('P-value between L{i}_straight_F and L{i}_01_F = ',p)")
    exec(f"if p<0.01: print('L{i} is less than 0.01!')")