## Comparison of Raw Data Correction Methods
Methods compared:
1. Brianna's linear model (linb) (only single mutants and wild type grown on one flat [not four flats])

    For sets with many flats:
        
        formula = f'{col_name} ~ Genotype + (1|Column) + (1|Row) + (1|Flat)'
    
    For sets with one flat:
        
        formula = f'{col_name} ~ Genotype + (1|Column) + (1|Row)'

2. Estimation of marginal means for each genotype using lmer in R (single, double, and wild type)

    Per set, per flat:
    
        formula = TSC ~ Subline + (1|Column) + (1|Row)

3. Spatial Analysis with SpATS (single, double, and wild type)

In [1]:
import datatable as dt
import pandas as pd

### Read in the corrected raw datasets

In [2]:
# Results on single mutants (that were grown on only one flat) for the lmer model Brianna ran in python
og_bri = dt.fread('../data/brianna_comparemean_tolmer_df_withrelative.csv').to_pandas()
og_bri.head()

Unnamed: 0,Set,WT_avg,WT_fitlmer,MA_avg,MA_fitlmer,MB_avg,MB_fitlmer,MA,MB,MA/WT,MB/WT
0,845,30.79,30.28,41.66,42.3,31.21,31.23,AT1G06040,AT2G31380,1.396794,1.031056
1,845E,27.94,27.58,27.1,27.04,25.88,26.72,AT1G06040,AT2G31380,0.980661,0.968813
2,133,406.46,408.68,411.25,414.09,369.34,368.95,AT1G18620,AT1G74160,1.013246,0.902789
3,703,340.87,342.38,228.98,228.4,292.24,291.11,AT1G74160,AT1G18620,0.667103,0.850257
4,72,166.93,166.73,161.08,161.47,151.75,151.59,AT3G14020,AT1G54160,0.968407,0.909187


In [3]:
og_bri.loc[og_bri['Set'].str.contains('1'),:] # 1 is a set grown on four flats; these sets were excluded in her analysis

Unnamed: 0,Set,WT_avg,WT_fitlmer,MA_avg,MA_fitlmer,MB_avg,MB_fitlmer,MA,MB,MA/WT,MB/WT
2,133,406.46,408.68,411.25,414.09,369.34,368.95,AT1G18620,AT1G74160,1.013246,0.902789
14,791,61.32,60.7,71.38,71.87,66.67,65.13,AT1G07180,AT2G29990,1.184029,1.073125
19,61,332.35,333.34,335.11,345.25,359.52,351.93,AT1G10450,AT1G59890,1.03573,1.055769
22,71,90.37,90.36,15.25,14.19,90.21,89.16,AT1G10650,AT1G60610,0.15708,0.986719
28,761,101.93,98.62,86.71,90.74,116.02,108.8,AT1G17540,AT1G72760,0.92014,1.103183
33,771,102.53,101.68,104.87,104.32,81.56,82.26,AT1G21380,AT1G76970,1.025938,0.80904
46,741,102.17,103.0,113.92,114.62,100.68,100.79,AT1G52190,AT3G16180,1.112831,0.978543
47,712,180.59,181.53,172.47,171.68,161.08,161.35,AT1G52420,AT3G15940,0.945766,0.88886
49,719,53.5,52.22,65.55,65.9,118.06,116.88,AT1G54130,AT3G14050,1.262093,2.238322
52,812,136.65,136.78,139.11,139.78,136.5,136.14,AT1G66180,AT5G37540,1.021922,0.995302


In [4]:
# Results on single and double mutants for the lmer model I ran in R (should be emulating Brianna's results)
linb = dt.fread('../data/double_mutant_fitness_data_05312024_all_corrected_linear_b.txt').to_pandas()
# linb = linb.loc[:,~linb.columns.str.contains('.y')]
# linb.insert(5, 'Genotype', linb.Subline.str.split('-').str[1])
# linb.to_csv('../data/double_mutant_fitness_data_05312024_all_corrected_linear_b.txt', sep='\t', index=False)
linb.head()

Unnamed: 0,Set,Flat,Column,Row,Number,Genotype,Subline,MA,MB,GN,...,TSC_emmean,TSC_SE,TSC_df,TSC_lower.CL,TSC_upper.CL,SH_emmean,SH_SE,SH_df,SH_lower.CL,SH_upper.CL
0,1,1,4,1,4,MB,001-MB-2,WT,MUT,5.0,...,38.65701,3.52665,15.317155,31.153665,46.160354,-0.045469,1.924941,17.060851,-4.105636,4.014698
1,1,1,6,1,6,DM,001-DM-2,MUT,MUT,5.0,...,40.079134,3.531284,15.320574,32.566074,47.592193,2.960672,1.871493,15.679768,-1.013312,6.934657
2,1,1,8,1,8,MA,001-MA-2,MUT,WT,5.0,...,51.311661,3.67322,16.969088,43.560769,59.062553,4.511428,1.955215,18.003516,0.40373,8.619125
3,1,1,10,1,10,WT,001-WT-2,WT,WT,9.0,...,54.898058,3.625398,15.993182,47.212292,62.583824,,,,,
4,1,1,6,3,26,MB,001-MB-2,WT,MUT,5.0,...,38.65701,3.52665,15.317155,31.153665,46.160354,-0.045469,1.924941,17.060851,-4.105636,4.014698


In [5]:
# Results on single and double mutants for the lmer model ran per set per flat
lin = dt.fread('../data/double_mutant_fitness_data_05312024_all_corrected_linear.txt').to_pandas()
# lin = lin.loc[:,~lin.columns.str.contains('.y')]
# lin.insert(5, 'Genotype', lin.Subline.str.split('-').str[1])
# lin.to_csv('../data/double_mutant_fitness_data_05312024_all_corrected_linear.txt', sep='\t', index=False)
lin.head()

Unnamed: 0,Set,Flat,Column,Row,Number,Genotype,Subline,MA,MB,GN,...,TSC_emmean,TSC_SE,TSC_df,TSC_lower.CL,TSC_upper.CL,SH_emmean,SH_SE,SH_df,SH_lower.CL,SH_upper.CL
0,1,1,4,1,4,MB,001-MB-2,WT,MUT,5.0,...,41.427617,5.251221,36.40158,30.781726,52.073509,-0.840579,4.091802,35.09412,-9.146582,7.465423
1,1,1,6,1,6,DM,001-DM-2,MUT,MUT,5.0,...,38.897783,4.932536,34.149081,28.875275,48.920291,1.274188,3.762168,34.871122,-6.364429,8.912805
2,1,1,8,1,8,MA,001-MA-2,MUT,WT,5.0,...,44.320512,5.53711,38.173321,33.112889,55.528134,7.474766,3.980375,31.819513,-0.634797,15.584329
3,1,1,10,1,10,WT,001-WT-2,WT,WT,9.0,...,44.88664,5.879645,37.370532,32.977333,56.795946,,,,,
4,1,1,6,3,26,MB,001-MB-2,WT,MUT,5.0,...,41.427617,5.251221,36.40158,30.781726,52.073509,-0.840579,4.091802,35.09412,-9.146582,7.465423


In [6]:
# Results on single and double mutants for the spatial analysis model ran per set per flat
spa = dt.fread('../data/double_mutant_fitness_data_05312024_TSC_corrected_SpATS.txt').to_pandas()
spa.head()

Unnamed: 0,Set,Flat,Column,Row,Number,Type,Genotype,Subline,MA,MB,...,WO,FN,SPF,TSC,SH,R,C,geno,weights,fit.TSC$fitted
0,1,1,4,1,4,BORDER,MB,001-MB-2,WT,MUT,...,1.0,2.0,21.666667,65.0,0.0,1,4,MB,True,66.307385
1,1,1,6,1,6,BORDER,DM,001-DM-2,MUT,MUT,...,0.0,0.0,20.333333,61.0,0.0,1,6,DM,True,52.85026
2,1,1,8,1,8,BORDER,MA,001-MA-2,MUT,WT,...,0.0,0.0,15.5,62.0,0.0,1,8,MA,True,47.525405
3,1,1,10,1,10,BORDER,WT,001-WT-2,WT,WT,...,1.0,0.0,12.5,37.5,,1,10,WT,True,46.041855
4,1,1,6,3,26,INSIDE,MB,001-MB-2,WT,MUT,...,0.0,0.0,16.333333,49.0,0.0,3,6,MB,True,52.446085


In [7]:
spa[['Set', 'Flat', 'Genotype', 'TSC', 'fit.TSC$fitted']].groupby(['Set', 'Flat', 'Genotype']).mean()
# For some reason, the spatial model predicts the mean of the genotypes perfectly,
# but if you look at the mean of the sublines, it's not perfect.

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,TSC,fit.TSC$fitted
Set,Flat,Genotype,Unnamed: 3_level_1,Unnamed: 4_level_1
1,1,DM,39.705882,39.705882
1,1,MA,45.307692,45.307692
1,1,MB,41.166667,41.166667
1,1,WT,44.458333,44.458333
1,2,DM,43.616667,43.616667
...,...,...,...,...
845,1,MB,32.216129,32.216129
845,1,WT,30.793651,30.793651
845E,1,MA,27.104478,27.104478
845E,1,MB,25.883333,25.883333


In [8]:
spa[['Set', 'Flat', 'Subline', 'TSC', 'fit.TSC$fitted']].groupby(['Set', 'Flat', 'Subline']).mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,TSC,fit.TSC$fitted
Set,Flat,Subline,Unnamed: 3_level_1,Unnamed: 4_level_1
1,1,001-DM-1,45.500000,42.289215
1,1,001-DM-2,45.000000,42.946836
1,1,001-DM-3,20.666667,36.155583
1,1,001-DM-4,43.000000,43.445554
1,1,001-DM-5,43.666667,30.647423
...,...,...,...,...
845E,1,845-MB-3,20.714286,26.906139
845E,1,845-MB-4,32.941176,24.433929
845E,1,845-WT-1,28.272727,29.288220
845E,1,845-WT-2,21.250000,26.535178


In [9]:
og_bri.shape, linb.shape, lin.shape, spa.shape

((119, 11), (26586, 75), (26586, 75), (25795, 26))

In [10]:
# Reshape Brianna's data
og_Bri = og_bri[['Set', 'WT_fitlmer', 'MA_fitlmer', 'MB_fitlmer']].melt(id_vars = 'Set', value_name='TSC_corrected', var_name='Genotype')
og_Bri.Genotype = og_Bri.Genotype.str.split('_').str.get(0)
og_Bri.head()

Unnamed: 0,Set,Genotype,TSC_corrected
0,845,WT,30.28
1,845E,WT,27.58
2,133,WT,408.68
3,703,WT,342.38
4,72,WT,166.73


In [11]:
bri_raw = og_bri[['Set', 'WT_avg', 'MA_avg', 'MB_avg']].melt(id_vars = 'Set', value_name='TSC_avg_raw', var_name='Genotype')
bri_raw.Genotype = bri_raw.Genotype.str.split('_').str.get(0)
bri_raw.head()

Unnamed: 0,Set,Genotype,TSC_avg_raw
0,845,WT,30.79
1,845E,WT,27.94
2,133,WT,406.46
3,703,WT,340.87
4,72,WT,166.93


### Combine the datasets and calculate the correlation between genotype averages per set for Total Seed Count

In [12]:
# Merge corrected values with the mean of the raw data for single mutants
# Averages are not counted with flat considered since Brianna added flat as a random effect in her analysis
corrected = pd.merge(linb[['Set', 'Genotype', 'TSC_emmean']].\
    groupby(['Set', 'Genotype']).mean(), lin[['Set', 'Genotype', 'TSC_emmean']].\
    groupby(['Set', 'Genotype']).mean(), left_index=True, right_index=True, how='left') # Linear model results to compare with Brianna's

corrected = pd.merge(corrected, spa[['Set', 'Genotype', 'fit.TSC$fitted']].\
    groupby(['Set', 'Genotype']).mean(), 
    left_on=['Set', 'Genotype'], right_index=True, how='left') # Spatial analysis results

corrected = pd.merge(corrected, og_Bri, left_index=True,
    right_on=['Set', 'Genotype'], how='left') # Brianna's python results

corrected = pd.merge(bri_raw, corrected, on=['Set', 'Genotype'], how='left') # Brianna's raw mean data

corrected = pd.merge(corrected, lin[['Set', 'Genotype', 'TSC']].groupby(['Set', 'Genotype']).mean(),
    left_on=['Set', 'Genotype'], right_index=True, how='left') # Raw mean data (to compare with Brianna's)

corrected.columns = ['Set', 'Genotype', 'TSC_avg_raw_bri', 'Brianna_rerun', 'Linear',
                     'SpATS', 'Brianna_og', 'TSC_raw_avg']

corrected

Unnamed: 0,Set,Genotype,TSC_avg_raw_bri,Brianna_rerun,Linear,SpATS,Brianna_og,TSC_raw_avg
0,845,WT,30.79,30.338463,30.338463,30.793651,30.28,30.793651
1,845E,WT,27.94,27.575788,27.575788,27.937500,27.58,27.937500
2,133,WT,406.46,409.687943,409.687943,406.461695,408.68,406.461695
3,703,WT,340.87,342.450414,342.450414,340.866006,342.38,340.866006
4,72,WT,166.93,166.184350,166.184350,166.934028,166.73,166.934028
...,...,...,...,...,...,...,...,...
352,724,MB,350.20,350.827143,350.827143,350.197384,351.13,350.197384
353,739,MB,87.73,87.733615,87.733615,87.733615,87.68,87.733615
354,767,MB,26.60,25.789527,25.789527,26.600000,25.05,26.600000
355,754,MB,42.70,42.511234,42.511234,42.701389,42.62,42.701389


In [13]:
corrected.select_dtypes('float').corr(method='pearson')

Unnamed: 0,TSC_avg_raw_bri,Brianna_rerun,Linear,SpATS,Brianna_og,TSC_raw_avg
TSC_avg_raw_bri,1.0,0.459163,0.459163,0.462071,0.999771,0.462071
Brianna_rerun,0.459163,1.0,1.0,0.999799,0.464803,0.999799
Linear,0.459163,1.0,1.0,0.999799,0.464803,0.999799
SpATS,0.462071,0.999799,0.999799,1.0,0.467512,1.0
Brianna_og,0.999771,0.464803,0.464803,0.467512,1.0,0.467512
TSC_raw_avg,0.462071,0.999799,0.999799,1.0,0.467512,1.0


In [14]:
# Since Brianna only did the single mutant data, there are NAs when I combine her results with mine
corrected.select_dtypes('float').dropna().corr(method='pearson')

Unnamed: 0,TSC_avg_raw_bri,Brianna_rerun,Linear,SpATS,Brianna_og,TSC_raw_avg
TSC_avg_raw_bri,1.0,0.459163,0.459163,0.460658,0.99977,0.460658
Brianna_rerun,0.459163,1.0,1.0,0.999799,0.464803,0.999799
Linear,0.459163,1.0,1.0,0.999799,0.464803,0.999799
SpATS,0.460658,0.999799,0.999799,1.0,0.466119,1.0
Brianna_og,0.99977,0.464803,0.464803,0.466119,1.0,0.466119
TSC_raw_avg,0.460658,0.999799,0.999799,1.0,0.466119,1.0


In [15]:
corrected.loc[corrected['Set'].str.contains('845'),:]

Unnamed: 0,Set,Genotype,TSC_avg_raw_bri,Brianna_rerun,Linear,SpATS,Brianna_og,TSC_raw_avg
0,845,WT,30.79,30.338463,30.338463,30.793651,30.28,30.793651
1,845E,WT,27.94,27.575788,27.575788,27.9375,27.58,27.9375
119,845,MA,41.66,42.406116,42.406116,41.664103,42.3,41.664103
120,845E,MA,27.1,27.042495,27.042495,27.104478,27.04,27.104478
238,845,MB,31.21,31.798327,31.798327,32.216129,31.23,32.216129
239,845E,MB,25.88,26.71577,26.71577,25.883333,26.72,25.883333


In [16]:
# Now include "Flat", since I built models per set per flat
corrected = pd.merge(linb[['Set', 'Flat', 'Genotype', 'TSC_emmean']].\
    groupby(['Set', 'Flat', 'Genotype']).mean(),
    lin[['Set', 'Flat', 'Genotype', 'TSC_emmean']].\
    groupby(['Set', 'Flat', 'Genotype']).mean(), 
    left_index=True, right_index=True, how='left') # Results from linear models

corrected = pd.merge(corrected, spa[['Set', 'Flat', 'Genotype', 'fit.TSC$fitted']].\
    groupby(['Set', 'Flat', 'Genotype']).mean(), 
    left_on=['Set', 'Flat', 'Genotype'], right_index=True, how='left') # Spatial analysis results

corrected = pd.merge(corrected, lin[['Set', 'Flat', 'Genotype', 'TSC']].\
    groupby(['Set', 'Flat', 'Genotype']).mean(),
    left_on=['Set', 'Flat', 'Genotype'], right_index=True, how='left') # Raw mean data

corrected.columns = ['Brianna_rerun', 'Linear', 'SpATS', 'TSC_raw_avg']

corrected

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Brianna_rerun,Linear,SpATS,TSC_raw_avg
Set,Flat,Genotype,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,1,DM,40.079134,38.897783,39.705882,39.705882
1,1,MA,51.311661,44.320512,45.307692,45.307692
1,1,MB,38.657010,41.427617,41.166667,41.166667
1,1,WT,54.898058,44.886640,44.458333,44.458333
1,2,DM,40.079134,43.511920,43.616667,43.616667
...,...,...,...,...,...,...
845,1,MB,31.798327,31.798327,32.216129,32.216129
845,1,WT,30.338463,30.338463,30.793651,30.793651
845E,1,MA,27.042495,27.042495,27.104478,27.104478
845E,1,MB,26.715770,26.715770,25.883333,25.883333


In [17]:
corrected.corr(method='pearson')

Unnamed: 0,Brianna_rerun,Linear,SpATS,TSC_raw_avg
Brianna_rerun,1.0,0.993624,0.994228,0.994228
Linear,0.993624,1.0,0.999371,0.999371
SpATS,0.994228,0.999371,1.0,1.0
TSC_raw_avg,0.994228,0.999371,1.0,1.0


Conclusion:

Of the three approaches I used, it seems to not matter which one I go with. 
But, I don't trust the spatial model, so I won't use that one. Instead, I will 
use the "Linear" model built per set per flat, instead of Brianna's re-run, where 
she built a linear model per set and flat was a random variable for those sets 
grown on 4 flats.

Remaining mystery:

I believe my analysis is correct, so I don't know why there is disagreement with 
what Brianna built in python. I made sure to keep the WT genotype as the reference 
level for the three approaches. And the models I ran agree with the average total 
seed count value, which also agrees with Brianna's averages if you see the dataframe, 
but I don't know why the correlation is so low. The correlation of her results 
is only ~.46 with "Brianna_rerun", "Linear", "SpATS", and "TSC_raw_avg".
Brianna said she has to re-run her stuff and clean up her code, so in the mean time, 
I think I will move forward.

Mystery solved:

Brianna used an older version of the dataset, hence why there is disagreement.
She re-ran the model with the updated raw data (that I used) and at least for 
set 845, our numbers agree, so the correlation should go up to .99 if I were to 
include here.

### Compare the averages for the rest of the fitness traits

In [18]:
all_corrected = pd.concat([lin[['Set', 'Flat', 'Genotype', 'GN', 'PG', 'DTB',
       'LN', 'DTF', 'SN', 'WO', 'FN', 'SPF', 'TSC', 'SH']], 
       lin.loc[:,lin.columns.str.endswith('_emmean')]], axis=1)
all_corrected2 = pd.concat([linb[['Set', 'Flat', 'Genotype']], 
       linb.loc[:,linb.columns.str.endswith('_emmean')]], axis=1)
all_corrected = pd.merge(all_corrected, all_corrected2, on=['Set', 'Flat', 'Genotype'], how='left', suffixes=['_lin', '_linb'])
all_corrected = all_corrected.groupby(['Set', 'Flat', 'Genotype']).mean()
all_corrected

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,GN,PG,DTB,LN,DTF,SN,WO,FN,SPF,TSC,...,PG_emmean_linb,DTB_emmean_linb,LN_emmean_linb,DTF_emmean_linb,SN_emmean_linb,WO_emmean_linb,FN_emmean_linb,SPF_emmean_linb,TSC_emmean_linb,SH_emmean_linb
Set,Flat,Genotype,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
1,1,DM,5.235294,104.705882,30.235294,8.764706,33.294118,2.941176,1.588235,0.117647,13.828431,39.705882,...,95.702242,31.808797,8.853605,32.982221,2.917333,1.268865,1.347726e-01,13.135060,40.079134,2.960672e+00
1,1,MA,4.933333,98.666667,29.066667,7.800000,32.333333,2.533333,1.400000,0.000000,16.717949,45.307692,...,101.765558,30.162385,8.239128,32.863066,2.572766,0.719832,4.142706e-02,18.094280,51.311661,4.511428e+00
1,1,MB,4.800000,96.000000,29.266667,8.600000,32.000000,3.133333,1.400000,0.133333,13.583333,41.166667,...,101.819436,30.772401,8.631862,33.338776,2.944599,1.047182,4.605556e-02,13.149070,38.657010,-4.546878e-02
1,1,WT,5.166667,103.333333,26.583333,6.909091,30.416667,2.666667,1.166667,0.000000,16.361111,44.458333,...,103.915498,28.391062,8.055482,31.392208,2.744861,0.581349,6.557994e-02,19.160268,54.898058,2.728458e+00
1,2,DM,4.714286,94.000000,27.761905,8.904762,31.666667,3.523810,1.190476,0.000000,12.359167,43.616667,...,95.702242,31.808797,8.853605,32.982221,2.917333,1.268865,1.347726e-01,13.135060,40.079134,2.960672e+00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
845,1,MB,5.126984,102.539683,26.025641,11.193548,,2.015873,14.571429,3.904762,9.860081,32.216129,...,102.679714,26.025641,11.203412,,1.958552,14.505501,3.773656e+00,9.721296,31.798327,3.070175e+00
845,1,WT,5.328125,106.562500,26.812500,12.312500,,1.888889,14.650794,4.365079,7.866138,30.793651,...,106.870049,26.812500,12.301997,,1.891348,14.709453,4.474796e+00,7.656962,30.338463,-1.642242e-16
845E,1,MA,2.791045,55.820896,41.880597,8.000000,45.507463,1.865672,0.000000,0.000000,14.159204,27.104478,...,55.914537,41.883470,7.972750,45.509007,1.830815,0.000000,2.104208e-18,14.159204,27.042495,-8.226227e-03
845E,1,MB,2.800000,56.000000,41.316667,7.650000,44.933333,1.683333,0.000000,0.033333,15.063889,25.883333,...,55.821778,41.304371,7.747274,44.934294,1.748365,0.000000,3.333333e-02,15.063889,26.715770,8.397377e-01


In [19]:
correlations = all_corrected.corr(method='pearson')
correlations

Unnamed: 0,GN,PG,DTB,LN,DTF,SN,WO,FN,SPF,TSC,...,PG_emmean_linb,DTB_emmean_linb,LN_emmean_linb,DTF_emmean_linb,SN_emmean_linb,WO_emmean_linb,FN_emmean_linb,SPF_emmean_linb,TSC_emmean_linb,SH_emmean_linb
GN,1.0,0.971373,-0.134742,-0.164256,-0.136221,-0.084812,0.208588,-0.040482,-0.139338,-0.088705,...,0.925996,-0.148333,-0.168235,-0.149184,-0.085475,0.193275,-0.043335,-0.135986,-0.089911,0.025456
PG,0.971373,1.0,-0.113067,-0.13477,-0.113736,-0.076281,0.205105,-0.060729,-0.137782,-0.086513,...,0.953646,-0.127299,-0.138847,-0.126704,-0.077237,0.189154,-0.063345,-0.134902,-0.088144,0.020799
DTB,-0.134742,-0.113067,1.0,0.348487,0.955345,-0.210469,0.083474,0.026703,-0.200969,-0.215832,...,-0.129282,0.983882,0.345559,0.948658,-0.203879,0.068497,0.020044,-0.194861,-0.210495,-0.076856
LN,-0.164256,-0.13477,0.348487,1.0,0.379908,0.206624,0.082008,0.085247,0.04652,0.115308,...,-0.140002,0.348783,0.988219,0.386232,0.205394,0.087183,0.085496,0.044074,0.113503,0.015687
DTF,-0.136221,-0.113736,0.955345,0.379908,1.0,-0.218807,0.14968,0.096409,-0.237563,-0.231196,...,-0.129494,0.94988,0.383535,0.982882,-0.212774,0.137799,0.09246,-0.231485,-0.225663,-0.045882
SN,-0.084812,-0.076281,-0.210469,0.206624,-0.218807,1.0,-0.390538,-0.343527,0.769976,0.980694,...,-0.077564,-0.205156,0.20601,-0.214727,0.994001,-0.39142,-0.339676,0.765846,0.974962,0.294658
WO,0.208588,0.205105,0.083474,0.082008,0.14968,-0.390538,1.0,0.648238,-0.631318,-0.43624,...,0.186895,0.068804,0.085766,0.136385,-0.381512,0.969074,0.635895,-0.623715,-0.429235,0.0626
FN,-0.040482,-0.060729,0.026703,0.085247,0.096409,-0.343527,0.648238,1.0,-0.602235,-0.402236,...,-0.073364,0.021412,0.085323,0.094302,-0.341232,0.655327,0.995787,-0.603748,-0.400997,-0.006689
SPF,-0.139338,-0.137782,-0.200969,0.04652,-0.237563,0.769976,-0.631318,-0.602235,1.0,0.823556,...,-0.138859,-0.195562,0.046007,-0.233247,0.766644,-0.63962,-0.600688,0.992924,0.820922,0.183319
TSC,-0.088705,-0.086513,-0.215832,0.115308,-0.231196,0.980694,-0.43624,-0.402236,0.823556,1.0,...,-0.089152,-0.211887,0.11364,-0.228189,0.975669,-0.441009,-0.399745,0.82067,0.994228,0.307135


Conclusion:

Linb (flat included as random effect for sets grown on multiple flats) yields 
genotype averages that are similar (PCC > 0.95) to the raw data averages for all 
the traits. 

Lin (per set per flat) yields genotype averages that are similar only to total 
seed count raw data averages (PCC > 0.99). With LN (PCC = 0.30), with SN 
(PCC = 0.45), with SPF (PCC = 0.50), and with SH (PCC = 0.47). The other traits 
have -0.23 < PCC < 0.09.
