In [1]:
import pandas as pd

In [6]:
df = pd.read_csv("pseudo-f2_final.map", sep=' ', header = None, names = ["LG", "Marker", "Map_Position"])
df

Unnamed: 0,LG,Marker,Map_Position
0,1,Chr01_1291158,0.000000
1,1,Chr01_1859164,0.845821
2,1,Chr01_3458306,2.111085
3,1,Chr01_4285885,2.514744
4,1,Chr01_4815845,4.224636
...,...,...,...
1143,11,Chr11_134368352,292.641298
1144,11,Chr11_134481938,293.506298
1145,11,Chr11_137055939,294.372470
1146,11,Chr11_137886278,295.238708


In [16]:
## first let's get the total number of markers for each LG
Marker_Number = []

for lg in range(1,12):
    sub = df[df['LG'] == lg]
    Marker_Number.append(len(sub))
    
Marker_Number

[100, 102, 97, 83, 118, 123, 86, 110, 84, 103, 142]

In [24]:
# now let's get the total distance of each LG
Total_Distance = []

for lg in range(1,12):
    sub = df[df['LG'] == lg]
    Total_Distance.append(sub['Map_Position'].iloc[-1])
    
Total_Distance

[183.30651083804,
 229.793880280927,
 358.960753761999,
 185.845308156627,
 197.484661829885,
 226.996830583856,
 176.016322173679,
 251.298617953276,
 145.295125491273,
 197.644572058557,
 296.552122427241]

In [29]:
# now let's get the average distance between markers for each LG
Average_Distance = []

for lg in range(1,12):
    sub = df[df['LG'] == lg]
    Average_Distance.append(sub['Map_Position'].diff().dropna().mean())

Average_Distance

[1.8515809175559597,
 2.275186933474525,
 3.7391745183541563,
 2.2664061970320364,
 1.6879030925631195,
 1.8606297588840655,
 2.0707802608668118,
 2.30549190782822,
 1.7505436806177472,
 1.9376918829270295,
 2.10320654203717]

In [30]:
# now let's get the max distance between markers for each LG
Maximum_Distance = []

for lg in range(1,12):
    sub = df[df['LG'] == lg]
    Maximum_Distance.append(sub['Map_Position'].diff().dropna().max())

Maximum_Distance

[9.162788385477995,
 7.302157221822405,
 54.81467133307899,
 7.717822120248002,
 7.2500813901285,
 6.299046163742602,
 9.4787935558298,
 10.321722156957904,
 9.58888999108003,
 5.9483867915531,
 9.190617712537005]

In [52]:
# get the average distance between every marker in the full map (instead of taking an average of averages)
all_distances = []
for lg in range(1, 12):
    sub = df[df['LG'] == lg]
    distances = sub['Map_Position'].diff().dropna()
    all_distances.extend(distances.tolist())  # flatten into one list

pd.Series(all_distances).mean()

2.1540850532588918

In [54]:
# make the final table
final = pd.DataFrame({'LG': range(1,12),
                      'Marker Number': Marker_Number,
                      'Total Distance': Total_Distance,
                      'Average Distance': Average_Distance,
                      'Maximum Distance': Maximum_Distance})


summary_row = {
    'LG': 'Total',
    'Marker Number': final['Marker Number'].sum(),
    'Total Distance': final['Total Distance'].sum(),
    'Average Distance': pd.Series(all_distances).mean(),
    'Maximum Distance': final['Maximum Distance'].max()
}

# Append the row to the DataFrame
final_df = pd.concat([final, pd.DataFrame([summary_row])], ignore_index=True)

print(final_df.round(1))

       LG  Marker Number  Total Distance  Average Distance  Maximum Distance
0       1            100           183.3               1.9               9.2
1       2            102           229.8               2.3               7.3
2       3             97           359.0               3.7              54.8
3       4             83           185.8               2.3               7.7
4       5            118           197.5               1.7               7.3
5       6            123           227.0               1.9               6.3
6       7             86           176.0               2.1               9.5
7       8            110           251.3               2.3              10.3
8       9             84           145.3               1.8               9.6
9      10            103           197.6               1.9               5.9
10     11            142           296.6               2.1               9.2
11  Total           1148          2449.2               2.2              54.8

In [41]:
final_df.round(1).to_excel('pseudo-f2_stats.xlsx', index=False)