In [46]:
import pandas as pd
import matplotlib
import plotly.express as px
import matplotlib.pyplot as plt

In [78]:
%run ../modules/utils.ipynb
%run ../modules/ladder_separation.ipynb
%run ../modules/homology_search.ipynb
%run ../modules/mass_sum.ipynb
%run ../modules/gap_fill.ipynb
%run ../modules/ladder_complementation.ipynb

In [71]:
ctrl_sample = '../samples/Phe/tRNA_Phe_Control.xlsx'

df_ctrl = load_data(ctrl_sample)
plotly_zone(df_ctrl, title='Control Sample Data')

In [74]:
bcr = homology_search(df_ctrl)
plotly_basecalling(*bcr, y='Vol', title='Homology Search Result')

In [75]:
df = load_data('../samples/Phe/tRNA_Phe_Deg_1.xlsx')
plotly_zone(df, title='Degraded Sample')

In [82]:
"""iloc[:10] takes the first 10 compounds from the list sorted in the descending order of 
intensity. Replace 10 with other numbers if more or less compounds are desired.
"""
df_deg_top = df[df.Mass>23000].sort_values('Vol', ascending=False).iloc[:10]
homo_deg = homology_search(df_deg_top)
plotly_basecalling(*homo_deg, y='Vol', title="Homology Search Result of Degraded Sample")
df_homo_deg = homo_deg[0]

In [103]:
# To easily demonstrate the concept, here we just load the data we selected in advance.
df_5p = load_data('./data/phe5p.xlsx')
df_3p = load_data('./data/phe3p.xlsx')

df_common = match_dfs(df_5p, df_3p)
plotly_multi_zones([df_5p, df_3p, df_common], 
                   names=["5´ Data", "3´ Data", "Overlapped Data"], 
                   title="Manually Divided Data")

In [87]:
sampling_num = 1000

df_5p_top = df_5p.sort_values('Vol', ascending=False).iloc[:sampling_num]
df_3p_top = df_3p.sort_values('Vol', ascending=False).iloc[:sampling_num]

plotly_zones(df_5p_top, df_3p_top, 
             names=["5´ Filtered Data", "3´ Filtered Data"],
             title='Filtered Data')

In [89]:
isoform_idx = 0

full_mass = df_homo_deg.sort_values('Vol', ascending=False).iloc[isoform_idx].Mass
df_masssum_3p, df_masssum_5p = mass_sum(df_3p_top, df_5p_top, full_mass=full_mass)
plotly_zones(df_masssum_5p, df_masssum_3p,
             names=["5´ Data", "3´ Data"],
             title='MassSum Result')

full_mass 24252.3110919712 sum_value 24270.3


In [99]:
%%capture

# GapFill for 5´ ladder
fullmass_dot = df_5p[(df_5p.Mass>full_mass-0.1) & (df_5p.Mass<full_mass+0.1)]
df_gap_5p = gap_fill(df_5p, df_masssum_5p, fullmass_dot, major=True, orientation=5)

# GapFill for 3´ ladder
fullmass_dot = df_3p[(df_3p.Mass>full_mass-0.1) & (df_3p.Mass<full_mass+0.1)]
df_gap_3p = gap_fill(df_3p, df_masssum_3p, fullmass_dot, major=True, orientation=3)

In [100]:
# Now we already have two rough ladders, just combine their skeleton dots with gap dots.
df_ladder_5p = pd.concat([df_masssum_5p, df_gap_5p]).reset_index()
df_ladder_3p = pd.concat([df_masssum_3p, df_gap_3p]).reset_index()
plotly_zones(df_ladder_5p, df_ladder_3p,
             names=["5´ Data", "3´ Data"],
             title='GapFill Result')

In [59]:
ladder1 = Ladder(df_ladder_5p, full_mass, 5)
ladder2 = Ladder(df_ladder_3p, full_mass, 3)
ladders = [ladder1, ladder2]
df_ladder_comp = ladder_complementation(ladders)

In [102]:
print('Ladder Complementation Result')
df_ladder_comp.to_excel('../outputs/ladder_comp_res_lite.xlsx')
df_ladder_comp.iloc[10:70]

Ladder Complementation Result


Unnamed: 0_level_0,Base0,Mass0,Base1,Mass1
position,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
11,C,3678.448436,,20896.960709
12,U,3984.473803,,
13,C,4289.515612,C,20285.859189
14,A,4618.566575,A,19980.842183
15,G,4963.615095,,19651.771913
16,D,5271.655368,,
17,,,,
18,,,,
19,,,G,18345.57711
20,,6614.838079,G,18000.565184
