In [None]:
import numpy as np
import pandas as pd
import itertools
# import matplotlib.pyplot as plt
# import seaborn as sns
import os

import peptide_forest
pd.set_option("max_columns", 1000)
import plotly
import plotly.graph_objs as go
import plotly.offline as offline


In [None]:
# quant_df = pd.read_csv("../data/_quant_new/04854_F1_R8_P0109699E13_TMT10_quant_pots.csv", index_col=0)
# output = "01Apr_E13.csv"
output = "01Apr_E50.csv"
# local_importance_output = "../01Apr_E50.csv_local_importance.csv"
final_df = pd.read_csv("../" + output)
base_dir = os.path.basename(output) + "shifted_psms"
if os.path.exists(base_dir) is False:
    os.mkdir(base_dir)

In [None]:
final_df.head()

## Rank comparison for spectra that have been assigned the same identification

In [None]:
engines = dict([(c.replace("q-value_",""), {}) for c in final_df.columns if "q-value" in c])
engines

In [None]:
for e in engines.keys():
    engines[e]['idxmin@0.01'] = final_df[f'q-value_{e}'].sort_values().sub(0.01).abs().idxmin()
    engines[e]['idxmin@0.05'] = final_df[f'q-value_{e}'].sort_values().sub(0.05).abs().idxmin()

TODO: scale plots to be squarred :)

In [None]:
q_value_cutoff = 0.05
for e1, e2 in itertools.combinations(engines.keys(), 2):
    mask = (final_df[f'q-value_{e1}'] < q_value_cutoff) & (final_df[f'q-value_{e2}'] < q_value_cutoff)
    print(e1, final_df[mask][f'rank_{e1}'].count())
    print(e2, final_df[mask][f'rank_{e2}'].count())
    

In [None]:
df = final_df
df[df['Spectrum ID'] == 36155]

In [None]:
df.sort_values(["q-value_mascot_1_0_0"])

In [None]:
q_value_cutoff = 0.10
# for e1, e2 in itertools.permutations(engines.keys(), 2):   # sanity check ;)
for e1, e2 in itertools.combinations(engines.keys(), 2):
    mask = (final_df[f'q-value_{e1}'] < q_value_cutoff) & (final_df[f'q-value_{e2}'] < q_value_cutoff)

    data = [
        go.Scattergl(
            x=final_df[mask][f'rank_{e1}'], 
            y=final_df[mask][f'rank_{e2}'],
            mode='markers',
            marker=dict(
                size=3,
                color=final_df[mask][f'q-value_{e1}'],
                colorscale='rainbow',
                colorbar=dict(
                    title=f'q-value_{e1}'
                ),
                showscale=True
            ),
            hovertext = "Q-values<br />" + \
            "Mascot:    " + final_df[mask]["q-value_mascot_1_0_0"].astype(str) + "<br />" + \
            "OMSSA:     " + final_df[mask]["q-value_omssa_2_1_9"].astype(str) + "<br />" + \
            "MSGF+:     " + final_df[mask]["q-value_msgfplus_v2018_06_28"].astype(str) + "<br />" + \
            "X!Tandem:  " + final_df[mask]["q-value_xtandem_vengeance"].astype(str) + "<br />" + \
            "MSFragger: " + final_df[mask]["q-value_msfragger_20190222"].astype(str) + "<br />" + \
            "RF:        " + final_df[mask]["q-value_RF-reg"].astype(str) + "<br />" + \
            "Spectrum ID: " + final_df[mask]['Spectrum ID'].astype(str)
            ,
            hoverinfo = "text"
        ),
    ]
    fig = go.Figure(
        data=data
    )
    fig.add_shape(
            dict(
                type="line",
                x0=final_df.loc[engines[e1]['idxmin@0.01'], f'rank_{e1}'],
                y0=0,
                x1=final_df.loc[engines[e1]['idxmin@0.01'], f'rank_{e1}'],
                y1=final_df[mask][f"rank_{e2}"].max(),
                line=dict(
                    color="Black",
                    width=1
                )
    ))
    fig.add_shape(
            dict(
                type="line",
                x0=0,
                y0=final_df.loc[engines[e2]['idxmin@0.01'], f'rank_{e2}'],
                x1=final_df[mask][f"rank_{e1}"].max(),
                y1=final_df.loc[engines[e2]['idxmin@0.01'], f'rank_{e2}'],
                line=dict(
                    color="Black",
                    width=1
                )
    ))
    fig.update_layout(
        template="plotly_white", 
        title=f"Ranks: {e1} vs {e2}",
        xaxis_title=f"Rank {e1}",
        yaxis_title=f"Rank {e2}",

    )
    offline.plot(fig, filename=f'{base_dir}/ranks_{e1}_vs_{e2}.html', auto_open=False)
#     fig.show() # That crashes my browser after a few plots :)
#     break

In [None]:
engines

In [None]:
for e in engines.keys():
    print(e)
    for k, idx in engines[e].items():
        print("{0}: {1: 10d} q-value: {2:.5f} - rank: {3}".format(
            k, 
            idx, 
            final_df.loc[idx, f'q-value_{e}'], 
            final_df.loc[idx, f'rank_{e}']
        ))
        print()

In [None]:
plotly_color_scales = ['aggrnyl', 'agsunset', 'algae', 'amp', 'armyrose', 'balance',
             'blackbody', 'bluered', 'blues', 'blugrn', 'bluyl', 'brbg',
             'brwnyl', 'bugn', 'bupu', 'burg', 'burgyl', 'cividis', 'curl',
             'darkmint', 'deep', 'delta', 'dense', 'earth', 'edge', 'electric',
             'emrld', 'fall', 'geyser', 'gnbu', 'gray', 'greens', 'greys',
             'haline', 'hot', 'hsv', 'ice', 'icefire', 'inferno', 'jet',
             'magenta', 'magma', 'matter', 'mint', 'mrybm', 'mygbm', 'oranges',
             'orrd', 'oryel', 'peach', 'phase', 'picnic', 'pinkyl', 'piyg',
             'plasma', 'plotly3', 'portland', 'prgn', 'pubu', 'pubugn', 'puor',
             'purd', 'purp', 'purples', 'purpor', 'rainbow', 'rdbu', 'rdgy',
             'rdpu', 'rdylbu', 'rdylgn', 'redor', 'reds', 'solar', 'spectral',
             'speed', 'sunset', 'sunsetdark', 'teal', 'tealgrn', 'tealrose',
             'tempo', 'temps', 'thermal', 'tropic', 'turbid', 'twilight',
             'viridis', 'ylgn', 'ylgnbu', 'ylorbr', 'ylorrd']