[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/thihanmoekyaw/NCSA_Manhattan_Plot/blob/master/example/demo.ipynb)

In [21]:
!pip install git+https://github.com/thihanmoekyaw/NCSA_Manhattan_Plot.git

In [5]:
import ssl
import pandas as pd
import numpy as np
import plotly.graph_objs as go
import re
from dash.dependencies import Input, Output, State
from dash import html, dcc, Dash
# This package
from NCSA_Manhattan_Plot import ManhattanPlot as NMP

In [12]:
ssl._create_default_https_context = ssl._create_unverified_context

url = "https://raw.githubusercontent.com/thihanmoekyaw/NCSA_Manhattan_Plot/main/example/demo-gwas-data.csv"
df = pd.read_csv(url)
bonferroni_val = 5
Benjamini_Hochberg_val = 7

# Change the data type of the specified columns from object to string
df['var'] = df['var'].astype('string')
df['Gene'] = df['Gene'].astype('string')
df['alias'] = df['alias'].astype('string')
df['Kegg_Pathway'] = df['Kegg_Pathway'].astype('string')
print(df.dtypes)

# Get all unique pathway names by splitting the Kegg_Pathway column
unique_pathways = set()
for pname in df['Kegg_Pathway']:
    if pd.notnull(pname):
        pathways = re.split(r';+', pname)
        unique_pathways.update(pathways)
# print(unique_pathways)

# Initialize MahanttanPlot once outside the callback function
MahanttanPlot = NMP.NCSA_ManhattanPlot(
    dataframe=df, 
    chrm="CHR", 
    bp="Position", 
    p="neg_log_10_p_val", 
    snp="var",
    gene="alias", 
    logp=False, 
    annotation="Kegg_Pathway",
    genomewideline_value=5,
    suggestiveline_value=7,
    additional_highlight_row="Kegg_Pathway",  # the name of the row in pandas
    additional_highlight_val="RNA transport",  # either a string or a list
)

MahanttanPlot.update_layout(
    # width=1000,  # set the width of the graph
    # height=700  # set the height of the graph
)

Mahanttan_app = Dash(__name__)

# add html components and figure to app
Mahanttan_app.layout = html.Div([
    html.Div([
        html.Div("For the threshold line, what value would you like to use?", style={'display': 'inline-block', 'paddingRight': '10px'}),
        dcc.RadioItems(
            id='threshold-radio',
            options=[
                {'label': 'Slider value', 'value': 'slider'},
                {'label': 'Bonferroni correction', 'value': 'bonferroni'}  # Corrected the value here
            ],
            value='slider',
            style={
                'display': 'block',
                'verticalAlign': 'middle'
            }
        ),
    ]),
    html.Br(),
    dcc.Dropdown(
        id='pname-dropdown',
        options=[{'label': pname, 'value': pname} for pname in unique_pathways],
        value=list(unique_pathways)[0],
        multi=True
    ),
    html.Br(),
    dcc.Slider(
        id='threshold-slider',
        min=1,
        max=10,
        marks={
            i: {'label': str(i)} for i in range(10)
        },
        value=5
    ),
    html.Br(),
    dcc.Graph(id='mahanttan-graph', figure=MahanttanPlot),
    html.Div('''
    note: due to how the Plotly Bio Package works, chromosome X, Y and MT where changed to chromosome 23, 24, 25 respectively.
    The blue line is the Benjamini Hochberg critical p-value and the red line is the Bonferroni value.'''),
    html.Div(id='mahanttan-output'),

])

# Callback function to update the Manhattan plot when the slider value changes
@Mahanttan_app.callback(
    Output('mahanttan-graph', 'figure'),
    Input('pname-dropdown', 'value'),
    Input('threshold-slider', 'value'),
    Input('threshold-radio', 'value')
)
def update_mahanttan_plot(pname_value, threshold_value, threshold_radio_value):
    # Update the values based on user input
    if threshold_radio_value == 'slider':
        genomewideline_val = threshold_value
    elif threshold_radio_value == 'bonferroni':
        genomewideline_val = bonferroni_val

    MahanttanPlot = NMP.NCSA_ManhattanPlot(
        dataframe=df, 
        chrm="CHR", 
        bp="Position", 
        p="neg_log_10_p_val", 
        snp="var",
        gene="alias", 
        logp=False, 
        annotation="Kegg_Pathway",
        genomewideline_value=genomewideline_val,
        suggestiveline_value=Benjamini_Hochberg_val,
        additional_highlight_row="Kegg_Pathway",
        additional_highlight_val=pname_value,
    )
    MahanttanPlot.update_layout()

    return MahanttanPlot

print("Note: this may take a while to load.")
Mahanttan_app.run_server(mode='inline') 

neg_log_10_p_val    float64
var                  object
CHR                   int64
Position              int64
Gene                 object
alias                object
Kegg_Pathway         object
dtype: object
neg_log_10_p_val    float64
var                  string
CHR                   int64
Position              int64
Gene                 string
alias                string
Kegg_Pathway         string
dtype: object
Note: this may take a while to load.
