In [4]:
#Importing necessary libraries
import pandas as pd
import numpy as np
import hvplot.pandas
import holoviews as hv
import panel as pn

hvplot.extension('bokeh')

pn.extension('tabulator')


In [5]:
#Loading the necessary dataset
metadata = pd.read_csv(r"Deidentified_example_RNAseq_metadata.csv").set_index('sample')
norm_counts = pd.read_csv(r"Deidentified_example_RNAseq_normalized_log2_counts.csv").set_index('gene')


In [6]:
metadata

Unnamed: 0_level_0,group,subject,Nanodrop_conc_ng_uL,Nanodrop_260_280,Nanodrop_260_230,Qubit_conc_ng_uL,TapeStation_RIN
sample,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Treated1,Treated,1,289.5,2.12,1.33,378,8.9
Control1,Control,1,139.5,2.04,1.56,219,9.0
Treated2,Treated,2,279.6,2.1,1.81,375,8.3
Control2,Control,2,86.29,1.98,0.51,136,9.2
Treated3,Treated,3,310.0,2.12,1.81,393,8.9
Control3,Control,3,136.9,2.07,1.22,216,9.3
Treated4,Treated,4,385.9,2.11,1.72,454,8.1
Control4,Control,4,96.45,2.02,1.16,156,9.2
Treated5,Treated,5,331.8,2.11,1.88,414,8.9
Control5,Control,5,106.5,1.98,1.32,166,9.3


In [7]:
norm_counts

Unnamed: 0_level_0,Treated1,Control1,Treated2,Control2,Treated3,Control3,Treated4,Control4,Treated5,Control5,Treated6,Control6,Treated7,Control7,Treated8,Control8
gene,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
gene1,4.588156,5.001665,4.411651,4.897417,4.780794,5.273214,4.389454,5.281885,4.856537,5.511054,4.612176,5.120755,4.919128,5.629526,4.689802,5.221003
gene2,8.512063,8.086674,7.776519,7.814994,7.760302,8.179345,7.485532,8.329592,7.877916,8.093512,8.684237,8.205230,8.179778,7.939253,8.529734,7.975964
gene3,5.298770,5.336631,5.539292,5.139642,5.713467,5.438977,5.713640,5.468298,5.552817,5.601555,5.492552,5.522030,5.304491,5.461498,5.453989,5.401339
gene4,4.367575,4.553845,3.621926,4.928725,5.283950,4.800096,4.805433,5.025600,4.681782,4.506041,4.028926,5.058560,4.387220,4.579743,4.368691,4.763321
gene5,-0.140853,0.625136,1.031111,2.141870,0.572455,1.670248,0.452109,1.394757,0.479568,1.039900,0.848696,1.319474,1.730282,1.772388,1.397020,1.084699
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
gene17035,-0.697247,-1.044715,0.428447,-0.738548,0.869436,-1.060145,-1.021822,-0.132490,-0.027392,-1.923575,-0.935576,0.290905,0.026267,0.282063,-1.287478,-0.185390
gene17036,-0.697247,-2.919184,0.317415,-0.738548,-1.749473,-1.712221,-0.732315,-1.898024,-0.815888,-4.245503,-0.405061,-2.767989,-3.342967,-4.110255,-0.187942,-1.465498
gene17037,-2.462781,-2.182218,-2.741478,-0.738548,-4.556828,-2.197648,-4.191747,-2.634990,-4.275320,-1.923575,-4.105501,-4.352951,-3.342967,-2.525292,-2.509871,-0.365962
gene17038,-1.977355,-2.919184,-0.867009,-0.738548,-2.971866,-2.934614,-1.384392,-0.313062,-1.105395,-1.438148,-1.783573,-1.183026,0.357473,-0.022792,-0.007370,-0.365962


In [8]:
#transpose norm_counts
genes_df=norm_counts.transpose()


In [9]:
#Combining the two datasets
combined_df=pd.concat ([metadata, genes_df], axis=1)

In [10]:
sel_widget_metadata = pn.widgets.AutocompleteInput(name="Select_metadata",options=metadata.columns[metadata.columns.isin(combined_df.columns)].tolist(), value='subject')
sel_widget_genes = pn.widgets.AutocompleteInput(name="Select_gene", options=norm_counts.index[genes_df.columns.isin(combined_df.columns)].tolist(), value='gene1')


In [11]:
sel_widget_genes

In [12]:
# Plot 1 - Boxplot and violin
# Define the plotting function
@pn.depends(sel_widget_metadata, sel_widget_genes)
def plot_graphs(metadata_var, gene_var):
    boxplot = combined_df[[metadata_var, gene_var]].hvplot.box(by=metadata_var, y=gene_var, c=metadata_var, legend = False, cmap='Category20', title='Boxplot', responsive=True)
    scatterplot = combined_df[[metadata_var, gene_var]].hvplot.scatter(x=metadata_var, y=gene_var, c=metadata_var, cmap='Category20', title='Scatterplot', responsive=True)
    overlay_plot = boxplot * scatterplot

    violinplot = combined_df[[metadata_var, gene_var]].hvplot.violin(by=metadata_var, y=gene_var, c=metadata_var, cmap='Category20', legend = False, title='Violinplot', responsive=True)
    scatterplot2 = combined_df[[metadata_var, gene_var]].hvplot.scatter(x=metadata_var, y=gene_var, c=metadata_var, cmap='Category20', title='Scatterplot', responsive=True)
    overlay_plot2 = violinplot * scatterplot2
    
    return pn.Row(overlay_plot, overlay_plot2)

# Create a panel to display the widgets and plots
combine_plot1 = pn.Row(plot_graphs)

In [13]:
#Plot 2: Scatterplot
sel_widget_genes1 = pn.widgets.AutocompleteInput(options=norm_counts.index[norm_counts.index.isin(combined_df.columns)].tolist(), name='X_axis_for_ScatterPlot', value='gene1')
sel_widget_genes2 = pn.widgets.AutocompleteInput(options=norm_counts.index[norm_counts.index.isin(combined_df.columns)].tolist(), name='Y_axis_for_ScatterPlot', value='gene2')
groupby_widget = pn.widgets.AutocompleteInput(options=metadata.columns[metadata.columns.isin(combined_df.columns)].tolist(), name='Groupby', value='group')

@pn.depends(sel_widget_genes1, sel_widget_genes2, groupby_widget)
def plot_scatter(gene1, gene2, groupby):
    return combined_df[[gene1, gene2]+metadata.columns.tolist()].hvplot.scatter(x=gene1, 
                                                             y=gene2, 
                                                             hover_cols=metadata.columns.tolist(), 
                                                             color=groupby, # can also use color=groupby_widget
                                                             title = 'ScatterPlot for visualizing correlation between two genes',
                                                             responsive=True
                                                            )

controls=pn.Column(groupby_widget)

combine_plot3=pn.Column(plot_scatter)

In [14]:
#Creating widget to display metadata table
df_widget=pn.pane.DataFrame(metadata)


In [15]:
#Creating dashboard
def create_dashboard():
    # Create dashboard
    COLS=12
    template = pn.template.FastGridTemplate(
        title="Data Explorer",
        sidebar=[
            pn.pane.Markdown('## Data Explorer'),
                 pn.pane.PNG(r'Designer.png', width=300, height=300),
                 pn.pane.Markdown("""
                 
                 
                 **This application lets the user visualize categorical plots and distribution plots for a preloaded dataset.***
                 
                 Please select variables fro metadata and gene to plot box and violin plots
                 
                 Please select 2 genes to visualize the correlation between 2 genes
                 
                 
                 """),
                 sel_widget_metadata,
                 sel_widget_genes,
                 sel_widget_genes1,
                 sel_widget_genes2,
                 groupby_widget],
        accent_base_color="#88d8b0",
        header_background="#1F3F49",
        row_height=150,
        cols={'lg': COLS, 'md': COLS, 'sm': COLS, 'xs': COLS, 'xxs': COLS} 
    )
    template.main[:3,:6] = df_widget
    template.main[:3,6:] = combine_plot3
    template.main[3:6, :] = combine_plot1

    return template 


dashboard = create_dashboard()
dashboard.show() 

Launching server at http://localhost:56469


AssertionError: 