In [2]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
import dash_bio as dashbio

In [3]:
df = pd.read_csv('results/results_airway.tsv',index_col=0, sep='\t')
df.head()

Unnamed: 0,baseMean,log2FoldChange,lfcSE,stat,pvalue,padj,symbol,entrez,ENSEMBL
ENSG00000000003,709.698552,-0.383074,0.100462,-3.813133,0.000137,0.001106,TSPAN6,7105.0,ENSG00000000003
ENSG00000000419,521.040353,0.205022,0.111299,1.842083,0.065463,0.186488,DPM1,8813.0,ENSG00000000419
ENSG00000000457,237.510888,0.036148,0.140619,0.257065,0.797129,0.905422,SCYL3,57147.0,ENSG00000000457
ENSG00000000460,58.018762,-0.091646,0.278242,-0.329375,0.741872,0.877333,C1orf112,55732.0,ENSG00000000460
ENSG00000000971,5825.5137,0.424604,0.089439,4.747404,2e-06,2.5e-05,CFH,3075.0,ENSG00000000971


In [4]:
df['negative_padj'] = np.log10(df['padj']) * (-1)
df.head()

Unnamed: 0,baseMean,log2FoldChange,lfcSE,stat,pvalue,padj,symbol,entrez,ENSEMBL,negative_padj
ENSG00000000003,709.698552,-0.383074,0.100462,-3.813133,0.000137,0.001106,TSPAN6,7105.0,ENSG00000000003,2.956396
ENSG00000000419,521.040353,0.205022,0.111299,1.842083,0.065463,0.186488,DPM1,8813.0,ENSG00000000419,0.72935
ENSG00000000457,237.510888,0.036148,0.140619,0.257065,0.797129,0.905422,SCYL3,57147.0,ENSG00000000457,0.043149
ENSG00000000460,58.018762,-0.091646,0.278242,-0.329375,0.741872,0.877333,C1orf112,55732.0,ENSG00000000460,0.056836
ENSG00000000971,5825.5137,0.424604,0.089439,4.747404,2e-06,2.5e-05,CFH,3075.0,ENSG00000000971,4.600889


In [5]:
import plotly.express as px

fig = px.scatter(df, x='log2FoldChange', y='negative_padj', color='negative_padj',hover_name='symbol')
fig.show()

In [9]:
import plotly.graph_objects as go
import pandas as pd
import numpy as np
from statsmodels.stats import multitest

# Load the data
data = pd.read_csv('results/results_airway.tsv', sep='\t')

# Filter the data
filterThreshold = 10
use = data['baseMean'] > filterThreshold

# Get the p-values
pvalues = data['pvalue']

# Perform multiple testing correction
_, pvalues_corrected, _, _ = multitest.multipletests(pvalues, alpha=0.001, method='fdr_bh')

# Create the histogram trace for non-significant p-values
hist1 = go.Histogram(
    x=pvalues[~use],
    name='do not pass',
    xbins=dict(
        start=0,
        end=1,
        size=0.02
    ),
    
)

# Create the histogram trace for significant p-values
hist2 = go.Histogram(
    x=pvalues[use],
    name='pass',
    xbins=dict(
        start=0,
        end=1,
        size=0.02
    ),
    
)

# Combine the traces into a data list
data = [hist1, hist2]

# Define the layout
layout = go.Layout(
    title='Histogram of p-values',
    xaxis=dict(
        title='p-value',
        range=[0, 1]
    ),
    yaxis=dict(
        title='frequency'
    ),
    bargap=0.1,
    bargroupgap=0.1
)

# Create the figure
fig = go.Figure(data=data, layout=layout)

# Show the figure
fig.show()


In [55]:
import plotly.express as px

def ma_plot(df, base_mean_col, log2FoldChange_col, pvalue_col):
    # create a new column with modified p-values
    df[base_mean_col] = np.log10(df[base_mean_col])
    
    # create a new column to indicate color based on significance and direction of change
    df['color'] = np.where((df[pvalue_col] < 0.05) & (df[log2FoldChange_col] > 0), 'Up Regulated',
                           np.where((df[pvalue_col] < 0.05) & (df[log2FoldChange_col] < 0), 'Down Regulated',
                                    np.where(df[pvalue_col] >= 0.05, 'Non Significant', '')))
    
    # create a Plotly scatter plot
    fig = px.scatter(df, x=base_mean_col, y=log2FoldChange_col, color='color', color_discrete_map= {
        'Upregulated' : 'red', 'Downregulated': 'blue', 'Non Significant' : '#fff'
    })
    
    # set x and y axis labels
    fig.update_xaxes(title_text='Base Mean')
    fig.update_yaxes(title_text='Log2 Fold Change')
    
    # set title
    up_count = len(df[(df[pvalue_col] < 0.05) & (df[log2FoldChange_col] > 0)])
    down_count = len(df[(df[pvalue_col] < 0.05) & (df[log2FoldChange_col] < 0)])
    non_sig_count = len(df[df[pvalue_col] >= 0.05])
    fig.update_layout(title=f'MA Plot of Differential Gene Expression ({up_count} up, {down_count} down, {non_sig_count} non-significant)')
    
    return fig


In [56]:
fig = ma_plot(df, 'baseMean', 'log2FoldChange', 'pvalue')
fig.show()

In [62]:

with open("graph.txt", "r") as f:
    dot_graph = f.read()

In [65]:
!pip install  graphviz

Collecting graphviz
  Downloading graphviz-0.20.1-py3-none-any.whl (47 kB)
     -------------------------------------- 47.0/47.0 kB 584.1 kB/s eta 0:00:00
Installing collected packages: graphviz
Successfully installed graphviz-0.20.1


In [67]:
dot_graph

'digraph snakemake_dag {\n\tgraph [bb="0,0,562.7,329",\n\t\tbgcolor=white,\n\t\tmargin=0\n\t];\n\tnode [fontname=sans,\n\t\tfontsize=10,\n\t\tlabel="\\N",\n\t\tpenwidth=2,\n\t\tshape=box,\n\t\tstyle=rounded\n\t];\n\tedge [color=grey,\n\t\tpenwidth=2\n\t];\n\t0\t[color="0.42 0.6 0.85",\n\t\theight=0.5,\n\t\tlabel=all,\n\t\tpos="287.51,18",\n\t\tstyle="rounded,dashed",\n\t\twidth=0.75];\n\t1\t[color="0.50 0.6 0.85",\n\t\theight=0.5,\n\t\tlabel=starMapping,\n\t\tpos="240.51,90",\n\t\tstyle="rounded,dashed",\n\t\twidth=1.0694];\n\t1 -> 0\t[pos="e,276.03,36.104 252.13,71.697 257.65,63.474 264.36,53.483 270.44,44.421"];\n\t2\t[color="0.17 0.6 0.85",\n\t\theight=0.5,\n\t\tlabel=starIndexing,\n\t\tpos="504.51,162",\n\t\tstyle="rounded,dashed",\n\t\twidth=1.0694];\n\t2 -> 1\t[pos="e,279.15,105.17 465.88,146.83 462.73,145.83 459.58,144.87 456.51,144 386.05,124.06 363.8,128.38 289.04,107.93"];\n\t13\t[color="0.50 0.6 0.85",\n\t\theight=0.5,\n\t\tlabel=starMapping,\n\t\tpos="335.51,90",\n\t\tstyle

In [66]:
import sys
import os
from graphviz import render, Source

if len(sys.argv) < 2:
    print("Please provide the path of the DOT file as an argument")
    sys.exit()

input_path = sys.argv[1]

if not os.path.exists(input_path):
    print("The file path provided does not exist")
    sys.exit()

# Read the DOT graph from the input file
with open(input_path, "r") as f:
    dot_graph = f.read()

# Render the graph using Graphviz
graph = Source(dot_graph)
graph.format = "png"

# Save the output file with the same name as the input file
output_path = os.path.splitext(input_path)[0] + ".png"
graph.render(output_path, view=True)

graph = graphviz.Source(dot_graph)

# Render the graph to a PNG image file
graph.format = 'png'
graph.render('example_graph', view=True)

ExecutableNotFound: failed to execute WindowsPath('dot'), make sure the Graphviz executables are on your systems' PATH

In [69]:
from graphviz import render, Source

In [70]:
graph = Source(dot_graph)
graph.format = "png"

In [71]:
graph.render('example_graph', view=True)

ExecutableNotFound: failed to execute WindowsPath('dot'), make sure the Graphviz executables are on your systems' PATH