In [5]:
# replication of the volcano plot in the shiny sleep app 

import scanpy as sc
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go

# Ignore all future warnings
from warnings import simplefilter
simplefilter(action='ignore', category=FutureWarning)

# Load pval and log2fc sleep data 
df = pd.read_csv('shinysleep-master/version1/data/combined_nobatch_pval_anno.csv')

# choose the cell that you want to visualize and filter the data
cell = "EG_1"
df_cell = df[df['cluster'] == cell]

# add a column to the dataframe that contains the -log10 of the pval
df_cell['-log10(pval)'] = -np.log10(df_cell['pval'])

# create volcano plot with -log10(pval) on the y-axis and log2fc on the x-axis using plotly 

# Create the interactive volcano plot
fig = px.scatter(
    df_cell,
    x='logfoldchange',
    y='-log10(pval)',
    hover_data=['gene'],
    title=f'Volcano Plot of Gene Expression Changes with Sleep/Wake in {cell}',
    labels={
        'logfoldchange': 'Log2 Fold Change',
        '-log10(pval)': '-log10(pval adjusted)'
    },
    color='-log10(pval)',
    color_continuous_scale='Viridis'
)

significance_threshold = -np.log10(0.05)
fig.add_trace(
    go.Scatter(
        x=[min(df_cell['logfoldchange']), max(df_cell['logfoldchange'])],
        y=[significance_threshold, significance_threshold],
        mode="lines",
        line=go.scatter.Line(color="red", dash="dash"),
        showlegend=False
    )
)

fig.show()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [14]:
df = pd.read_csv('shinysleep-master/version1/data/combined_nobatch_pval_anno.csv')
np.unique(df['cluster'])

array(['5-HT', 'ALG', 'CXG', 'EG_1', 'EG_2', 'Oct', 'PAM', 'PB', 'PG',
       'R5', 'Tyr', 'ab', 'abp', 'adPN', 'clock', 'dFB', 'nonPAM',
       'ring_A', 'ring_B', 'y'], dtype=object)

In [11]:
# get differentially expressed genes in eg 

deg = df_cell[(df_cell['pval_adj'] < 0.05)]
negatives = deg[deg['logfoldchange'] < 0]
positives = deg[deg['logfoldchange'] > 0]

negative_genes_sleep = np.array(negatives['gene'].values)
positive_genes_sleep = np.array(positives['gene'].values)

# read in npy with age data
negative_genes_age = np.load("negative_genes_age.npy", allow_pickle=True)
positive_genes_age = np.load("positive_genes_age.npy", allow_pickle=True)

# get intersection of negative_genes and negative_genes_age
neg_intersection = np.intersect1d(negative_genes_sleep, positive_genes_age) # more highly expressed in sleep and old age
pos_intersection = np.intersect1d(positive_genes_sleep, negative_genes_age) # more highly expressed in wake and young age

In [12]:
neg_intersection

array(['Acsl', 'CG31869', 'CG7029', 'CaMKII', 'Galphao', 'Rdl', 'Snap25',
       'Treh', 'ctp', 'nuf', 'sm'], dtype=object)

In [13]:
pos_intersection

array(['CG31324', 'CG7781', 'CG7888', 'CG9377', 'Msr-110', 'Oda', 'Pdp1',
       'RpS27A', 'Sod3'], dtype=object)