# Environment

In [1]:
# Read the DEA results
import pandas as pd
import numpy as np

### Prepare an Excel file with all deregulated genes, including their Fold Change, p-Value, and Reads Expression. (Parameters to exclude: reads < 50 and Fold Change < 0.5).

In [7]:
# Read the DEA file
dea_df = pd.read_csv('/beegfs/scratch/ric.broccoli/kubacki.michal/SRF_CUTandTAG/DATA/DEA_NEU.csv')

# Filter based on criteria:
# 1. Reads (baseMean) >= 50
# 2. Absolute log2FoldChange >= 0.5
# 3. padj > 0.05
filtered_df = dea_df[
    (dea_df['baseMean'] >= 50) & 
    (abs(dea_df['log2FoldChange']) >= 0.5) &
    (dea_df['padj'] > 0.05)
]

# Sort by absolute fold change
filtered_df['abs_log2FC'] = abs(filtered_df['log2FoldChange'])
filtered_df = filtered_df.sort_values('abs_log2FC', ascending=False)

# Drop the helper column
filtered_df = filtered_df.drop('abs_log2FC', axis=1)

# Save to Excel
output_file = 'deregulated_genes_NEU.xlsx'
filtered_df.to_excel(output_file, index=False)

print(f"Number of genes passing filters: {len(filtered_df)}")
print(f"Results saved to {output_file}")

# Display first few rows
filtered_df.head()


Number of genes passing filters: 277
Results saved to deregulated_genes_NEU.xlsx


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['abs_log2FC'] = abs(filtered_df['log2FoldChange'])


Unnamed: 0,gene,baseMean,log2FoldChange,lfcSE,stat,pvalue,padj
10777,Mrc1,62.810688,-2.212044,0.835467,-2.647672,0.008105,0.055136
10065,Stab1,390.918228,-2.17469,0.969861,-2.24227,0.024944,0.126481
9957,Cd5l,125.059724,-2.110901,0.962613,-2.192887,0.028316,0.138304
10119,Hk3,56.060637,-2.10565,0.927547,-2.270127,0.0232,0.119985
10459,Abcc3,59.076599,-1.950015,0.793889,-2.456283,0.014038,0.082795


In [8]:
# Read the DEA file
dea_df = pd.read_csv('/beegfs/scratch/ric.broccoli/kubacki.michal/SRF_CUTandTAG/DATA/DEA_NSC.csv')

# Filter based on criteria:
# 1. Reads (baseMean) >= 50
# 2. Absolute log2FoldChange >= 0.5
# 3. padj > 0.05
filtered_df = dea_df[
    (dea_df['baseMean'] >= 50) & 
    (abs(dea_df['log2FoldChange']) >= 0.5) &
    (dea_df['padj'] > 0.05)
]

# Sort by absolute fold change
filtered_df['abs_log2FC'] = abs(filtered_df['log2FoldChange'])
filtered_df = filtered_df.sort_values('abs_log2FC', ascending=False)

# Drop the helper column
filtered_df = filtered_df.drop('abs_log2FC', axis=1)

# Save to Excel
output_file = 'deregulated_genes_NSC.xlsx'
filtered_df.to_excel(output_file, index=False)

print(f"Number of genes passing filters: {len(filtered_df)}")
print(f"Results saved to {output_file}")

# Display first few rows
filtered_df.head()


Number of genes passing filters: 190
Results saved to deregulated_genes_NSC.xlsx


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['abs_log2FC'] = abs(filtered_df['log2FoldChange'])


Unnamed: 0,gene,baseMean,log2FoldChange,lfcSE,stat,pvalue,padj
5011,mt-Rnr1,128735.6214,-2.205665,1.017778,-2.167139,0.030224,0.073425
3409,Rny3,231.937597,-0.884791,0.627751,-1.409463,0.158698,0.291576
5280,Scarna6,52.264515,-0.861636,0.373226,-2.30862,0.020965,0.053399
5160,5730414N17Rik,50.717258,0.768789,0.341993,2.247971,0.024578,0.061357
1684,Gm42418,106.773519,-0.761254,1.107337,-0.687464,0.491791,0.660379


### metaprofiles - In the metaprofile include -5kb, TSS, Gene Body, 5’ UTR (+5kb)
- showing that Mecp2 is enriched at TSSs
- image of enrichment at CpG islands


![image.png](attachment:image.png)

In [None]:
# Display the PDF file
from IPython.display import IFrame

# Create an IFrame to display the PDF
IFrame("/beegfs/scratch/ric.broccoli/kubacki.michal/SRF_CUTandTAG/Visualization/results/combined_tss_profiles.pdf", width=800, height=600)
