In [None]:
# libraries 
import numpy as np
import pandas as pd
# matplotlib
import matplotlib.pyplot as plt
# seaborn
import seaborn as sns
# sklearn

# biomart
from biomart import BiomartServer


In [None]:
# Download the results 

## DEx
de_example = pd.read_csv('DE/GSE214615_signatureData.csv')
de_example.head()


In [None]:
# -log adj.pvalue 
de_example['-log adj.pvalue'] = -np.log10(de_example['FDR_adjusted_P.value'])
# Filter logcpm < 1
de_example = de_example[de_example['logCPM'] >= 1]
# Define thresholds
abs_fc = 1.5  # Replace with your desired threshold for log_Folderchange
threshold = 0.05  # Replace with your desired threshold for -log adj.pvalue

# Add a new column 'color' based on the conditions
de_example['color'] = de_example.apply(
    lambda row: 'red' if row['Log_FoldChange'] < -abs_fc and row['-log adj.pvalue'] > threshold
    else 'blue' if row['Log_FoldChange'] > abs_fc and row['-log adj.pvalue'] > threshold
    else 'white',
    axis=1
)

de_example.head()

In [None]:
# Select the top 20 points by -log adj.pvalue
top_20 = de_example.nlargest(20, '-log adj.pvalue')

# Volcano plot with points colored by the 'color' column
plt.figure(figsize=(10, 6))
sns.scatterplot(data=de_example, x='Log_FoldChange', y='-log adj.pvalue', hue='color', alpha=0.7, 
                palette={'red': 'red', 'blue': 'blue', 'white': 'gray'})

# Add labels for the top 20 points
for i, row in top_20.iterrows():
    plt.text(row['Log_FoldChange'], row['-log adj.pvalue'], row['Gene_symbol'], 
             fontsize=8, ha='right', va='bottom')

# Optional: Add threshold lines
plt.axhline(y= threshold, color='grey', linestyle='--', label='FDR=0.05')
plt.axvline(x=abs_fc, color='grey', linestyle='--', label='log2FC=1')
plt.axvline(x=-abs_fc, color='grey', linestyle='--', label='log2FC=-1')

plt.title('Volcano Plot of Differential Expression')
plt.xlabel('Log2 Fold Change (log2FC)')
plt.ylabel('-Log10 Adjusted P-value')

plt.legend(title='Color')
plt.grid(True)  
plt.show()

In [None]:
# Export to HPA 
# Filter color that is not white
de_example_filtered = de_example[de_example['color'] != 'white']
gene_list = de_example_filtered['Ensembl_ID'].tolist() 
# Change character to capital