[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/12v1M7eNUXDem2RFR0TIsBe-NBgKjlny4?usp=sharing)
[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.13923663.svg)](https://doi.org/10.5281/zenodo.13923663)

# Volcano Plot for Gene Expression Analysis

---

**Citation Note:**

Shakiba, M. (2024). moneuron/Volcano-Plot: Volcano-Plot. Zenodo.
https://doi.org/10.5281/zenodo.13923663

---

In [None]:
#@title Install Dependencies {display-mode: "form"}
!pip install matplotlib seaborn adjustText --quiet

In [None]:
#@title Import Dependencies {display-mode: "form"}
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from matplotlib.lines import Line2D
from adjustText import adjust_text
from google.colab import files

In [None]:
#@title Upload the GEO .tsv file {display-mode: "form"}
uploaded = files.upload()

In [None]:
#@title Load the data from the .tsv file {display-mode: "form"}
file_path = list(uploaded.keys())[0]  # Get the file name from uploaded files
data = pd.read_csv(file_path, sep='\t')

In [None]:
#@title Define the threshold for significance and genes of interest {display-mode: "form"}

# Input the significance thresholds
pval_threshold = 0.05  #@param {type:"number"}
logfc_threshold = 0.58  #@param {type:"number"}

# Input the genes of interest as a comma-separated string
genes_of_interest_input = "MMP2, MMP7, MMP14"  #@param {type:"string"}

# Convert the input string into a list
genes_of_interest = [gene.strip() for gene in genes_of_interest_input.split(',')]

In [None]:
#@title Define column names {display-mode: "form"}

# Input column names
pvalue = "P.Value"  #@param {type:"string"}
logfc = "logFC"  #@param {type:"string"}
symbol = "Gene.symbol"  #@param {type:"string"}

In [None]:
#@title Results {display-mode: "form"}

# Create the log10 transformation of the p-values
data['-log10(P.Value)'] = -np.log10(data[pvalue])
data = data.sort_values(by=pvalue).drop_duplicates(subset=symbol, keep='first')

# Classify genes based on logFC and p-value
def classify_gene(row):
    if row[pvalue] < pval_threshold and row[logfc] > logfc_threshold:
        return 'Upregulated'
    elif row[pvalue] < pval_threshold and row[logfc] < -logfc_threshold:
        return 'Downregulated'
    else:
        return 'Non-significant'

# Apply classification
data['regulation'] = data.apply(classify_gene, axis=1)

upregulated_genes = data[(data[symbol].isin(genes_of_interest)) & (data['regulation'] == 'Upregulated')]
downregulated_genes = data[(data[symbol].isin(genes_of_interest)) & (data['regulation'] == 'Downregulated')]
non_significant_genes = data[(data[symbol].isin(genes_of_interest)) & (data['regulation'] == 'Non-significant')]

# Display results
if not upregulated_genes.empty:
  print("Upregulated Genes of Interest:")
  print(upregulated_genes[symbol].tolist())
if not downregulated_genes.empty:
  print("\nDownregulated Genes of Interest:")
  print(downregulated_genes[symbol].tolist())
if not non_significant_genes.empty:
  print("\nNon-significant Genes of Interest:")
  print(non_significant_genes[symbol].tolist())

In [None]:
#@title Plotting settings {display-mode: "form"}

# Input the plot size
plot_width = 12  #@param {type:"number"}
plot_height = 10  #@param {type:"number"}

# Input colors for significant and non-significant genes
upregulated_color = "crimson"  #@param {type:"string"}
downregulated_color = "navy"  #@param {type:"string"}
non_significant_color = "gray"  #@param {type:"string"}

> Available colors › [Seaborn Palette](https://miro.medium.com/v2/resize:fit:1400/format:webp/1*zm6zVTIEvf9uZ0cE4n0Ogg.png)

In [None]:
#@title Plotting {display-mode: "form"}

# Create the volcano plot with paper-friendly coloring
plt.figure(figsize=(plot_width, plot_height))  # Increased figure size for better label placement

# Custom paper-friendly color palette
palette = {'Upregulated': upregulated_color, 'Downregulated': downregulated_color, 'Non-significant': non_significant_color}

# Plot the points
sns.scatterplot(x=logfc, y='-log10(P.Value)', data=data, hue='regulation',
                palette=palette, alpha=0.5, s=80, edgecolor=None)

# Prepare text annotations for genes of interest and plot larger dots
texts = []
for i, row in data.iterrows():
    if row[symbol] in genes_of_interest:
        # Plot a larger, more visible dot for genes of interest
        plt.scatter(row[logfc], row['-log10(P.Value)'], s=100, color='white', zorder=5, edgecolor='black')

        texts.append(plt.text(row[logfc], row['-log10(P.Value)'], row[symbol],
                              fontsize=10, color='black', ha='center', va='center',
                              bbox=dict(boxstyle='round,pad=0.3', edgecolor='black', facecolor='lightyellow', alpha=0.8)))

# Adjust text positions to avoid overlaps
adjust_text(texts,
            force_points=(0.1, 0.25),  # Adjust force between points and texts
            force_text=(0.5, 1),  # Adjust force between texts
            expand_points=(2, 2),  # Expand the area around points
            expand_text=(2, 2),  # Expand the area around texts
            autoalign='xy',  # Use both x and y axes for alignment
            only_move={'points':'xy', 'texts':'xy'},  # Allow movement in both x and y directions
            )

# Plot the significance thresholds
plt.axhline(-np.log10(pval_threshold), linestyle='--', color='black', label=f'p-value = {pval_threshold}')
plt.axvline(-logfc_threshold, linestyle='--', color='black', label=f'logFC = {-logfc_threshold}')
plt.axvline(logfc_threshold, linestyle='--', color='black', label=f'logFC = {logfc_threshold}')

# Customize axis labels and title
plt.xlabel('Log2 Fold Change', fontsize=14)
plt.ylabel('-log10(P.Value)', fontsize=14)
plt.title('Volcano Plot of Differential Gene Expression', fontsize=16)

# Customize the legend
custom_lines = [Line2D([0], [0], color=upregulated_color, lw=4, label='Upregulated'),
                Line2D([0], [0], color=downregulated_color, lw=4, label='Downregulated'),
                Line2D([0], [0], color=non_significant_color, lw=4, label='Non-significant')]
plt.legend(handles=custom_lines, loc='upper right', title='Gene Regulation')

# Improve plot aesthetics
sns.despine(trim=True)
plt.grid(False)  # Remove grid lines for a cleaner look
plt.tight_layout()

# Show the plot
plt.show()