In [None]:
import os
import pandas as pd

def open_gsea_results(output_dir):
    """
    Open and inspect all the GSEA results for a particular dataset.
    
    Parameters:
    - output_dir: The directory where GSEA results are saved.
    
    Returns:
    - gsea_data: Dictionary containing GSEA results for each cell type.
    """
    if not os.path.exists(output_dir):
        print(f"No GSEA results found in the directory: {output_dir}")
        return None
    
    # Dictionary to store GSEA results for each cell type
    gsea_data = {}

    # Loop through all files in the result directory
    for file in os.listdir(output_dir):
        if file.endswith("_gsea_results.csv"):
            # Extract the cell type from the filename by removing the '_gsea_results.csv' part
            cell_type = file.replace("_gsea_results.csv", "")
            
            # Path to the CSV file
            gsea_csv = os.path.join(output_dir, file)
            
            # Read the CSV file into a DataFrame
            gsea_df = pd.read_csv(gsea_csv)
            print(f"Loaded GSEA results for {cell_type}:")
            print(gsea_df.head())  # Display the first few rows of the DataFrame
            
            # Store the DataFrame in the dictionary using the cell type as the key
            gsea_data[cell_type] = gsea_df
    
    return gsea_data

# Example usage: open GSEA results in the 'forebrain/gsea_results/' directory
output_dir = "pancreas/gsea_results/"  # Directory where the results are saved

gsea_results = open_gsea_results(output_dir)


In [34]:
import os
import pandas as pd

def open_gsea_results(output_dir, cell_type=None):
    """
    Open and inspect the GSEA results for a specific cell type or all cell types in a dataset.
    
    Parameters:
    - output_dir: The directory where GSEA results are saved.
    - cell_type: If provided, only return results for the specified cell type (string).
    
    Returns:
    - gsea_data: A DataFrame for the specific cell type if provided, otherwise a dictionary
                 containing GSEA results for each cell type.
    """
    if not os.path.exists(output_dir):
        print(f"No GSEA results found in the directory: {output_dir}")
        return None
    
    # Dictionary to store GSEA results for each cell type if cell_type is not specified
    gsea_data = {}

    # Loop through all files in the result directory
    for file in os.listdir(output_dir):
        if file.endswith("_gsea_results.csv"):
            # Extract the cell type from the filename by removing the '_gsea_results.csv' part
            current_cell_type = file.replace("_gsea_results.csv", "")
            
            # If a specific cell type is requested, only load that one
            if cell_type and current_cell_type != cell_type:
                continue
            
            # Path to the CSV file
            gsea_csv = os.path.join(output_dir, file)
            
            # Read the CSV file into a DataFrame
            gsea_df = pd.read_csv(gsea_csv)
            print(f"Loaded GSEA results for {current_cell_type}:")
            print(gsea_df.head())  # Display the first few rows of the DataFrame
            
            # If specific cell type is requested, return the DataFrame
            if cell_type:
                return gsea_df
            
            # Store the DataFrame in the dictionary using the cell type as the key
            gsea_data[current_cell_type] = gsea_df
    
    # Return the dictionary containing results for all cell types if no specific cell type is requested
    return gsea_data if not cell_type else None

# Example usage: open GSEA results in the 'forebrain/gsea_results/' directory for a specific cell type
output_dir = "gastrulation_erythroid/gsea_results/Reactome_2022/"  # Directory where the results are saved

# Specify the cell type you're interested in
cell_type = "Erythroid1"

# Retrieve results for the specific cell type
gsea_results = pd.DataFrame(open_gsea_results(output_dir, cell_type=cell_type))
gsea_results

# If no specific cell type is provided, it will load results for all cell types
# gsea_results_all = open_gsea_results(output_dir)


Loaded GSEA results for Erythroid1:
   Unnamed: 0     Name                                               Term  \
0           0  prerank                     Heme Biosynthesis R-HSA-189451   
1           1  prerank              Metabolism Of Porphyrins R-HSA-189445   
2           2  prerank     Eukaryotic Translation Elongation R-HSA-156842   
3           3  prerank  Influenza Viral RNA Transcription And Replicat...   
4           4  prerank             Selenocysteine Synthesis R-HSA-2408557   

         ES       NES  NOM p-val  FDR q-val  FWER p-val   Tag % Gene %  \
0  0.972937  1.770901   0.053476   1.000000       0.842     7/7  3.15%   
1  0.947719  1.743613   0.089385   1.000000       0.904     7/8  3.15%   
2 -0.876482 -1.581514   0.000000   0.003382       0.003   52/77  9.13%   
3 -0.856279 -1.579298   0.000000   0.001691       0.003  53/106  9.13%   
4 -0.877465 -1.578361   0.000000   0.001127       0.003   46/72  7.23%   

                                          Lead_genes  
0

Unnamed: 0.1,Unnamed: 0,Name,Term,ES,NES,NOM p-val,FDR q-val,FWER p-val,Tag %,Gene %,Lead_genes
0,0,prerank,Heme Biosynthesis R-HSA-189451,0.972937,1.770901,0.053476,1.000000,0.842,7/7,3.15%,ALAD;HMBS;ALAS2;UROD;ABCG2;FECH;UROS
1,1,prerank,Metabolism Of Porphyrins R-HSA-189445,0.947719,1.743613,0.089385,1.000000,0.904,7/8,3.15%,ALAD;HMBS;ALAS2;UROD;ABCG2;FECH;UROS
2,2,prerank,Eukaryotic Translation Elongation R-HSA-156842,-0.876482,-1.581514,0.000000,0.003382,0.003,52/77,9.13%,RPS2;RPLP0;RPL18A;RPL32;RPS5;RPL19;RPLP1;RPS6;...
3,3,prerank,Influenza Viral RNA Transcription And Replicat...,-0.856279,-1.579298,0.000000,0.001691,0.003,53/106,9.13%,RPS2;RPLP0;RPL18A;RPL32;RPS5;RPL19;RPLP1;RPS6;...
4,4,prerank,Selenocysteine Synthesis R-HSA-2408557,-0.877465,-1.578361,0.000000,0.001127,0.003,46/72,7.23%,RPS2;RPLP0;RPL18A;RPL32;RPS5;RPL19;RPLP1;RPS6;...
...,...,...,...,...,...,...,...,...,...,...,...
977,977,prerank,IRS-related Events Triggered By IGF1R R-HSA-24...,-0.242459,-0.391737,0.991841,1.000000,1.000,3/11,37.91%,HRAS;GRB2;PTPN11
978,978,prerank,PI Metabolism R-HSA-1483255,-0.247152,-0.384604,0.998792,1.000000,1.000,1/8,24.18%,PITPNB
979,979,prerank,RHOQ GTPase Cycle R-HSA-9013406,-0.229618,-0.368505,0.997576,0.999961,1.000,4/10,46.52%,CDC42;SNAP23;WASL;SLC1A5
980,980,prerank,RHOJ GTPase Cycle R-HSA-9013409,-0.224056,-0.358651,0.998770,0.999187,1.000,5/10,49.54%,CDC42;SNAP23;WASL;SLC1A5;RHOJ


In [None]:
import gseapy as gp
gp.get_library_name()