In [None]:
import pandas as pd
import os
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import numpy as np
import seaborn as sns

# Set options to display all columns and expand column width
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)

In [None]:
# Set options to display a specific number of columns and control column width
pd.set_option('display.max_columns', 10)  # Set the maximum number of columns to display
pd.set_option('display.max_colwidth', 100)  # Set the maximum column width for text data


In [None]:

plt.rcParams['figure.dpi'] = 200
plt.rcParams['figure.figsize'] = (4, 4)
# Set the style of the plot for publication
#sns.set(style="white", context="talk")  # 'talk' context increases font sizes


In [None]:
pip list

In [None]:
############multi species analysis

In [None]:
import pandas as pd
import os

# Define cell types and file paths
cell_types = ["PT", "Podo", "TAL_MD", "DCT_CNT_CD", "EC", "Stromal", "IC", "Immune", "PEC", "DTL_ATL"]

base_path = ".../Atlas/scSPECTRA/species_healthy_disease"
r2_folder = "R2/"
pval_folder = "Pval/"
metadata_path = ".../Atlas/scSPECTRA/multispecies/multispecies_metadata_complete.csv"

def extract_cell_type(gene_set):
    for cell_type in cell_types:
        if gene_set.startswith(cell_type):
            return cell_type
    return "Unknown"

# Read metadata
metadata = pd.read_csv(metadata_path)
unique_samples = metadata[metadata['disease'] == "diseased"]['orig_ident'].unique()
sample_to_species = metadata.set_index('orig_ident')['species'].to_dict()

# Initialize combined dataframes for R2 and p-values
combined_r2 = pd.DataFrame(index=unique_samples)
combined_pval = pd.DataFrame(index=unique_samples)
sample_counts_per_cell_type = {}
sample_counts_per_cell_type_per_species = {species: {ct: 0 for ct in cell_types} for species in metadata['species'].unique()}

# Process each cell type
for cell_type in cell_types:
    r2_path = os.path.join(base_path, r2_folder, f"R2_{cell_type}.csv")
    pval_path = os.path.join(base_path, pval_folder, f"Pval_{cell_type}.csv")
    
    r2_df = pd.read_csv(r2_path, index_col=0)
    r2_df = r2_df[r2_df.index.isin(unique_samples)]
    pval_df = pd.read_csv(pval_path, index_col=0)
    pval_df = pval_df[pval_df.index.isin(unique_samples)]
    
    r2_df.columns = [f"{cell_type}_{col}" for col in r2_df.columns]
    pval_df.columns = [f"{cell_type}_{col}" for col in pval_df.columns]
    
    combined_r2 = combined_r2.join(r2_df, how='left')
    combined_pval = combined_pval.join(pval_df, how='left')

    sample_counts_per_cell_type[cell_type] = r2_df.shape[0]
    for sample in r2_df.index:
        species = sample_to_species.get(sample, "Unknown")
        sample_counts_per_cell_type_per_species[species][cell_type] += 1

combined_pval.fillna(1, inplace=True)

p_value_threshold = 0.05
significant_matrix = combined_pval < p_value_threshold
significant_counts = significant_matrix.sum()

significant_counts_df = pd.DataFrame({'GeneSet': significant_counts.index, 'Count': significant_counts.values})
significant_counts_df['CellType'] = significant_counts_df['GeneSet'].apply(extract_cell_type)

# Calculate overall normalized percentages
significant_counts_df['NormalizedPercentage'] = significant_counts_df.apply(
    lambda row: (row['Count'] / sample_counts_per_cell_type.get(row['CellType'], 1)) * 100, axis=1
)

# Add counts of significant samples per species and calculate normalized percentages per species
for species in metadata['species'].unique():
    count_col_name = f'Count_{species}'
    norm_perc_col_name = f'NormalizedPercentage_{species}'

    significant_counts_df[count_col_name] = 0  

    for index, row in significant_counts_df.iterrows():
        cell_type = row['CellType']
        gene_set = row['GeneSet']
        significant_samples = significant_matrix.loc[significant_matrix[gene_set], gene_set].index
        count_species = sum(sample_to_species.get(sample, "Unknown") == species for sample in significant_samples)
        significant_counts_df.at[index, count_col_name] = count_species

        
        total_samples_for_cell_type_species = sample_counts_per_cell_type_per_species[species].get(cell_type, 1)
        if total_samples_for_cell_type_species > 0:
            normalized_percentage = (count_species / total_samples_for_cell_type_species) * 100
        else:
            normalized_percentage = 0  # Avoid division by zero
        significant_counts_df.at[index, norm_perc_col_name] = normalized_percentage

# Sort DataFrame by general normalized percentage
sorted_df = significant_counts_df.sort_values(by='NormalizedPercentage', ascending=False)
print(sorted_df.head())


In [None]:

high_percentage_threshold = 10  

# Filter gene sets that are high across all specified species
conserved_features = significant_counts_df[
    (significant_counts_df['NormalizedPercentage_mouse'] > high_percentage_threshold) &
    (significant_counts_df['NormalizedPercentage_rat'] > high_percentage_threshold) &
    (significant_counts_df['NormalizedPercentage_human'] > high_percentage_threshold)
]

# Calculate average normalized percentage across species for the filtered gene sets
conserved_features['AverageNormalizedPercentage'] = conserved_features[
    ['NormalizedPercentage_mouse', 'NormalizedPercentage_rat', 'NormalizedPercentage_human']
].mean(axis=1)

# Sort the filtered DataFrame by average normalized percentage
sorted_conserved_features = conserved_features.sort_values(by='AverageNormalizedPercentage', ascending=False)

sorted_conserved_features.index = sorted_conserved_features["GeneSet"]

# Print the top features based on average normalized percentage
top_conserved_features = sorted_conserved_features.head(50)  # Adjust number as needed
top_conserved_features[:50]

In [None]:
len(conserved_features)

In [None]:

conserved_features.to_csv(".../Atlas/scSPECTRA/species_healthy_disease/conserved_hallmark_features.csv")


In [None]:
cell_colors = {
    "DCT_CNT_CD": "#3182bd",
    "DTL_ATL": "#fdd0a2",
    "EC": "seagreen",
    "IC": "orange",
    "Immune": "#c7e9c0",
    "Podo": "#555555",
    "Stromal": "limegreen",
    "PEC": "#fde725",
    "PT": "darkorchid",
    "TAL_MD": "lightcoral",
}

# Assuming 'sorted_conserved_features' is already available from previous steps

#not top 100 but conserved features 
top_100_features = sorted_conserved_features

# Count the frequency of each CellType in the top 100 features
cell_type_counts = top_100_features['CellType'].value_counts()

# Extract the colors corresponding to the cell types
pie_colors = [cell_colors[cell_type] for cell_type in cell_type_counts.index if cell_type in cell_colors]

# Plotting the pie chart
plt.figure(figsize=(10, 10))  # Adjust the size as needed
cell_type_counts.plot(kind='pie', colors=pie_colors, autopct='')
plt.title('')
plt.ylabel('')  
plt.show()

In [None]:
# Assuming conserved_features is already defined and loaded

custom_palette = {
    'NormalizedPercentage_human': 'royalblue',
    'NormalizedPercentage_mouse': '#FF8000',
    'NormalizedPercentage_rat': 'green'
}

# Calculate the average normalized percentage
conserved_features['AverageNormalizedPercentage'] = conserved_features[
    ['NormalizedPercentage_human', 'NormalizedPercentage_mouse', 'NormalizedPercentage_rat']
].mean(axis=1)

# Sort by the average and select the top 5 features
top_5_features = conserved_features.sort_values(by='AverageNormalizedPercentage', ascending=False).head(8)

# Select only the necessary columns for the plot
plot_data = top_5_features[['NormalizedPercentage_human', 'NormalizedPercentage_mouse', 'NormalizedPercentage_rat']]
plot_data['Feature'] = top_5_features['GeneSet']
plot_data = plot_data.melt(id_vars=['Feature'], var_name='Species', value_name='NormalizedPercentage')

# Set the style of the plot for publication
#sns.set(style="white", context="talk")  # 'talk' context increases font sizes

# Adjust figure size and aspect ratio to make bars appear narrower
plt.figure(figsize=(8, 4))  # Increasing the width of the plot

# Create a bar plot
barplot = sns.barplot(x='Feature', y='NormalizedPercentage', hue='Species', data=plot_data, palette=custom_palette)

# Move the y-axis ticks to the right side
plt.gca().yaxis.tick_right()

# Adjust the y-axis label position if necessary
#plt.ylabel('Normalized Percentage', fontsize=16, labelpad=10, rotation=270)


# Add labels and title with increased font size
plt.xlabel('', fontsize=16)
#plt.ylabel('Normalized Percentage', fontsize=16)
plt.title('', fontsize=18)
plt.xticks(rotation=90, fontsize=14)
plt.yticks(rotation=90, fontsize=14)
plt.yticks(fontsize=14)

# Remove the legend if not needed
plt.legend().remove()

# Tight layout for better spacing
plt.tight_layout()

# Show the plot
plt.show()

In [None]:
# Define the path to the R2 file for the specific cell type and gene set
r2_base_path = ".../Atlas/scSPECTRA/species_healthy_disease/"
cell_type = "PT"  # Replace with your desired cell type
target_gene_set = "GO.0030574.collagen.catabolic.process.BP"  

# Load the R2 file
r2_file_path = f"{r2_base_path}/R2/R2_{cell_type}.csv"
pval_file_path = f"{r2_base_path}/Pval/Pval_{cell_type}.csv"
r2_df = pd.read_csv(r2_file_path, index_col=0)
pval_df = pd.read_csv(pval_file_path, index_col=0)

# Define the sample groups (time points)

group0 = [
"K3076",
"K3037",
"I3065",
"J3036",
"N3099",
"M3098",
"B3039",
"N3024",
"Q3067",
"T3097",
"J3079",
"I3078",
"A3038",
"H3069",
"P3066",
"M3096",
"R3077",
"G3068",
"E3075",
"B3035",
"B3025",
"I3095",
"C3028",
"A3026",
"A3029",
"B3027"]

group1 = ["IRI4h1", "IRI4h2", "IRI4h3"]

group2 = ["IRI12h1b1",
"IRI12h1b2",
"IRI12h2",
"IRI12h3"]

group3 = ["IRI2d1b1",
"IRI2d1b2",
"IRI2d2b1",
"IRI2d2b2",
"IRI2d3"]

group4 = ["IRI14d1b1",
"IRI14d1b2",
"IRI14d2",
"IRI14d3"]

group5 = ["IRI6w1b1", "IRI6w1b2", "IRI6w2", "IRI6w3"]

# Identify significant samples based on Pval < 0.05 for the specific gene set
significant_samples = pval_df.index[pval_df[target_gene_set] < 0.05]

# Create a line plot to visualize the mean R2 values for the groups over time
plt.figure(figsize=(10, 6))

# Initialize lists to store mean R2 values and standard deviations for each group
mean_r2_values = []
std_dev_values = []

# Plot each individual sample and color it red if it's significant
for i, group in enumerate([group0, group1, group2, group3, group4, group5]):
    group_r2_values = r2_df.loc[group, target_gene_set]
    mean_r2 = group_r2_values.mean()
    std_dev = group_r2_values.std()
    mean_r2_values.append(mean_r2)
    std_dev_values.append(std_dev)

    for sample in group:
        if sample in significant_samples:
            plt.plot(i, group_r2_values.loc[sample], marker='o', markersize=5, color='red')
        else:
            plt.plot(i, group_r2_values.loc[sample], marker='o', markersize=5, color='blue')

# Plot the mean R2 values as a black line with a label
plt.plot(range(6), mean_r2_values, marker='_', color='black', linestyle='--', label='Mean R2')

# Customize the plot
plt.ylim(0, 1)  # Set the y-axis range from 0 to 1
plt.xlabel("Time Points (Groups)")
plt.ylabel("R2 Value")
plt.title(f"{target_gene_set} {cell_type}")
plt.xticks(range(6), ["diabetic_control", "4h", "12h", "2d", "14d", "6w"])
#plt.legend(loc='upper left', bbox_to_anchor=(1, 1))
#plt.legend()
plt.grid(False)  # Remove the grid
plt.show()

In [None]:
# Define the path to the R2 file for the specific cell type and gene set
#r2_base_path = "/home/kloetzer/Atlas/scSpectra/species_healthy_disease/"
#cell_type = "PT"  # Replace with your desired cell type
#target_gene_set = "GO.0030574.collagen.catabolic.process.BP"  # Replace with your desired gene set name

# Load the R2 file
r2_file_path = f"{r2_base_path}/R2/R2_{cell_type}.csv"
pval_file_path = f"{r2_base_path}/Pval/Pval_{cell_type}.csv"
r2_df = pd.read_csv(r2_file_path, index_col=0)
pval_df = pd.read_csv(pval_file_path, index_col=0)

# Define the sample groups (time points)

group0 = metadata[(metadata["species"]=="human") & (metadata["condition_harmonized"]=="CKD")].orig_ident.unique().tolist()

group1 = metadata[(metadata["species"]=="human") & (metadata["condition_harmonized"]=="AKI")].orig_ident.unique().tolist()

samples_to_remove = []
samples_to_remove = ['HK2886']  # Replace with the names of samples you want to remove

# Creating the list with samples to be removed
group0 = [sample for sample in group0 if sample not in samples_to_remove]

group1 = [sample for sample in group1 if sample not in samples_to_remove]

# Identify significant samples based on Pval < 0.05 for the specific gene set
significant_samples = pval_df.index[pval_df[target_gene_set] < 0.05]


# Assuming the rest of your setup code (loading data, etc.) is here

# Define x-coordinates for the groups, closer together
group_positions = [0.4, 0.6]  # Adjust these values as needed

# Create a figure for the plot
plt.figure(figsize=(2.5, 6))

# Initialize lists to store mean R2 values for each group
mean_r2_values = []

# Plot each individual sample at the new group positions
for i, group in enumerate([group0, group1]):
    group_r2_values = r2_df.loc[group, target_gene_set]
    mean_r2 = group_r2_values.mean()
    mean_r2_values.append(mean_r2)

    for sample in group:
        x_position = group_positions[i]  # x-coordinate for this group
        color = 'red' if sample in significant_samples else 'blue'
        plt.plot(x_position, group_r2_values.loc[sample], marker='o', markersize=5, color=color)

# Plot the mean R2 values
plt.plot(group_positions, mean_r2_values, marker='_', color='black', linestyle='', label='Mean R2')

# Customize the plot
plt.ylim(0, 1)  # Set the y-axis range from 0 to 1
plt.xlim(0.3, 0.7)  # Set the x-axis range
plt.xlabel("Groups")
plt.ylabel("R2 Value")
plt.title("human")
plt.xticks(group_positions, ["CKD", "AKI"])  # Set custom x-axis ticks
#plt.legend()
plt.grid(False)
plt.show()

In [None]:
# Define the path to the R2 file for the specific cell type and gene set
#r2_base_path = "/home/kloetzer/Atlas/scSpectra/species_healthy_disease/"
#cell_type = "PT"  # Replace with your desired cell type
#target_gene_set = "GO.0030574.collagen.catabolic.process.BP"  # Replace with your desired gene set name

# Load the R2 file
r2_file_path = f"{r2_base_path}/R2/R2_{cell_type}.csv"
pval_file_path = f"{r2_base_path}/Pval/Pval_{cell_type}.csv"
r2_df = pd.read_csv(r2_file_path, index_col=0)
pval_df = pd.read_csv(pval_file_path, index_col=0)

# Define the sample groups (time points)

group0 = metadata[(metadata["species"]=="rat") & (metadata["condition_harmonized2"]=="H-CKD")].orig_ident.unique().tolist()

group1 = metadata[(metadata["species"]=="rat") & (metadata["condition_harmonized2"]=="DKD")].orig_ident.unique().tolist()

samples_to_remove = []
samples_to_remove = []  # Replace with the names of samples you want to remove


# Creating the list with samples to be removed
group0 = [sample for sample in group0 if sample not in samples_to_remove]

group1 = [sample for sample in group1 if sample not in samples_to_remove]

# Identify significant samples based on Pval < 0.05 for the specific gene set
significant_samples = pval_df.index[pval_df[target_gene_set] < 0.05]


# Assuming the rest of your setup code (loading data, etc.) is here

# Define x-coordinates for the groups, closer together
group_positions = [0.4, 0.6]  # Adjust these values as needed

# Create a figure for the plot
plt.figure(figsize=(2.5, 6))

# Initialize lists to store mean R2 values for each group
mean_r2_values = []

# Plot each individual sample at the new group positions
for i, group in enumerate([group0, group1]):
    group_r2_values = r2_df.loc[group, target_gene_set]
    mean_r2 = group_r2_values.mean()
    mean_r2_values.append(mean_r2)

    for sample in group:
        x_position = group_positions[i]  # x-coordinate for this group
        color = 'red' if sample in significant_samples else 'blue'
        plt.plot(x_position, group_r2_values.loc[sample], marker='o', markersize=5, color=color)

# Plot the mean R2 values
plt.plot(group_positions, mean_r2_values, marker='_', color='black', linestyle='', label='Mean R2')

# Customize the plot
plt.ylim(0, 1)  # Set the y-axis range from 0 to 1
plt.xlim(0.3, 0.7)  # Set the x-axis range
plt.xlabel("Groups")
plt.ylabel("R2 Value")
plt.title("rat")
plt.xticks(group_positions, ["H-CKD", "DKD"])  # Set custom x-axis ticks
#plt.legend(loc='upper left', bbox_to_anchor=(1, 1))
plt.grid(False)
plt.show()

In [None]:
import pandas as pd
import os

# Define the cell types and the base paths
cell_types = ["PT", "Podo", "TAL_MD", "DCT_CNT_CD", "EC", "Stromal", "IC", "Immune", "PEC", "DTL_ATL"]

# We use a modified code using a categ. key instead of a contin. threshold
def process_condition(metadat, value_threshold, above_threshold=True):
    if above_threshold:
        unique_samples = metadat[metadat['species'] != value_threshold]['orig_ident'].unique()
    else:
        unique_samples = metadat[metadat['species'] == value_threshold]['orig_ident'].unique()
    
    combined_pval = pd.DataFrame(index=unique_samples)
    for cell_type in cell_types:
        pval_path = os.path.join(base_path, pval_folder, f"Pval_{cell_type}.csv")
        pval_df = pd.read_csv(pval_path, index_col=0)
        pval_df = pval_df[pval_df.index.isin(unique_samples)]
        pval_df.columns = [f"{cell_type}_{col}" for col in pval_df.columns]
        combined_pval = combined_pval.join(pval_df, how='left')
    return combined_pval

# Function to count significance
def count_significance(pval_df, threshold=0.05):
    sig_count = (pval_df < threshold).sum(axis=0, skipna=True)
    nonsig_count = (pval_df >= threshold).sum(axis=0, skipna=True)
    return sig_count, nonsig_count

# Read metadata

# Define GFR threshold
value_threshold = "human"  # Replace with your desired GFR threshold

# Process each condition based on GFR threshold
combined_pval_cond1 = process_condition(metadat, value_threshold, above_threshold=True)  # above = is categ.
combined_pval_cond2 = process_condition(metadat, value_threshold, above_threshold=False)  

# Count significance for each condition
sig_count_cond1, nonsig_count_cond1 = count_significance(combined_pval_cond1)
sig_count_cond2, nonsig_count_cond2 = count_significance(combined_pval_cond2)

# Creating the comparison DataFrame
comparison_df = pd.DataFrame(columns=combined_pval_cond1.columns, index=['cond1_sig', 'cond1_nonsig', 'cond2_sig', 'cond2_nonsig'])
comparison_df.loc['cond1_sig'] = sig_count_cond1
comparison_df.loc['cond1_nonsig'] = nonsig_count_cond1
comparison_df.loc['cond2_sig'] = sig_count_cond2
comparison_df.loc['cond2_nonsig'] = nonsig_count_cond2

# Display the DataFrame
print(comparison_df)

In [None]:
import pandas as pd
import numpy as np
from scipy.stats import chi2_contingency, fisher_exact
from statsmodels.stats.multitest import multipletests

chi_square_results = pd.DataFrame(columns=['Feature', 'Test_statistic', 'p_value', 'degrees_of_freedom', 'Test_type', 'cond1_sig_count', 'cond2_sig_count'])
minimum_count_threshold = 1  # Define the threshold

for feature in comparison_df.columns:
    # Creating the contingency table for each feature
    contingency_table = comparison_df[[feature]].values.reshape(2, 2)

    # Extract counts of significant samples for each condition for the current feature
    cond1_sig_count = comparison_df.loc['cond1_sig', feature]
    cond2_sig_count = comparison_df.loc['cond2_sig', feature]

    # Check if any cell in the contingency table is below the threshold
    if np.any(contingency_table < minimum_count_threshold):
        # If any cell count is below the threshold, use Fisher's Exact Test
        odds_ratio, p = fisher_exact(contingency_table)
        result_row = pd.DataFrame([{
            'Feature': feature, 
            'Test_statistic': odds_ratio, 
            'p_value': p, 
            'Test_type': 'Fisher',
            'cond1_sig_count': cond1_sig_count,
            'cond2_sig_count': cond2_sig_count
        }])
    else:
        # Perform the Chi-square test
        chi_square_stat, p, dof, _ = chi2_contingency(contingency_table)
        result_row = pd.DataFrame([{
            'Feature': feature, 
            'Test_statistic': chi_square_stat, 
            'p_value': p, 
            'degrees_of_freedom': dof,
            'Test_type': 'Chi-square',
            'cond1_sig_count': cond1_sig_count,
            'cond2_sig_count': cond2_sig_count
        }])

    chi_square_results = pd.concat([chi_square_results, result_row], ignore_index=True)

# Sort the results by p-value in ascending order
sorted_results = chi_square_results.sort_values(by='p_value')

# Display the most significant gene sets
print(sorted_results.head())  # Adjust the number inside head() as needed

In [None]:
# Extract the original p-values
p_values = sorted_results['p_value'].values

correction_method = 'fdr_bh' 
rejected, corrected_p_values, _, _ = multipletests(p_values, alpha=0.05, method=correction_method)


sorted_results['corrected_p_value'] = corrected_p_values
sorted_results['rejected_h0'] = rejected


print(sorted_results.head())  # Adjust the number inside head() as needed


In [None]:
sorted_results.to_csv(".../Atlas/scSPECTRA/species_healthy_disease/human_rodent_differences.csv")


In [None]:

cell_type = "Stromal"  # Replace with your desired cell type
target_gene_set = "GO.0006956.complement.activation.BP"  

metadat = metadata

# Load the R2 file
r2_file_path = f"{r2_base_path}/R2/R2_{cell_type}.csv"
pval_file_path = f"{r2_base_path}/Pval/Pval_{cell_type}.csv"
r2_df = pd.read_csv(r2_file_path, index_col=0)
pval_df = pd.read_csv(pval_file_path, index_col=0)

# Define the sample groups (time points)


group0 = metadata[(metadata['disease']=="diseased") & (metadata['species']=="human")].orig_ident.unique().tolist()


group1 = metadata[(metadata['disease']=="diseased") & (metadata['species']=="mouse")].orig_ident.unique().tolist()

group2 = metadata[(metadata['disease']=="diseased") & (metadata['species']=="rat")].orig_ident.unique().tolist()


samples_to_remove = []
samples_to_remove = []  # Replace with the names of samples you want to remove if code has error
 

# Creating the list with samples to be removed
group0 = [sample for sample in group0 if sample not in samples_to_remove]

group1 = [sample for sample in group1 if sample not in samples_to_remove]

group2 = [sample for sample in group2 if sample not in samples_to_remove]

# Identify significant samples based on Pval < 0.05 for the specific gene set
significant_samples = pval_df.index[pval_df[target_gene_set] < 0.05]


# Assuming the rest of your setup code (loading data, etc.) is here

# Define x-coordinates for the groups, closer together
group_positions = [0.4, 0.6, 0.8]  # Adjust these values as needed

# Create a figure for the plot
plt.figure(figsize=(2.5, 6))

# Initialize lists to store mean R2 values for each group
mean_r2_values = []

# Plot each individual sample at the new group positions
for i, group in enumerate([group0, group1, group2]):
    group_r2_values = r2_df.loc[group, target_gene_set]
    mean_r2 = group_r2_values.mean()
    mean_r2_values.append(mean_r2)

    for sample in group:
        x_position = group_positions[i]  # x-coordinate for this group
        color = 'red' if sample in significant_samples else 'blue'
        plt.plot(x_position, group_r2_values.loc[sample], marker='o', markersize=5, color=color)

# Plot the mean R2 values
plt.plot(group_positions, mean_r2_values, marker='_', color='black', linestyle='', label='Mean R2')

# Customize the plot
plt.ylim(0, 1)  # Set the y-axis range from 0 to 1
plt.xlim(0.3, 0.9)  # Set the x-axis range
plt.xlabel(cell_type)
plt.ylabel("R2 Value")
plt.title(target_gene_set)
plt.xticks(group_positions, ["Human", "Mouse", "Rat"], rotation = 45)  # Set custom x-axis ticks
#plt.legend()
plt.grid(False)
plt.show()