In [None]:
import pandas as pd
import numpy as np
import scanpy as sc
import anndata as ad
import matplotlib.pyplot as plt

In [None]:
# Load the gene signature file
gene_signatures = pd.read_csv('metamodules.txt',sep="\t")

# Define the gene sets for each meta-module
# MES2    MES1    AC      OPC     NPC1_sig        NPC2_sig        G1/S    G2/M
gene_sets = {
    'MES1': gene_signatures['MES1'].dropna().tolist(),
    'MES2': gene_signatures['MES2'].dropna().tolist(),
    'NPC1': gene_signatures['NPC1_sig'].dropna().tolist(),
    'NPC2': gene_signatures['NPC2_sig'].dropna().tolist(),
    'AC': gene_signatures['AC'].dropna().tolist(),
    'OPC': gene_signatures['OPC'].dropna().tolist(),
    'G1S': gene_signatures['G1/S'].dropna().tolist(),
    'G2M': gene_signatures['G2/M'].dropna().tolist()
}

# Load the anndata object
adata = sc.read_h5ad('final.h5ad')
var_names_list = adata.var_names.tolist()

In [None]:
adata

In [None]:
# Calculate single-cell gene signature scores
def calculate_signature_scores(adata, gene_sets):
    scores = {}
    for key, genes in gene_sets.items():
        # Filter genes to include only those present in adata.var_names
        valid_genes = [gene for gene in genes if gene in var_names_list]
        
        if not valid_genes:
            continue
        
        # Calculate average relative expression for the valid gene set
        avg_expr = adata[:, valid_genes].X.mean(axis=1)
        
        # Create control gene set
        control_genes = []
        for gene in valid_genes:
            expr_level = adata[:, gene].X.mean()
            bin_genes_bool = np.ravel((adata.X.mean(axis=0) > expr_level - 0.05) & (adata.X.mean(axis=0) < expr_level + 0.05))
            bin_genes = np.array(var_names_list)[np.array(bin_genes_bool)]
            sample_size = min(100, len(bin_genes))
            control_genes.extend(np.random.choice(bin_genes, sample_size, replace=False))
            #control_genes.extend(np.random.choice(bin_genes, 100, replace=True))
        
        # Calculate average relative expression for the control gene set
        avg_control_expr = adata[:, control_genes].X.mean(axis=1)
        
        # Calculate signature score
        scores[key] = avg_expr - avg_control_expr
    
    return scores


In [None]:
signature_scores = calculate_signature_scores(adata, gene_sets)

In [None]:
signature_scores


In [None]:
# Assign cells to meta-modules and their hybrids
def assign_meta_modules(signature_scores):
    meta_modules = ['MES', 'NPC', 'AC', 'OPC']
    assignments = []
    both_scores = []
    for i in range(len(signature_scores['MES1'])):
        scores = {key: signature_scores[key][i] for key in signature_scores}
        # Collapse MES1 and MES2 into MES, and NPC1 and NPC2 into NPC
        # Create a new dictionary to store the updated keys
        # Create a new dictionary to store the updated keys
        updated_scores = {}
        
        # Combine values for MES and NPC keys
        mes_total = 0
        npc_total = 0
        
        for key, value in scores.items():
            if key in ['MES1', 'MES2']:
                mes_total += value
            elif key in ['NPC1', 'NPC2']:
                npc_total += value
            else:
                updated_scores[key] = value
        
        # Add the combined MES and NPC values to the new dictionary
        updated_scores['MES'] = mes_total
        updated_scores['NPC'] = npc_total
        scores = updated_scores
        # Assign to the highest scoring meta-module
        primary_module = max(scores, key=scores.get)
        # Check for hybrid assignment
        hybrid_module = None
        second_highest_score = sorted(scores.values(), reverse=True)[1]
        
        #if second_highest_score > 1 or second_highest_score > np.percentile([scores[key] for key in meta_modules if key != primary_module], 10) or (second_highest_score - sorted(scores.values(), reverse=True)[2]) >= 0.3:
         #   hybrid_module = [key for key in scores if scores[key] == second_highest_score][0]
        #if second_highest_score > 1:
         #   hybrid_module = [key for key in scores if scores[key] == second_highest_score][0]

        # Higher 0.3
        if second_highest_score - sorted(scores.values(), reverse=True)[2] >= 0.3:
            hybrid_module = [key for key in scores if scores[key] == second_highest_score][0]
        # percentile
        #if second_highest_score > np.percentile([scores[key] for key in meta_modules if key != primary_module], 10):
            #hybrid_module = [key for key in scores if scores[key] == second_highest_score][0]
            
        assignments.append((primary_module, hybrid_module))
        both_scores.append((scores, second_highest_score))
    
    return assignments,both_scores
    

In [None]:
assignments,both_scores = assign_meta_modules(signature_scores)

In [None]:
# Produce a plot of expected number of hybrids
def plot_expected_hybrids(assignments):
    hybrid_counts = {module: 0 for module in ['MES', 'NPC', 'AC', 'OPC','G1S','G2M']}
    
    for primary, hybrid in assignments:
        if hybrid:
            hybrid_counts[primary] += 1
    
    expected_hybrid_counts = {module: [] for module in ['MES', 'NPC', 'AC', 'OPC','G1S','G2M']}
    
    for _ in range(100):
        shuffled_assignments = np.random.permutation(assignments)
        shuffled_hybrid_counts = {module: 0 for module in ['MES', 'NPC', 'AC', 'OPC','G1S','G2M']}
        
        for primary, hybrid in shuffled_assignments:
            if hybrid:
                shuffled_hybrid_counts[primary] += 1
        
        for module in shuffled_hybrid_counts:
            expected_hybrid_counts[module].append(shuffled_hybrid_counts[module])
    
    means = {module: np.mean(expected_hybrid_counts[module]) for module in expected_hybrid_counts}
    stds = {module: np.std(expected_hybrid_counts[module]) for module in expected_hybrid_counts}
    
    fig, ax = plt.subplots()
    
    ax.bar(hybrid_counts.keys(), hybrid_counts.values(), label='Observed')
    ax.errorbar(means.keys(), means.values(), yerr=stds.values(), fmt='o', label='Expected')
    
    ax.set_xlabel('Meta-module')
    ax.set_ylabel('Number of hybrids')
    ax.legend()
    
    plt.show()



In [None]:
plot_expected_hybrids(assignments)

In [None]:
import matplotlib.pyplot as plt
from collections import Counter

# Example list
hybrid_list = assignments
shuffled_assignments = np.random.permutation(assignments)

# Filter the list to include only hybrid combinations
hybrids = [tuple(sorted(item)) for item in hybrid_list if item[1] != None]

# Count the occurrences of each hybrid combination
hybrid_counts = Counter(hybrids)

# Calculate the total number of elements in the list
total_elements = len(hybrid_list)

# Calculate the percentage of each hybrid combination
hybrid_percentages = {k: (v / total_elements) * 100 for k, v in hybrid_counts.items()}

# Convert tuple keys to strings for plotting
hybrid_percentages_str = {f"{k},{k}": v for k, v in hybrid_percentages.items()}

# Create a bar plot
fig, ax = plt.subplots(figsize=(12, 8))
ax.bar(hybrid_percentages_str.keys(), hybrid_percentages_str.values())

# Set labels and title
ax.set_xlabel('Hybrid Combinations')
ax.set_ylabel('Percentage (%)')
ax.set_title('Percentage of Hybrid Combinations')

# Rotate x-axis labels for better readability
plt.xticks(rotation=45, ha='right')

# Show the plot
plt.tight_layout()
plt.show()


In [None]:
expected_hybrid_counts = {module: [] for module in ['MES', 'NPC', 'AC', 'OPC','G1S','G2M']}
for _ in range(100):
    shuffled_assignments = np.random.permutation(assignments)
    shuffled_hybrid_counts = {module: 0 for module in ['MES', 'NPC', 'AC', 'OPC','G1S','G2M']}
    
    for primary, hybrid in shuffled_assignments:
        if hybrid:
            shuffled_hybrid_counts[primary] += 1
    
    for module in shuffled_hybrid_counts:
        expected_hybrid_counts[module].append(shuffled_hybrid_counts[module])

means = {module: np.mean(expected_hybrid_counts[module]) for module in expected_hybrid_counts}
stds = {module: np.std(expected_hybrid_counts[module]) for module in expected_hybrid_counts}


In [None]:
means
stds

In [None]:
import matplotlib.pyplot as plt
from collections import Counter

# Example lists
hybrid_list = assignments
shuffled_assignments = np.random.permutation(assignments)

    
def process_hybrid_list(hybrid_list):
    # Filter the list to include only hybrid combinations and sort the tuples
    filtered_list = [tuple(sorted(item)) for item in hybrid_list if item[1] != None]
    
    # Count the occurrences of each hybrid combination
    hybrid_counts = Counter(filtered_list)
    
    # Calculate the total number of elements in the list
    total_elements = len(hybrid_list)
    
    # Calculate the percentage of each hybrid combination
    hybrid_percentages = {k: (v / total_elements) * 100 for k, v in hybrid_counts.items()}
    
    return hybrid_percentages

# Process both lists
percentages_list = process_hybrid_list(hybrid_list)
percentages_shuffles = process_hybrid_list(shuffled_assignments)

# Convert tuple keys to strings for plotting
percentages_list_str = {f"{k},{k}": v for k, v in percentages_list.items()}
percentages_shuffles_str = {f"{k},{k}": v for k, v in percentages_shuffles.items()}

# Create a bar plot
fig, ax = plt.subplots(figsize=(12, 8))

# Define the width of the bars
bar_width = 0.35

# Define the positions of the bars
index = range(len(percentages_list_str))

# Plot the bars for the first list
bars1 = ax.bar(index, percentages_list_str.values(), bar_width, label='Observed', color='blue')

# Plot the bars for the second list, offset by bar_width
bars2 = ax.bar([i + bar_width for i in index], percentages_shuffles_str.values(), bar_width, label='Expected', color='orange')

# Set labels and title
ax.set_xlabel('Hybrid Combinations')
ax.set_ylabel('Percentage (%)')
ax.set_title('Percentage of Hybrid Combinations')

# Set the x-ticks to be in the middle of the grouped bars
ax.set_xticks([i + bar_width / 2 for i in index])
ax.set_xticklabels(percentages_list_str.keys(), rotation=45, ha='right')

# Add a legend
ax.legend()


# Show the plot
plt.tight_layout()
plt.show()


In [None]:
shuffled_assignments = np.random.permutation(assignments)
shuffled_assignments

In [None]:
import matplotlib.pyplot as plt
from collections import Counter

# Example list
hybrid_list = [
    ('MES', 'NPC'),
    ('NPC', 'None'),
    ('NPC', 'None'),
    ('MES', 'None'),
    ('NPC', 'None'),
    ('G1S', 'MES'),
    ('AC', 'MES'),
    ('NPC', 'None'),
    ('NPC', 'None'),
    ('MES', 'NPC'),
    ('MES', 'None'),
    # Add more elements as needed
]

# Filter the list to include only hybrid combinations
hybrids = [item for item in hybrid_list if item != 'None']

# Count the occurrences of each hybrid combination
hybrid_counts = Counter(hybrids)

In [None]:
filtered_hybrids = [item for item in hybrid_list if item[1] != 'None']
filtered_hybrids = [item for item in hybrid_list if item[1] != 'None']
print(filtered_hybrids )

In [None]:
hybrid_list = [
    ('MES', 'NPC'),
    ('NPC', 'None'),
    ('NPC', 'None'),
    ('MES', 'None'),
    ('NPC', 'None'),
    ('G1S', 'MES'),
    ('AC', 'MES'),
    ('NPC', 'None'),
    ('NPC', 'None'),
    ('MES', 'NPC'),
    ('MES', 'None'),
]

filtered_list = [item for item in hybrid_list if item[1] != 'None']
print(filtered_list)

In [None]:
# Create a new dictionary to store the updated keys
updated_signature_scores = {}

# Calculate the element-wise maximum for MES and NPC
mes_max = np.maximum(signature_scores.get('MES1', np.zeros_like(signature_scores['MES1'])), signature_scores.get('MES2', np.zeros_like(signature_scores['MES2'])))
npc_max = np.maximum(signature_scores.get('NPC1', np.zeros_like(signature_scores['NPC1'])), signature_scores.get('NPC2', np.zeros_like(signature_scores['NPC2'])))

# Add the new MES and NPC to the updated dictionary
updated_signature_scores['MES'] = mes_max
updated_signature_scores['NPC'] = npc_max

# Add the other keys to the updated dictionary
for key, value in signature_scores.items():
    if key not in ['MES1', 'MES2', 'NPC1', 'NPC2']:
        updated_signature_scores[key] = value



In [None]:
updated_signature_scores



In [None]:
import numpy as np
import matplotlib.pyplot as plt

def generate_2d_representation(signature_scores1):
    D_values = []
    x_values = []
    
    for i in range(len(signature_scores1['MES'])):
        SCopc_npc = max(signature_scores1['OPC'][i], signature_scores1['NPC'][i])
        SCac_mes = max(signature_scores1['AC'][i], signature_scores1['MES'][i])
        
        D = SCopc_npc - SCac_mes
        D_values.append(D)
        
        if D > 0:
            x_diff = signature_scores1['OPC'][i] - signature_scores1['NPC'][i]
        else:
            x_diff = signature_scores1['AC'][i] - signature_scores1['MES'][i]
        
        # Apply log2 to the absolute value and keep the sign
        x_values.append(np.sign(x_diff) * np.log2(abs(x_diff) + 1))
    
    fig, ax = plt.subplots()
    
    ax.scatter(x_values, D_values)
    
    ax.set_xlabel('log2(|SCopc – SCnpc|+1) or log2(|SCac–SCmes|)')
    ax.set_ylabel('D value')
    
    # Set the plot axes to be y -1,1 and x -1,1
    ax.set_xlim([-4, 4])
    ax.set_ylim([-4, 4])

    # Add dashed black lines at x=0 and y=0
    ax.axhline(0, color='black', linestyle='--')
    ax.axvline(0, color='black', linestyle='--')

    # Add text labels to the edges of the four regions
    ax.text(-0.9, 0.9, 'NPC-like', fontsize=12, ha='center', va='center')
    ax.text(0.9, 0.9, 'OPC-like', fontsize=12, ha='center', va='center')
    ax.text(-0.9, -0.9, 'MES-like', fontsize=12, ha='center', va='center')
    ax.text(0.9, -0.9, 'AC-like', fontsize=12, ha='center', va='center')
    
    plt.show()


In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import scanpy as sc

def generate_2d_representation(signature_scores1, adata, color_by, distance = 0.25):
    D_values = []
    x_values = []
    
    for i in range(len(signature_scores1['MES'])):
        SCopc_npc = max(signature_scores1['OPC'][i], signature_scores1['NPC'][i])
        SCac_mes = max(signature_scores1['AC'][i], signature_scores1['MES'][i])
        
        D = SCopc_npc - SCac_mes
        D_values.append(D)
        
        if D > 0:
            x_diff = signature_scores1['NPC'][i] - signature_scores1['OPC'][i]
        else:
            x_diff = signature_scores1['MES'][i] - signature_scores1['AC'][i]
        
        # Apply log2 to the absolute value and keep the sign
        x_values.append(np.sign(x_diff) * np.log2(abs(x_diff) + 1))
    
    # Convert lists to numpy arrays
    D_values = np.array(D_values)
    x_values = np.array(x_values)
    
    # Identify cells in the center of the plot
    center_mask = (D_values >= -1*distance) & (D_values <= distance) & (x_values >= -1*distance) & (x_values <= distance)
    center_cells = adata.obs[center_mask]
    
    fig, ax = plt.subplots(figsize=(10, 6))
    
    if pd.api.types.is_numeric_dtype(adata.obs[color_by]):
        # Use a continuous palette for numerical data
        scatter = ax.scatter(x_values, D_values, c=adata.obs[color_by], cmap='viridis', alpha=0.7)
        plt.colorbar(scatter, ax=ax, label=color_by)
    else:
        # Extract the color palette used by Scanpy for categorical data
        sc.pl.umap(adata , color=color_by, show=False)
        cluster_colors = adata.uns[f'{color_by}_colors']
        
        # Map categorical data to colors
        color_map = {category: color for category, color in zip(adata.obs[color_by].cat.categories, cluster_colors)}
        colors = adata.obs[color_by].map(color_map)
        
        scatter = ax.scatter(x_values, D_values, c=colors, alpha=0.7)
    
    # Highlight center cells
    ax.scatter(x_values[center_mask], D_values[center_mask], edgecolor='red', facecolor='none', s=100, label='Center Cells')
    
    ax.set_xlabel('log2(|SCopc – SCnpc|+1) or log2(|SCac–SCmes|)')
    ax.set_ylabel('D value')
    
    # Set the plot axes to be y -1,1 and x -1,1
    ax.set_xlim([-4, 4])
    ax.set_ylim([-4, 4])

    # Add dashed black lines at x=0 and y=0
    ax.axhline(0, color='black', linestyle='--')
    ax.axvline(0, color='black', linestyle='--')

    # Add text labels to the edges of the four regions
    ax.text(-2.7, 2.7, 'OPC-like', fontsize=12, ha='center', va='center')
    ax.text(2.7, 2.7, 'NPC-like', fontsize=12, ha='center', va='center')
    ax.text(-2.7, -2.7, 'AC-like', fontsize=12, ha='center', va='center')
    ax.text(2.7, -2.7, 'MES-like', fontsize=12, ha='center', va='center')
    
    ax.legend()
    plt.show()
    
    return center_cells

# Example usage
center_cells = generate_2d_representation(updated_signature_scores, adata, color_by='louvain_res_0.50',
                                         distance=0.10)
#print("Center cells:", center_cells)


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Function to calculate cluster distances to the origin for each sample layer and calculate cells in and out of a specific box
def find_cluster_distances_and_box_analysis(signature_scores1, adata, color_by, sample_layer='sample', distance=0.25):
    D_values = []
    x_values = []

    # Iterate through the cells to calculate D_values and x_values
    for i in range(len(signature_scores1['MES'])):
        SCopc_npc = max(signature_scores1['OPC'][i], signature_scores1['NPC'][i])
        SCac_mes = max(signature_scores1['AC'][i], signature_scores1['MES'][i])
        D = SCopc_npc - SCac_mes
        D_values.append(D)

        if D > 0:
            x_diff = signature_scores1['NPC'][i] - signature_scores1['OPC'][i]
        else:
            x_diff = signature_scores1['MES'][i] - signature_scores1['AC'][i]

        # Apply log2 to the absolute value and keep the sign
        x_values.append(np.sign(x_diff) * np.log2(abs(x_diff) + 1))

    # Convert lists to numpy arrays
    D_values = np.array(D_values)
    x_values = np.array(x_values)

def classify_and_plot(signature_scores1, x_values, D_values):
    # Step 1: Identify plastic cells within the box (-1, 1) for both x_values and D_values
    plastic_mask = (x_values >= -1) & (x_values <= 1) & (D_values >= -1) & (D_values <= 1)

    # Step 2: Create an empty list to store classifications
    classifications = []

    # Step 3: Iterate through the cells and classify
    for i in range(len(signature_scores1)):
        if plastic_mask[i]:
            classifications.append('Plastic')
        else:
            # Identify which of the cell types (MES, NPC, AC, OPC) has the highest score
            cell_type = signature_scores1[['MES', 'NPC', 'AC', 'OPC']].iloc[i].idxmax()
            classifications.append(cell_type)

    # Step 4: Convert classifications to a pandas Series
    classifications = pd.Series(classifications)

    # Step 5: Calculate percentages
    percentage_counts = classifications.value_counts(normalize=True) * 100

    # Step 6: Plot the percentages as a bar plot
    plt.figure(figsize=(8, 6))
    percentage_counts.plot(kind='bar', color='skyblue', edgecolor='black')
    plt.ylabel('Percentage (%)')
    plt.xlabel('Cell Classification')
    plt.title('Percentage of Plastic, MES, NPC, AC, and OPC Cells')
    plt.ylim(0, 100)
    plt.grid(axis='y', linestyle='--', alpha=0.7)
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()

# Example Usage
# Assuming signature_scores1 is a DataFrame containing columns ['MES', 'NPC', 'AC', 'OPC']
# And x_values, D_values are numpy arrays calculated from the previous step

signature_scores1 = pd.DataFrame({
    'MES': np.random.rand(100),
    'NPC': np.random.rand(100),
    'AC': np.random.rand(100),
    'OPC': np.random.rand(100),
})

# Generate random example values for x_values and D_values
np.random.seed(0)
x_values = np.random.uniform(-2, 2, 100)
D_values = np.random.uniform(-2, 2, 100)

# Call the function to classify and plot
classify_and_plot(signature_scores1, x_values, D_values)



In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Function to calculate cluster distances to the origin for each sample layer and calculate cells in and out of a specific box
def find_cluster_distances_and_box_analysis(signature_scores1, adata, color_by, sample_layer='sample', distance=0.25):
    D_values = []
    x_values = []

    # Iterate through the cells to calculate D_values and x_values
    for i in range(len(signature_scores1['MES'])):
        SCopc_npc = max(signature_scores1['OPC'][i], signature_scores1['NPC'][i])
        SCac_mes = max(signature_scores1['AC'][i], signature_scores1['MES'][i])
        D = SCopc_npc - SCac_mes
        D_values.append(D)

        if D > 0:
            x_diff = signature_scores1['NPC'][i] - signature_scores1['OPC'][i]
        else:
            x_diff = signature_scores1['MES'][i] - signature_scores1['AC'][i]

        # Apply log2 to the absolute value and keep the sign
        x_values.append(np.sign(x_diff) * np.log2(abs(x_diff) + 1))

    # Convert lists to numpy arrays
    D_values = np.array(D_values)
    x_values = np.array(x_values)

def classify_and_plot(signature_scores1, x_values, D_values, sample):
    # Step 1: Identify plastic cells within the box (-1, 1) for both x_values and D_values
    plastic_mask = (x_values >= -1) & (x_values <= 1) & (D_values >= -1) & (D_values <= 1)

    # Step 2: Create an empty list to store classifications
    classifications = []

    # Step 3: Iterate through the cells and classify
    for i in range(len(signature_scores1)):
        if plastic_mask[i]:
            classifications.append('Plastic')
        else:
            # Identify which of the cell types (MES, NPC, AC, OPC) has the highest score
            cell_type = signature_scores1[['MES', 'NPC', 'AC', 'OPC']].iloc[i].idxmax()
            classifications.append(cell_type)

    # Step 4: Convert classifications to a pandas Series
    classifications = pd.Series(classifications, index=sample)

    # Step 5: Calculate percentages for each day
    unique_days = sample.unique()
    for day in unique_days:
        day_classifications = classifications[sample == day]
        percentage_counts = day_classifications.value_counts(normalize=True) * 100

        # Step 6: Plot the percentages as a bar plot for each day
        plt.figure(figsize=(8, 6))
        percentage_counts.plot(kind='bar', color='skyblue', edgecolor='black')
        plt.ylabel('Percentage (%)')
        plt.xlabel('Cell Classification')
        plt.title(f'Percentage of Plastic, MES, NPC, AC, and OPC Cells for {day}')
        plt.ylim(0, 100)
        plt.grid(axis='y', linestyle='--', alpha=0.7)
        plt.xticks(rotation=45)
        plt.tight_layout()
        plt.show()

# Example Usage
# Assuming signature_scores1 is a DataFrame containing columns ['MES', 'NPC', 'AC', 'OPC']
# And x_values, D_values are numpy arrays calculated from the previous step
# And sample is a pandas Series containing the day information

signature_scores1 = pd.DataFrame({
    'MES': np.random.rand(100),
    'NPC': np.random.rand(100),
    'AC': np.random.rand(100),
    'OPC': np.random.rand(100),
})

# Generate random example values for x_values, D_values, and sample
np.random.seed(0)
x_values = np.random.uniform(-2, 2, 100)
D_values = np.random.uniform(-2, 2, 100)
sample = pd.Series(np.random.choice(['ay 0', 'Day 1', 'Day 2', 'Day 3'], size=100))

# Call the function to classify and plot for each day
classify_and_plot(signature_scores1, x_values, D_values, sample)


In [None]:
# Highlight center cells on UMAP plot
adata.obs['center_cells'] = 'Other'
adata.obs.loc[center_cells.index, 'center_cells'] = 'Center'
sc.pl.umap(adata, color='center_cells', groups=['Center'], palette=['red', 'lightgrey'])


In [None]:
#import scanpy as sc
#adata = sc.read_h5ad('time_course_cluster_9_out.h5ad')
#var_names_list = adata.var_names.tolist()
sc.pl.umap(adata, color = "clusters_renamed")

In [None]:
adata


In [None]:
generate_2d_representation(updated_signature_scores, adata, color_by='phase')

In [None]:
generate_2d_representation(updated_signature_scores, adata, color_by='Pop2_score')


In [None]:
generate_2d_representation(updated_signature_scores, adata, color_by='Pop3_score')


In [None]:
generate_2d_representation(updated_signature_scores, adata, color_by='Pop4_score')


In [None]:
generate_2d_representation(updated_signature_scores, adata, color_by='sample')


In [None]:
import numpy as np
import pandas as pd

def calculate_centroids(signature_scores1, adata, cluster_key):
    D_values = []
    x_values = []
    
    for i in range(len(signature_scores1['MES'])):
        SCopc_npc = max(signature_scores1['OPC'][i], signature_scores1['NPC'][i])
        SCac_mes = max(signature_scores1['AC'][i], signature_scores1['MES'][i])
        
        D = SCopc_npc - SCac_mes
        D_values.append(D)
        
        if D > 0:
            x_diff = signature_scores1['OPC'][i] - signature_scores1['NPC'][i]
        else:
            x_diff = signature_scores1['AC'][i] - signature_scores1['MES'][i]
        
        # Apply log2 to the absolute value and keep the sign
        x_values.append(np.sign(x_diff) * np.log2(abs(x_diff) + 1))
    

    # Convert lists to numpy arrays and flatten them
    D_values = np.array(D_values).flatten()
    x_values = np.array(x_values).flatten()
    
    # Ensure the lengths match
    if len(x_values) != len(adata.obs[cluster_key].values) or len(D_values) != len(adata.obs[cluster_key].values):
        raise ValueError("The lengths of x_values, D_values, and cluster labels must match.")
    # Create a DataFrame with the cluster labels and the calculated X, Y values
    df = pd.DataFrame({
        'cluster': adata.obs[cluster_key].values,
        'X': x_values,
        'Y': D_values
    })
    
    # Calculate the centroid for each cluster
    centroids = df.groupby('cluster').mean()
    
    return centroids

# Example usage
centroids = calculate_centroids(updated_signature_scores, adata, cluster_key='clusters_renamed')
print("Centroids for each cluster:")
print(centroids)


In [None]:
import matplotlib.pyplot as plt

def plot_centroids(centroids):
    fig, ax = plt.subplots()
    
    # Plot the centroids
    ax.scatter(centroids['X'], centroids['Y'], color='blue', s=100)
    
    # Annotate the centroids with cluster labels
    for cluster, row in centroids.iterrows():
        ax.text(row['X'], row['Y'], cluster, fontsize=12, ha='center', va='center', color='red')
    
    ax.set_xlabel('log2(|SCopc – SCnpc|+1) or log2(|SCac–SCmes|)')
    ax.set_ylabel('D value')
    
    # Set the plot axes to be y -1,1 and x -1,1
    ax.set_xlim([-0.6, 0.6])
    ax.set_ylim([-0.3, 0.3])

    # Add dashed black lines at x=0 and y=0
    ax.axhline(0, color='black', linestyle='--')
    ax.axvline(0, color='black', linestyle='--')

    plt.show()

# Example usage
plot_centroids(centroids)


In [None]:
generate_2d_representation(updated_signature_scores, adata, color_by='MES1')


In [None]:
generate_2d_representation(updated_signature_scores, adata, color_by='MES1')


In [None]:
generate_2d_representation(updated_signature_scores, adata, color_by='AC')


In [None]:
generate_2d_representation(updated_signature_scores, adata, color_by='OPC')


In [None]:
generate_2d_representation(updated_signature_scores, adata, color_by='NPC1_sig')


In [None]:
generate_2d_representation(updated_signature_scores, adata, color_by='NPC2_sig')

In [None]:
generate_2d_representation(updated_signature_scores, adata, color_by='G1/S')


In [None]:
generate_2d_representation(updated_signature_scores, adata, color_by='G2/M')


In [None]:
generate_2d_representation(updated_signature_scores, adata, color_by='AC')
