In [1]:
# Point to the location of the project
import sys
sys.path.append('..')

# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
# Import DataManager and auxiliary functions
from src.DataManager import DataManager
from src.DataManager import auxillary_functions
from src.DataManager.auxillary_functions import extract_central_peripheral_all_layers

In [None]:
# Load the data from MasterSheet.xlsx
data_path = '../test/input/MasterSheet.xlsx'
dm = DataManager(file_path=data_path)

# Display available sheet names and column titles
print("Available sheets:")
dm.show_sheetnames()
print("\nColumn titles:")
dm.show_column_titles()


In [None]:
# Check the unique genotypes in the data
print("Unique genotypes:")
print(dm.id_sheet['Genotype'].unique())
print("\nUnique ages:")
print(dm.id_sheet['Age'].unique())


In [None]:
# Extract wildtype (WT) data
wt_df = dm.get_ID_df_with_category(genotype='WT')
print(f"Wildtype (WT) samples: {len(wt_df)}")

# Extract B2 knockout data
b2_df = dm.get_ID_df_with_category(genotype='B2')
print(f"B2 knockout samples: {len(b2_df)}")


In [None]:
# Define the layers to extract data from
layers_to_use = ['SuperficialDensity', 'IntermediateDensity', 'DeepDensity']
print(f"Extracting data from layers: {layers_to_use}")

# Extract data for wildtype from all layers
wt_data = extract_central_peripheral_all_layers(dm, 'WT', sheet_names=layers_to_use)
print(f"\nWildtype data extracted: {len(wt_data)} total samples (across all layers)")
print(f"Samples per layer: {len(wt_data) // len(layers_to_use)} per layer")

# Extract data for B2 knockout from all layers
b2_data = extract_central_peripheral_all_layers(dm, 'B2', sheet_names=layers_to_use)
print(f"\nB2 Knockout data extracted: {len(b2_data)} total samples (across all layers)")
print(f"Samples per layer: {len(b2_data) // len(layers_to_use)} per layer")


In [None]:
# Display info about the extracted data before combining
print("Data extraction summary:")
print(f"WT samples: {len(wt_data)} total (across all layers)")
print(f"B2 samples: {len(b2_data)} total (across all layers)")
print(f"\nLayers being analyzed: {layers_to_use}")
print(f"\nUnique layers in WT data: {wt_data['Layer'].unique()}")
print(f"Unique layers in B2 data: {b2_data['Layer'].unique()}")


In [None]:
# Combine both datasets into a single dataframe
combined_df = pd.concat([wt_data, b2_data], ignore_index=True)

# Display the resulting dataframe
print("Combined dataframe:")
print(combined_df.head(10))
print(f"\nTotal samples: {len(combined_df)}")
print(f"\nDataframe shape: {combined_df.shape}")
print(f"Columns: {list(combined_df.columns)}")
print("\nSummary statistics:")
print(combined_df.describe())


In [None]:
# Display sample data from each layer
print("\nSample data from each layer:")
print("="*60)
for layer in combined_df['Layer'].unique():
    print(f"\n{layer}:")
    sample = combined_df[combined_df['Layer'] == layer].head(3)
    print(sample[['Age', 'Genotype', 'Layer', 'Central', 'Peripheral']].to_string(index=False))


In [None]:
# Display data grouped by genotype, layer, and age
print("Data grouped by Genotype, Layer, and Age:")
grouped = combined_df.groupby(['Genotype', 'Layer', 'Age']).agg({
    'Central': ['mean', 'std', 'count'],
    'Peripheral': ['mean', 'std', 'count']
})
print(grouped)

# Display summary by layer
print("\n" + "="*60)
print("Summary by Layer:")
layer_summary = combined_df.groupby(['Layer', 'Genotype']).agg({
    'Central': ['mean', 'std'],
    'Peripheral': ['mean', 'std']
})
print(layer_summary)


In [None]:
# Optional: Save the combined dataframe to a CSV file
output_path = '../test/output/central_peripheral_analysis.csv'
combined_df.to_csv(output_path, index=False)
print(f"Data saved to: {output_path}")


In [None]:
# Visualize the data: 2x2 layout (Central/Peripheral rows, WT/B2 columns)
# Each plot shows all 3 layers as different colored lines

fig, axes = plt.subplots(2, 2, figsize=(12, 12))

# Define colors for layers
layer_colors = {
    'Superficial': 'blue',
    'Intermediate': 'orange',
    'Deep': 'green'
}

layers = ['Superficial', 'Intermediate', 'Deep']
genotypes = ['WT', 'B2']

# Calculate global y-axis limits for shared axes
central_min = combined_df['Central'].min()
central_max = combined_df['Central'].max()
peripheral_min = combined_df['Peripheral'].min()
peripheral_max = combined_df['Peripheral'].max()

# Add some padding (10%) to the limits
central_padding = (central_max - central_min) * 0.1
peripheral_padding = (peripheral_max - peripheral_min) * 0.1

# Plot Central WT (top left)
for layer in layers:
    data = combined_df[(combined_df['Genotype'] == 'WT') & (combined_df['Layer'] == layer)]
    age_stats = data.groupby('Age')['Central'].agg(['mean', 'std']).reset_index()
    ages = age_stats['Age'].values
    means = age_stats['mean'].values
    stds = age_stats['std'].fillna(0).values
    
    axes[0, 0].plot(ages, means, '-o', label=layer, 
                   color=layer_colors[layer], linewidth=2, markersize=6)
    axes[0, 0].fill_between(ages, means - stds, means + stds, 
                           alpha=0.2, color=layer_colors[layer])

axes[0, 0].set_xlabel('Age (P days)', fontsize=12)
axes[0, 0].set_ylabel('Mean Density', fontsize=12)
axes[0, 0].set_title('Central WT Density (0.4mm from O/N)', fontsize=13, fontweight='bold')
axes[0, 0].legend(loc='upper right', frameon=False)
axes[0, 0].set_ylim(central_min - central_padding, central_max + central_padding)

# Plot Central B2 (top right)
for layer in layers:
    data = combined_df[(combined_df['Genotype'] == 'B2') & (combined_df['Layer'] == layer)]
    age_stats = data.groupby('Age')['Central'].agg(['mean', 'std']).reset_index()
    ages = age_stats['Age'].values
    means = age_stats['mean'].values
    stds = age_stats['std'].fillna(0).values
    
    axes[0, 1].plot(ages, means, '-o', label=layer, 
                   color=layer_colors[layer], linewidth=2, markersize=6)
    axes[0, 1].fill_between(ages, means - stds, means + stds, 
                           alpha=0.2, color=layer_colors[layer])

axes[0, 1].set_xlabel('Age (P days)', fontsize=12)
axes[0, 1].set_ylabel('Mean Density', fontsize=12)
axes[0, 1].set_title('Central B2 Density (0.4mm from O/N)', fontsize=13, fontweight='bold')
axes[0, 1].legend(loc='upper right', frameon=False)
axes[0, 1].set_ylim(central_min - central_padding, central_max + central_padding)

# Plot Peripheral WT (bottom left)
for layer in layers:
    data = combined_df[(combined_df['Genotype'] == 'WT') & (combined_df['Layer'] == layer)]
    age_stats = data.groupby('Age')['Peripheral'].agg(['mean', 'std']).reset_index()
    ages = age_stats['Age'].values
    means = age_stats['mean'].values
    stds = age_stats['std'].fillna(0).values
    
    axes[1, 0].plot(ages, means, '-o', label=layer, 
                   color=layer_colors[layer], linewidth=2, markersize=6)
    axes[1, 0].fill_between(ages, means - stds, means + stds, 
                           alpha=0.2, color=layer_colors[layer])

axes[1, 0].set_xlabel('Age (P days)', fontsize=12)
axes[1, 0].set_ylabel('Mean Density', fontsize=12)
axes[1, 0].set_title('Peripheral WT Density (1.6mm from O/N)', fontsize=13, fontweight='bold')
axes[1, 0].legend(loc='upper right', frameon=False)
axes[1, 0].set_ylim(peripheral_min - peripheral_padding, peripheral_max + peripheral_padding)

# Plot Peripheral B2 (bottom right)
for layer in layers:
    data = combined_df[(combined_df['Genotype'] == 'B2') & (combined_df['Layer'] == layer)]
    age_stats = data.groupby('Age')['Peripheral'].agg(['mean', 'std']).reset_index()
    ages = age_stats['Age'].values
    means = age_stats['mean'].values
    stds = age_stats['std'].fillna(0).values
    
    axes[1, 1].plot(ages, means, '-o', label=layer, 
                   color=layer_colors[layer], linewidth=2, markersize=6)
    axes[1, 1].fill_between(ages, means - stds, means + stds, 
                           alpha=0.2, color=layer_colors[layer])

axes[1, 1].set_xlabel('Age (P days)', fontsize=12)
axes[1, 1].set_ylabel('Mean Density', fontsize=12)
axes[1, 1].set_title('Peripheral B2 Density (1.6mm from O/N)', fontsize=13, fontweight='bold')
axes[1, 1].legend(loc='upper right', frameon=False)
axes[1, 1].set_ylim(peripheral_min - peripheral_padding, peripheral_max + peripheral_padding)

plt.tight_layout()
plt.savefig('../test/output/central_peripheral_by_genotype.png', dpi=300, bbox_inches='tight')
plt.show()
print("Figure saved to: ../test/output/central_peripheral_by_genotype.png")


In [None]:
# Create a new figure with overlapped WT and B2 by layer (3 columns: Superficial, Intermediate, Deep)
# and by region (2 rows: Central, Peripheral)

fig, axes = plt.subplots(2, 3, figsize=(15, 10))

# Define colors for genotypes (matching the image style)
genotype_colors = {
    'WT': '#D95F4E',  # orange/red color for WT
    'B2': '#2E8B8B'   # teal/cyan color for B2 (β2-nAChR-KO)
}

layers = ['Superficial', 'Intermediate', 'Deep']
regions = ['Central', 'Peripheral']

# Iterate through each subplot
for col_idx, layer in enumerate(layers):
    for row_idx, region in enumerate(regions):
        ax = axes[row_idx, col_idx]
        
        # Plot both genotypes on the same subplot
        for genotype in ['WT', 'B2']:
            # Filter data for this genotype and layer
            data = combined_df[(combined_df['Genotype'] == genotype) & 
                              (combined_df['Layer'] == layer)]
            
            # Calculate mean and std for this region across ages
            age_stats = data.groupby('Age')[region].agg(['mean', 'std', 'sem']).reset_index()
            
            # Get ages, means, and standard errors
            ages = age_stats['Age'].values
            means = age_stats['mean'].values
            sems = age_stats['sem'].fillna(0).values
            
            # Plot the line with markers
            ax.plot(ages, means, '-o', label=genotype, 
                   color=genotype_colors[genotype], linewidth=2.5, 
                   markersize=7, alpha=0.9)
            
            # Add shaded error bands (using SEM)
            ax.fill_between(ages, means - sems, means + sems, 
                           alpha=0.25, color=genotype_colors[genotype])
        
        # Formatting for each subplot
        ax.set_xlabel('Age (days)', fontsize=11, fontweight='bold')
        ax.set_ylabel('Density (AU)', fontsize=11, fontweight='bold')
        
        # Set title based on position
        if row_idx == 0:  # Top row - add layer name
            ax.set_title(f'{layer}\n{region}', fontsize=12, fontweight='bold')
        else:  # Bottom row
            ax.set_title(region, fontsize=12, fontweight='bold')
        
        # Add legend to the first subplot only
        if col_idx == 0 and row_idx == 0:
            # Create custom legend labels
            legend_labels = ['WT', 'β2-nAChR-KO']
            handles = ax.get_lines()
            ax.legend(handles, legend_labels, loc='upper left', 
                     frameon=False, fontsize=10)
        
        # Set x-axis limits and ticks
        ax.set_xlim(5, 14.5)
        ax.set_xticks([6, 8, 10, 12, 14])
        
        # Grid for better readability
        ax.grid(False)
        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)

plt.tight_layout()
plt.savefig('../test/output/vessel_density_by_layer_overlapped.png', dpi=300, bbox_inches='tight')
plt.show()
print("Figure saved to: ../test/output/vessel_density_by_layer_overlapped.png")
