In [None]:
import pandas as pd
import os
import re

In [None]:
#output_path = "spine_analysis_2024/spine_density_output1/"
#output_path = "spine_analysis_2024/spine_density_output2/"
output_path = "spine_analysis_2024/spine_density_output3/"

#base_file_names = [file.split(".tif")[0] for file in os.listdir("spine_analysis_2024/Spine density REPL1/")]
#base_file_names = [file.split(".tif")[0] for file in os.listdir("spine_analysis_2024/Spine density REPL2/")]
base_file_names = [file.split(".tif")[0] for file in os.listdir("spine_analysis_2024/Spine density REPL3/")]

base_file_names

In [None]:
# Initialize an empty DataFrame to store the final results
final_df = pd.DataFrame(columns=["Total Branch Length",
                                 "Endpoints",
                                 "Axon Length",
                                 "Spine Density per 10um"])

# Assume 'base_file_names' is a list of unique base names derived from your files
for base_name in base_file_names:
    # Read the corresponding files for each base_name
    branch_info_df = pd.read_csv(f"{output_path}{base_name}_branch_information.csv")
    summary_df = pd.read_csv(f"{output_path}{base_name}_summary.csv")

    # Process each file as needed to extract the required information

    # total branch length
    # Count the occurrences of each Skeleton ID
    skeleton_id_counts = branch_info_df['Skeleton ID'].value_counts()
    # The Skeleton ID that occurs the most
    most_frequent_skeleton_id = skeleton_id_counts.idxmax()

    total_branch_length = branch_info_df[branch_info_df["Skeleton ID"] == most_frequent_skeleton_id]["Branch length"].sum()

    # Extract the largest number of branches and its corresponding endpoints from summary_df
    largest_branch_row = summary_df.loc[summary_df["# Branches"].idxmax()]
    endpoints = largest_branch_row["# End-point voxels"]
    axon_len = largest_branch_row["Longest Shortest Path"]
    spine_density_per_10um = endpoints / (axon_len / 10)

    # Append the extracted information to the DataFrame
    final_df.loc[base_name] = [total_branch_length,
                               endpoints,
                               axon_len,
                               spine_density_per_10um
                               ]

# final_df now contains all the aggregated data
    
# Define a function to classify Genotype based on index value
def classify_genotype(index):
    if re.search(r'isoG29', index):
        return 'isoG29'
    elif re.search(r'G29', index):
        return 'G29'
    elif re.search(r'isoB', index):
        return 'isoB'
    elif re.search(r'K2', index):
        return 'K2'
    elif re.search(r'C9', index):
        return 'C9'
    elif re.search(r'B', index):
        return 'B'
    elif re.search(r'NO', index):
        return 'NO'
    else:
        return 'Unknown'

# Apply the function to the index of the DataFrame to create a new "Genotype" column
final_df['Genotype'] = final_df.index.map(classify_genotype)

final_df

In [None]:
#final_df.to_csv("spine_density_results_REPL1.csv")
#final_df.to_csv("spine_density_results_REPL2.csv")
final_df.to_csv("spine_density_results_REPL3.csv")

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

columns_to_plot = ["Endpoints",
                   "Axon Length",
                   "Spine Density per 10um"]

# Plotting
plt.figure(figsize=(12, len(columns_to_plot) * 6))
for i, column in enumerate(columns_to_plot, 1):
    plt.subplot(len(columns_to_plot), 1, i)
    sns.violinplot(x='Genotype', y=column, data=final_df)
    plt.title(f'Violin plot of {column} by Genotype')

plt.tight_layout()
plt.show()