In [1]:
import pandas as pd

def add_volume_ratio(m04_csv_path, oc_csv_path):
    # Load the datasets
    m04_data = pd.read_csv(m04_csv_path)
    oc_data = pd.read_csv(oc_csv_path)
    
    # Ensure there is a common identifier (image_name) in both datasets
    # Merge the datasets on this identifier
    merged_data = pd.merge(m04_data, oc_data, on="image_name", suffixes=('_m04', '_oc'))
    
    # Calculate the volume ratio
    merged_data['volume_ratio'] = merged_data['area_m04'] / merged_data['area_oc'].replace({0: pd.NA})
    
    # Update the original datasets with the calculated ratios
    for image_name, volume_ratio in merged_data[['image_name', 'volume_ratio']].values:
        m04_data.loc[m04_data['image_name'] == image_name, 'volume_ratio'] = volume_ratio
        oc_data.loc[oc_data['image_name'] == image_name, 'volume_ratio'] = volume_ratio
    
    # Save the updated datasets back to CSV
    m04_data.to_csv(m04_csv_path, index=False)
    oc_data.to_csv(oc_csv_path, index=False)

add_volume_ratio('/Users/katherineridley/Dropbox (UK Dementia Research Institute)/KRidley/PlaqueStack/Masks/overall_m04_combined.csv',
                 '/Users/katherineridley/Dropbox (UK Dementia Research Institute)/KRidley/PlaqueStack/Masks/overall_oc_combined.csv')

In [12]:
from scipy.stats import mannwhitneyu
import pandas as pd

variables = ['area', 'extent', 'surface_area', 'complexity', 'sphericity', 'major_axis_length', 'minor_axis_length', 'solidity', 'volume_ratio']

genotype1 = 'APPPS1'

genotype2 = 'APPPS1xFIRE'

def perform_mannwhitneyu_test(data, genotype1, genotype2, variables):
    results = {}
    data = data.fillna(0)
    for variable in variables:
        
        genotype1_values = data[data['Genotype'] == genotype1][variable]
        genotype2_values = data[data['Genotype'] == genotype2][variable]
        
        stat, p_value = mannwhitneyu(genotype1_values, genotype2_values)
        results[variable] = {'stat': stat, 'p_value': p_value}
    return results

data = pd.read_csv('/Users/katherineridley/Dropbox (UK Dementia Research Institute)/KRidley/PlaqueStack/Masks/overall_oc_combined.csv')
ocresults = perform_mannwhitneyu_test(data, genotype1, genotype2, variables)
print(ocresults)

ocdf= pd.DataFrame(ocresults)
ocdf.to_csv('/Users/katherineridley/Dropbox (UK Dementia Research Institute)/KRidley/PlaqueStack/Masks/oc_mannwhitneyu_results.csv')

data = pd.read_csv('/Users/katherineridley/Dropbox (UK Dementia Research Institute)/KRidley/PlaqueStack/Masks/overall_m04_combined.csv')
m04results = perform_mannwhitneyu_test(data, genotype1, genotype2, variables)
print(m04results)

m04df = pd.DataFrame(m04results)
m04df.to_csv('/Users/katherineridley/Dropbox (UK Dementia Research Institute)/KRidley/PlaqueStack/Masks/m04_mannwhitneyu_results.csv')

# Save the results to a CSV
results = pd.DataFrame([ocresults, m04results], index=['oc', 'm04']).T
results.to_csv('/Users/katherineridley/Dropbox (UK Dementia Research Institute)/KRidley/PlaqueStack/Masks/mannwhitneyu_results.csv')

{'area': {'stat': 1448.0, 'p_value': 0.0022127174518948916}, 'extent': {'stat': 812.0, 'p_value': 0.05700489165291237}, 'surface_area': {'stat': 1484.0, 'p_value': 0.0008333430219923272}, 'complexity': {'stat': 1233.0, 'p_value': 0.16771598728532633}, 'sphericity': {'stat': 895.0, 'p_value': 0.2096534395187294}, 'major_axis_length': {'stat': 1598.0, 'p_value': 2.3107099087365453e-05}, 'minor_axis_length': {'stat': 1368.0, 'p_value': 0.014900726899065438}, 'solidity': {'stat': 607.0, 'p_value': 0.00045554784236190033}, 'volume_ratio': {'stat': 567.0, 'p_value': 0.00013439020015031987}}
{'area': {'stat': 992.0, 'p_value': 0.6196572575922759}, 'extent': {'stat': 707.0, 'p_value': 0.006449911428212244}, 'surface_area': {'stat': 1110.0, 'p_value': 0.6758206452693942}, 'complexity': {'stat': 849.0, 'p_value': 0.10651233860202203}, 'sphericity': {'stat': 749.0, 'p_value': 0.016588602057381486}, 'major_axis_length': {'stat': 1037.0, 'p_value': 0.8850252178413345}, 'minor_axis_length': {'stat':

In [None]:
import plotly.express as px

def visualize_morphological_characteristics(data, variable, genotype_column='Genotype'):
    fig = px.violin(data, y=variable, x=genotype_column, color=genotype_column,
                    box=True, points="all", hover_data=data.columns)
    fig.update_layout(title=f"Comparison of {variable} Between Genotypes")
    return fig

# Assuming 'data' is your merged dataset with all necessary morphological characteristics and 'Genotype' information
# Here is an example usage for 'area' - replace 'data' with your actual dataframe variable name
fig = visualize_morphological_characteristics(data, 'area')
fig.show()