In [None]:
import pandas as pd
import numpy as np

mvsf=pd.read_csv("mvsf_lcms.csv", index_col=0).iloc[1:,:]

## Our new M vs F LCMS dataset has 0 differential lipids across the whole brain between sexes

In [None]:
logFC = np.log(1 / mvsf['FC(Male / Female)']) # we doing female vs male

logFC

In [None]:
mvsf['ttest.pValueAdj'].min() # AH, LOL. weird... mixup?

In [None]:
unsignificant = logFC[mvsf['ttest.pValue'] < 0.05].sort_values()
unsignificant

## Look at the male vs female AD dataset

In [None]:
mvsfad = pd.read_csv("maleVSfemaleVSAD_juljiana.csv", index_col=0)
lipids = mvsfad.iloc[3:, 1:].copy()
lipids.index = mvsfad['LipidBlast_name'][3:]
lipids.columns = mvsfad.loc['Sample name',:][1:]
lipids

In [None]:
meta = mvsfad.iloc[:3,1:].T
meta

In [None]:
males = lipids.loc[:, meta.loc[(meta['Sex'] == "M") & (meta['Genotype'] == "WT"), 'Sample name']].astype(float)
females = lipids.loc[:, meta.loc[(meta['Sex'] == "F") & (meta['Genotype'] == "WT"), 'Sample name']].astype(float)

males

In [None]:
import pandas as pd
import numpy as np
from scipy import stats
from statsmodels.stats.multitest import multipletests
import plotly.graph_objects as go
import plotly.express as px
from adjustText import adjust_text
import matplotlib.pyplot as plt

def analyze_lipids(males_df, females_df):
    """
    Perform statistical analysis on lipid data between males and females.
    
    Parameters:
    males_df (pd.DataFrame): DataFrame with lipids as rows and samples as columns for males
    females_df (pd.DataFrame): DataFrame with lipids as rows and samples as columns for females
    
    Returns:
    pd.DataFrame: Results containing statistics for each lipid
    """
    # Initialize results dictionary
    results = {
        'lipid': [],
        'pvalue': [],
        'fold_change': [],
        'log2_fold_change': [],
        'mean_males': [],
        'mean_females': []
    }
    
    # Perform t-test for each lipid
    for lipid in males_df.index:
        males_data = males_df.loc[lipid]
        females_data = females_df.loc[lipid]
        
        # Calculate t-test
        t_stat, p_val = stats.ttest_ind(males_data, females_data)
        
        # Calculate means and fold changes
        mean_males = np.mean(males_data)
        mean_females = np.mean(females_data)
        fold_change = mean_females / mean_males
        log2_fold_change = np.log2(fold_change)
        
        # Store results
        results['lipid'].append(lipid)
        results['pvalue'].append(p_val)
        results['fold_change'].append(fold_change)
        results['log2_fold_change'].append(log2_fold_change)
        results['mean_males'].append(mean_males)
        results['mean_females'].append(mean_females)
    
    # Convert to DataFrame
    results_df = pd.DataFrame(results)
    
    # Apply Benjamini-Hochberg correction
    results_df['padj'] = multipletests(results_df['pvalue'], method='fdr_bh')[1]
    
    return results_df

results = analyze_lipids(males, females)

In [None]:
results['padj'].min()

In [None]:
padj_threshold=0.1
fc_threshold=0.1
results_df = results

results_df['neg_log10_padj'] = -np.log10(results_df['padj'])
results_df['significant'] = (results_df['padj'] < padj_threshold) & \
                           (abs(results_df['log2_fold_change']) > fc_threshold)

significant_df = results_df[results_df['significant']]
significant_df

# ok, good confirmation - also this independent dataset confirms that fundamentally there are NO DIFFERENTIAL LIPIDS "OVERALL" BETWEEN M AND F
# ONE GOOD EXTRA REASON TO LOOK LOCALLY, SPATIALLY!

## Look at AD changes to incorporate in our sex-specific knowledge base

In [None]:
males = lipids.loc[:, meta.loc[(meta['Sex'] == "M") & (meta['Genotype'] == "WT"), 'Sample name']].astype(float)
females = lipids.loc[:, meta.loc[(meta['Sex'] == "F") & (meta['Genotype'] == "WT"), 'Sample name']].astype(float)
malesAD = lipids.loc[:, meta.loc[(meta['Sex'] == "M") & (meta['Genotype'] != "WT"), 'Sample name']].astype(float)
femalesAD = lipids.loc[:, meta.loc[(meta['Sex'] == "F") & (meta['Genotype'] != "WT"), 'Sample name']].astype(float)
malesAD

In [None]:
results = analyze_lipids(males, malesAD)
padj_threshold=0.1
fc_threshold=0.1
results_df = results

results_df['neg_log10_padj'] = -np.log10(results_df['padj'])
results_df['significant'] = (results_df['padj'] < padj_threshold) & \
                           (abs(results_df['log2_fold_change']) > fc_threshold)

significant_df = results_df[results_df['significant']]
significant_df # Cer 42:2, some PCs we don't have, TGs that we don't have

In [None]:
results = analyze_lipids(females, femalesAD) #### way more changes in females than in males. this is very interesting!
padj_threshold=0.1
fc_threshold=0.1
results_df = results

results_df['neg_log10_padj'] = -np.log10(results_df['padj'])
results_df['significant'] = (results_df['padj'] < padj_threshold) & \
                           (abs(results_df['log2_fold_change']) > fc_threshold)

significant_df = results_df[results_df['significant']]
significant_df

In [None]:
print(significant_df['lipid'].values) # Cer 40:2, HexCer 36:1, HexCer 36:2, HexCer 38:0, HexCer 40:1, HexCer 40:2 and many more!

In [None]:
f_spec_ad_lipids = significant_df['lipid'].values
f_spec_ad_lipids = [x.split('|')[0].replace('3O', 'O2').replace('2O', 'O2').rstrip('/0:0') for x in f_spec_ad_lipids]
f_spec_ad_lipids

In [None]:
pd.Series(f_spec_ad_lipids).to_csv("f_spec_ad_lipids.csv")

In [None]:
atlas = pd.read_parquet("atlas.parquet")

In [None]:
np.intersect1d(atlas.columns[:173].values, f_spec_ad_lipids)

# these lipids that we track are in the call set of AD in FEMALES!

In [None]:
significant_df.index = f_spec_ad_lipids
significant_df.loc[np.intersect1d(atlas.columns[:173].values, f_spec_ad_lipids),:]

# lysophospho go down, all other folks go up...
# ofc i like seeing Cer 40:2;O2... quite lucky...

In [None]:
averaged_df = significant_df.loc[np.intersect1d(atlas.columns[:173].values, f_spec_ad_lipids),['log2_fold_change']].groupby(level=0).mean()
averaged_df

In [None]:
averaged_df.to_csv("AD_femalespec_alteredlips.csv")

In [None]:
f_spec_ad_lipids = malesAD.index.values
f_spec_ad_lipids = [x.split('|')[0].replace('3O', 'O2').replace('2O', 'O2').rstrip('/0:0') for x in f_spec_ad_lipids]
pd.Series(f_spec_ad_lipids).to_csv("AD_studyBG.csv")