In [None]:
import pandas as pd
import numpy as np

# Read Data and Connectome

In [None]:
input_file = pd.read_csv("./data/Tau_with_Demographics_Normalized.csv").drop(columns=['Unnamed: 0'])
connectome = pd.read_csv("./data/connectome_mean80_fibercount_normalized.csv")

In [None]:
regions = ['ctx-rh-bankssts','ctx-rh-caudalanteriorcingulate','ctx-rh-caudalmiddlefrontal','ctx-rh-cuneus','ctx-rh-entorhinal','ctx-rh-fusiform',\
           'ctx-rh-inferiorparietal','ctx-rh-inferiortemporal','ctx-rh-isthmuscingulate','ctx-rh-lateraloccipital','ctx-rh-lateralorbitofrontal',\
           'ctx-rh-lingual','ctx-rh-medialorbitofrontal','ctx-rh-middletemporal','ctx-rh-parahippocampal','ctx-rh-paracentral','ctx-rh-parsopercularis',\
           'ctx-rh-parsorbitalis','ctx-rh-parstriangularis','ctx-rh-pericalcarine','ctx-rh-postcentral','ctx-rh-posteriorcingulate','ctx-rh-precentral',\
           'ctx-rh-precuneus','ctx-rh-rostralanteriorcingulate','ctx-rh-rostralmiddlefrontal','ctx-rh-superiorfrontal','ctx-rh-superiorparietal',\
           'ctx-rh-superiortemporal','ctx-rh-supramarginal','ctx-rh-frontalpole','ctx-rh-temporalpole','ctx-rh-transversetemporal','ctx-rh-insula',\
           'ctx-lh-bankssts','ctx-lh-caudalanteriorcingulate','ctx-lh-caudalmiddlefrontal','ctx-lh-cuneus','ctx-lh-entorhinal','ctx-lh-fusiform',\
           'ctx-lh-inferiorparietal','ctx-lh-inferiortemporal','ctx-lh-isthmuscingulate','ctx-lh-lateraloccipital','ctx-lh-lateralorbitofrontal',\
           'ctx-lh-lingual','ctx-lh-medialorbitofrontal','ctx-lh-middletemporal','ctx-lh-parahippocampal','ctx-lh-paracentral','ctx-lh-parsopercularis',\
           'ctx-lh-parsorbitalis','ctx-lh-parstriangularis','ctx-lh-pericalcarine','ctx-lh-postcentral','ctx-lh-posteriorcingulate','ctx-lh-precentral',\
           'ctx-lh-precuneus','ctx-lh-rostralanteriorcingulate','ctx-lh-rostralmiddlefrontal','ctx-lh-superiorfrontal','ctx-lh-superiorparietal',\
           'ctx-lh-superiortemporal','ctx-lh-supramarginal','ctx-lh-frontalpole','ctx-lh-temporalpole','ctx-lh-transversetemporal','ctx-lh-insula',\
           'Left-Cerebellum-Cortex','Left-Thalamus-Proper','Left-Caudate','Left-Putamen','Left-Pallidum','Left-Hippocampus','Left-Amygdala','Left-Accumbens-area',\
           'Left-VentralDC','Right-Cerebellum-Cortex','Right-Thalamus-Proper','Right-Caudate','Right-Putamen','Right-Pallidum','Right-Hippocampus',\
           'Right-Amygdala','Right-Accumbens-area','Right-VentralDC']

# Synthetic Data Generation

Create bilaterial entorhinal coretex seeding

In [None]:
# Remove RID, not needed here
input_file = input_file.drop(columns='RID')

# Extract index of desired start location
regions = [x for x in connectome.columns if 'entorhinal' in x.split('-')]
regions_idx = [input_file.columns.get_loc(r) for r in regions]

# Adjust for shape of numpy array
first_region_idx = list(input_file.columns).index('ctx-rh-bankssts')
regions_idx = [r-first_region_idx for r in regions_idx]
regions_idx

In [None]:
# Creat all zeros
fake_seed = np.zeros((input_file.shape[0], connectome.shape[1]))

# Replace entorhinal with 1
fake_seed[:,regions_idx] = 1

# Insert into real data
input_file.iloc[:,first_region_idx:first_region_idx+connectome.shape[0]] = fake_seed

# Save
input_file.to_csv("./data/fake_seed_data_forward_model.csv")

# Using Predicted Seeds

### Read seeds

In [None]:
seeding = pd.read_csv("./data/seeding_patterns.csv")

### Remake Connectome

Find regions used in seeds and drop columns from connectome. Renormalize the connectome to have a maximum value of 1.

In [None]:
seeding_regions = list(seeding.drop(columns=['Unnamed: 0', 'RID']).columns)

seeding_connectome_idx = sorted([connectome.drop(columns='Unnamed: 0').columns.get_loc(sr) for sr in seeding_regions])
seeding_connectome = connectome.drop(columns='Unnamed: 0').iloc[seeding_connectome_idx,seeding_connectome_idx]

# Save to CSV
seeding_connectome.to_csv('./data/seeding_connectome_mean80_fibercount_normalized.csv')
seeding_connectome

### Drop columns from input data not in seeding

This could be done better, but as of right now this is able to reformat the data in a manner that the current pipeline can be used easily. This dataframe modification should be made more elegant to improve usability.

In [None]:
extra_features = [c for c in list(input_file.columns.values) if c not in regions]
input_features = seeding_regions + extra_features
seeding_data = input_file.loc[:,input_features]
seeding_data = seeding_data.drop(columns='RID')
seeding_data.to_csv('./data/seeding_Tau_with_Demographics_Normalized.csv')

### Replace Target Tau with Seeds

In [None]:
seeding_input_idx = sorted([input_file.columns.get_loc(sr) for sr in seeding_regions])

# Drop tau
seeding_input = input_file.drop(columns=regions)

# Insert seeds
seeding_input = seeding_input.merge(seeding.drop(columns=['Unnamed: 0']), on='RID')

# Save
seeding_input.to_csv("./data/seeding_data_forward_model.csv")

seeding_input