# Longitudinal Analysis

### Import packages

In [None]:
import paramiko
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from io import StringIO

### SSH Connection + file read

In [None]:
hostname = 'bridges2.psc.edu'
username = 'amanov'
password = '110388+neuro'

remote_path_baseline = '/ocean/projects/med230011p/shared/data_rep/abcd/baseline/trks/dti_fa.csv'
remote_path_2yr = '/ocean/projects/med230011p/shared/data_rep/abcd/2yrfollowup/trks/dti_fa.csv'

client = paramiko.SSHClient()
client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
client.connect(hostname, username=username, password=password)
print('SSH connection established')

sftp = client.open_sftp()

print('reading requested files')
with sftp.open(remote_path_baseline, 'r') as remote_file:
    csv_content_baseline = remote_file.read()

with sftp.open(remote_path_2yr, 'r') as remote_file:
    csv_content_2yr = remote_file.read()
print('files read')

sftp.close()
client.close()
print('SSH connection closed')


### Dataframe modification

In [None]:
data_baseline = pd.read_csv(StringIO(csv_content_baseline.decode('utf-8')))
data_2yr = pd.read_csv(StringIO(csv_content_2yr.decode('utf-8')))

data_baseline.rename(columns={data_baseline.columns[0]: 'ID'}, inplace=True)
data_2yr.rename(columns={data_2yr.columns[0]: 'ID'}, inplace=True)

### The next 2 code cells are **mutually exclusive**. Only choose one code cell to run before creating plot.
_________

OPTION 1: <span style="color:red;">Run code cell below for **FULL** dataset</span>

In [None]:
tract_names = data_baseline.columns[1:]

data_baseline['Time'] = 'Baseline'
data_2yr['Time'] = '2 yr'

data_baseline_long = pd.melt(data_baseline, id_vars=['ID', 'Time'], value_vars=tract_names, var_name='Tract', value_name='DTI_FA')
data_2yr_long = pd.melt(data_2yr, id_vars=['ID', 'Time'], value_vars=tract_names, var_name='Tract', value_name='DTI_FA')

combined_data = pd.concat([data_baseline_long, data_2yr_long], ignore_index=True)

OPTION 2: <span style="color:red;">Run code cell below for **n=X sampled** dataset</span>

In [None]:
tract_names = data_baseline.columns[1:]

sampled_baseline = data_baseline.sample(n=5000, random_state=42)
sampled_2yr = data_2yr[data_2yr['ID'].isin(sampled_baseline['ID'])]

sampled_baseline['Time'] = 'Baseline'
sampled_2yr['Time'] = '2 yr'

sampled_baseline_long = pd.melt(sampled_baseline, id_vars=['ID', 'Time'], value_vars=tract_names, var_name='Tract', value_name='DTI_FA')
sampled_2yr_long = pd.melt(sampled_2yr, id_vars=['ID', 'Time'], value_vars=tract_names, var_name='Tract', value_name='DTI_FA')

combined_data = pd.concat([sampled_baseline_long, sampled_2yr_long], ignore_index=True)

__________

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

mean_values = combined_data.groupby('Time')['DTI_FA'].mean().reset_index()

plt.figure(figsize=(10, 6))
plt.scatter(combined_data['Time'], combined_data['DTI_FA'], alpha=0.3, color='gray', label='Indiv. DTI FA Vals')

plt.plot(mean_values['Time'], mean_values['DTI_FA'], 'bo-', label='Mean DTI FA')

plt.xlabel('Time')
plt.ylabel('Fractional Anisotropy (FA)')
plt.title('Change in FA vs. Time')
plt.legend()
plt.grid(True)
plt.show()
