# GHR project 

## FA analysis of the thalamo-OFC tracts

> Import libraries

In [79]:
import pandas as pd
import re
import os
pd.set_option('max_rows',5000)
from scipy import stats 
import numpy as np

> Define where the FA data is and read it

In [3]:
dataLoc = '/Volumes/CCNC_3T_2/kcho/ccnc/GHR_project/FA_from_probtrackx'

In [15]:
with open(os.path.join(dataLoc, 'FA.txt'),'r') as f:
    dataList = [x.strip() for x in f.readlines()]

> Cleanse the data

In [129]:
dfDict = {}
for i in dataList:
    '''
    Loop through each lines and split them by space.
    Separate each information and add them to the dictionary.
    Separating side in the key is very important.
    Floatfi(?) the FA column
    '''
    data = i.split(' ')
    subject = data[0]
    group = data[0][:3]
    side = data[1]
    fa = float(data[2])
    dfDict[subject+'_'+side] = [subject,side, group, fa]

# Convert the dictionary into dataframe using index orientation
df = pd.DataFrame.from_dict(dfDict, orient='index')
df.columns = ['subject','side','group','fa']

## Statistics of all subjects

> Left

In [137]:
leftDf = df.groupby('side').get_group('lh')
print stats.ttest_ind(leftDf.groupby('group').get_group('GHR').fa,
               leftDf.groupby('group').get_group('NOR').fa)
leftDf.groupby('group').mean()

Ttest_indResult(statistic=-2.7797762763191138, pvalue=0.0061879068846912933)


Unnamed: 0_level_0,fa
group,Unnamed: 1_level_1
GHR,0.407223
NOR,0.418484


In [138]:
rightDf = df.groupby('side').get_group('rh')
print stats.ttest_ind(rightDf.groupby('group').get_group('GHR').fa.astype(float),
               rightDf.groupby('group').get_group('NOR').fa.astype(float))
rightDf.groupby('group').mean()

Ttest_indResult(statistic=-1.5118658790249624, pvalue=0.13282199087860591)


Unnamed: 0_level_0,fa
group,Unnamed: 1_level_1
GHR,0.401045
NOR,0.407062


## Load matched subject list

> Load matched subject list

In [144]:
matchedDf = pd.read_csv("/Volumes/CCNC_3T_2/kcho/ccnc/GHR_project/python_script/propensity_matched_20151201_missing_IQ_as_0_Education_as_0.csv")
matchedDf.group = matchedDf.group.map({0:'NOR',1:'GHR'})
matchedDf = matchedDf[['Unnamed: 0','folderName','sex','age','IQ','Education','handedness','group']]
matchedDf.columns = ['index','folderName','sex','age','IQ','Education','handedness','group']
matchedDf = matchedDf.set_index('index')
matchedDf.groupby('group').count()[['folderName']]

Unnamed: 0_level_0,folderName
group,Unnamed: 1_level_1
GHR,35
NOR,34


> Select those included in the matched set

In [145]:
df_matched = df.set_index('subject').ix[matchedDf.folderName.unique()].reset_index()

In [148]:
df_matched.subject.unique()

array(['GHR01_LHK', 'GHR02_LJH', 'GHR03_PSJ', 'GHR04_PJS', 'GHR05_LJK',
       'GHR06_LSH', 'GHR07_KPJ', 'GHR08_LEK', 'GHR09_KBM', 'GHR10_KYR',
       'GHR11_KYJ', 'GHR12_SYS', 'GHR13_LYJ', 'GHR14_KJH', 'GHR15_EHK',
       'GHR16_PES', 'GHR17_LJW', 'GHR18_CJY', 'GHR19_KJI', 'GHR20_CCH',
       'GHR21_HSM', 'GHR22_CHJ', 'GHR23_KMJ', 'GHR24_SJH', 'GHR25_JHC',
       'GHR26_KSY', 'GHR27_KSH', 'GHR29_CYR', 'GHR30_SSH', 'GHR31_JJA',
       'GHR32_JEA', 'GHR33_KSJ', 'GHR34_HYR', 'GHR35_JYY', 'GHR36_BHJ',
       'NOR10_KMW', 'NOR13_SMY', 'NOR15_KKY', 'NOR17_YKH', 'NOR18_JJM',
       'NOR20_JJR', 'NOR24_YJA', 'NOR26_YJH', 'NOR27_YKS', 'NOR28_SHM',
       'NOR31_HKO', 'NOR32_LJW', 'NOR34_CES', 'NOR35_SNH', 'NOR36_HYS',
       'NOR48_KTH', 'NOR50_KEJ', 'NOR53_PSH', 'NOR57_CJW', 'NOR59_KDW',
       'NOR60_KSH', 'NOR61_KSH', 'NOR64_SSJ', 'NOR65_LES', 'NOR66_LDY',
       'NOR67_LYE', 'NOR68_SDS', 'NOR76_KKT', 'NOR78_LSM', 'NOR82_PJI',
       'NOR87_YSH', 'NOR88_BKS', 'NOR91_SJY', 'NOR93_BHU'], dtyp

## Group Comparison

> Left comparison

In [150]:
leftDf = df_matched.groupby('side').get_group('lh')
print stats.ttest_ind(leftDf.groupby('group').get_group('GHR').fa.astype(float),
                      leftDf.groupby('group').get_group('NOR').fa.astype(float))
leftDf.groupby('group').mean()

Ttest_indResult(statistic=-2.6634245640219381, pvalue=0.0096784170333936576)


Unnamed: 0_level_0,fa
group,Unnamed: 1_level_1
GHR,0.407277
NOR,0.421191


> Right comparison

In [152]:
rightDf = df_matched.groupby('side').get_group('rh')
print stats.ttest_ind(rightDf.groupby('group').get_group('GHR').fa.astype(float),
                      rightDf.groupby('group').get_group('NOR').fa.astype(float))
rightDf.groupby('group').mean()

Ttest_indResult(statistic=-1.3202033418514245, pvalue=0.19125972445134323)


Unnamed: 0_level_0,fa
group,Unnamed: 1_level_1
GHR,0.397851
NOR,0.405097


### Left and right mean comparison

> Make mean dataframe : (left + right) / 2

In [161]:
df_mean = df_matched.groupby(['subject','group']).mean().reset_index()

> Comparison

In [160]:
stats.ttest_ind(df_mean.groupby('group').get_group('GHR').fa,
                df_mean.groupby('group').get_group('NOR').fa)
df_mean.groupby('group').mean()

Unnamed: 0_level_0,fa
group,Unnamed: 1_level_1
GHR,0.402564
NOR,0.413144
