### This is a dataset of cross-sectional MRI data in young, middle aged, nondemented and demented older adults, found here https://www.kaggle.com/jboysen/mri-and-alzheimers

In [1]:
import pandas as pd
import numpy as np

In [2]:
df_cross = pd.read_csv('oasis_cross-sectional.csv', sep=',')
df_cross.head()

Unnamed: 0,ID,M/F,Hand,Age,Educ,SES,MMSE,CDR,eTIV,nWBV,ASF,Delay
0,OAS1_0001_MR1,F,R,74,2.0,3.0,29.0,0.0,1344,0.743,1.306,
1,OAS1_0002_MR1,F,R,55,4.0,1.0,29.0,0.0,1147,0.81,1.531,
2,OAS1_0003_MR1,F,R,73,4.0,3.0,27.0,0.5,1454,0.708,1.207,
3,OAS1_0004_MR1,M,R,28,,,,,1588,0.803,1.105,
4,OAS1_0005_MR1,M,R,18,,,,,1737,0.848,1.01,


In [3]:
df_long = pd.read_csv('oasis_longitudinal.csv', sep=',')
df_long.head()

Unnamed: 0,Subject ID,MRI ID,Group,Visit,MR Delay,M/F,Hand,Age,EDUC,SES,MMSE,CDR,eTIV,nWBV,ASF
0,OAS2_0001,OAS2_0001_MR1,Nondemented,1,0,M,R,87,14,2.0,27.0,0.0,1987,0.696,0.883
1,OAS2_0001,OAS2_0001_MR2,Nondemented,2,457,M,R,88,14,2.0,30.0,0.0,2004,0.681,0.876
2,OAS2_0002,OAS2_0002_MR1,Demented,1,0,M,R,75,12,,23.0,0.5,1678,0.736,1.046
3,OAS2_0002,OAS2_0002_MR2,Demented,2,560,M,R,76,12,,28.0,0.5,1738,0.713,1.01
4,OAS2_0002,OAS2_0002_MR3,Demented,3,1895,M,R,80,12,,22.0,0.5,1698,0.701,1.034


In [4]:
df_long.shape

(373, 15)

In [5]:
df_long.columns

Index(['Subject ID', 'MRI ID', 'Group', 'Visit', 'MR Delay', 'M/F', 'Hand',
       'Age', 'EDUC', 'SES', 'MMSE', 'CDR', 'eTIV', 'nWBV', 'ASF'],
      dtype='object')

#### Rename the column 'M/F' in 'Sex'

In [6]:
df_long.rename(columns={'M/F': 'Sex'}, inplace=True)
df_long.head()

Unnamed: 0,Subject ID,MRI ID,Group,Visit,MR Delay,Sex,Hand,Age,EDUC,SES,MMSE,CDR,eTIV,nWBV,ASF
0,OAS2_0001,OAS2_0001_MR1,Nondemented,1,0,M,R,87,14,2.0,27.0,0.0,1987,0.696,0.883
1,OAS2_0001,OAS2_0001_MR2,Nondemented,2,457,M,R,88,14,2.0,30.0,0.0,2004,0.681,0.876
2,OAS2_0002,OAS2_0002_MR1,Demented,1,0,M,R,75,12,,23.0,0.5,1678,0.736,1.046
3,OAS2_0002,OAS2_0002_MR2,Demented,2,560,M,R,76,12,,28.0,0.5,1738,0.713,1.01
4,OAS2_0002,OAS2_0002_MR3,Demented,3,1895,M,R,80,12,,22.0,0.5,1698,0.701,1.034


### Let's see some descriptive stats of this dataset:

In [7]:
df_long.describe()

Unnamed: 0,Visit,MR Delay,Age,EDUC,SES,MMSE,CDR,eTIV,nWBV,ASF
count,373.0,373.0,373.0,373.0,354.0,371.0,373.0,373.0,373.0,373.0
mean,1.882038,595.104558,77.013405,14.597855,2.460452,27.342318,0.290885,1488.128686,0.729568,1.195461
std,0.922843,635.485118,7.640957,2.876339,1.134005,3.683244,0.374557,176.139286,0.037135,0.138092
min,1.0,0.0,60.0,6.0,1.0,4.0,0.0,1106.0,0.644,0.876
25%,1.0,0.0,71.0,12.0,2.0,27.0,0.0,1357.0,0.7,1.099
50%,2.0,552.0,77.0,15.0,2.0,29.0,0.0,1470.0,0.729,1.194
75%,2.0,873.0,82.0,16.0,3.0,30.0,0.5,1597.0,0.756,1.293
max,5.0,2639.0,98.0,23.0,5.0,30.0,2.0,2004.0,0.837,1.587


#### These are values of all the patients tested, with and without dementia. They tell us something about the experiemntal design and the sample, but not about the differences between the groups of patients with different conditions. So let's analyze the groups separately:

In [9]:
df_long.Group.describe()

count             373
unique              3
top       Nondemented
freq              190
Name: Group, dtype: object

#### There are three group categories of patients, the most common being Nondemented. Let's find the other categories:

In [10]:
df_long.Group.unique()

array(['Nondemented', 'Demented', 'Converted'], dtype=object)

### The 'Demented' group:

In [15]:
demented = df_long[df_long.Group=='Demented']
demented.head()

Unnamed: 0,Subject ID,MRI ID,Group,Visit,MR Delay,Sex,Hand,Age,EDUC,SES,MMSE,CDR,eTIV,nWBV,ASF
2,OAS2_0002,OAS2_0002_MR1,Demented,1,0,M,R,75,12,,23.0,0.5,1678,0.736,1.046
3,OAS2_0002,OAS2_0002_MR2,Demented,2,560,M,R,76,12,,28.0,0.5,1738,0.713,1.01
4,OAS2_0002,OAS2_0002_MR3,Demented,3,1895,M,R,80,12,,22.0,0.5,1698,0.701,1.034
10,OAS2_0007,OAS2_0007_MR1,Demented,1,0,M,R,71,16,,28.0,0.5,1357,0.748,1.293
11,OAS2_0007,OAS2_0007_MR3,Demented,3,518,M,R,73,16,,27.0,1.0,1365,0.727,1.286


In [16]:
demented.describe()

Unnamed: 0,Visit,MR Delay,Age,EDUC,SES,MMSE,CDR,eTIV,nWBV,ASF
count,146.0,146.0,146.0,146.0,127.0,144.0,146.0,146.0,146.0,146.0
mean,1.732877,452.541096,76.260274,13.671233,2.771654,24.513889,0.671233,1485.849315,0.716301,1.19689
std,0.790383,513.838511,6.940193,2.898536,1.196483,4.497064,0.296173,173.769461,0.031921,0.137177
min,1.0,0.0,61.0,6.0,1.0,4.0,0.5,1143.0,0.646,0.897
25%,1.0,0.0,71.0,12.0,2.0,22.0,0.5,1357.0,0.69425,1.12
50%,2.0,490.5,76.0,13.0,3.0,26.0,0.5,1476.5,0.712,1.1885
75%,2.0,693.25,81.0,16.0,4.0,28.0,1.0,1566.5,0.737,1.293
max,5.0,2508.0,98.0,20.0,5.0,30.0,2.0,1957.0,0.806,1.535


#### There are 146 patiens with dementia in this sample, aged between 61 and 98 years, on average 76 years old.

### The 'Nondemented' group:

In [17]:
nondemented = df_long[df_long.Group=='Nondemented']
nondemented.head()

Unnamed: 0,Subject ID,MRI ID,Group,Visit,MR Delay,Sex,Hand,Age,EDUC,SES,MMSE,CDR,eTIV,nWBV,ASF
0,OAS2_0001,OAS2_0001_MR1,Nondemented,1,0,M,R,87,14,2.0,27.0,0.0,1987,0.696,0.883
1,OAS2_0001,OAS2_0001_MR2,Nondemented,2,457,M,R,88,14,2.0,30.0,0.0,2004,0.681,0.876
5,OAS2_0004,OAS2_0004_MR1,Nondemented,1,0,F,R,88,18,3.0,28.0,0.0,1215,0.71,1.444
6,OAS2_0004,OAS2_0004_MR2,Nondemented,2,538,F,R,90,18,3.0,27.0,0.0,1200,0.718,1.462
7,OAS2_0005,OAS2_0005_MR1,Nondemented,1,0,M,R,80,12,4.0,28.0,0.0,1689,0.712,1.039


In [18]:
nondemented.describe()

Unnamed: 0,Visit,MR Delay,Age,EDUC,SES,MMSE,CDR,eTIV,nWBV,ASF
count,190.0,190.0,190.0,190.0,190.0,190.0,190.0,190.0,190.0,190.0
mean,1.968421,670.242105,77.057895,15.142105,2.394737,29.226316,0.005263,1495.5,0.7409,1.191063
std,0.980796,672.807019,8.096104,2.742149,1.047534,0.882722,0.051163,184.888262,0.037705,0.144029
min,1.0,0.0,60.0,8.0,1.0,26.0,0.0,1106.0,0.644,0.876
25%,1.0,0.0,71.0,13.0,2.0,29.0,0.0,1358.25,0.71725,1.07375
50%,2.0,631.0,77.0,16.0,2.0,29.0,0.0,1474.5,0.739,1.19
75%,2.75,1129.75,82.0,18.0,3.0,30.0,0.0,1634.75,0.769,1.2925
max,5.0,2517.0,97.0,23.0,5.0,30.0,0.5,2004.0,0.837,1.587


#### There are 190 nondemented patiens in this sample, aged between 60 and 97 years, on average 77 years old.

### The 'Converted' group:

In [19]:
converted = df_long[df_long.Group=='Converted']
converted.head()

Unnamed: 0,Subject ID,MRI ID,Group,Visit,MR Delay,Sex,Hand,Age,EDUC,SES,MMSE,CDR,eTIV,nWBV,ASF
33,OAS2_0018,OAS2_0018_MR1,Converted,1,0,F,R,87,14,1.0,30.0,0.0,1406,0.715,1.248
34,OAS2_0018,OAS2_0018_MR3,Converted,3,489,F,R,88,14,1.0,29.0,0.0,1398,0.713,1.255
35,OAS2_0018,OAS2_0018_MR4,Converted,4,1933,F,R,92,14,1.0,27.0,0.5,1423,0.696,1.234
36,OAS2_0020,OAS2_0020_MR1,Converted,1,0,M,R,80,20,1.0,29.0,0.0,1587,0.693,1.106
37,OAS2_0020,OAS2_0020_MR2,Converted,2,756,M,R,82,20,1.0,28.0,0.5,1606,0.677,1.093


In [20]:
converted.describe()

Unnamed: 0,Visit,MR Delay,Age,EDUC,SES,MMSE,CDR,eTIV,nWBV,ASF
count,37.0,37.0,37.0,37.0,37.0,37.0,37.0,37.0,37.0,37.0
mean,2.027027,771.810811,79.756757,15.459459,1.72973,28.675676,0.256757,1459.27027,0.72373,1.212405
std,1.040472,767.296366,7.425203,2.523166,0.96173,1.564432,0.253356,135.432773,0.03543,0.109295
min,1.0,0.0,65.0,12.0,1.0,24.0,0.0,1264.0,0.666,1.019
25%,1.0,0.0,74.0,14.0,1.0,28.0,0.0,1383.0,0.696,1.106
50%,2.0,706.0,81.0,16.0,1.0,29.0,0.5,1423.0,0.718,1.234
75%,3.0,1422.0,86.0,18.0,2.0,30.0,0.5,1587.0,0.75,1.269
max,5.0,2639.0,92.0,20.0,4.0,30.0,0.5,1722.0,0.799,1.388


#### There are 37 converted patiens in this sample, aged between 65 and 92 years, on average 79 years old.