## Framingham Reshaping

Turning the longitudinal Framingham data into a format suitable for multistate modeling.

We will focus on creating a data set suitable for three states: No disease, Cardiovascular disease (any), and Death (absorbing).

In [3]:
%matplotlib inline
import matplotlib
import pandas as pd
import numpy as np

In [5]:
framingham = pd.read_csv("Datasets/framingham.csv")

In [6]:
framingham

Unnamed: 0,SEX,RANDID,TOTCHOL,AGE,SYSBP,DIABP,CURSMOKE,CIGPDAY,BMI,DIABETES,...,CVD,HYPERTEN,TIMEAP,TIMEMI,TIMEMIFC,TIMECHD,TIMESTRK,TIMECVD,TIMEDTH,TIMEHYP
0,1,2448,195,39,106.0,70.0,0,0,26.97,0,...,1,0,8766,6438,6438,6438,8766,6438,8766,8766
1,1,2448,209,52,121.0,66.0,0,0,.,0,...,1,0,8766,6438,6438,6438,8766,6438,8766,8766
2,2,6238,250,46,121.0,81.0,0,0,28.73,0,...,0,0,8766,8766,8766,8766,8766,8766,8766,8766
3,2,6238,260,52,105.0,69.5,0,0,29.43,0,...,0,0,8766,8766,8766,8766,8766,8766,8766,8766
4,2,6238,237,58,108.0,66.0,0,0,28.5,0,...,0,0,8766,8766,8766,8766,8766,8766,8766,8766
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11622,1,9998212,173,46,126.0,82.0,0,0,19.17,0,...,0,1,8766,8766,8766,8766,8766,8766,8766,0
11623,1,9998212,153,52,143.0,89.0,0,0,25.74,0,...,0,1,8766,8766,8766,8766,8766,8766,8766,0
11624,2,9999312,196,39,133.0,86.0,1,30,20.91,0,...,0,1,8766,8766,8766,8766,8766,8766,8766,4201
11625,2,9999312,240,46,138.0,79.0,1,20,26.39,0,...,0,1,8766,8766,8766,8766,8766,8766,8766,4201


In [8]:
framingham.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11627 entries, 0 to 11626
Data columns (total 38 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   SEX       11627 non-null  int64  
 1   RANDID    11627 non-null  int64  
 2   TOTCHOL   11627 non-null  object 
 3   AGE       11627 non-null  int64  
 4   SYSBP     11627 non-null  float64
 5   DIABP     11627 non-null  float64
 6   CURSMOKE  11627 non-null  int64  
 7   CIGPDAY   11627 non-null  object 
 8   BMI       11627 non-null  object 
 9   DIABETES  11627 non-null  int64  
 10  BPMEDS    11627 non-null  object 
 11  HEARTRTE  11627 non-null  object 
 12  GLUCOSE   11627 non-null  object 
 13  PREVCHD   11627 non-null  int64  
 14  PREVAP    11627 non-null  int64  
 15  PREVMI    11627 non-null  int64  
 16  PREVSTRK  11627 non-null  int64  
 17  PREVHYP   11627 non-null  int64  
 18  TIME      11627 non-null  int64  
 19  PERIOD    11627 non-null  int64  
 20  HDLC      11627 non-null  ob

In [30]:
at_risk = framingham[(framingham["PREVAP"] == 0) & (framingham["PREVCHD"] == 0) & (framingham["PREVMI"] == 0) & (framingham["PREVSTRK"] == 0)] 
at_risk = at_risk[["RANDID", "TIME", "PERIOD", "TIMECVD", "TIMEDTH", "TIMEHYP", "PREVHYP", "AGE", "SEX", "BMI", "CURSMOKE", "DIABETES"]]
at_risk

Unnamed: 0,RANDID,TIME,PERIOD,TIMECVD,TIMEDTH,TIMEHYP,PREVHYP,AGE,SEX,BMI,CURSMOKE,DIABETES
0,2448,0,1,6438,8766,8766,0,39,1,26.97,0,0
1,2448,4628,3,6438,8766,8766,0,52,1,.,0,0
2,6238,0,1,8766,8766,8766,0,46,2,28.73,0,0
3,6238,2156,2,8766,8766,8766,0,52,2,29.43,0,0
4,6238,4344,3,8766,8766,8766,0,58,2,28.5,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...
11622,9998212,2333,2,8766,8766,0,1,46,1,19.17,0,0
11623,9998212,4538,3,8766,8766,0,1,52,1,25.74,0,0
11624,9999312,0,1,8766,8766,4201,0,39,2,20.91,1,0
11625,9999312,2390,2,8766,8766,4201,0,46,2,26.39,1,0


In [19]:
f = frm[frm.TIMEDTH == 8766]
f.groupby(["RANDID"]).mean()

Unnamed: 0_level_0,TIME,PERIOD,TIMECVD,TIMEDTH,TIMEHYP,PREVHYP,AGE,SEX,CURSMOKE,DIABETES
RANDID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2448,2314.000000,2.0,6438.0,8766.0,8766.0,0.000000,45.500000,1.0,0.000000,0.000000
6238,2166.666667,2.0,8766.0,8766.0,8766.0,0.000000,52.000000,2.0,0.000000,0.000000
9428,1099.500000,1.5,8766.0,8766.0,8766.0,0.000000,51.000000,1.0,1.000000,0.000000
11252,2119.000000,2.0,8766.0,8766.0,4285.0,0.333333,51.666667,2.0,1.000000,0.000000
11263,2176.333333,2.0,5719.0,8766.0,0.0,1.000000,49.000000,2.0,0.000000,0.333333
...,...,...,...,...,...,...,...,...,...,...
9969773,2163.000000,2.0,7994.0,8766.0,0.0,1.000000,56.000000,2.0,0.000000,1.000000
9978986,2136.000000,2.0,8766.0,8766.0,0.0,1.000000,62.000000,2.0,0.666667,0.000000
9995546,1093.000000,1.5,5209.0,8766.0,735.0,0.500000,55.000000,2.0,0.000000,0.000000
9998212,2290.333333,2.0,8766.0,8766.0,0.0,1.000000,46.000000,1.0,0.000000,0.000000


In [20]:
f2 = frm[frm.TIMEDTH < 8766]
f2.groupby(["RANDID"]).mean()

Unnamed: 0_level_0,TIME,PERIOD,TIMECVD,TIMEDTH,TIMEHYP,PREVHYP,AGE,SEX,CURSMOKE,DIABETES
RANDID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
10552,988.500000,1.5,2089.0,2956.0,0.0,1.000000,64.000000,2.0,1.0,0.0
23727,2263.333333,2.0,5592.0,5592.0,0.0,1.000000,47.000000,2.0,0.0,0.0
24721,2225.333333,2.0,6411.0,6411.0,4408.0,0.333333,45.333333,2.0,1.0,0.0
30928,0.000000,1.0,146.0,146.0,0.0,1.000000,38.000000,2.0,1.0,0.0
33555,0.000000,1.0,1442.0,1442.0,1442.0,0.000000,46.000000,2.0,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...
9983319,0.000000,1.0,565.0,565.0,0.0,1.000000,68.000000,1.0,0.0,0.0
9984683,1046.500000,1.5,1884.0,4300.0,0.0,1.000000,53.000000,1.0,1.0,0.0
9989287,2195.666667,2.0,7746.0,7746.0,7746.0,0.000000,57.000000,1.0,1.0,0.0
9990894,2212.000000,2.0,6433.0,6433.0,2219.0,0.666667,54.000000,2.0,1.0,0.0


In [21]:
f3 = frm[frm.TIMECVD == 8766 and frm.PREVCVD == 0]
f3.groupby(["RANDID"]).mean()

Unnamed: 0_level_0,TIME,PERIOD,TIMECVD,TIMEDTH,TIMEHYP,PREVHYP,AGE,SEX,CURSMOKE,DIABETES
RANDID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
6238,2166.666667,2.0,8766.0,8766.0,8766.0,0.000000,52.000000,2.0,0.000000,0.000000
9428,1099.500000,1.5,8766.0,8766.0,8766.0,0.000000,51.000000,1.0,1.000000,0.000000
11252,2119.000000,2.0,8766.0,8766.0,4285.0,0.333333,51.666667,2.0,1.000000,0.000000
12629,1106.000000,1.5,8766.0,8766.0,2212.0,0.500000,66.500000,2.0,0.000000,0.000000
12806,2153.000000,2.0,8766.0,8766.0,8679.0,0.000000,51.000000,2.0,1.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...
9960803,1093.500000,1.5,8766.0,8766.0,8766.0,0.000000,50.000000,1.0,0.500000,0.000000
9961615,2296.333333,2.0,8766.0,8766.0,2205.0,0.666667,51.333333,1.0,1.000000,0.333333
9978986,2136.000000,2.0,8766.0,8766.0,0.0,1.000000,62.000000,2.0,0.666667,0.000000
9998212,2290.333333,2.0,8766.0,8766.0,0.0,1.000000,46.000000,1.0,0.000000,0.000000


In [15]:
f2.shape

(6950, 12)