In [1]:
import pandas as pd
import numpy as np
import datetime
import seaborn

In [2]:
conditions = pd.read_csv("../data-set/conditions.zip", compression='zip', 
                         dtype={'CODE': str}, parse_dates=['START', 'STOP'])
patients = pd.read_csv("../data-set/patients.zip", compression='zip',
                      parse_dates=['BIRTHDATE', 'DEATHDATE'])

In [3]:
conditions['LENGTH'] = conditions.STOP - conditions.START

A brief summary of different employment conditions for each individual

In [4]:
employment = conditions[(conditions.CODE == '160903007') | (conditions.CODE == '741062008')
                        | (conditions.CODE == '160904001') | (conditions.CODE == '73438004')]
employment_count = pd.pivot_table(employment[['CODE', 'PATIENT']], columns=pd.Grouper(key="CODE"), index="PATIENT", aggfunc=len) 
employment_count

CODE,160903007,160904001,73438004,741062008
PATIENT,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
00098f6f-9f2e-2c2c-0461-99750eaca295,2.0,,2.0,
000cd94c-9ce3-8f8e-10b2-e5523b6e22d6,1.0,1.0,1.0,
000eb9f8-f23e-29a9-bf44-73cad4e842ad,,1.0,1.0,
001ac12a-1c96-4e88-756d-e6602dd795b9,1.0,1.0,2.0,1.0
0026bb69-c793-4626-38f8-869e3100d576,2.0,3.0,9.0,
...,...,...,...,...
ffe3e229-8fce-acc5-e605-48a418a68e6e,4.0,,3.0,
ffe5ba32-2c4d-8fe9-6958-c6be4e34fd30,2.0,,1.0,1.0
ffe886e6-775d-6684-7988-920458508dd1,7.0,5.0,4.0,
fff4a40b-dcf5-9886-3a97-23a3a6ec392c,2.0,3.0,3.0,1.0


## Full Time Employment Info

SNOMED 160903007

In [5]:
conditions[conditions.CODE == '160903007'].PATIENT.value_counts()

3cf6c4af-e750-c78e-17f4-ab9a96458814    102
65320659-2594-e11d-628c-f4c06510e4d9     99
55be1f9e-e66a-1135-9547-c3e86eb22ead     99
ee8458dc-5b37-60fa-cbc7-63807aa520c6     86
cd174e24-9126-64aa-55b2-5a379555cfcc     84
                                       ... 
2fc105bd-a45b-fff0-e416-bcece0d62e52      1
bc74b4a2-c50e-52c4-71c0-7310b68a3d07      1
852c06ba-2862-80f5-6505-d559ccbad118      1
d7f787c3-f61b-eae3-8769-bd1b7c2ea961      1
6478ceb5-c4f7-3857-5c6c-fb0f807dff63      1
Name: PATIENT, Length: 9062, dtype: int64

In [6]:
conditions[conditions.CODE == '160903007'].LENGTH.describe()

count                          44755
mean     535 days 14:09:42.853312488
std      633 days 10:36:56.465648864
min                 13 days 00:00:00
25%                119 days 00:00:00
50%                371 days 00:00:00
75%                742 days 00:00:00
max               9086 days 00:00:00
Name: LENGTH, dtype: object

## Part time employment

SNOMED 160904001

In [7]:
conditions[conditions.CODE == '160904001'].PATIENT.value_counts()

ee7c0f9e-0a77-3704-e82b-049cb44243f9    41
5f7601d5-b735-81b9-c8d6-2cd9d947b53e    41
e1ab410f-966c-bf08-208b-20c82eaaec6a    37
88580343-3ff9-6f8f-6a1b-86a166bc5157    36
e2f2635c-15df-082f-7e5a-4611bb86024f    35
                                        ..
422008f5-be0e-f44f-f2f5-d22870b6cde1     1
6cd148f0-d0e2-3979-6ade-5216341f73ab     1
5259d617-69bb-0548-420e-76495c9816c3     1
ee637cab-f4a4-129b-0ad5-b444724ae0bb     1
59c43e8f-e60a-c02c-7909-645a999808ba     1
Name: PATIENT, Length: 6353, dtype: int64

In [8]:
conditions[conditions.CODE == '160904001'].LENGTH.describe()

count                          15180
mean     298 days 19:01:05.454545456
std      335 days 05:36:44.518785556
min                 13 days 00:00:00
25%                 35 days 00:00:00
50%                245 days 00:00:00
75%                371 days 00:00:00
max               3297 days 00:00:00
Name: LENGTH, dtype: object

## Not in labor force

SNOMED 741062008

In [9]:
conditions[conditions.CODE == '741062008'].PATIENT.value_counts()

b97686e1-676a-ae68-551a-02c5b26a1afd    25
bdb28728-0f01-e4ca-b743-ba555ff05b47    20
3baac8bc-00cc-f1da-bcd9-0b0c04c9a7db    18
b136d94a-3249-c68d-b46c-d56384baa817    17
ee8458dc-5b37-60fa-cbc7-63807aa520c6    17
                                        ..
c0378345-dc5e-e822-86e9-e69b56c9029f     1
408db693-e96d-fa1e-eb83-c9ed32667b2e     1
356ce8c4-0c81-fbdf-8a26-e8cbccc7aa2f     1
20d3a960-791b-810c-8f66-33d823dec0f8     1
c77fcbcf-414f-44d1-0af8-32a82cc319d6     1
Name: PATIENT, Length: 4438, dtype: int64

In [10]:
conditions[conditions.CODE == '741062008'].LENGTH.describe()

count                           7750
mean     266 days 06:37:48.696774192
std      291 days 19:00:25.376373444
min                 13 days 00:00:00
25%                 28 days 00:00:00
50%                196 days 00:00:00
75%                371 days 00:00:00
max               2205 days 00:00:00
Name: LENGTH, dtype: object

## Unemployment

SNOMED 73438004

In [11]:
conditions[conditions.CODE == '73438004'].PATIENT.value_counts()

1bfafda9-3fca-6085-8e32-3402cc035b75    69
edaf7116-e3b0-c4bf-47d2-029c0fa64684    61
d7e7d839-f4a1-50c6-14d0-2be44e7f6d80    61
b3309860-0b28-753f-d879-16266c847868    58
450fb760-07c7-6c1d-264d-42d0afcb20a8    51
                                        ..
7fbcfa8e-780d-ee1e-8ba5-5537f3aa1347     1
5cd985fa-1aef-b80c-f9cc-abf81f18eb32     1
9dff375c-be65-47c8-703f-1bedcd19679d     1
e9ac8f62-2b05-40d2-4592-9d16ed41543d     1
15d5d0c2-6ad2-533f-8597-b075c4eb85ba     1
Name: PATIENT, Length: 9003, dtype: int64

In [12]:
conditions[conditions.CODE == '73438004'].LENGTH.describe()

count                          41581
mean     653 days 16:13:35.218489208
std      695 days 11:55:04.370778104
min                 13 days 00:00:00
25%                267 days 00:00:00
50%                371 days 00:00:00
75%                742 days 00:00:00
max               9086 days 00:00:00
Name: LENGTH, dtype: object