In [1]:
import pandas as pd

# BTOS-4D definition
Create the modified version of `BTOS` symbols. In this modified version, there are less symbols and some of them are specific to the problem. At first we load the standard ITEM - BTOS association.

In [2]:
imap = pd.read_csv('../mbspbs10pc/data/imap_derived.csv', header=0, index_col=0)
imap.head(10)

Unnamed: 0,ITEM,BTOS (Heirarchy order),BTOS
0,1,101,A
1,2,101,A
2,3,101,A
3,4,101,A
4,5,103,B
5,6,103,B
6,7,103,B
7,8,103,B
8,9,103,B
9,10,103,B


In [7]:
# imap[imap['BTOS'] == 'C']

Load the modified `btos_details.csv` file.

In [8]:
columns = ['BTOS (Heirarchy order)', 'BTOS NAME', 'BTOS-4D NAME', 'BTOS-4D', 'BTOS']
btos_details = pd.read_csv('../mbspbs10pc/data/btos_details.csv', header=0, usecols=columns)
btos_details.head(16)

Unnamed: 0,BTOS (Heirarchy order),BTOS,BTOS NAME,BTOS-4D NAME,BTOS-4D
0,101.0,A,Non-referred attendances GP/VR GP,GP,G
1,102.0,M,Non-referred attendances - Enhanced Primary Care,GP,G
2,103.0,B,Non-referred attendances - Other,GP,G
3,110.0,O,Non-referred attendances - Practice Nurse Items,GP,G
4,150.0,P,Other Allied Health,Other Allied Health,L
5,200.0,C,Specialist attendances,Specialist attendances,E
6,300.0,D,Obstetrics,Obstetrics,B
7,400.0,E,Anaesthetics,Surgery,S
8,501.0,N,Pathology Collection Items,Pathology,P
9,502.0,F,Pathology Tests,Pathology,P


Init the `'BTOS-4D'` column of `imap` using the corresponding symbols in `btos_details.csv`.

In [9]:
columns = ['ITEM', 'BTOS (Heirarchy order)', 'BTOS-4D']
btos4d = pd.merge(imap, btos_details, how='left', on='BTOS (Heirarchy order)')[columns]
btos4d.head(10)

Unnamed: 0,ITEM,BTOS (Heirarchy order),BTOS-4D
0,1,101,G
1,2,101,G
2,3,101,G
3,4,101,G
4,5,103,G
5,6,103,G
6,7,103,G
7,8,103,G
8,9,103,G
9,10,103,G


In [11]:
# btos4d[btos4d['BTOS-4D'] == 'E']

- Now create the special diabetes-related simbols. Let's start with `'D'`.

In [12]:
# Assign to the following MBS ITEMs a new BTO -> D
annual_cycle_care = [2620, 2622, 2624, 2631, 2633, 2635, 2517, 2518, 2521, 2522, 2525, 2526]
eye_exam = [10915]
allied_health_services_diabetes = [81100, 81105, 81110, 81115, 81120, 81125]
to_D = annual_cycle_care + eye_exam + allied_health_services_diabetes
print(to_D)

[2620, 2622, 2624, 2631, 2633, 2635, 2517, 2518, 2521, 2522, 2525, 2526, 10915, 81100, 81105, 81110, 81115, 81120, 81125]


In [13]:
# Before
btos4d[btos4d['ITEM'].isin(to_D)]

Unnamed: 0,ITEM,BTOS (Heirarchy order),BTOS-4D
2007,2517,101,G
2008,2518,101,G
2010,2521,101,G
2011,2522,101,G
2013,2525,101,G
2014,2526,101,G
2068,2620,103,G
2070,2622,103,G
2071,2624,103,G
2075,2631,103,G


In [14]:
# After
btos4d.at[btos4d['ITEM'].isin(to_D), 'BTOS-4D'] = 'D'
btos4d[btos4d['ITEM'].isin(to_D)]

Unnamed: 0,ITEM,BTOS (Heirarchy order),BTOS-4D
2007,2517,101,D
2008,2518,101,D
2010,2521,101,D
2011,2522,101,D
2013,2525,101,D
2014,2526,101,D
2068,2620,103,D
2070,2622,103,D
2071,2624,103,D
2075,2631,103,D


- Now let's do that for the Hb1c test, which goes to `'H'`.

In [15]:
# Hb1c test -> H
hb1c = [66841]
to_H = hb1c
print(to_H)

[66841]


In [16]:
# Before
btos4d[btos4d['ITEM'].isin(to_H)]

Unnamed: 0,ITEM,BTOS (Heirarchy order),BTOS-4D
11449,66841,502,P


In [17]:
# After
btos4d.at[btos4d['ITEM'].isin(to_H), 'BTOS-4D'] = 'H'
btos4d[btos4d['ITEM'].isin(to_H)]

Unnamed: 0,ITEM,BTOS (Heirarchy order),BTOS-4D
11449,66841,502,H


- Finally, let's do that for the symbol `'R'`.

In [18]:
# Health assessment for people 40-49 years at risk
health_assessment_risk = [701, 703, 705, 707]
to_R = health_assessment_risk
print(to_R)

[701, 703, 705, 707]


In [19]:
# Before
btos4d[btos4d['ITEM'].isin(to_R)]

Unnamed: 0,ITEM,BTOS (Heirarchy order),BTOS-4D
408,701,102,G
410,703,102,G
412,705,102,G
414,707,102,G


In [20]:
# After
btos4d.at[btos4d['ITEM'].isin(to_R), 'BTOS-4D'] = 'R'
btos4d[btos4d['ITEM'].isin(to_R)]

Unnamed: 0,ITEM,BTOS (Heirarchy order),BTOS-4D
408,701,102,R
410,703,102,R
412,705,102,R
414,707,102,R


The new vocabulary is done. Let's print out the list of new symbols.

In [23]:
symbols = set(btos4d['BTOS-4D'].values)
print('There are {} unique symbols:'.format(len(symbols)))
for s in symbols:
    print('- {}'.format(s))

There are 12 unique symbols:
- B
- E
- D
- G
- I
- H
- L
- O
- P
- S
- R
- T


Let's save the new vocabulary as `data/btos4d.csv`.

In [24]:
btos4d.to_csv('../mbspbs10pc/data/btos4d.csv')

In [25]:
btos4d.head()

Unnamed: 0,ITEM,BTOS (Heirarchy order),BTOS-4D
0,1,101,G
1,2,101,G
2,3,101,G
3,4,101,G
4,5,103,G
