In [1]:
import re
import pandas as pd
df = pd.read_csv('data_raw.csv')
tuple(df.columns.values)

('Bird Species',
 'Distance Bin',
 'Date',
 'Time',
 'Coordinates',
 'Location',
 'Habitat Type',
 'Time Period',
 'Group Name',
 'Remarks')

# Standardise Location
Use the 'Location' column instead of 'Coordinates' because there's actually less variation and typos in that column. Besides, accuracy of coordinates entered by some groups are not good enough to use.

In [2]:
# There is one individual in one group that added in misc comments into their location column
# Manually target those rows to standardise
target_coords = (
    '1.303894, 103.775190', 
    '1.305994, 103.773311', 
    '1.306042, 103.774040', 
    '1.306989, 103.773073')

lagging_pointer = {'coords': '', 'loc': ''}  # tuple< current coord, current location >
for _, row in df.iterrows():
    current_pointer = {'coords': row[4], 'loc': row[5]}
    if current_pointer['coords'] in target_coords:
        if current_pointer['coords'] == lagging_pointer['coords']:
            row[5] = lagging_pointer['loc']
    lagging_pointer = {'coords': row[4], 'loc': row[5]}

In [3]:
df.iloc[965:1000]

Unnamed: 0,Bird Species,Distance Bin,Date,Time,Coordinates,Location,Habitat Type,Time Period,Group Name,Remarks
965,Javan Myna,2,1 Mar,09 08,"1.305037, 103.772685",Edge of Town Green near Pizza Hut,On town green,0828 - 0838,Ashy Minivet,
966,Javan Myna,2,1 Mar,09 08,"1.305037, 103.772685",Edge of Town Green near Pizza Hut,On town green closer to signboard,0828 - 0838,Ashy Minivet,
967,Javan Myna,2,1 Mar,09 09,"1.305037, 103.772685",Edge of Town Green near Pizza Hut,NIL(in flight),0828 - 0838,Ashy Minivet,
968,Javan Myna,1,1 Mar,09 12,"1.303894, 103.775190",Road/Parking space behind Create Tower,Road,0912 - 1012,Ashy Minivet,
969,Yellow-vented Bulbul,2,1 Mar,09 12,"1.303894, 103.775190",Road/Parking space behind Create Tower,NIL (heard),0912 - 1012,Ashy Minivet,Heard
970,Javan Myna,1,1 Mar,09 15,"1.303894, 103.775190",Road/Parking space behind Create Tower,Pavement,0912 - 1012,Ashy Minivet,
971,Javan Myna,2,1 Mar,09 19,"1.303894, 103.775190",Road/Parking space behind Create Tower,Tree,0912 - 1012,Ashy Minivet,
972,Javan Myna,1,2 Mar,09 28,"1.305994, 103.773311","30 College Ave E, Singapore 138599",Pavement,0928 - 0938,Ashy Minivet,
973,Yellow-vented Bulbul,2,2 Mar,09 31,"1.305994, 103.773311","30 College Ave E, Singapore 138599",Tree,0928 - 0938,Ashy Minivet,
974,Olive-backed Sunbird,2,2 Mar,09 32,"1.305994, 103.773311","30 College Ave E, Singapore 138599",NIL (heard),0928 - 0938,Ashy Minivet,Heard


In [4]:
# Remove trailing whitespaces
for _, row in df.iterrows():
    row[5] = row[5].strip()

In [5]:
# Fix the one guy who entered '...opposite Engineering E4' to E13 because they dragged the cell
for _, row in df.iterrows():
    if 'In the middle of YIH and CLB bus stop, opposite Engineering E' in row[5]:
        row[5] = 'In the middle of YIH and CLB bus stop, opposite Engineering E4'

# Check group sites

In [6]:
print('Number of unique locations: {}'.format(
    len(set(df['Location'])))) # if this is not 200, it is bad!

asian_koel = df[(df['Group Name'] == 'Asian Koel')]
ashy_minivet = df[(df['Group Name'] == 'Ashy Minivet')]
banded_woodpecker = df[(df['Group Name'] == 'Banded Woodpecker ')]
brahminy_kite = df[(df['Group Name'] == 'Brahminy Kite')]
collared_kingfisher = df[(df['Group Name'] == 'Collared Kingfisher')]
coppersmith_barbet = df[(df['Group Name'] == 'Coppersmith Barbet')]
crimson_sunbird = df[(df['Group Name'] == 'Crimson Sunbird')]
pacific_swallow = df[(df['Group Name'] == 'Pacific Swallow')]
spotted_dove = df[(df['Group Name'] == 'Spotted Dove')]
spotted_wood_owl = df[(df['Group Name'] == 'Spotted Wood Owl')]

groups = (asian_koel, ashy_minivet, banded_woodpecker,
          brahminy_kite, collared_kingfisher, coppersmith_barbet,
          crimson_sunbird, pacific_swallow, spotted_dove, spotted_wood_owl)

print('Number of rows: {}'.format(len(df)))
for group_num, group in enumerate(groups):
    print('Number of rows by group {}: {}'.format(group_num+1, len(group)))
    print('                locations: {}'.format(len(set(group['Location']))))

Number of unique locations: 195
Number of rows: 1722
Number of rows by group 1: 125
                locations: 20
Number of rows by group 2: 99
                locations: 20
Number of rows by group 3: 173
                locations: 19
Number of rows by group 4: 189
                locations: 20
Number of rows by group 5: 132
                locations: 18
Number of rows by group 6: 327
                locations: 20
Number of rows by group 7: 170
                locations: 20
Number of rows by group 8: 217
                locations: 20
Number of rows by group 9: 150
                locations: 20
Number of rows by group 10: 140
                locations: 20


# Remove unidentified and untracked
If the birds are not of the following species:
1. Javan myna
2. Yellow-vented bulbul
3. Rock pigeon
4. Olive-backed sunbird
5. Black-naped Oriole

Remove them from the dataframe. Additionally, fix typos, standardise capitalisation and hyphenisations, trailing whitespaces, so that we can programmatically differentiate between species well.

Also separate rows counting more than one observation, e.g. 'Javan Myna x4'

In [7]:
set(df['Bird Species'])

{'"Chweet chweet" then 4 secs pause, repeat',
 '"Parking car sound" bird',
 '-',
 'Asian Glossy Starling',
 'Asian Koel',
 'Asian Koele',
 'Bird of prey(?)',
 'Bird that makes a low-pitched quack',
 'Bird that sounds like monkey',
 'Black-capped Kingfisher',
 'Black-naped Oriole',
 'Black-naped Oriole ',
 'Black-naped oriole (?)',
 'Brahminy Kite',
 'Brahminy Kite ',
 'Brahminy kite',
 'Cinnamon-headed Green Pigeon',
 'Collared Kingfisher',
 'Collared Kingfisher ',
 'Collared Kingfisher x2',
 'Crimson Sunbird',
 'Crow',
 'Eagle',
 'Eagle ',
 'Eurasian Tree Sparrow',
 'Eurasian Tree Sparrow ',
 'Eurasian Tree Sparrow x3',
 'Flame-breasted sunbird',
 'Jarvan Myna',
 'Javan Myna',
 'Javan Myna ',
 'Javan Myna (x2)',
 'Javan Myna x4',
 'Javan Mynah',
 'Javan myna',
 'Jungle Crow',
 'Kingfisher',
 'Koel',
 'Large black bird',
 'Large grey/black bird, suspected pigeon',
 'Large-billed Crow',
 'None ',
 'Olive-backed Sunbird',
 'Olive-backed Sunbird ',
 'Olive-backed Sunbird (?)',
 'Olive-bac

In [8]:
myna_re = re.compile('[Mm]yna')
oriole_re = re.compile('[Oo]riole')
sunbird_re = re.compile('Ol[i]?ve-backed')
pigeon_re = re.compile('Rock')
bulbul_re = re.compile('[Yy]ellow-[Vv]ented')

tracked_only_df = pd.DataFrame()

for _, row in df.iterrows():
    # check if row represents > 1 observation (use later)
    has_digit = re.search('\d', row[0])
    # standardise species common names
    if myna_re.search(row[0]):
        row[0] = 'Javan myna'
    elif oriole_re.search(row[0]):
        row[0] = 'Black-naped oriole'
    elif sunbird_re.search(row[0]):
        row[0] = 'Olive-backed sunbird'
    elif pigeon_re.search(row[0]):
        row[0] = 'Rock pigeon'
    elif bulbul_re.search(row[0]):
        row[0] = 'Yellow-vented bulbul'
    else:
        # if not the above, goto next iteration
        continue
    # check for rows representing > 1 observations
    if has_digit:
        repeats = int(has_digit.group(0))
        for _ in range(repeats):
            tracked_only_df = tracked_only_df.append(row)
    else:
        tracked_only_df = tracked_only_df.append(row)

In [9]:
set(tracked_only_df['Bird Species'])

{'Black-naped oriole',
 'Javan myna',
 'Olive-backed sunbird',
 'Rock pigeon',
 'Yellow-vented bulbul'}

In [10]:
len(tracked_only_df)  # 1360, or 1364

1364

In [11]:
tuple(tracked_only_df.columns.values)  # !! indices changed

('Bird Species',
 'Coordinates',
 'Date',
 'Distance Bin',
 'Group Name',
 'Habitat Type',
 'Location',
 'Remarks',
 'Time',
 'Time Period')

# Remove heard-not-seen and fly-bys

In [12]:
set(tracked_only_df['Distance Bin'])
# there is a single observation with no distance bin
# it was probably accidentally deleted
# interpolating, we assign this a distance bin of 1

{'Heard ',
 nan,
 'in flight',
 '4',
 'Flyby',
 'Did not land',
 'In flight',
 '3',
 'heard',
 'flyby',
 '2',
 'Heard',
 '1',
 'In Flight'}

In [13]:
# also note that a group has noted flyby or heard-not-seen
# in the remarks column
set(tracked_only_df['Remarks'])

{nan,
 'Sunlight was very strong which hindered vision',
 'moving from tree to tree; construction works',
 'in flight',
 'Noisy and barely any sky visibility as it is flanked by two tall buildings',
 'Half blocked by buildings',
 'moving from tree to tree; location has high vehicular traffic',
 'Found in tree',
 'x7 of them (4 were greyish, 3 were darker brown/black in colour, unsure if same species/diff gender)',
 'Heard sound but not observed, 180 degree view',
 'In between two buildings. Little visibility of the sky',
 'Very hot and shaded area was very far away from observer point (distance bin around 2.5). There might have been more birds hiding in the bushes across the YNC field. Also potential recounting of yellow bulbuls because they were within a 5m radius.',
 'Surrounded by tall buildings, no trees around',
 'Black bird in flight, suspected to be Javan Myna',
 'Landed in the construction site away from view, but estimated to be between 10-50m away',
 'roof top of block',
 'Fo

In [14]:
heard_and_inflight_re = re.compile('([Ff]light|[Ff]lyby|Did not land|[Hh]eard)')

tracked_inloc_df = pd.DataFrame()

for _, row in tracked_only_df.iterrows():
    if pd.isnull(row[3]):
        print('found the null!')
        row[3] = '1'
    if (not heard_and_inflight_re.search(str(row[3]))
    and not heard_and_inflight_re.search(str(row[7]))):
        tracked_inloc_df = tracked_inloc_df.append(row)

found the null!


In [15]:
len(tracked_inloc_df)

999

In [16]:
set(tracked_inloc_df['Distance Bin'])

{'1', '2', '3', '4'}

In [17]:
tracked_inloc_df.to_csv('data.csv')

In [18]:
tuple(tracked_inloc_df.columns.values)

('Bird Species',
 'Coordinates',
 'Date',
 'Distance Bin',
 'Group Name',
 'Habitat Type',
 'Location',
 'Remarks',
 'Time',
 'Time Period')

# Standardise Time

In [19]:
set(tracked_inloc_df['Time'])

{'07 45',
 '07 50',
 '08 00',
 '08 06',
 '08 11',
 '08 12',
 '08 14',
 '08 18',
 '08 19',
 '08 22',
 '08 23',
 '08 28',
 '08 29',
 '08 30',
 '08 33',
 '08 37',
 '08 46',
 '08 59',
 '0803',
 '0806',
 '0807',
 '0808',
 '0810',
 '0811',
 '0814',
 '0816',
 '0817',
 '0820',
 '0823',
 '0824',
 '0825',
 '0826',
 '0828',
 '0829',
 '0832',
 '0836',
 '0841',
 '0843',
 '0844',
 '0845',
 '0846',
 '0849',
 '0850',
 '0858',
 '0859',
 '09 00',
 '09 01',
 '09 02',
 '09 04',
 '09 06',
 '09 08',
 '09 09',
 '09 12',
 '09 15',
 '09 19',
 '09 21',
 '09 28',
 '09 31',
 '09 35',
 '09 36',
 '09 38',
 '09 40',
 '09 46',
 '09 48',
 '09 51',
 '09 55',
 '0900',
 '0902',
 '0905',
 '0906',
 '0908',
 '0909',
 '0910',
 '0911',
 '0912',
 '0914',
 '0915',
 '0916',
 '0917',
 '0918',
 '0919',
 '0921',
 '0922',
 '0923',
 '0924',
 '0925',
 '0926',
 '0927',
 '0928',
 '0932',
 '0935',
 '0937',
 '0939',
 '0940',
 '0941',
 '0942',
 '0943',
 '0944',
 '0946',
 '0952',
 '0953',
 '0955',
 '0958',
 '10 01',
 '10 02',
 '10 04',
 '10

In [20]:
for _, row in tracked_inloc_df.iterrows():
    before = row[8]
    if re.search('^\d\d\ \d\d$', row[8]):
        row[8] = row[8].replace(' ', '')
    elif re.search('^\d\d:\d\d$', row[8]):
        row[8] = row[8].replace(':', '')
    elif re.search('\d:\d\d$', row[8]):
        row[8] = '0' + row[8].replace(':', '')
    elif re.search('\d\.\d\d', row[8]):
        row[8] = '0' + row[8].replace('.', '')
    elif re.search('\d\.\d', row[8]):
        row[8] = '0' + row[8].replace('.', '') + '0'
    elif re.search('^\d\d\d$', row[8]):
        row[8] = '0' + row[8]
    elif re.search('^\d$', row[8]):
        row[8] = '0{}00'.format(row[8])
    print('{} -> {}'.format(before, row[8]))

09 35 -> 0935
09 40 -> 0940
09 36 -> 0936
10 07 -> 1007
10 36 -> 1036
10 36 -> 1036
10 42 -> 1042
10 38 -> 1038
10 44 -> 1044
10 55 -> 1055
10 55 -> 1055
11 00 -> 1100
10 56 -> 1056
10 57 -> 1057
11 01 -> 1101
11 02 -> 1102
11 04 -> 1104
10 56 -> 1056
10 58 -> 1058
10 59 -> 1059
11 04 -> 1104
11 04 -> 1104
11 04 -> 1104
11 04 -> 1104
11 04 -> 1104
11 04 -> 1104
11 04 -> 1104
11 04 -> 1104
11 04 -> 1104
11 04 -> 1104
11 04 -> 1104
11 04 -> 1104
11 04 -> 1104
11 04 -> 1104
11 04 -> 1104
11 04 -> 1104
11 04 -> 1104
11 04 -> 1104
11 05 -> 1105
10 57 -> 1057
11 02 -> 1102
9:25 -> 0925
9:25 -> 0925
9:25 -> 0925
9:25 -> 0925
9:25 -> 0925
9:25 -> 0925
9:28 -> 0928
9:30 -> 0930
9:30 -> 0930
9:31 -> 0931
9:34 -> 0934
9:35 -> 0935
9:25 -> 0925
9:44 -> 0944
9:47 -> 0947
9:52 -> 0952
9:49 -> 0949
9:49 -> 0949
9:49 -> 0949
9:49 -> 0949
9:49 -> 0949
9:49 -> 0949
9:53 -> 0953
9:53 -> 0953
9:53 -> 0953
9:55 -> 0955
9:57 -> 0957
9:59 -> 0959
9:59 -> 0959
9:59 -> 0959
10:00 -> 1000
10:00 -> 1000
10:00 ->

In [21]:
set(tracked_inloc_df['Time'])

{'0740',
 '0745',
 '0750',
 '0800',
 '0803',
 '0806',
 '0807',
 '0808',
 '0810',
 '0811',
 '0812',
 '0813',
 '0814',
 '0816',
 '0817',
 '0818',
 '0819',
 '0820',
 '0822',
 '0823',
 '0824',
 '0825',
 '0826',
 '0828',
 '0829',
 '0830',
 '0831',
 '0832',
 '0833',
 '0834',
 '0835',
 '0836',
 '0837',
 '0838',
 '0841',
 '0842',
 '0843',
 '0844',
 '0845',
 '0846',
 '0847',
 '0848',
 '0849',
 '0850',
 '0852',
 '0853',
 '0855',
 '0857',
 '0858',
 '0859',
 '0900',
 '0901',
 '0902',
 '0904',
 '0905',
 '0906',
 '0907',
 '0908',
 '0909',
 '0910',
 '0911',
 '0912',
 '0913',
 '0914',
 '0915',
 '0916',
 '0917',
 '0918',
 '0919',
 '0920',
 '0921',
 '0922',
 '0923',
 '0924',
 '0925',
 '0926',
 '0927',
 '0928',
 '0930',
 '0931',
 '0932',
 '0934',
 '0935',
 '0936',
 '0937',
 '0938',
 '0939',
 '0940',
 '0941',
 '0942',
 '0943',
 '0944',
 '0945',
 '0946',
 '0947',
 '0948',
 '0949',
 '0950',
 '0951',
 '0952',
 '0953',
 '0954',
 '0955',
 '0957',
 '0958',
 '0959',
 '1000',
 '1001',
 '1002',
 '1004',
 '1005',
 

# Standardise Time Period

In [22]:
set(tracked_inloc_df['Time Period'])

{'07 45 - 07 55',
 '07 50 - 08 00',
 '0758-0808',
 '08 00 - 08 10',
 '08 06 - 08 16',
 '08 22 - 08 32',
 '08 37 - 08 47',
 '0805-0815',
 '0805-0816',
 '0807-0817',
 '0811 - 0821',
 '0817-0827',
 '0818-0828',
 '0821-0831',
 '0825-0835',
 '0828 - 0838',
 '0834-0844',
 '0835-0845',
 '0840-0850',
 '0842-0852',
 '0846 - 0856',
 '0850-0900',
 '0853-0903',
 '0859 - 0909',
 '09 00 - 09 10',
 '09 21 - 09 31',
 '09 32 - 09 42',
 '09 35 - 09 45',
 '09.33-09.43',
 '0900-0910',
 '0901-0911',
 '0903 - 0912',
 '0905 - 0915',
 '0906-0916',
 '0910-0920',
 '0912 - 1012',
 '0913-0923',
 '0914-0924',
 '0915-0925',
 '0917-0927',
 '0920 - 0930',
 '0922-0932',
 '0924 - 0934',
 '0928 - 0938',
 '0931-0941',
 '0932-0942',
 '0934-0944',
 '0935-0945',
 '0936-0946',
 '0937-0947',
 '0938-0948',
 '0939 - 0949',
 '0940 - 0950',
 '0940-0950',
 '0949-0959',
 '0951-1001',
 '0953-1003',
 '0956-1006',
 '0956-1007',
 '09:25-09:35',
 '09:27-09:37',
 '09:30 - 09:40',
 '09:46 - 09:56',
 '09:49-09:59',
 '09:50-10:00',
 '10 05 

In [23]:
for _, row in tracked_inloc_df.iterrows():
    row[9] = row[9].split('-')[0].strip()

In [24]:
set(tracked_inloc_df['Time Period'])

{'07 45',
 '07 50',
 '0758',
 '08 00',
 '08 06',
 '08 22',
 '08 37',
 '0805',
 '0807',
 '0811',
 '0817',
 '0818',
 '0821',
 '0825',
 '0828',
 '0834',
 '0835',
 '0840',
 '0842',
 '0846',
 '0850',
 '0853',
 '0859',
 '09 00',
 '09 21',
 '09 32',
 '09 35',
 '09.33',
 '0900',
 '0901',
 '0903',
 '0905',
 '0906',
 '0910',
 '0912',
 '0913',
 '0914',
 '0915',
 '0917',
 '0920',
 '0922',
 '0924',
 '0928',
 '0931',
 '0932',
 '0934',
 '0935',
 '0936',
 '0937',
 '0938',
 '0939',
 '0940',
 '0949',
 '0951',
 '0953',
 '0956',
 '09:25',
 '09:27',
 '09:30',
 '09:46',
 '09:49',
 '09:50',
 '10 05',
 '10 35',
 '10 55',
 '10.39',
 '1000',
 '1001',
 '1003',
 '1005',
 '1007',
 '1012',
 '1014',
 '1018',
 '1020',
 '1021',
 '1027',
 '1028',
 '1029',
 '1037',
 '1040',
 '1042',
 '1045',
 '1047',
 '1048',
 '1050',
 '1053',
 '1058',
 '1059',
 '10:00',
 '10:05',
 '10:11',
 '10:13',
 '10:16',
 '10:19',
 '10:20',
 '10:21',
 '10:22',
 '10:31',
 '10:33',
 '10:36',
 '10:44',
 '10:45',
 '10:47',
 '10:52',
 '10:53',
 '11.10'

In [25]:
# this is almost the same `for` as the one for Time
for _, row in tracked_inloc_df.iterrows():
    before = row[9]
    if re.search('^\d\d\ \d\d$', row[9]):
        row[9] = row[9].replace(' ', '')
    elif re.search('^\d\d:\d\d$', row[9]):
        row[9] = row[9].replace(':', '')
    elif re.search('\d:\d\d$', row[9]):
        row[9] = '0' + row[9].replace(':', '')
    elif re.search('^\d\.\d\d', row[9]):
        row[9] = '0' + row[9].replace('.', '')
    elif re.search('^\d\.\d', row[9]):
        row[9] = '0' + row[9].replace('.', '') + '0'
    elif re.search('^\d\d.\d\d', row[9]):
        row[9] = row[9].replace('.', '')
    elif re.search('^\d\d\d$', row[9]):
        row[9] = '0' + row[9]
    elif re.search('^\d$', row[9]):
        row[9] = '0{}00'.format(row[9])
    print('{} -> {}'.format(before, row[9]))

09 32 -> 0932
09 32 -> 0932
09 32 -> 0932
10 05 -> 1005
10 35 -> 1035
10 35 -> 1035
10 35 -> 1035
10 35 -> 1035
10 35 -> 1035
10 55 -> 1055
10 55 -> 1055
10 55 -> 1055
10 55 -> 1055
10 55 -> 1055
10 55 -> 1055
10 55 -> 1055
10 55 -> 1055
10 55 -> 1055
10 55 -> 1055
10 55 -> 1055
10 55 -> 1055
10 55 -> 1055
10 55 -> 1055
10 55 -> 1055
10 55 -> 1055
10 55 -> 1055
10 55 -> 1055
10 55 -> 1055
10 55 -> 1055
10 55 -> 1055
10 55 -> 1055
10 55 -> 1055
10 55 -> 1055
10 55 -> 1055
10 55 -> 1055
10 55 -> 1055
10 55 -> 1055
10 55 -> 1055
10 55 -> 1055
10 55 -> 1055
10 55 -> 1055
09:25 -> 0925
09:25 -> 0925
09:25 -> 0925
09:25 -> 0925
09:25 -> 0925
09:25 -> 0925
09:25 -> 0925
09:25 -> 0925
09:25 -> 0925
09:25 -> 0925
09:25 -> 0925
09:25 -> 0925
09:25 -> 0925
9:44 -> 0944
9:44 -> 0944
9:44 -> 0944
09:49 -> 0949
09:49 -> 0949
09:49 -> 0949
09:49 -> 0949
09:49 -> 0949
09:49 -> 0949
09:49 -> 0949
09:49 -> 0949
09:49 -> 0949
09:49 -> 0949
09:49 -> 0949
09:49 -> 0949
09:49 -> 0949
09:49 -> 0949
10:00 -> 

In [26]:
set(tracked_inloc_df['Time Period'])

{'0740',
 '0745',
 '0750',
 '0758',
 '0800',
 '0802',
 '0805',
 '0806',
 '0807',
 '0810',
 '0811',
 '0814',
 '0817',
 '0818',
 '0821',
 '0822',
 '0825',
 '0828',
 '0834',
 '0835',
 '0837',
 '0840',
 '0842',
 '0844',
 '0845',
 '0846',
 '0847',
 '0850',
 '0853',
 '0859',
 '0900',
 '0901',
 '0903',
 '0905',
 '0906',
 '0908',
 '0910',
 '0912',
 '0913',
 '0914',
 '0915',
 '0917',
 '0919',
 '0920',
 '0921',
 '0922',
 '0924',
 '0925',
 '0927',
 '0928',
 '0930',
 '0931',
 '0932',
 '0933',
 '0934',
 '0935',
 '0936',
 '0937',
 '0938',
 '0939',
 '0940',
 '0944',
 '0945',
 '0946',
 '0949',
 '0950',
 '0951',
 '0953',
 '0956',
 '1000',
 '1001',
 '1003',
 '1005',
 '1007',
 '1011',
 '1012',
 '1013',
 '1014',
 '1016',
 '1018',
 '1019',
 '1020',
 '1021',
 '1022',
 '1027',
 '1028',
 '1029',
 '1031',
 '1033',
 '1035',
 '1036',
 '1037',
 '1039',
 '1040',
 '1042',
 '1044',
 '1045',
 '1047',
 '1048',
 '1050',
 '1052',
 '1053',
 '1055',
 '1058',
 '1059',
 '1100',
 '1102',
 '1104',
 '1110',
 '1114',
 '1118',
 

In [27]:
tracked_inloc_df.to_csv('data.csv')