## Background

> A main dataset records about 18,000 cat 'events' via sensors attached to 101 cats in Cornwall, UK during the second half of 2017. A second dataset gives details on each of the cats. One possible question is: did any of the cats meet during this period? An answer: Two cats came within 12m of each other late one night in mid-July.

## Imports & cleaning

In [192]:
import pandas as pd
pd.set_option('display.max_columns', 200)

import folium

from pyproj import Geod
wgs84_geod = Geod(ellps='WGS84')

import warnings
warnings.simplefilter(action = 'ignore', category = Warning)


In [2]:
# import reference file

path = '/Users/compj/Documents/DATA/ALL_DATASETS/cats/'
file = 'cats_uk_ref.csv'

dfr = pd.read_csv(path+file)
print(dfr.shape)

dfr[:2]

(101, 21)


Unnamed: 0,tag-id,animal-id,animal-taxon,deploy-on-date,deploy-off-date,animal-comments,animal-life-stage,animal-reproductive-condition,animal-sex,attachment-type,...,deployment-end-type,deployment-id,duty-cycle,manipulation-comments,manipulation-type,study-site,tag-manufacturer-name,tag-mass,tag-model,tag-readout-method
0,Tommy-Tag,Tommy,Felis catus,2017-06-03 01:02:09.000,2017-06-10 02:10:52.000,Hunt: Yes; prey_p_month: 12.5,11 years,Neutered,m,collar,...,removal,Tommy,3-min fixes,"hrs_indoors: 12.5; n_cats: 2; Food: Dry- Yes, ...",manipulated other,UK,"Mobile Action Technology, Inc.",119.0,i-GotU,tag retreival
1,Athena,Athena,Felis catus,2017-06-24 01:02:13.000,2017-06-30 23:59:32.000,Hunt: Yes; prey_p_month: 3,3 years,Spayed,f,collar,...,removal,Athena,3-min fixes,"hrs_indoors: 7.5; n_cats: 2; Food: Dry- Yes, W...",manipulated other,UK,"Mobile Action Technology, Inc.",119.0,i-GotU,tag retreival


In [3]:
# variables for each cat

dfr.columns

Index(['tag-id', 'animal-id', 'animal-taxon', 'deploy-on-date',
       'deploy-off-date', 'animal-comments', 'animal-life-stage',
       'animal-reproductive-condition', 'animal-sex', 'attachment-type',
       'data-processing-software', 'deployment-end-type', 'deployment-id',
       'duty-cycle', 'manipulation-comments', 'manipulation-type',
       'study-site', 'tag-manufacturer-name', 'tag-mass', 'tag-model',
       'tag-readout-method'],
      dtype='object')

In [4]:
# import events file

path = '/Users/compj/Documents/DATA/ALL_DATASETS/cats/'
file = 'cats_uk.csv'

df = pd.read_csv(path+file)
print(df.shape)
df[:3]

(18215, 14)


Unnamed: 0,event-id,visible,timestamp,location-long,location-lat,algorithm-marked-outlier,ground-speed,height-above-ellipsoid,manually-marked-outlier,sensor-type,individual-taxon-canonical-name,tag-local-identifier,individual-local-identifier,study-name
0,3395610551,True,2017-06-24 01:03:57.000,-5.113851,50.170315,,684.0,154.67,,gps,Felis catus,Ares,Ares,Pet Cats United Kingdom
1,3395610552,True,2017-06-24 01:11:20.000,-5.113851,50.170315,,936.0,154.67,,gps,Felis catus,Ares,Ares,Pet Cats United Kingdom
2,3395610553,True,2017-06-24 02:58:16.000,-5.11373,50.169876,,2340.0,81.35,,gps,Felis catus,Ares,Ares,Pet Cats United Kingdom


In [5]:
# print columns for renaming

df.columns

Index(['event-id', 'visible', 'timestamp', 'location-long', 'location-lat',
       'algorithm-marked-outlier', 'ground-speed', 'height-above-ellipsoid',
       'manually-marked-outlier', 'sensor-type',
       'individual-taxon-canonical-name', 'tag-local-identifier',
       'individual-local-identifier', 'study-name'],
      dtype='object')

In [6]:
# rename cols

df.columns = ['event_id', 'visible', 'time', 'long', 'lat',
       'algo_outlier', 'speed_ms', 'height_m',
       'manual_outlier', 'sensor', 'individual_taxon_canonical_name',
       'tag_name', 'name', 'study']

In [7]:
# single observation in variables: sensor, individual-taxon-canonical-name, study. All occur 18215 times, so DROPPED
# drop 'ground speed' column as data too suspect

df = df[['event_id', 'visible', 'time', 'lat', 'long', 'height_m',
         'tag_name', 'name', 'algo_outlier', 'manual_outlier']]

In [8]:
# cast time to datetime (from string)

df['time'] = df['time'].astype('datetime64[ns]')

In [9]:
# add epoch col

df['epoch'] = df.time.apply(lambda x: x.timestamp())

In [12]:
# build dict to match each of the 101 animals with sex (from reference dataframe)

dd = dfr[['animal-id', 'animal-sex']]
this_dict = dict(zip(dd['animal-id'], dd['animal-sex']))

# map sex onto event dataset
df['sex'] = df.name.map(this_dict)

In [15]:
# remove outliers

df = df[df.algo_outlier != True]
df = df[df.manual_outlier != True]

In [194]:
# a clean, reduced, df

print(df.shape)
df[:3]

(17866, 12)


Unnamed: 0,event_id,visible,time,lat,long,height_m,tag_name,name,algo_outlier,manual_outlier,epoch,sex
0,3395610551,True,2017-06-24 01:03:57,50.170315,-5.113851,154.67,Ares,Ares,,,1498266000.0,m
1,3395610552,True,2017-06-24 01:11:20,50.170315,-5.113851,154.67,Ares,Ares,,,1498267000.0,m
2,3395610553,True,2017-06-24 02:58:16,50.169876,-5.11373,81.35,Ares,Ares,,,1498273000.0,m


## Did two cats meet at any point?

In [16]:
# what time stamps occur more than once?

df.time.value_counts()

2017-07-22 23:10:14    2
2017-07-15 00:07:00    2
2017-07-21 00:02:18    2
2017-07-29 09:58:57    2
2017-07-24 00:07:10    2
                      ..
2017-07-20 14:24:20    1
2017-07-20 15:11:12    1
2017-07-20 22:43:38    1
2017-07-20 22:46:57    1
2017-11-27 00:09:29    1
Name: time, Length: 17817, dtype: int64

In [21]:
# collect list of the 49 timestamps that occur twice

times_list = list(df.time.value_counts().iloc[0:49].index)

In [24]:
# look at a matching pair

df[df.time == times_list[4]]

Unnamed: 0,event_id,visible,time,lat,long,height_m,tag_name,name,algo_outlier,manual_outlier,epoch,sex
5605,3459579723,True,2017-07-24 00:07:10,50.438293,-4.6095,149.23,Tom-Tag,Tom,,,1500855000.0,f
6239,3466874756,True,2017-07-24 00:07:10,50.149246,-5.077589,61.92,Bumbles-Tag,Bumbles,,,1500855000.0,f


In [29]:
# build df containing only timestamps that occur twice

pairs = pd.DataFrame(df[df.time.isin(times_list) == True])

In [32]:
# define df as ordered by matched pairs

pairs = pairs.sort_values('time')

In [165]:
pairs.head(6)

Unnamed: 0,event_id,visible,time,lat,long,height_m,tag_name,name,algo_outlier,manual_outlier,epoch,sex
84,3395610635,True,2017-06-30 07:52:42,50.170071,-5.114016,79.82,Ares,Ares,,,1498809000.0,m
416,3395925965,True,2017-06-30 07:52:42,50.146648,-5.073758,28.05,Lola,Lola,,,1498809000.0,f
1176,3403154374,True,2017-07-02 00:02:02,50.152744,-5.076456,56.04,Coco,Coco,,,1498954000.0,f
1773,3407154381,True,2017-07-02 00:02:02,50.158855,-5.086765,147.56,Nettle-Tag,Nettle,,,1498954000.0,f
2007,3407233488,True,2017-07-09 23:33:51,50.155525,-5.080747,62.33,Carbonel-Tag,Carbonel,,,1499643000.0,m
3569,3434335229,True,2017-07-09 23:33:51,50.406281,-4.232153,35.63,Fonzie-Tag,Fonzie,,,1499643000.0,m


In [96]:
# build a list of pairs of event_ids: rows 1&2, 3&4 etc

idlist = []
a = 0
b = 1

for x in range(48):
    
    id1 = pairs.event_id.iloc[a]
    id2 = pairs.event_id.iloc[b]
    pair = [id1, id2]
    idlist.append(pair)
    
    a = a+2
    b = b+2
    

In [210]:
# which gives, for example...

idlist[7]

[3424068549, 3434650508]

In [99]:
# Function to get distance between pairs of lat-lon points in meters

def Distance(id1, id2, pairs):
    az12,az21,dist = wgs84_geod.inv(pairs[pairs.event_id == id1].long,
                                  pairs[pairs.event_id == id1].lat,
                                  pairs[pairs.event_id == id2].long,
                                  pairs[pairs.event_id == id2].lat)
    return dist[0]


In [166]:
Distance(3669580557, 3669580563, pairs)

8.098774495702012

In [101]:
# return distance between each pair of two points that share a timestamp: assemble results into a list of lists

dist_list = []

for x in idlist:
    event1 = x[0]
    found_distance = Distance(x[0], x[1], df)
    entry_list = []
    entry_list.extend((found_distance, x))
    dist_list.append(entry_list)


In [102]:
# sort the list by smallest distance
dist_list.sort(key = lambda x:x[0])

# show the first ten shortest
dist_list[:10]

[[12.093935837150926, [3424068550, 3411705019]],
 [24.73185196794881, [3669580557, 3669564051]],
 [27.990842098322485, [3669580563, 3669564064]],
 [49.77572199512557, [3459579605, 3459516210]],
 [82.73165352109858, [3669558771, 3669563963]],
 [148.99185925349062, [3507105157, 3507104834]],
 [187.8196191221107, [3544856075, 3544857618]],
 [220.4351567214042, [3466879300, 3467011822]],
 [487.6847762295395, [3459516358, 3459579774]],
 [1002.3981294601922, [3403154374, 3407154381]]]

In [195]:
# 1st and 2nd instance in the 12m pair

pairs[(pairs.event_id == 3424068550) | (pairs.event_id == 3411705019)]

Unnamed: 0,event_id,visible,time,lat,long,height_m,tag_name,name,algo_outlier,manual_outlier,epoch,sex
3179,3424068550,True,2017-07-16 00:18:00,50.147152,-5.062538,53.66,Rusty-Tag,Rusty,,,1500164000.0,m
2677,3411705019,True,2017-07-16 00:18:00,50.147228,-5.062417,30.21,Indie-Tag,Indie,,,1500164000.0,m


In [197]:
# Who are these two cats? See reference dataset

dfr[(dfr['animal-id'] == 'Rusty') | (dfr['animal-id'] == 'Indie')]

Unnamed: 0,tag-id,animal-id,animal-taxon,deploy-on-date,deploy-off-date,animal-comments,animal-life-stage,animal-reproductive-condition,animal-sex,attachment-type,data-processing-software,deployment-end-type,deployment-id,duty-cycle,manipulation-comments,manipulation-type,study-site,tag-manufacturer-name,tag-mass,tag-model,tag-readout-method
17,Indie-Tag,Indie,Felis catus,2017-07-09 01:03:00.000,2017-07-16 09:33:00.000,Hunt: Yes; prey_p_month: 0.5,3 years,Neutered,m,collar,@Trip PC,removal,Indie,3-min fixes,"hrs_indoors: 17.5; n_cats: 2; Food: Dry- Yes, ...",manipulated other,UK,"Mobile Action Technology, Inc.",119.0,i-GotU,tag retreival
19,Rusty-Tag,Rusty,Felis catus,2017-07-09 02:19:00.000,2017-07-16 00:34:00.000,Hunt: Yes; prey_p_month: 3,4 years,Neutered,m,collar,@Trip PC,removal,Rusty,3-min fixes,"hrs_indoors: 17.5; n_cats: 1; Food: Dry- Yes, ...",manipulated other,UK,"Mobile Action Technology, Inc.",119.0,i-GotU,tag retreival


In [173]:
# filter a df for each of the 2 cats
rusty = df[df.name == 'Rusty']
indie = df[df.name == 'Indie']

# filter each of the cat datasets down to 2 days since they cross overnight July 15-16
rusty_2d = rusty[(rusty['time'].dt.strftime('%Y-%m-%d') == '2017-07-15') | (rusty['time'].dt.strftime('%Y-%m-%d') == '2017-07-16')]
indie_2d = indie[(indie['time'].dt.strftime('%Y-%m-%d') == '2017-07-15') | (indie['time'].dt.strftime('%Y-%m-%d') == '2017-07-16')]

# extract rusty lat, longs to a list
list_rusty_locs = list(zip(rusty_2d.lat, rusty_2d.long))

# extract indie lat, longs to a list
list_indie_locs = list(zip(indie_2d.lat, indie_2d.long))


## Rusty & Indie, 18 mins past midnight on July 16, 2017

> Map shows all points recorded for Rusty (red) and Indie (green) on July 15th and 16th, 2017. The larger points show where each was situated at 00:18 on July 16

In [208]:
# create folium map base
mapit = folium.Map(location=[50.147228, -5.062417], zoom_start=18)

# choose tiles from Carto db
folium.TileLayer('cartodbpositron').add_to(mapit)

# plot all rusty locations for those 2 days on low opacity
for coord in list_rusty_locs:
    folium.CircleMarker(location=[coord[0],coord[1]], 
                        color='red', 
                        opacity = 0.2,
                        fill_opacity = 0.2,
                        fill_color = 'red',
                        radius=2).add_to(mapit)
    
# plot all indie locations for those 2 days on low opacity
for coord in list_indie_locs:
    folium.CircleMarker(location=[coord[0],coord[1]], 
                        color='green', 
                        opacity = 0.2,
                        fill_opacity = 0.2,
                        fill_color = 'green',
                        radius=2).add_to(mapit)
    
# rusty crossing point on full opacity
folium.CircleMarker(location=[50.147152, -5.062538], 
                    color='red', 
                    opacity = 1.0,
                    fill_opacity = 1.0,
                    fill_color = 'red',
                    radius=4).add_to(mapit)

# indie crossing point on full opacity
folium.CircleMarker(location=[50.147228, -5.062417], 
                    color='green', 
                    opacity = 1.0,
                    fill_opacity = 1.0,
                    fill_color = 'green',
                    radius=4).add_to(mapit)

mapit