### Introduction

[Ocearch.org](https://www.ocearch.org/about/) is an organization that collects data on marine wildlife including Sharks.  This notebook will pull in data from the ocearch.org tracker and format it for follow on analysis

### Code

#### Pull data

In [2]:
import requests
url = "http://www.ocearch.org/tracker/ajax/filter-sharks"
headers = {'Accept' : 'application/json'}
resp = requests.get(url, headers=headers)
resp

<Response [200]>

#### Check out one row of data

In [13]:
resp.json()[0]

{'active': '1',
 'cnt_inactive_pings': '0',
 'description': '<p>Oprah was named by expedition leader Chris Fischer. Fischer named her after Oprah Winfrey, one of America&#39;s great philanthropists who has done much with education in Africa.</p>\r\n',
 'dist_24_hours': '0.000',
 'dist_72_hours': '0.000',
 'dist_total': '2816.662',
 'gender': 'Female',
 'id': 3,
 'images': [{'description': None,
   'encrypted_name': '4ffefafc2f699e5837c56cb2043b9798',
   'filename': 'Screen Shot 2013-06-20 at 11.15.57 AM.png',
   'id': '218',
   'is_primary': True},
  {'description': None,
   'encrypted_name': '20cfc54dc392bfbafbfa34b70b00f3fd',
   'filename': 'Screen Shot 2013-06-20 at 10.58.41 AM.jpg',
   'id': '188',
   'is_primary': False},
  {'description': None,
   'encrypted_name': '2ccdb39e8022b9227f63839430cfea1a',
   'filename': 'Screen Shot 2013-06-20 at 11.04.35 AM.jpg',
   'id': '189',
   'is_primary': False},
  {'description': None,
   'encrypted_name': '7b99c58d1b258c5eb5f5d8485af34712',


#### Parse main json response

In [14]:
import pandas as pd
df = pd.DataFrame(resp.json())
columns = ['id', 'name', 'gender', 'species', 'weight', 'tagDate']
df[columns].head()

Unnamed: 0,id,name,gender,species,weight,tagDate
0,3,Oprah,Female,White Shark (Carcharodon carcharias),686 lb,7 March 2012
1,4,Albertina,Female,White Shark (Carcharodon carcharias),1110 lb,8 March 2012
2,5,Helen,Female,White Shark (Carcharodon carcharias),765 lb,8 March 2012
3,6,Brenda,Female,White Shark (Carcharodon carcharias),1310 lb,8 March 2012
4,7,Madiba,Male,White Shark (Carcharodon carcharias),659 lb,8 March 2012


#### Extract ping data
Each row in the dataframe represents an individual shark, but may have hundreds or thousands of 'pings' associated with that shark

In [9]:
df.pings[0][:3]

[{'active': '1',
  'datetime': '6 July 2014 1:57:28 PM',
  'id': '36902',
  'latitude': '-34.60661',
  'longitude': '21.15244',
  'tz_datetime': '6 July 2014 1:57:28 PM +0900'},
 {'active': '1',
  'datetime': '23 June 2014 11:40:09 AM',
  'id': '36666',
  'latitude': '-34.78752',
  'longitude': '19.42479',
  'tz_datetime': '23 June 2014 11:40:09 AM +0900'},
 {'active': '1',
  'datetime': '15 June 2014 10:15:44 PM',
  'id': '36500',
  'latitude': '-34.42487',
  'longitude': '21.09754',
  'tz_datetime': '15 June 2014 10:15:44 PM +0900'}]

In [17]:
ping_frames = []
for row in df.itertuples():
    ping_frame = pd.DataFrame(row.pings)
    ping_frame['id'] = row.id
    ping_frames.append(ping_frame)
    
len(ping_frames)

275

In [24]:
ping_frames[0].head()

Unnamed: 0,active,datetime,id,latitude,longitude,tz_datetime
0,1,6 July 2014 1:57:28 PM,3,-34.60661,21.15244,6 July 2014 1:57:28 PM +0900
1,1,23 June 2014 11:40:09 AM,3,-34.78752,19.42479,23 June 2014 11:40:09 AM +0900
2,1,15 June 2014 10:15:44 PM,3,-34.42487,21.09754,15 June 2014 10:15:44 PM +0900
3,1,3 June 2014 11:23:57 AM,3,-34.70432271674724,20.21013441406251,3 June 2014 11:23:57 AM +0900
4,1,29 May 2014 4:53:57 AM,3,-34.65556,19.37459,29 May 2014 4:53:57 AM +0900


#### Enrich ping data with original data

In [25]:
pings = pd.concat(ping_frames)
pings.shape

(75977, 6)

In [26]:
pings.head()

Unnamed: 0,active,datetime,id,latitude,longitude,tz_datetime
0,1,6 July 2014 1:57:28 PM,3,-34.60661,21.15244,6 July 2014 1:57:28 PM +0900
1,1,23 June 2014 11:40:09 AM,3,-34.78752,19.42479,23 June 2014 11:40:09 AM +0900
2,1,15 June 2014 10:15:44 PM,3,-34.42487,21.09754,15 June 2014 10:15:44 PM +0900
3,1,3 June 2014 11:23:57 AM,3,-34.70432271674724,20.21013441406251,3 June 2014 11:23:57 AM +0900
4,1,29 May 2014 4:53:57 AM,3,-34.65556,19.37459,29 May 2014 4:53:57 AM +0900


In [27]:
df[columns].head()

Unnamed: 0,id,name,gender,species,weight,tagDate
0,3,Oprah,Female,White Shark (Carcharodon carcharias),686 lb,7 March 2012
1,4,Albertina,Female,White Shark (Carcharodon carcharias),1110 lb,8 March 2012
2,5,Helen,Female,White Shark (Carcharodon carcharias),765 lb,8 March 2012
3,6,Brenda,Female,White Shark (Carcharodon carcharias),1310 lb,8 March 2012
4,7,Madiba,Male,White Shark (Carcharodon carcharias),659 lb,8 March 2012


In [28]:
joined = pings.merge(df[columns], on='id')
joined.shape

(75977, 11)

In [29]:
joined.head()

Unnamed: 0,active,datetime,id,latitude,longitude,tz_datetime,name,gender,species,weight,tagDate
0,1,6 July 2014 1:57:28 PM,3,-34.60661,21.15244,6 July 2014 1:57:28 PM +0900,Oprah,Female,White Shark (Carcharodon carcharias),686 lb,7 March 2012
1,1,23 June 2014 11:40:09 AM,3,-34.78752,19.42479,23 June 2014 11:40:09 AM +0900,Oprah,Female,White Shark (Carcharodon carcharias),686 lb,7 March 2012
2,1,15 June 2014 10:15:44 PM,3,-34.42487,21.09754,15 June 2014 10:15:44 PM +0900,Oprah,Female,White Shark (Carcharodon carcharias),686 lb,7 March 2012
3,1,3 June 2014 11:23:57 AM,3,-34.70432271674724,20.21013441406251,3 June 2014 11:23:57 AM +0900,Oprah,Female,White Shark (Carcharodon carcharias),686 lb,7 March 2012
4,1,29 May 2014 4:53:57 AM,3,-34.65556,19.37459,29 May 2014 4:53:57 AM +0900,Oprah,Female,White Shark (Carcharodon carcharias),686 lb,7 March 2012


### Write to disk

In [30]:
fname = 'sharks_cleaned.csv'
joined.to_csv(fname, index=False)

from IPython.display import FileLink
FileLink(fname)