In [1]:
import pandas as pd
import numpy as np
import requests
import json
from pprint import pprint
from citipy import citipy
import math
import time

# Google API Key
from config import gkey
# OpenWeatherMap API key
from api_keys import api_key

from sqlalchemy import create_engine

import pymysql
pymysql.install_as_MySQLdb()

# Hide warning messages in notebook
import warnings
warnings.filterwarnings('ignore')

### Store CSV into DataFrame: Vulture Migration Paths

In [2]:
excel_file = "Resources/Turkey vultures in North and South America - migration.xlsx"
vulture_data_df = pd.read_excel(excel_file, index_col=None)
vulture_data_df.head(2)

Unnamed: 0,event-id,visible,timestamp,location-long,location-lat,manually-marked-outlier,sensor-type,individual-taxon-canonical-name,tag-local-identifier,individual-local-identifier,...,animal-life-stage,animal-mass,attachment-type,deployment-comments,deployment-id,duty-cycle,study-site,tag-manufacturer-name,tag-mass,tag-model
0,283203879,True,2003-11-14 16:00:00.000,-75.39717,40.48933,False,gps,Cathartes aura,42500,Butterball,...,adult,2372.0,harness,trapped in Pennsylvania using padded-leg hold ...,42500-Butterball,1 fix per hour,East Coast of North America,Microwave Telemetry,70,PTT100
1,283203880,True,2003-11-14 17:00:00.000,-75.39717,40.48933,False,gps,Cathartes aura,42500,Butterball,...,adult,2372.0,harness,trapped in Pennsylvania using padded-leg hold ...,42500-Butterball,1 fix per hour,East Coast of North America,Microwave Telemetry,70,PTT100


In [3]:
vulture_data_df.columns

Index(['event-id', 'visible', 'timestamp', 'location-long', 'location-lat',
       'manually-marked-outlier', 'sensor-type',
       'individual-taxon-canonical-name', 'tag-local-identifier',
       'individual-local-identifier', 'study-name', 'utm-easting',
       'utm-northing', 'utm-zone', 'study-timezone', 'study-local-timestamp',
       'tag-id', 'animal-id', 'animal-taxon', 'deploy-on-date',
       'deploy-off-date', 'animal-comments', 'animal-life-stage',
       'animal-mass', 'attachment-type', 'deployment-comments',
       'deployment-id', 'duty-cycle', 'study-site', 'tag-manufacturer-name',
       'tag-mass', 'tag-model'],
      dtype='object')

In [4]:
vulture_data_df.keys()

Index(['event-id', 'visible', 'timestamp', 'location-long', 'location-lat',
       'manually-marked-outlier', 'sensor-type',
       'individual-taxon-canonical-name', 'tag-local-identifier',
       'individual-local-identifier', 'study-name', 'utm-easting',
       'utm-northing', 'utm-zone', 'study-timezone', 'study-local-timestamp',
       'tag-id', 'animal-id', 'animal-taxon', 'deploy-on-date',
       'deploy-off-date', 'animal-comments', 'animal-life-stage',
       'animal-mass', 'attachment-type', 'deployment-comments',
       'deployment-id', 'duty-cycle', 'study-site', 'tag-manufacturer-name',
       'tag-mass', 'tag-model'],
      dtype='object')

### Create new data with select columns

In [5]:
# Select columns 
new_vulture_data_df = vulture_data_df[['event-id', 'timestamp', 'location-long', 'location-lat','individual-taxon-canonical-name', 'tag-local-identifier',
       'individual-local-identifier']].copy()
new_vulture_data_df.head()

Unnamed: 0,event-id,timestamp,location-long,location-lat,individual-taxon-canonical-name,tag-local-identifier,individual-local-identifier
0,283203879,2003-11-14 16:00:00.000,-75.39717,40.48933,Cathartes aura,42500,Butterball
1,283203880,2003-11-14 17:00:00.000,-75.39717,40.48933,Cathartes aura,42500,Butterball
2,283203881,2003-11-14 18:00:00.000,-75.33317,40.32467,Cathartes aura,42500,Butterball
3,283203882,2003-11-14 19:00:00.000,-75.35617,40.33983,Cathartes aura,42500,Butterball
4,283203883,2003-11-14 20:00:00.000,-75.4265,40.3155,Cathartes aura,42500,Butterball


### Clean DataFrame

In [6]:
new_vulture_data_df.count()

event-id                           220077
timestamp                          220077
location-long                      220077
location-lat                       220077
individual-taxon-canonical-name    220077
tag-local-identifier               220077
individual-local-identifier        220077
dtype: int64

In [7]:
# drop rows without long and lat
new_vulture_data_df = new_vulture_data_df.dropna(how="any")
new_vulture_data_df.count()

event-id                           220077
timestamp                          220077
location-long                      220077
location-lat                       220077
individual-taxon-canonical-name    220077
tag-local-identifier               220077
individual-local-identifier        220077
dtype: int64

In [8]:
# filter data to only keep turkey vulture (Cathartes aura) data
new_vulture_data_df = new_vulture_data_df.loc[new_vulture_data_df
                                              ["individual-taxon-canonical-name"] == "Cathartes aura", :]
new_vulture_data_df.count()

event-id                           220077
timestamp                          220077
location-long                      220077
location-lat                       220077
individual-taxon-canonical-name    220077
tag-local-identifier               220077
individual-local-identifier        220077
dtype: int64

In [9]:
new_vulture_data_df.dtypes

event-id                             int64
timestamp                           object
location-long                      float64
location-lat                       float64
individual-taxon-canonical-name     object
tag-local-identifier                 int64
individual-local-identifier         object
dtype: object

In [10]:
# check to prevent duplicate loading (inqury for database)

### Connect to local database

In [11]:
database_path = "vulture_etl"
engine = create_engine(f"sqlite:///{database_path}")

### Check for tables

In [12]:
engine.table_names()

['city_df', 'migration_paths', 'vulture_detail']

### Use pandas to load csv converted DataFrame into database

In [13]:
new_vulture_data_df.to_sql(name='migration_paths', con=engine, if_exists='append', index=False)

### Use pandas to import second csv: Vulture Info by name

In [14]:
csv_file = "Resources/Turkey vultures in North and South America-reference-data.csv"
vulture_info_df = pd.read_csv(csv_file, low_memory=False)
vulture_info_df.head()

Unnamed: 0,tag-id,animal-id,animal-taxon,deploy-on-date,deploy-off-date,animal-comments,animal-life-stage,animal-mass,attachment-type,deployment-comments,deployment-id,duty-cycle,study-site,tag-manufacturer-name,tag-mass,tag-model
0,42500,Butterball,Cathartes aura,2003-11-14 16:00:00.000,2004-03-14 20:00:01.000,migratory,adult,2372.0,harness,trapped in Pennsylvania using padded-leg hold ...,42500-Butterball,1 fix per hour,East Coast of North America,Microwave Telemetry,70.0,PTT100
1,52067,Irma,Cathartes aura,2004-09-06 17:00:00.000,2013-03-18 22:00:01.000,non-migratory,adult,2012.0,harness,trapped in Pennsylvania using padded-leg hold ...,52067-Irma,1 fix per hour,East Coast of North America,Microwave Telemetry,70.0,PTT100
2,42500,Schaumboch,Cathartes aura,2004-10-08 15:00:00.000,2006-03-29 17:00:01.000,migratory,adult,1951.0,harness,trapped in Pennsylvania using padded-leg hold ...,42500-Schaumboch,1 fix per hour,East Coast of North America,Microwave Telemetry,70.0,PTT100
3,52069,Disney,Cathartes aura,2004-10-11 14:00:00.000,2011-10-18 23:00:01.000,migratory,adult,2108.0,harness,trapped in Pennsylvania using padded-leg hold ...,52069-Disney,1 fix per hour,East Coast of North America,Microwave Telemetry,70.0,PTT100
4,57954,Prado,Cathartes aura,2005-11-02 15:00:00.000,2009-07-07 00:00:01.000,non-migratory,adult,1710.0,harness,trapped in California using walk-in traps,57954-Prado,1 fix per hour,West Coast of North America,Microwave Telemetry,70.0,PTT100


In [15]:
vulture_info_df.columns

Index(['tag-id', 'animal-id', 'animal-taxon', 'deploy-on-date',
       'deploy-off-date', 'animal-comments', 'animal-life-stage',
       'animal-mass', 'attachment-type', 'deployment-comments',
       'deployment-id', 'duty-cycle', 'study-site', 'tag-manufacturer-name',
       'tag-mass', 'tag-model'],
      dtype='object')

In [16]:
# Select columns 
new_vulture_info_df = vulture_info_df[['tag-id', 'animal-id', 'animal-taxon', 'deploy-on-date',
       'deploy-off-date', 'animal-comments', 'animal-life-stage',
       'animal-mass',  'deployment-comments',
        'study-site']].copy()
new_vulture_info_df.head(1)

Unnamed: 0,tag-id,animal-id,animal-taxon,deploy-on-date,deploy-off-date,animal-comments,animal-life-stage,animal-mass,deployment-comments,study-site
0,42500,Butterball,Cathartes aura,2003-11-14 16:00:00.000,2004-03-14 20:00:01.000,migratory,adult,2372.0,trapped in Pennsylvania using padded-leg hold ...,East Coast of North America


In [17]:
new_vulture_info_df.count()

tag-id                 19
animal-id              19
animal-taxon           19
deploy-on-date         19
deploy-off-date        19
animal-comments        19
animal-life-stage      19
animal-mass            12
deployment-comments    19
study-site             19
dtype: int64

In [18]:
new_vulture_info_df.to_sql(name='vulture_detail', con=engine, if_exists='append', index=False)

### Confirm data has been added by querying
* NOTE: can also check using pgAdmin

In [19]:
pd.read_sql_query('select * from migration_paths', con=engine).head()

Unnamed: 0,event-id,timestamp,location-long,location-lat,individual-taxon-canonical-name,tag-local-identifier,individual-local-identifier
0,283203879,2003-11-14 16:00:00.000,-75.39717,40.48933,Cathartes aura,42500,Butterball
1,283203880,2003-11-14 17:00:00.000,-75.39717,40.48933,Cathartes aura,42500,Butterball
2,283203881,2003-11-14 18:00:00.000,-75.33317,40.32467,Cathartes aura,42500,Butterball
3,283203882,2003-11-14 19:00:00.000,-75.35617,40.33983,Cathartes aura,42500,Butterball
4,283203883,2003-11-14 20:00:00.000,-75.4265,40.3155,Cathartes aura,42500,Butterball


In [20]:
pd.read_sql_query('select * from vulture_detail', con=engine).head()

Unnamed: 0,tag-id,animal-id,animal-taxon,deploy-on-date,deploy-off-date,animal-comments,animal-life-stage,animal-mass,deployment-comments,study-site
0,42500,Butterball,Cathartes aura,2003-11-14 16:00:00.000,2004-03-14 20:00:01.000,migratory,adult,2372.0,trapped in Pennsylvania using padded-leg hold ...,East Coast of North America
1,52067,Irma,Cathartes aura,2004-09-06 17:00:00.000,2013-03-18 22:00:01.000,non-migratory,adult,2012.0,trapped in Pennsylvania using padded-leg hold ...,East Coast of North America
2,42500,Schaumboch,Cathartes aura,2004-10-08 15:00:00.000,2006-03-29 17:00:01.000,migratory,adult,1951.0,trapped in Pennsylvania using padded-leg hold ...,East Coast of North America
3,52069,Disney,Cathartes aura,2004-10-11 14:00:00.000,2011-10-18 23:00:01.000,migratory,adult,2108.0,trapped in Pennsylvania using padded-leg hold ...,East Coast of North America
4,57954,Prado,Cathartes aura,2005-11-02 15:00:00.000,2009-07-07 00:00:01.000,non-migratory,adult,1710.0,trapped in California using walk-in traps,West Coast of North America


In [21]:
engine.table_names()

['city_df', 'migration_paths', 'vulture_detail']

In [22]:
conn = engine.connect()

from sqlalchemy.orm import Session
from sqlalchemy.ext.declarative import declarative_base
Base = declarative_base()

session = Session(bind=engine)
Base.metadata.create_all(engine)
session.commit()

In [23]:
new_vulture_data_df.head()

Unnamed: 0,event-id,timestamp,location-long,location-lat,individual-taxon-canonical-name,tag-local-identifier,individual-local-identifier
0,283203879,2003-11-14 16:00:00.000,-75.39717,40.48933,Cathartes aura,42500,Butterball
1,283203880,2003-11-14 17:00:00.000,-75.39717,40.48933,Cathartes aura,42500,Butterball
2,283203881,2003-11-14 18:00:00.000,-75.33317,40.32467,Cathartes aura,42500,Butterball
3,283203882,2003-11-14 19:00:00.000,-75.35617,40.33983,Cathartes aura,42500,Butterball
4,283203883,2003-11-14 20:00:00.000,-75.4265,40.3155,Cathartes aura,42500,Butterball


In [24]:
vulture_city = new_vulture_data_df[['location-lat', 'location-long']]
vulture_city.head()

Unnamed: 0,location-lat,location-long
0,40.48933,-75.39717
1,40.48933,-75.39717
2,40.32467,-75.33317
3,40.33983,-75.35617
4,40.3155,-75.4265


# list of cities

In [25]:
lats = new_vulture_data_df['location-lat'].values.tolist()
lngs = new_vulture_data_df['location-long'].values.tolist()
type(lngs)

list

In [26]:
lat_lngs = []
cities = []
lat_lngs = zip(lats, lngs)
lat_lngs_s = set(lat_lngs)
lat_lngs_a = list(lat_lngs_s)
print(lat_lngs_a[0])

(31.75567, -82.64317)


In [30]:
ll_arraycut = lat_lngs_a[::500]
print(ll_arraycut[0])
print(ll_arraycut[0][0])

(31.75567, -82.64317)
31.75567


In [33]:
add_array = []
for i in range(200):
    target_lat = ll_arraycut[i][0]
    target_lng = ll_arraycut[i][1]
    base_url = ('https://maps.googleapis.com/maps/api/geocode/json?latlng={0},{1}&key={2}').format(target_lat, target_lng, gkey)
    geo_data = requests.get(base_url).json()
    
    try:
        res = geo_data["plus_code"]["compound_code"]
        res_arr = res.split(", ")
        add_array.append(res_arr)
    except (KeyError, IndexError):
        pprint("Could not find information")

city_arr = pd.DataFrame(add_array) 
city_arr[0] = city_arr[0].map(lambda x: str(x)[8:])
city_arr = city_arr.rename(columns={0: 'City'})
# city_array = new_array.loc[:, '1':'3']
print(city_arr.head(20))

'Could not find information'
'Could not find information'
'Could not find information'
'Could not find information'
'Could not find information'
                      City                    1          2     3
0                   Denton                   GA        USA  None
1                 Columbia                   SC        USA  None
2                 Leoville                   SK     Canada  None
3             Mountain Top                   PA        USA  None
4          Santa Margarita                   CA        USA  None
5                Skamokawa                   WA        USA  None
6                 Leoville                   SK     Canada  None
7   São Miguel do Araguaia       State of Goiás     Brazil  None
8            Viento Fresco            Venezuela       None  None
9              Upper Tract                   WV        USA  None
10              Las Tapias              Córdoba  Argentina  None
11               Skamokawa                   WA        USA  None
12        

In [34]:
city_arr['Max Temp'] = ""
# print(city_arr)
city_df = city_arr[["City", "Max Temp"]]
print(city_df.head(20))

                      City Max Temp
0                   Denton         
1                 Columbia         
2                 Leoville         
3             Mountain Top         
4          Santa Margarita         
5                Skamokawa         
6                 Leoville         
7   São Miguel do Araguaia         
8            Viento Fresco         
9              Upper Tract         
10              Las Tapias         
11               Skamokawa         
12              Maple Hill         
13                  Bamepa         
14                Tipitapa         
15                    Onyx         
16                Freeland         
17                 Itiratí         
18               El Retiro         
19                  Geddes         


In [35]:
limit = len(city_df)/50
set_var = 1
data_count = 50
dummy_array = [];

base_url = "http://api.openweathermap.org/data/2.5/weather?"
params = {'appid': api_key, 'units': 'Imperial'}

for index, row in city_df.iterrows():
    if len(dummy_array) < data_count and set_var <= math.ceil(limit):
        city_row = row["City"]
        dummy_array.append(city_row)
       
        params['q'] = city_row

        response = requests.get(base_url, params=params).json()
    
    else:
        set_var = set_var + 1
        data_count = data_count + 49
        time.sleep(60)

    try:
        city_df.loc[index, 'Max Temp'] = response['main']['temp_max']
        
    except (KeyError, IndexError):
        pprint("Could not find information")

'Could not find information'
'Could not find information'
'Could not find information'
'Could not find information'
'Could not find information'
'Could not find information'
'Could not find information'
'Could not find information'
'Could not find information'
'Could not find information'
'Could not find information'
'Could not find information'
'Could not find information'
'Could not find information'
'Could not find information'
'Could not find information'
'Could not find information'
'Could not find information'
'Could not find information'
'Could not find information'
'Could not find information'
'Could not find information'
'Could not find information'
'Could not find information'
'Could not find information'
'Could not find information'
'Could not find information'
'Could not find information'
'Could not find information'
'Could not find information'
'Could not find information'
'Could not find information'
'Could not find information'
'Could not find information'
'Could not fin

In [36]:
city_df.head(30)

Unnamed: 0,City,Max Temp
0,Denton,71.01
1,Columbia,66.2
2,Leoville,39.01
3,Mountain Top,73.99
4,Santa Margarita,85.63
5,Skamokawa,57.2
6,Leoville,39.01
7,São Miguel do Araguaia,78.07
8,Viento Fresco,64.03
9,Upper Tract,51.8


In [37]:
city_df.to_sql(name='city_df', con=engine, if_exists='append', index=False)

In [38]:
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    cities.append(city)

In [39]:
len(cities)
cities[-1]

'pottsville'

In [41]:
cities_citypy = new_vulture_data_df[['event-id', 'location-lat', 'location-long']]
cities_citypy["cities"] = pd.DataFrame({"cities" : cities})
cities_citypy = cities_citypy[['event-id', 'location-lat', 'location-long', 'cities']]
cities_citypy.head(30)

Unnamed: 0,event-id,location-lat,location-long,cities
0,283203879,40.48933,-75.39717,emmaus
1,283203880,40.48933,-75.39717,emmaus
2,283203881,40.32467,-75.33317,lansdale
3,283203882,40.33983,-75.35617,lansdale
4,283203883,40.3155,-75.4265,lansdale
5,283203884,40.30383,-75.405,lansdale
6,283203885,40.29567,-75.41133,lansdale
7,283203886,40.29567,-75.41167,lansdale
8,283203887,40.296,-75.41183,lansdale
9,283203888,40.29583,-75.41167,lansdale
