In [1]:
import pandas as pd
from sqlalchemy import create_engine

### Store CSV into DataFrame: Vulture Migration Paths

In [10]:
csv_file = "Resources/Turkey vultures in North and South America - migration.csv"
vulture_data_df = pd.read_csv(csv_file, low_memory=False)
vulture_data_df.head(2)

Unnamed: 0,event-id,visible,timestamp,location-long,location-lat,manually-marked-outlier,sensor-type,individual-taxon-canonical-name,tag-local-identifier,individual-local-identifier,...,animal-life-stage,animal-mass,attachment-type,deployment-comments,deployment-id,duty-cycle,study-site,tag-manufacturer-name,tag-mass,tag-model
0,283203879,True,2003-11-14 16:00:00.000,-75.39717,40.48933,False,gps,Cathartes aura,42500,Butterball,...,adult,2372.0,harness,trapped in Pennsylvania using padded-leg hold ...,42500-Butterball,1 fix per hour,East Coast of North America,Microwave Telemetry,70,PTT100
1,283203880,True,2003-11-14 17:00:00.000,-75.39717,40.48933,False,gps,Cathartes aura,42500,Butterball,...,adult,2372.0,harness,trapped in Pennsylvania using padded-leg hold ...,42500-Butterball,1 fix per hour,East Coast of North America,Microwave Telemetry,70,PTT100


In [12]:
vulture_data_df.columns

Index(['event-id', 'visible', 'timestamp', 'location-long', 'location-lat',
       'manually-marked-outlier', 'sensor-type',
       'individual-taxon-canonical-name', 'tag-local-identifier',
       'individual-local-identifier', 'study-name', 'utm-easting',
       'utm-northing', 'utm-zone', 'study-timezone', 'study-local-timestamp',
       'tag-id', 'animal-id', 'animal-taxon', 'deploy-on-date',
       'deploy-off-date', 'animal-comments', 'animal-life-stage',
       'animal-mass', 'attachment-type', 'deployment-comments',
       'deployment-id', 'duty-cycle', 'study-site', 'tag-manufacturer-name',
       'tag-mass', 'tag-model'],
      dtype='object')

In [3]:
vulture_data_df.keys()

Index(['event-id', 'visible', 'timestamp', 'location-long', 'location-lat',
       'manually-marked-outlier', 'sensor-type',
       'individual-taxon-canonical-name', 'tag-local-identifier',
       'individual-local-identifier', 'study-name', 'utm-easting',
       'utm-northing', 'utm-zone', 'study-timezone', 'study-local-timestamp',
       'tag-id', 'animal-id', 'animal-taxon', 'deploy-on-date',
       'deploy-off-date', 'animal-comments', 'animal-life-stage',
       'animal-mass', 'attachment-type', 'deployment-comments',
       'deployment-id', 'duty-cycle', 'study-site', 'tag-manufacturer-name',
       'tag-mass', 'tag-model'],
      dtype='object')

### Create new data with select columns

In [24]:
# Select columns 
new_vulture_data_df = vulture_data_df[['timestamp', 'location-long', 'location-lat','individual-taxon-canonical-name', 'tag-local-identifier',
       'individual-local-identifier']].copy()
new_vulture_data_df.head()

Unnamed: 0,timestamp,location-long,location-lat,individual-taxon-canonical-name,tag-local-identifier,individual-local-identifier
0,2003-11-14 16:00:00.000,-75.39717,40.48933,Cathartes aura,42500,Butterball
1,2003-11-14 17:00:00.000,-75.39717,40.48933,Cathartes aura,42500,Butterball
2,2003-11-14 18:00:00.000,-75.33317,40.32467,Cathartes aura,42500,Butterball
3,2003-11-14 19:00:00.000,-75.35617,40.33983,Cathartes aura,42500,Butterball
4,2003-11-14 20:00:00.000,-75.4265,40.3155,Cathartes aura,42500,Butterball


### Clean DataFrame

In [5]:
new_vulture_data_df.count()

timestamp                          220077
location-long                      220077
location-lat                       220077
individual-taxon-canonical-name    220077
tag-local-identifier               220077
individual-local-identifier        220077
dtype: int64

In [6]:
# drop rows without long and lat
new_vulture_data_df = new_vulture_data_df.dropna(how="any")
new_vulture_data_df.count()

timestamp                          220077
location-long                      220077
location-lat                       220077
individual-taxon-canonical-name    220077
tag-local-identifier               220077
individual-local-identifier        220077
dtype: int64

In [7]:
# filter data to only keep turkey vulture (Cathartes aura) data
new_vulture_data_df = new_vulture_data_df.loc[new_vulture_data_df
                                              ["individual-taxon-canonical-name"] == "Cathartes aura", :]
new_vulture_data_df.count()

timestamp                          220077
location-long                      220077
location-lat                       220077
individual-taxon-canonical-name    220077
tag-local-identifier               220077
individual-local-identifier        220077
dtype: int64

In [8]:
new_vulture_data_df.dtypes

timestamp                           object
location-long                      float64
location-lat                       float64
individual-taxon-canonical-name     object
tag-local-identifier                 int64
individual-local-identifier         object
dtype: object

In [None]:
# check to prevent duplicate loading (inqury for database)


### Store Info CSV to DataFrame

In [None]:
#new_vulture_data_df

### Connect to local database

In [25]:
database_path = "vulture_etl"
engine = create_engine(f"sqlite:///{database_path}")

### Check for tables

In [27]:
engine.table_names()

[]

### Use pandas to load csv converted DataFrame into database

In [28]:
new_vulture_data_df.to_sql(name='migration_paths', con=engine, if_exists='append', index=False)

### Use pandas to import second csv: Vulture Info by name

In [29]:
csv_file = "Resources/Turkey vultures in North and South America-reference-data.csv"
vulture_info_df = pd.read_csv(csv_file, low_memory=False)
vulture_info_df

Unnamed: 0,tag-id,animal-id,animal-taxon,deploy-on-date,deploy-off-date,animal-comments,animal-life-stage,animal-mass,attachment-type,deployment-comments,deployment-id,duty-cycle,study-site,tag-manufacturer-name,tag-mass,tag-model
0,42500,Butterball,Cathartes aura,2003-11-14 16:00:00.000,2004-03-14 20:00:01.000,migratory,adult,2372.0,harness,trapped in Pennsylvania using padded-leg hold ...,42500-Butterball,1 fix per hour,East Coast of North America,Microwave Telemetry,70.0,PTT100
1,52067,Irma,Cathartes aura,2004-09-06 17:00:00.000,2013-03-18 22:00:01.000,non-migratory,adult,2012.0,harness,trapped in Pennsylvania using padded-leg hold ...,52067-Irma,1 fix per hour,East Coast of North America,Microwave Telemetry,70.0,PTT100
2,42500,Schaumboch,Cathartes aura,2004-10-08 15:00:00.000,2006-03-29 17:00:01.000,migratory,adult,1951.0,harness,trapped in Pennsylvania using padded-leg hold ...,42500-Schaumboch,1 fix per hour,East Coast of North America,Microwave Telemetry,70.0,PTT100
3,52069,Disney,Cathartes aura,2004-10-11 14:00:00.000,2011-10-18 23:00:01.000,migratory,adult,2108.0,harness,trapped in Pennsylvania using padded-leg hold ...,52069-Disney,1 fix per hour,East Coast of North America,Microwave Telemetry,70.0,PTT100
4,57954,Prado,Cathartes aura,2005-11-02 15:00:00.000,2009-07-07 00:00:01.000,non-migratory,adult,1710.0,harness,trapped in California using walk-in traps,57954-Prado,1 fix per hour,West Coast of North America,Microwave Telemetry,70.0,PTT100
5,57955,Sarkis,Cathartes aura,2006-02-25 23:00:00.000,2007-06-02 20:00:01.000,migratory,adult,1810.0,harness,trapped in California using walk-in traps,57955-Sarkis,1 fix per hour,West Coast of North America,Microwave Telemetry,70.0,PTT100
6,57957,Morongo,Cathartes aura,2006-04-10 04:00:00.000,2009-04-05 22:00:01.000,migratory,adult,1750.0,harness,trapped in California using walk-in traps,57957-Morongo,1 fix per hour,West Coast of North America,Microwave Telemetry,70.0,PTT100
7,57956,Rosalie,Cathartes aura,2006-04-11 04:00:00.000,2010-03-28 00:00:01.000,migratory,adult,1975.0,harness,trapped in California using walk-in traps,57956-Rosalie,1 fix per hour,West Coast of North America,Microwave Telemetry,70.0,PTT100
8,65545,Leo,Cathartes aura,2007-09-24 17:00:00.000,2013-03-15 12:00:01.000,migratory,adult,,harness,trapped on their nests in Saskatchewan,65545-Leo,1 fix per hour,Interior of North America,Microwave Telemetry,70.0,PTT100
9,65544,Mac,Cathartes aura,2007-09-27 20:00:00.000,2008-09-13 16:00:01.000,migratory,adult,,harness,trapped on their nests in Saskatchewan,65544-Mac,1 fix per hour,Interior of North America,Microwave Telemetry,70.0,PTT100


In [30]:
vulture_info_df.columns

Index(['tag-id', 'animal-id', 'animal-taxon', 'deploy-on-date',
       'deploy-off-date', 'animal-comments', 'animal-life-stage',
       'animal-mass', 'attachment-type', 'deployment-comments',
       'deployment-id', 'duty-cycle', 'study-site', 'tag-manufacturer-name',
       'tag-mass', 'tag-model'],
      dtype='object')

In [31]:
# Select columns 
new_vulture_info_df = vulture_info_df[['tag-id', 'animal-id', 'animal-taxon', 'deploy-on-date',
       'deploy-off-date', 'animal-comments', 'animal-life-stage',
       'animal-mass',  'deployment-comments',
        'study-site']].copy()
new_vulture_info_df.head(1)

Unnamed: 0,tag-id,animal-id,animal-taxon,deploy-on-date,deploy-off-date,animal-comments,animal-life-stage,animal-mass,deployment-comments,study-site
0,42500,Butterball,Cathartes aura,2003-11-14 16:00:00.000,2004-03-14 20:00:01.000,migratory,adult,2372.0,trapped in Pennsylvania using padded-leg hold ...,East Coast of North America


In [32]:
new_vulture_info_df.count()

tag-id                 19
animal-id              19
animal-taxon           19
deploy-on-date         19
deploy-off-date        19
animal-comments        19
animal-life-stage      19
animal-mass            12
deployment-comments    19
study-site             19
dtype: int64

In [34]:
new_vulture_info_df.to_sql(name='vulture_detail', con=engine, if_exists='append', index=False)

### Confirm data has been added by querying the customer_name table
* NOTE: can also check using pgAdmin

In [35]:
pd.read_sql_query('select * from migration_paths', con=engine).head()

Unnamed: 0,timestamp,location-long,location-lat,individual-taxon-canonical-name,tag-local-identifier,individual-local-identifier
0,2003-11-14 16:00:00.000,-75.39717,40.48933,Cathartes aura,42500,Butterball
1,2003-11-14 17:00:00.000,-75.39717,40.48933,Cathartes aura,42500,Butterball
2,2003-11-14 18:00:00.000,-75.33317,40.32467,Cathartes aura,42500,Butterball
3,2003-11-14 19:00:00.000,-75.35617,40.33983,Cathartes aura,42500,Butterball
4,2003-11-14 20:00:00.000,-75.4265,40.3155,Cathartes aura,42500,Butterball


### Confirm data has been added by querying the customer_location table

In [36]:
pd.read_sql_query('select * from vulture_detail', con=engine).head()

Unnamed: 0,tag-id,animal-id,animal-taxon,deploy-on-date,deploy-off-date,animal-comments,animal-life-stage,animal-mass,deployment-comments,study-site
0,42500,Butterball,Cathartes aura,2003-11-14 16:00:00.000,2004-03-14 20:00:01.000,migratory,adult,2372.0,trapped in Pennsylvania using padded-leg hold ...,East Coast of North America
1,52067,Irma,Cathartes aura,2004-09-06 17:00:00.000,2013-03-18 22:00:01.000,non-migratory,adult,2012.0,trapped in Pennsylvania using padded-leg hold ...,East Coast of North America
2,42500,Schaumboch,Cathartes aura,2004-10-08 15:00:00.000,2006-03-29 17:00:01.000,migratory,adult,1951.0,trapped in Pennsylvania using padded-leg hold ...,East Coast of North America
3,52069,Disney,Cathartes aura,2004-10-11 14:00:00.000,2011-10-18 23:00:01.000,migratory,adult,2108.0,trapped in Pennsylvania using padded-leg hold ...,East Coast of North America
4,57954,Prado,Cathartes aura,2005-11-02 15:00:00.000,2009-07-07 00:00:01.000,non-migratory,adult,1710.0,trapped in California using walk-in traps,West Coast of North America


In [37]:
engine.table_names()

['migration_paths', 'vulture_detail']

In [48]:
conn = engine.connect()

from sqlalchemy.orm import Session
from sqlalchemy.ext.declarative import declarative_base
Base = declarative_base()

session = Session(bind=engine)
Base.metadata.create_all(engine)

In [50]:

session.commit()

In [51]:
names = session.query(migration_paths)
for name in names:
    print(migration_paths.animal-id)

NameError: name 'migration_paths' is not defined

# list of cities

In [53]:
from citipy import citipy


In [61]:
lats = new_vulture_data_df['location-lat'].values.tolist()
lngs = new_vulture_data_df['location-long'].values.tolist()
type(long)

list

In [56]:
lat_lngs = []
cities = []

In [63]:


lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)


# Print the city count to confirm sufficient count
cities_list=list(cities)
len(cities)

1556