## Holocene Period Volcanoes Data

   1. Import and scrub holocene period volcano data

In [1]:
#import modules 
import pandas as pd
import numpy as np 
import folium 
import requests
import matplotlib.cm as cm
import matplotlib.colors as colors


from geopy.geocoders import Nominatim 
from pandas.io.json import json_normalize
from sklearn.cluster import KMeans

In [2]:
#import eruption data
df = pd.read_csv('database.csv')

In [48]:
#select only US data
us_eruptions = df.loc[df['Country'] == 'United States']
us_eruptions.head()

Unnamed: 0,Number,Name,Country,Region,Type,Activity Evidence,Last Known Eruption,Latitude,Longitude,Elevation (Meters),Dominant Rock Type,Tectonic Setting
271,244000,Vailulu'u,United States,New Zealand to Fiji,Submarine,Eruption Observed,2003 CE,-14.215,-169.058,-592,Basalt / Picro-Basalt,Subduction Zone / Oceanic Crust (< 15 km)
272,244001,Ta'u,United States,New Zealand to Fiji,Shield,Evidence Credible,Unknown,-14.23,-169.454,931,Basalt / Picro-Basalt,Subduction Zone / Oceanic Crust (< 15 km)
273,244010,Ofu-Olosega,United States,New Zealand to Fiji,Shield(s),Eruption Observed,1866 CE,-14.175,-169.618,639,Basalt / Picro-Basalt,Subduction Zone / Oceanic Crust (< 15 km)
274,244020,Tutuila,United States,New Zealand to Fiji,Tuff cone(s),Evidence Credible,Unknown,-14.295,-170.7,653,Basalt / Picro-Basalt,Subduction Zone / Oceanic Crust (< 15 km)
661,284133,Fukujin,United States,"Japan, Taiwan, Marianas",Submarine,Eruption Observed,1974 CE,21.93,143.47,-217,Andesite / Basaltic Andesite,Subduction Zone / Crust Thickness Unknown


In [4]:
#assign country, lat and long to new dataframe 
eruption_location = us_eruptions.drop(columns = ['Number', 'Name', 'Type', 'Activity Evidence','Last Known Eruption','Elevation (Meters)',
       'Dominant Rock Type', 'Tectonic Setting'])
pd.DataFrame(eruption_location)
eruption_location.set_index('Country', inplace=True)
eruption_location.reset_index(level=0, inplace=True)
eruption_location.head()

Unnamed: 0,Country,Region,Latitude,Longitude
0,United States,New Zealand to Fiji,-14.215,-169.058
1,United States,New Zealand to Fiji,-14.23,-169.454
2,United States,New Zealand to Fiji,-14.175,-169.618
3,United States,New Zealand to Fiji,-14.295,-170.7
4,United States,"Japan, Taiwan, Marianas",21.93,143.47


In [50]:
#Find the central lat/lon for the US

address = ['United States']
geolocator = Nominatim(user_agent = 'ny_explorer')
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of the United States are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of the United States are 39.7837304, -100.4458825.


#### A map of US volcanic eruptions during the holocene period.

In [6]:
#Create map of volcanic eruptions
volcano_map = folium.Map(location =[latitude, longitude], zoom_start = 4)

for lat, lng, country in zip(eruption_location['Latitude'], eruption_location['Longitude'], 
                             eruption_location['Country']):
    
    label='{}'.format(country)
    label=folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='brown',
        fill=True,
        fill_color='pink',
        fill_opacity=0.7,
        parse_html=False).add_to(volcano_map)  
    
volcano_map

#### Now that we have our volcano data cleaned and mapped, it's time to begin importing farmers market data. 

In [15]:
#Begin by importing foursquare api. 
#This will enable us to gather farmers market data from around the 
#United States. 

#API credentials
CLIENT_ID = "YY0RWGWD5Z3A1HAO2PKTYFHACGOADUVPYTYTXBAACJTEXYF0"
CLIENT_SECRET = "TELSOSSMHFEPNPG0I5VXQWJBUSNTZLHGGEZ14WG5OSFQFDY4"
VERSION = '20200401'

In [16]:
eruption_location.loc[0, 'Country']

'United States'

In [17]:
eruption_lat = eruption_location.loc[0, 'Latitude'] #eruption latitude, assign to variable for 4square iteration
eruption_long = eruption_location.loc[0, 'Longitude'] #longitude ~~
eruption_country = eruption_location.loc[0, 'Country']

print('Latitude and longitude values of {} are {}, {}.'.format(eruption_country, 
                                                               eruption_lat, 
                                                               eruption_long))

Latitude and longitude values of United States are -14.215, -169.058.


## Time to get our popular volcano venues

In [18]:
#api call
LIMIT = 25 # limit of number of venues returned by Foursquare API
radius = 10000 # define radius 
#query = "Farmers Market" #add query parameter
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    eruption_lat, 
    eruption_long, 
    radius, 
    LIMIT)


In [19]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5f2dcf13b856fa49bb84f7e3'},
  'headerLocation': 'Current map view',
  'headerFullLocation': 'Current map view',
  'headerLocationGranularity': 'unknown',
  'totalResults': 0,
  'suggestedBounds': {'ne': {'lat': -14.124999909999909,
    'lng': -168.96533051225714},
   'sw': {'lat': -14.30500009000009, 'lng': -169.15066948774285}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': []}]}}

In [20]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Region', 
                  'Region Latitude', 
                  'Region Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [21]:
# type your answer here

volcano_venues = getNearbyVenues(names=eruption_location['Region'],
                                   latitudes=eruption_location['Latitude'],
                                   longitudes=eruption_location['Longitude']
                                  )



New Zealand to Fiji
New Zealand to Fiji
New Zealand to Fiji
New Zealand to Fiji
Japan, Taiwan, Marianas
Japan, Taiwan, Marianas
Japan, Taiwan, Marianas
Japan, Taiwan, Marianas
Japan, Taiwan, Marianas
Japan, Taiwan, Marianas
Japan, Taiwan, Marianas
Japan, Taiwan, Marianas
Japan, Taiwan, Marianas
Japan, Taiwan, Marianas
Japan, Taiwan, Marianas
Japan, Taiwan, Marianas
Japan, Taiwan, Marianas
Japan, Taiwan, Marianas
Japan, Taiwan, Marianas
Japan, Taiwan, Marianas
Japan, Taiwan, Marianas
Japan, Taiwan, Marianas
Japan, Taiwan, Marianas
Japan, Taiwan, Marianas
Japan, Taiwan, Marianas
Japan, Taiwan, Marianas
Japan, Taiwan, Marianas
Japan, Taiwan, Marianas
Japan, Taiwan, Marianas
Alaska
Alaska
Alaska
Alaska
Alaska
Alaska
Alaska
Alaska
Alaska
Alaska
Alaska
Alaska
Alaska
Alaska
Alaska
Alaska
Alaska
Alaska
Alaska
Alaska
Alaska
Alaska
Alaska
Alaska
Alaska
Alaska
Alaska
Alaska
Alaska
Alaska
Alaska
Alaska
Alaska
Alaska
Alaska
Alaska
Alaska
Alaska
Alaska
Alaska
Alaska
Alaska
Alaska
Alaska
Alaska
Alask

In [22]:
print(volcano_venues.shape)
volcano_venues.head()

(27, 7)


Unnamed: 0,Region,Region Latitude,Region Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Canada and Western USA,46.853,-121.76,paradise view,46.853702,-121.758947,Mountain
1,Canada and Western USA,46.853,-121.76,Comcast Xfinity,46.853028,-121.762765,Business Service
2,Canada and Western USA,46.853,-121.76,Mount Rainier,46.851852,-121.757178,Mountain
3,Canada and Western USA,45.88,-122.08,Observation Peak,45.878133,-122.084091,Trail
4,Canada and Western USA,45.374,-121.695,Pearly Gates,45.372446,-121.696507,Trail


In [23]:
print('There are {} uniques categories.'.format(len(volcano_venues['Venue Category'].unique())))

There are 12 uniques categories.


In [24]:
# one hot encoding
volc_oh = pd.get_dummies(volcano_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
volc_oh['Region'] = volcano_venues['Region'] 

# move neighborhood column to the first column
fixed_columns = [volc_oh.columns[-1]] + list(volc_oh.columns[:-1])
volc_oh = volc_oh[fixed_columns]

volc_oh.head()

Unnamed: 0,Region,Beach,Boat or Ferry,Burrito Place,Business Service,Cafeteria,Historic Site,Mexican Restaurant,Mountain,Scenic Lookout,Ski Chalet,Trail,Volcano
0,Canada and Western USA,0,0,0,0,0,0,0,1,0,0,0,0
1,Canada and Western USA,0,0,0,1,0,0,0,0,0,0,0,0
2,Canada and Western USA,0,0,0,0,0,0,0,1,0,0,0,0
3,Canada and Western USA,0,0,0,0,0,0,0,0,0,0,1,0
4,Canada and Western USA,0,0,0,0,0,0,0,0,0,0,1,0


In [25]:
eruption_grouped = volc_oh.groupby('Region').mean().reset_index()
eruption_grouped

Unnamed: 0,Region,Beach,Boat or Ferry,Burrito Place,Business Service,Cafeteria,Historic Site,Mexican Restaurant,Mountain,Scenic Lookout,Ski Chalet,Trail,Volcano
0,Canada and Western USA,0.047619,0.047619,0.047619,0.047619,0.047619,0.0,0.047619,0.333333,0.190476,0.047619,0.142857,0.0
1,Hawaii and Pacific Ocean,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.166667,0.166667,0.0,0.0,0.5


In [26]:
num_top_venues = 5

for hood in eruption_grouped['Region']:
    print("----"+hood+"----")
    temp = eruption_grouped[eruption_grouped['Region'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Canada and Western USA----
            venue  freq
0        Mountain  0.33
1  Scenic Lookout  0.19
2           Trail  0.14
3           Beach  0.05
4   Boat or Ferry  0.05


----Hawaii and Pacific Ocean----
            venue  freq
0         Volcano  0.50
1   Historic Site  0.17
2        Mountain  0.17
3  Scenic Lookout  0.17
4           Beach  0.00




In [27]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [28]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Region']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
eruption_venues_sorted = pd.DataFrame(columns=columns)
eruption_venues_sorted['Region'] = eruption_grouped['Region']

for ind in np.arange(eruption_grouped.shape[0]):
    eruption_venues_sorted.iloc[ind, 1:] = return_most_common_venues(eruption_grouped.iloc[ind, :], num_top_venues)

eruption_venues_sorted.head()

Unnamed: 0,Region,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Canada and Western USA,Mountain,Scenic Lookout,Trail,Ski Chalet,Mexican Restaurant,Cafeteria,Business Service,Burrito Place,Boat or Ferry,Beach
1,Hawaii and Pacific Ocean,Volcano,Scenic Lookout,Mountain,Historic Site,Trail,Ski Chalet,Mexican Restaurant,Cafeteria,Business Service,Burrito Place


In [29]:
# set number of clusters
kclusters = 2

eruption_grouped_clustering = eruption_grouped.drop('Region', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(eruption_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 1], dtype=int32)

In [30]:
# add clustering labels
eruption_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

eruption_merged = eruption_location

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
eruption_merged = eruption_merged.join(eruption_venues_sorted.set_index('Region'), on='Region')

eruption_merged.tail() # check the last columns!

Unnamed: 0,Country,Region,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
168,United States,Hawaii and Pacific Ocean,19.421,-155.287,1.0,Volcano,Scenic Lookout,Mountain,Historic Site,Trail,Ski Chalet,Mexican Restaurant,Cafeteria,Business Service,Burrito Place
169,United States,Hawaii and Pacific Ocean,19.475,-155.608,1.0,Volcano,Scenic Lookout,Mountain,Historic Site,Trail,Ski Chalet,Mexican Restaurant,Cafeteria,Business Service,Burrito Place
170,United States,Hawaii and Pacific Ocean,19.82,-155.47,1.0,Volcano,Scenic Lookout,Mountain,Historic Site,Trail,Ski Chalet,Mexican Restaurant,Cafeteria,Business Service,Burrito Place
171,United States,Hawaii and Pacific Ocean,19.692,-155.87,1.0,Volcano,Scenic Lookout,Mountain,Historic Site,Trail,Ski Chalet,Mexican Restaurant,Cafeteria,Business Service,Burrito Place
172,United States,Hawaii and Pacific Ocean,20.708,-156.25,1.0,Volcano,Scenic Lookout,Mountain,Historic Site,Trail,Ski Chalet,Mexican Restaurant,Cafeteria,Business Service,Burrito Place


In [34]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=3)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(eruption_merged['Latitude'], eruption_merged['Longitude'], eruption_merged['Region'], eruption_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        #color=rainbow[cluster-1],
        fill=True,
       # fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [35]:
#Cluster 1
eruption_merged.loc[eruption_merged['Cluster Labels'] == 0, 
                    eruption_merged.columns[[1] + list(range(5, eruption_merged.shape[1]))]]



Unnamed: 0,Region,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
117,Canada and Western USA,Mountain,Scenic Lookout,Trail,Ski Chalet,Mexican Restaurant,Cafeteria,Business Service,Burrito Place,Boat or Ferry,Beach
118,Canada and Western USA,Mountain,Scenic Lookout,Trail,Ski Chalet,Mexican Restaurant,Cafeteria,Business Service,Burrito Place,Boat or Ferry,Beach
119,Canada and Western USA,Mountain,Scenic Lookout,Trail,Ski Chalet,Mexican Restaurant,Cafeteria,Business Service,Burrito Place,Boat or Ferry,Beach
120,Canada and Western USA,Mountain,Scenic Lookout,Trail,Ski Chalet,Mexican Restaurant,Cafeteria,Business Service,Burrito Place,Boat or Ferry,Beach
121,Canada and Western USA,Mountain,Scenic Lookout,Trail,Ski Chalet,Mexican Restaurant,Cafeteria,Business Service,Burrito Place,Boat or Ferry,Beach
122,Canada and Western USA,Mountain,Scenic Lookout,Trail,Ski Chalet,Mexican Restaurant,Cafeteria,Business Service,Burrito Place,Boat or Ferry,Beach
123,Canada and Western USA,Mountain,Scenic Lookout,Trail,Ski Chalet,Mexican Restaurant,Cafeteria,Business Service,Burrito Place,Boat or Ferry,Beach
124,Canada and Western USA,Mountain,Scenic Lookout,Trail,Ski Chalet,Mexican Restaurant,Cafeteria,Business Service,Burrito Place,Boat or Ferry,Beach
125,Canada and Western USA,Mountain,Scenic Lookout,Trail,Ski Chalet,Mexican Restaurant,Cafeteria,Business Service,Burrito Place,Boat or Ferry,Beach
126,Canada and Western USA,Mountain,Scenic Lookout,Trail,Ski Chalet,Mexican Restaurant,Cafeteria,Business Service,Burrito Place,Boat or Ferry,Beach


In [36]:
#Cluster 2
eruption_merged.loc[eruption_merged['Cluster Labels'] == 1, 
                    eruption_merged.columns[[1] + list(range(5, eruption_merged.shape[1]))]]


Unnamed: 0,Region,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
167,Hawaii and Pacific Ocean,Volcano,Scenic Lookout,Mountain,Historic Site,Trail,Ski Chalet,Mexican Restaurant,Cafeteria,Business Service,Burrito Place
168,Hawaii and Pacific Ocean,Volcano,Scenic Lookout,Mountain,Historic Site,Trail,Ski Chalet,Mexican Restaurant,Cafeteria,Business Service,Burrito Place
169,Hawaii and Pacific Ocean,Volcano,Scenic Lookout,Mountain,Historic Site,Trail,Ski Chalet,Mexican Restaurant,Cafeteria,Business Service,Burrito Place
170,Hawaii and Pacific Ocean,Volcano,Scenic Lookout,Mountain,Historic Site,Trail,Ski Chalet,Mexican Restaurant,Cafeteria,Business Service,Burrito Place
171,Hawaii and Pacific Ocean,Volcano,Scenic Lookout,Mountain,Historic Site,Trail,Ski Chalet,Mexican Restaurant,Cafeteria,Business Service,Burrito Place
172,Hawaii and Pacific Ocean,Volcano,Scenic Lookout,Mountain,Historic Site,Trail,Ski Chalet,Mexican Restaurant,Cafeteria,Business Service,Burrito Place
