## Deliverable 3. Create a Travel Itinerary Map.

In [1]:
# Dependencies and Setup
import pandas as pd
import requests
import gmaps

import pprint as pp

# Import API key
from config import g_key

# Configure gmaps
gmaps.configure(api_key=g_key)

In [2]:
# 1. Read the WeatherPy_vacation.csv into a DataFrame.
vacation_df = pd.read_csv("../Vacation_Search/WeatherPy_vacation.csv")
vacation_df.head()

Unnamed: 0,City_ID,City,Country,Max Temp,Weather,Lat,Lng,Hotel Name
0,0,Nicoya,Cr,84.13,overcast clouds,10.1483,-85.452,Las Cabinas Del Sueño
1,1,Vaini,To,75.36,scattered clouds,-21.2,-175.2,Keleti Beach Resort
2,3,Kapaa,Us,81.12,scattered clouds,22.0752,-159.319,Sheraton Kauai Coconut Beach Resort
3,6,Port Blair,In,75.24,thunderstorm with light rain,11.6667,92.75,"Welcomhotel By ITC Hotels, Bay Island, Port Blair"
4,24,Isla,Ph,76.17,overcast clouds,15.8939,120.3036,Green Apple Inn


In [3]:
# 2. Using the template add the city name, the country code, the weather description and maximum temperature for the city.
info_box_template = """
<dl>
<dt>Hotel Name</dt><dd>{Hotel Name}</dd>
<dt>City</dt><dd>{City}</dd>
<dt>Country</dt><dd>{Country}</dd>
<dt>Weather</dt><dd>{Weather} and {Max Temp} °F</dd>
</dl>
"""

# 3a. Get the data from each row and add it to the formatting template and store the data in a list.
vacation_info = [info_box_template.format(**row) for index, row in vacation_df.iterrows()]

# 3b. Get the latitude and longitude from each row and store in a new DataFrame.
locations = vacation_df[["Lat", "Lng"]]

In [4]:
figure_layout = {
    'width': '800px',
    'height': '50vh',
    'border': '2px solid black',
    'padding': '2px'
}
fig = gmaps.figure(center=(30.0, 31.0), zoom_level=2, layout = figure_layout)

# 4a. Add a marker layer for each city to the map. 
marker_layer = gmaps.marker_layer(locations, info_box_content=vacation_info)
fig.add_layer(marker_layer)

# 4b. Display the figure
fig

Figure(layout=FigureLayout(border='2px solid black', height='50vh', padding='2px', width='800px'))

### For my city picking, I decided that I should practice a bit and write up some code that, given a particular country and number of destinations, would find the 4 closest hotels via clustering.  Completely extra, and doesn't change anything for the final bit of code, other than a few places where I'll use my list[] of destinations rather than vacation_start = ..., vacation_end = ..., and so on.  It could be a bit more efficient in O() notation (recalculating already-existing distances rather than storing them dynamically), but I thought brute force was okay for this application.

#### Fun fact: Because of the way that picking cities based on random lat/long combinations works, if you try to use the United States, you end up with 4 locations in Hawaii, which is a bit hard to drive between for different islands.  Pacific Ocean selection bias! :)

In [5]:
# From the map above pick 4 cities and create a vacation itinerary route to travel between the four cities.
select_country = "In"
country_df = vacation_df[vacation_df["Country"] == select_country]
#Set the desired number of cities
group_size = 4

country_df["City_ID"] = country_df["City_ID"].astype(str)
country_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  import sys


Unnamed: 0,City_ID,City,Country,Max Temp,Weather,Lat,Lng,Hotel Name
3,6,Port Blair,In,75.24,thunderstorm with light rain,11.6667,92.75,"Welcomhotel By ITC Hotels, Bay Island, Port Blair"
53,206,Kavaratti,In,82.98,overcast clouds,10.5669,72.642,Hotels in Lakshadweep Islands
76,269,Balotra,In,89.6,clear sky,25.8333,72.2333,Hotel City Square
88,311,Gudari,In,82.36,overcast clouds,19.35,83.7833,Hotel maa majhigouri
100,348,Sitarganj,In,84.79,clear sky,28.9333,79.7,OYO 37021 Hotel Sitar International
112,382,Gulbarga,In,80.47,light rain,17.3333,76.8333,OYO 37176 N K Comforts
114,390,Veraval,In,81.72,clear sky,20.9,70.3667,Lords Inn Somnath
136,461,Harindanga,In,84.56,broken clouds,22.0383,88.3214,Hasanur Builders
151,520,Mangrol,In,81.61,clear sky,21.1167,70.1167,Hotel The Royal Honours
154,526,Shahpur,In,77.88,very heavy rain,16.7,76.8333,Ashwini Hotel & Deluxe Lodge


In [6]:
# Class to hold a number of cities in a group, with a centralized latitude/longitude
class my_city_group:
    def __init__ (self, city_ids, lat, lng):
        self.city_ids = []
        for i in city_ids:
            self.city_ids.append(i)
        self.lat = lat
        self.lng = lng
    
    # Return the distance between two centroids of city groups
    def dist(self, city_group2):
        x = self.lng - city_group2.lng
        y = self.lat - city_group2.lat
        return(((x**2) + (y**2))**0.5)
    
    # Combine two city groups into one, merging the city_ID lists into one and using
    # a weighted average to calculate a new centroid.
    def combine(self, city_group2):
        len1 = len(self.city_ids)
        len2 = len(city_group2.city_ids)
        new_ids = []
        for i in self.city_ids:
            new_ids.append(i)
        for i in city_group2.city_ids:
            new_ids.append(i)
        avg_lat = (self.lat * len1 + city_group2.lat * len2) / (len1 + len2)
        avg_lng = (self.lng * len1 + city_group2.lng * len2) / (len1 + len2)
        return my_city_group(new_ids, avg_lat, avg_lng)

def get_closest_cities(df, group_size):
    hotel_count = df["Country"].count()
    # Verify that there are at least group_size number of hotels in the database
    if hotel_count < group_size:
        country = list(df["Country"])[0]
        err = f"ERROR: There are only {hotel_count} hotels in " \
                f"country {country}, but {group_size} were requested."   
        print(err)
        return ""
    cities = []
    # initialize city list
    for i in df.index:
        new_city = my_city_group([df.loc[i, 'City_ID']],
                                  df.loc[i, 'Lat'],
                                  df.loc[i, 'Lng'])
        cities.append(new_city)
    # initialize the current largest group size
    lar_group = 1
    while(lar_group < group_size):
        # Maximum distance is, in theory, sqrt(360^2 + 180^2), assuming we don't account for the international dateline
        min_dist = 403
        # Cycle through each city grouping
        for i, city1 in enumerate(cities):
            # Cycle through each city grouping again
            for j, city2 in enumerate(cities):
                # Ensure we're not comparing the same group
                if i != j:
                    dist = city1.dist(city2)
                    # Find the minimum distance for this cycle
                    if dist < min_dist:
                        min_dist = dist
                        min_cities = [j, i]
        # Make a new group of the two closest city groups, combined
        new_group = cities[min_cities[0]].combine(cities[min_cities[1]])
        lar_group = len(new_group.city_ids)
        # Remove the two previous city groups that were combined before adding the new one
        # Since min_cities is returned as [j ,i], after finding the first combination of those two city groups,
        # the first one (larger index) should be popped first, to preserve the index order for the second
        cities.pop(min_cities[0])
        cities.pop(min_cities[1])
        cities.append(new_group)
    return new_group

In [13]:
# From the map above pick 4 cities and create a vacation itinerary route to travel between the four cities.
cluster = get_closest_cities(country_df, group_size)

# 5. Create DataFrames for each city by filtering the 'vacation_df' using the loc method. 
# Hint: The starting and ending city should be the same city.

stops = [country_df.loc[country_df["City_ID"] == x] for x in cluster.city_ids]
stops
#vacation_start = vacation_df.loc[]
#vacation_end = vacation_df.loc[]
#vacation_stop1 = vacation_df.loc[]
#vacation_stop2 = vacation_df.loc[] 
#vacation_stop3 = vacation_df.loc[]

[    City_ID  City Country  Max Temp          Weather   Lat    Lng  \
 173     571  Puri      In     81.12  overcast clouds  19.8  85.85   
 
                           Hotel Name  
 173  OYO 1577 Hotel Shree Hari Grand  ,
    City_ID    City Country  Max Temp          Weather    Lat      Lng  \
 88     311  Gudari      In     82.36  overcast clouds  19.35  83.7833   
 
               Hotel Name  
 88  Hotel maa majhigouri  ,
     City_ID       City Country  Max Temp     Weather      Lat    Lng  \
 197     661  Gharghoda      In     83.01  few clouds  22.1667  83.35   
 
      Hotel Name  
 197  Gokul Naik  ,
     City_ID        City Country  Max Temp        Weather      Lat      Lng  \
 136     461  Harindanga      In     84.56  broken clouds  22.0383  88.3214   
 
            Hotel Name  
 136  Hasanur Builders  ]

In [8]:
# 6. Get the latitude-longitude pairs as tuples from each city DataFrame using the to_numpy function and list indexing.
lat_lngs = [(x['Lat'].to_numpy()[0], x['Lng'].to_numpy()[0]) for x in stops]
lat_lngs
#start = 
#end = 
#stop1 = 
#stop2 = 
#stop3 = 

[(19.8, 85.85), (19.35, 83.7833), (22.1667, 83.35), (22.0383, 88.3214)]

In [9]:
# 7. Create a direction layer map using the start and end latitude-longitude pairs,
# and stop1, stop2, and stop3 as the waypoints. The travel_mode should be "DRIVING", "BICYCLING", or "WALKING".
fig2 = gmaps.figure(layout = figure_layout)
# Start and end are both the first element of the list, waypoints are the [1:] slice
city_itinerary = gmaps.directions_layer(
        lat_lngs[0], lat_lngs[0], waypoints=lat_lngs[1:],
        travel_mode='DRIVING')
fig2.add_layer(city_itinerary)
fig2

Figure(layout=FigureLayout(border='2px solid black', height='50vh', padding='2px', width='800px'))

In [10]:
# 8. To create a marker layer map between the four cities.
#  Combine the four city DataFrames into one DataFrame using the concat() function.
itinerary_df = pd.concat([x for x in stops],ignore_index=True)
itinerary_df

Unnamed: 0,City_ID,City,Country,Max Temp,Weather,Lat,Lng,Hotel Name
0,571,Puri,In,81.12,overcast clouds,19.8,85.85,OYO 1577 Hotel Shree Hari Grand
1,311,Gudari,In,82.36,overcast clouds,19.35,83.7833,Hotel maa majhigouri
2,661,Gharghoda,In,83.01,few clouds,22.1667,83.35,Gokul Naik
3,461,Harindanga,In,84.56,broken clouds,22.0383,88.3214,Hasanur Builders


In [11]:
# 9 Using the template add city name, the country code, the weather description and maximum temperature for the city. 
info_box_template = """
<dl>
<dt>Hotel Name</dt><dd>{Hotel Name}</dd>
<dt>City</dt><dd>{City}</dd>
<dt>Country</dt><dd>{Country}</dd>
<dt>Weather</dt><dd>{Weather} and {Max Temp} °F</dd>
</dl>
"""

# 10a Get the data from each row and add it to the formatting template and store the data in a list.
hotel_info = [info_box_template.format(**row) for index, row in itinerary_df.iterrows()]

# 10b. Get the latitude and longitude from each row and store in a new DataFrame.
locations = itinerary_df[["Lat", "Lng"]]
locations

Unnamed: 0,Lat,Lng
0,19.8,85.85
1,19.35,83.7833
2,22.1667,83.35
3,22.0383,88.3214


In [12]:
fig4 = gmaps.figure(layout = figure_layout)

# 11a. Add a marker layer for each city to the map.
marker_layer = gmaps.marker_layer(locations, info_box_content=hotel_info)
fig4.add_layer(marker_layer)

# 11b. Display the figure
fig4

Figure(layout=FigureLayout(border='2px solid black', height='50vh', padding='2px', width='800px'))