In [1]:

import restaurants_scraping as restaurant
import hotel_scraping as hotel
import FP_weather_scrapping as weather
import attraction_scraping as attraction
import Sort_Filter as sf
import pandas as pd
import datetime as dt
import packaging as pg
import numpy as np
import calculate_distance as cd

CATEGORY_HOTEL = "hotel"
CATEGORY_RESTAURANT = "restaurant"
CATEGORY_ATTRACTION = "attraction"
# columns = ["Name", "address", "rating", "popularity", "price", "category", "url", "imageurl", "phone", "description"]


def scrap_data(location, start_date, end_date, preference=None):

    print("Scraping restaurants...")
    restaurant_data = restaurant.scrap(location)

    print("Scarping hotels...")
    hotel_data = hotel.hotel_main(location, start_date, end_date)

    print("Scraping attractions...")
    attraction_data = attraction.init(location)

    print("Scrapping weather...")
    weather_data = weather.search_weather(location)

    with pd.option_context('display.max_rows', None, 'display.max_columns', None, 'expand_frame_repr', False):
        print(hotel_data)
        print(restaurant_data)
        print(attraction_data)
    return restaurant_data, hotel_data, attraction_data, weather_data


def data_filter(restaurant_data, hotel_data, attraction_data, preference, num_days):
    hotel_filtered = sf.sort_n_filter(hotel_data, preference, CATEGORY_HOTEL, num_days)
    restaurant_filtered = sf.sort_n_filter(restaurant_data, preference, CATEGORY_RESTAURANT, num_days)
    attraction_filtered = sf.sort_n_filter(attraction_data, preference, CATEGORY_ATTRACTION, num_days)
    return hotel_filtered, restaurant_filtered, attraction_filtered



In [115]:
def init(location, start_date, end_date, preference):
    print("Application Start...")
    # get number of days
    start_dt = dt.datetime.strptime(start_date, "%Y-%m-%d").date()
    end_dt = dt.datetime.strptime(end_date, "%Y-%m-%d").date()
    num_days = (end_dt - start_dt).days
    print("Number of days: {}".format(num_days))

    # scrap data
    restaurant_data, hotel_data, attraction_data, weather_data = scrap_data(location, start_date, end_date, preference)

    # filter data
    print("###################################################")
    print("Sorting data with preference 【{}】 and filtering...".format(preference))
    hotel_filtered, restaurant_filtered, attraction_filtered = data_filter(restaurant_data,
                                                                           hotel_data,
                                                                           attraction_data,
                                                                           preference,
                                                                           num_days)
    print("\n\n#########filtered#########")
    with pd.option_context('display.max_rows', None, 'display.max_columns', None, 'expand_frame_repr', False):
        print(hotel_filtered)
        print(restaurant_filtered)
        print(attraction_filtered)

    attraction_restaurant_package = cd.package_att_rest(attraction_filtered, restaurant_filtered)

    with pd.option_context('display.max_rows', None, 'display.max_columns', None, 'expand_frame_repr', False):
        print(attraction_restaurant_package)
    routes = pg.package(hotel_filtered, attraction_restaurant_package, num_days)
    return routes,hotel_filtered,restaurant_filtered,attraction_filtered,weather_data




In [116]:
routes,hotel_filtered,restaurant_filtered,attraction_filtered,weather_data = init("new york", "2019-02-20", "2019-02-23", "popularity")




Application Start...
Number of days: 3
Scraping restaurants...
Querying https://api.yelp.com/v3/businesses/search ...
Scarping hotels...
Scraping attractions...
Scrapping weather...
                                                                                                 Name                                            address rating popularity  price category url imageurl         phone description
Ameritania at Times Square                                                 Ameritania at Times Square  230 W 54th Street, New York, NY, 10019, United...    8.4       1282   85.0                        866-573-4235            
Grand Hyatt New York                                                             Grand Hyatt New York  109 East 42nd Street, at Grand Central Termina...    8.0       2878  176.0                        866-538-1314            
DoubleTree Suites by Hilton New York City - Tim...  DoubleTree Suites by Hilton New York City - Ti...  1568 Broadway, 47th Street And 7th Av



#########filtered#########
                                                                                Name                                            address rating  popularity  price category url imageurl         phone
Hotel Pennsylvania                                                Hotel Pennsylvania    401 7th Ave, New York, NY, 10001, United States    4.8        6673   76.0                        866-538-9298
YOTEL New York                                                        YOTEL New York  570 Tenth Avenue, New York, NY, 10036, United ...    8.4        5872   96.0                        866-925-4159
Park Central Hotel New York                              Park Central Hotel New York    870 7th Ave, New York, NY, 10019, United States    7.8        3890  125.0                        866-925-4159
The Manhattan at Times Square Hotel              The Manhattan at Times Square Hotel    790 7th Ave, New York, NY, 10019, United States    6.2        3776  111.0                  

Calculating distance for 9 Pell St, New York, NY 10013 <---> Central Park,new york, NY|The National 9/11 Memorial & Museum,new york, NY|Empire State Building,new york, NY|Top of the Rock,new york, NY|The High Line,new york, NY|The Metropolitan Museum of Art,new york, NY|Times Square,new york, NY|Grand Central Terminal,new york, NY|Statue of Liberty,new york, NY|Broadway,new york, NY|Rockefeller Center,new york, NY|American Museum of Natural History,new york, NY|One World Observatory,new york, NY|Bryant Park,new york, NY|Brooklyn Bridge,new york, NY|Staten Island Ferry,new york, NY|Manhattan Skyline,new york, NY|The Museum of Modern Art (MoMA),new york, NY
Calculating distance for 39 W 19th St, New York, NY 10011 <---> Central Park,new york, NY|The National 9/11 Memorial & Museum,new york, NY|Empire State Building,new york, NY|Top of the Rock,new york, NY|The High Line,new york, NY|The Metropolitan Museum of Art,new york, NY|Times Square,new york, NY|Grand Central Terminal,new york, NY|

Calculating distance for 19 Old Fulton St, Brooklyn, NY 11201 <---> Central Park,new york, NY|The National 9/11 Memorial & Museum,new york, NY|Empire State Building,new york, NY|Top of the Rock,new york, NY|The High Line,new york, NY|The Metropolitan Museum of Art,new york, NY|Times Square,new york, NY|Grand Central Terminal,new york, NY|Statue of Liberty,new york, NY|Broadway,new york, NY|Rockefeller Center,new york, NY|American Museum of Natural History,new york, NY|One World Observatory,new york, NY|Bryant Park,new york, NY|Brooklyn Bridge,new york, NY|Staten Island Ferry,new york, NY|Manhattan Skyline,new york, NY|The Museum of Modern Art (MoMA),new york, NY
Calculating distance for 235 Mulberry St, New York, NY 10012 <---> Central Park,new york, NY|The National 9/11 Memorial & Museum,new york, NY|Empire State Building,new york, NY|Top of the Rock,new york, NY|The High Line,new york, NY|The Metropolitan Museum of Art,new york, NY|Times Square,new york, NY|Grand Central Terminal,new

In [117]:
routes

Unnamed: 0,day1,day2,day3
route1,"[Stewart Hotel, Broadway, Juliana's Pizza, The...","[Stewart Hotel, Statue of Liberty, Jacob's Pic...","[Stewart Hotel, One World Observatory, Up Thai..."
route2,"[Park Central Hotel New York, American Museum ...","[Park Central Hotel New York, Central Park, Be...","[Park Central Hotel New York, Empire State Bui..."


In [170]:
def route_to_address(routes, hotel_filtered, attraction_filtered, restaurant_filtered):
    route_address = {}
    for index, row in routes.iterrows():
        daynum = 1
        route_address[index] = {}
        for day in row:
            route_address[index]['day{}'.format(daynum)]=[]
            for i in range(len(day)):
                if i == 0 or i == 5:
                    route_address[index]['day{}'.format(daynum)].append(hotel_filtered.loc[day[i], "address"])
                if i == 1 or i == 3:
                    route_address[index]['day{}'.format(daynum)].append(attraction_filtered.loc[day[i], "address"])
                if i == 2 or i == 4:
                    route_address[index]['day{}'.format(daynum)].append(restaurant_filtered.loc[day[i], "address"])
            daynum += 1;
    route_address = pd.DataFrame.from_dict(route_address)
    return route_address

In [171]:
route_to_address(routes, hotel_filtered, attraction_filtered, restaurant_filtered)

Unnamed: 0,route1,route2
day1,"[371 7th Ave, New York, NY, 10001, United Stat...","[870 7th Ave, New York, NY, 10019, United Stat..."
day2,"[371 7th Ave, New York, NY, 10001, United Stat...","[870 7th Ave, New York, NY, 10019, United Stat..."
day3,"[371 7th Ave, New York, NY, 10001, United Stat...","[870 7th Ave, New York, NY, 10019, United Stat..."
