In [1]:
# Dependencies and Setup
import requests
import json
import pandas as pd
import os

## Import the Priceline API key
from api_keys import priceline_api_key

In [3]:
# List of cities
city_list = [
    'San Jose, CA',
    'Santa Cruz, CA',
    'Newport Beach, CA',
    'Napa, CA',
    'Santa Monica, CA',
    'Long Beach, CA',
    'Fresno, CA',
    'Monterey, CA',
    'Santa Barbara, CA',
    'Palm Springs, CA',
    'Sacramento, CA',
    'Anaheim, CA',
    'San Diego, CA',
    'Los Angeles, CA',
    'San Francisco, CA',
]

# Initialize an empty list to store DataFrames
dfs = []

# # Loop through each city in the list
# for city in city_list:
#     url = "https://priceline-com-provider.p.rapidapi.com/v1/hotels/locations"
    
#     querystring = {"name": city, "search_type": "HOTEL"}

#     headers = {
#         "X-RapidAPI-Key": priceline_api_key,
#         "X-RapidAPI-Host": "priceline-com-provider.p.rapidapi.com"
#     }
    
#     response = requests.get(url, headers=headers, params=querystring)

#     if response.status_code == 200:

#         # Load the JSON response data into a list
#         list_data = json.loads(response.text)
        
#         # Create a DataFrame for the current city and append it to the list
#         hotel_data = pd.DataFrame(list_data)
#         dfs.append(hotel_data)

        
#         # Get the JSON data
#         json_data = response.json()
        
#         # Use json.dumps with 'indent' parameter to pretty-print the JSON data
#         json_data = json.dumps(response.json(), indent=4)
        
#         print(f"Hotel data for {city}:")
#         print(json_data)
#     else:
#         print(f"Failed to get data for {city}. Status code:", response.status_code)
        
        
# Loop through each city in the list
for city in city_list:
    url = "https://priceline-com-provider.p.rapidapi.com/v1/hotels/locations"
    
    querystring = {"name": city, "search_type": "HOTEL"}

    headers = {
        "X-RapidAPI-Key": priceline_api_key,
        "X-RapidAPI-Host": "priceline-com-provider.p.rapidapi.com"
    }
    
    while True:
        response = requests.get(url, headers=headers, params=querystring)

        if response.status_code == 200:
            # Get the JSON data
            json_data = response.json()

            # Check if response type is "HOTEL"
            if json_data[0].get('type') == 'HOTEL':
                
                # Load the JSON response data into a list
                list_data = json.loads(response.text)
        
                # Create a DataFrame for the current city and append it to the list
                hotel_data = pd.DataFrame(list_data)
                dfs.append(hotel_data)
                
                # Use json.dumps with 'indent' parameter to pretty-print the JSON data
                json_data_pretty = json.dumps(json_data, indent=4)
                
                print(f"Hotel data for {city}:")
                print(json_data_pretty)
                
                break  # Exit the while loop if data is successfully retrieved
            else:
                print(f"Response type for {city} is not 'HOTEL'. Retrying...")
        else:
            print(f"Failed to get data for {city}. Status code:", response.status_code)
            break  # Exit the while loop on error



Response type for San Jose, CA is not 'HOTEL'. Retrying...
Hotel data for San Jose, CA:
[
    {
        "itemName": "Roseview House - San Jose, CA",
        "id": "125194804",
        "cityID": "3000002250",
        "type": "HOTEL",
        "lat": 37.3302,
        "lon": -121.89728,
        "proximity": 0.0,
        "savedTravelStartDate": null,
        "savedTravelEndDate": null,
        "cityName": "San Jose",
        "stateCode": "CA",
        "provinceName": "California",
        "score": 400809.66,
        "radius": 23.9,
        "rank": 1.0,
        "rank2": 97.81724548339844,
        "globalScore": 1.0,
        "globalScoreReducedCityBoost": 1.0,
        "globalScoreWOHotelCountNormalize": 1.0,
        "country": "US",
        "address": "396 W SAN FERNANDO ST",
        "zip": "95112",
        "countryName": "UNITED STATES",
        "countryCode": "US",
        "poiCategoryTypeId": 0,
        "poiCategoryName": null,
        "poiID": null,
        "seType": null,
        "gmtOff

In [4]:
# Concatenate all DataFrames in the list into a single DataFrame
final_df = pd.concat(dfs, ignore_index=True)

# Print the final DataFrame
final_df

Unnamed: 0,itemName,id,cityID,type,lat,lon,proximity,savedTravelStartDate,savedTravelEndDate,cityName,...,poiCategoryTypeId,poiCategoryName,poiID,seType,gmtOffset,entered,highlightedName,displayLine1,displayLine2,fromSavedSearch
0,"Roseview House - San Jose, CA",125194804,3000002250,HOTEL,37.330200,-121.897280,0.0,,,San Jose,...,0,,,,-7.0,san jose ca,,Roseview House,"San Jose, California",False
1,"Boutique Hotel Calle 20 - San Jose, Costa Rica",13453403,3000070002,HOTEL,9.937550,-84.087102,0.0,,,San Jose,...,0,,,,-6.0,san jose ca,,Boutique Hotel Calle 20,"San Jose, Costa Rica",False
2,"Casa Lima B&B - San Jose, Costa Rica",42958406,3000070002,HOTEL,9.945148,-84.125250,0.0,,,San Jose,...,0,,,,-6.0,san jose ca,,Casa Lima B&B,"San Jose, Costa Rica",False
3,"El Mirador Del Valle - San Jose Del Valle, Spain",23075205,5000371869,HOTEL,36.612899,-5.804481,0.0,,,San Jose Del Valle,...,0,,,,2.0,san jose ca,,El Mirador Del Valle,"San Jose Del Valle, Spain",False
4,"La Caseta De Jose - Sueca, Spain",20960403,5000043954,HOTEL,39.248423,-0.260272,0.0,,,Sueca,...,0,,,,2.0,san jose ca,,La Caseta De Jose,"Sueca, Spain",False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
145,"Aldrich Hotel - San Francisco, CA",41391104,3000002244,HOTEL,37.785436,-122.412940,0.0,,,San Francisco,...,0,,,,-7.0,san francisco ca,,Aldrich Hotel,"San Francisco, California",False
146,"Hotel Amari - San Francisco, CA",42559,3000002244,HOTEL,37.789009,-122.410939,0.0,,,San Francisco,...,0,,,,-7.0,san francisco ca,,Hotel Amari,"San Francisco, California",False
147,"Elite Inn - San Francisco, CA",31152804,3000002244,HOTEL,37.783166,-122.411225,0.0,,,San Francisco,...,0,,,,-7.0,san francisco ca,,Elite Inn,"San Francisco, California",False
148,"Layne Hotel - San Francisco, CA",2578205,3000002244,HOTEL,37.786397,-122.413120,0.0,,,San Francisco,...,0,,,,-7.0,san francisco ca,,Layne Hotel,"San Francisco, California",False


In [136]:
final_df['name'] = final_df['displayLine1']
final_df['city'] = final_df['displayLine2'].str.split(',').str[0]
final_df['state'] = final_df['displayLine2'].str.split(',').str[1]

column_to_select = ['name',
                    'id',
                    'city',
                    'state',
                    'lat',
                    'lon',
                    'score', 
                    'rank', 
                    'rank2', 
                    'globalScore',
                    'country' 
                    ]

# Drop rows where a certain value is present in a specific column
values_to_keep = 'US'  # Replace with the value you want to drop
column_to_check = 'country'   # Replace with the column name

filtered_df_country = final_df[final_df[column_to_check] == values_to_keep]

# Drop rows where a certain value is present in a specific column
values_to_keep = 'HOTEL'  # Replace with the value you want to drop
column_to_check = 'type'   # Replace with the column name

filtered_df_type = filtered_df_country[filtered_df_country[column_to_check] == values_to_keep]

hotel_data_select = filtered_df_type[column_to_select]

hotel_data_select

Unnamed: 0,name,id,city,state,lat,lon,score,rank,rank2,globalScore,country
0,Roseview House,125194804,San Jose,California,37.330200,-121.897280,400809.66,1.000000,97.817245,1.0,US
22,"Hampton Inn BY Hilton Santa Cruz West, CA",21328006,Santa Cruz,California,36.961063,-122.048920,175204.38,11.810301,49.023651,30.0,US
23,Super 8 BY Wyndham Santa Cruz/Beach Boardwalk ...,1812805,Santa Cruz,California,36.966590,-122.018330,175204.36,11.810301,6.002491,30.0,US
24,La Quinta Inn & Suites BY Wyndham Santa Cruz,39120,Santa Cruz,California,36.964360,-122.024280,175204.36,11.810301,5.927703,30.0,US
25,Comfort Inn Santa Cruz,43582,Santa Cruz,California,36.984950,-122.023220,175204.36,11.810304,66.405663,30.0,US
...,...,...,...,...,...,...,...,...,...,...,...
160,Civic Center Motor Inn,43023,San Francisco,California,37.772861,-122.410534,400658.12,11.862965,97.820938,30.0,US
161,Aldrich Hotel,41391104,San Francisco,California,37.785436,-122.412940,400658.12,11.840854,97.820930,30.0,US
162,Hotel Amari,42559,San Francisco,California,37.789009,-122.410939,400658.12,11.866960,97.820938,1.0,US
163,Layne Hotel,2578205,San Francisco,California,37.786397,-122.413120,400658.06,11.810301,97.820923,30.0,US


In [137]:
hotel_data_select.dtypes

name            object
id              object
city            object
state           object
lat            float64
lon            float64
score          float64
rank           float64
rank2          float64
globalScore    float64
country         object
dtype: object

In [138]:
null_counts = hotel_data_select.isnull().sum()

print(null_counts)

name           0
id             0
city           0
state          0
lat            0
lon            0
score          0
rank           0
rank2          0
globalScore    0
country        0
dtype: int64


In [139]:
# Create a folder named "data" if it doesn't exist
if not os.path.exists("data"):
    os.mkdir("data")

# Save DataFrame to a CSV file inside the "data" folder
file_path = os.path.join("data", "hotel.csv")
hotel_data_select.to_csv(file_path, index=False)

print("DataFrame saved to 'data/hotel.csv' file.")

DataFrame saved to 'data/hotel.csv' file.


In [50]:
url = "https://priceline-com-provider.p.rapidapi.com/v1/hotels/locations"

querystring = {"name":"UNITED STATES","search_type":"AIRPORT"}

headers = {
	"X-RapidAPI-Key": priceline_api_key,
	"X-RapidAPI-Host": "priceline-com-provider.p.rapidapi.com"
}

response = requests.get(url, headers=headers, params=querystring)

if response.status_code == 200:
    # Use json.dumps with 'indent' parameter to pretty-print the JSON data
    json_data = json.dumps(response.json(), indent=4)
    print(json_data)
else:
    print("Failed to get data. Status code:", response.status_code)

[
    {
        "itemName": "Orlando Intl Airport (MCO) , FL",
        "id": "MCO",
        "cityID": "3000003349",
        "type": "AIRPORT",
        "lat": 28.43113982264317,
        "lon": -81.3092428479283,
        "proximity": 0.0,
        "savedTravelStartDate": null,
        "savedTravelEndDate": null,
        "cityName": "Orlando",
        "stateCode": "FL",
        "provinceName": "Florida",
        "score": 388071.66,
        "radius": 33.7,
        "rank": 56.13621,
        "rank2": 86.16417694091797,
        "globalScore": 78.25274,
        "globalScoreReducedCityBoost": 62.42079,
        "globalScoreWOHotelCountNormalize": 48.595905,
        "country": "US",
        "address": null,
        "zip": null,
        "countryName": "UNITED STATES",
        "countryCode": "US",
        "poiCategoryTypeId": 0,
        "poiCategoryName": null,
        "poiID": null,
        "seType": null,
        "gmtOffset": -4.0,
        "entered": "united states",
        "highlightedName": nul

In [51]:
dictionary_data = json.loads(response.text)

airport_data = pd.DataFrame(dictionary_data)

airport_data

Unnamed: 0,itemName,id,cityID,type,lat,lon,proximity,savedTravelStartDate,savedTravelEndDate,cityName,...,poiCategoryTypeId,poiCategoryName,poiID,seType,gmtOffset,entered,highlightedName,displayLine1,displayLine2,fromSavedSearch
0,"Orlando Intl Airport (MCO) , FL",MCO,3000003349,AIRPORT,28.43114,-81.309243,0.0,,,Orlando,...,0,,,,-4.0,united states,,Orlando - Orlando Intl Airport (MCO),Florida,False
1,"Miami Intl Airport (MIA) , FL",MIA,3000003311,AIRPORT,25.795076,-80.282961,0.0,,,Miami,...,0,,,,-4.0,united states,,Miami - Miami Intl Airport (MIA),Florida,False
2,"Hollywood Intl Airport (FLL) , FL",FLL,3000003192,AIRPORT,26.076856,-80.151238,0.0,,,Fort Lauderdale,...,0,,,,-4.0,united states,,Fort Lauderdale - Hollywood Intl Airport (FLL),Florida,False
3,Hartsfield-Jackson Atlanta Intl Airport (ATL) ...,ATL,3000003496,AIRPORT,33.640644,-84.422466,0.0,,,Atlanta,...,0,,,,-4.0,united states,,Atlanta - Hartsfield-Jackson Atlanta Intl Airp...,Georgia,False
4,"Phoenix Sky Harbor Intl Airport (PHX) , AZ",PHX,3000001349,AIRPORT,33.435468,-112.003795,0.0,,,Phoenix,...,0,,,,-7.0,united states,,Phoenix - Phoenix Sky Harbor Intl Airport (PHX),Arizona,False


In [52]:
airport_data.columns

Index(['itemName', 'id', 'cityID', 'type', 'lat', 'lon', 'proximity',
       'savedTravelStartDate', 'savedTravelEndDate', 'cityName', 'stateCode',
       'provinceName', 'score', 'radius', 'rank', 'rank2', 'globalScore',
       'globalScoreReducedCityBoost', 'globalScoreWOHotelCountNormalize',
       'country', 'address', 'zip', 'countryName', 'countryCode',
       'poiCategoryTypeId', 'poiCategoryName', 'poiID', 'seType', 'gmtOffset',
       'entered', 'highlightedName', 'displayLine1', 'displayLine2',
       'fromSavedSearch'],
      dtype='object')

In [57]:
airport_data['name'] = airport_data['itemName'].str.split(',').str[0]
airport_data['city'] = airport_data['cityName']
airport_data['state'] = airport_data['displayLine2']

column_to_select = ['name',
                    'id',
                    'city',
                    'state',
                    'lat',
                    'lon',
                    'score', 
                    'rank', 
                    'rank2', 
                    'globalScore',
                    'country' 
                    ]

airport_data_select = airport_data[column_to_select]

airport_data_select.head()

Unnamed: 0,name,id,city,state,lat,lon,score,rank,rank2,globalScore,country
0,Orlando Intl Airport (MCO),MCO,Orlando,Florida,28.43114,-81.309243,388071.66,56.13621,86.164177,78.25274,US
1,Miami Intl Airport (MIA),MIA,Miami,Florida,25.795076,-80.282961,387476.03,45.91389,85.568512,76.66467,US
2,Hollywood Intl Airport (FLL),FLL,Fort Lauderdale,Florida,26.076856,-80.151238,385823.97,57.068672,83.937508,75.521675,US
3,Hartsfield-Jackson Atlanta Intl Airport (ATL),ATL,Atlanta,Georgia,33.640644,-84.422466,385106.66,42.474194,83.238976,76.96165,US
4,Phoenix Sky Harbor Intl Airport (PHX),PHX,Phoenix,Arizona,33.435468,-112.003795,384922.3,47.539833,83.060394,74.87271,US


In [64]:
airport_data_select.dtypes

name            object
id              object
city            object
state           object
lat            float64
lon            float64
score          float64
rank           float64
rank2          float64
globalScore    float64
country         object
dtype: object

In [66]:
null_counts = airport_data_select.isnull().sum()

print(null_counts)

name           0
id             0
city           0
state          0
lat            0
lon            0
score          0
rank           0
rank2          0
globalScore    0
country        0
dtype: int64


In [67]:
# Create a folder named "data" if it doesn't exist
if not os.path.exists("data"):
    os.mkdir("data")

# Save DataFrame to a CSV file inside the "data" folder
file_path = os.path.join("data", "airport.csv")
airport_data_select.to_csv(file_path, index=False)

print("DataFrame saved to 'data/airport.csv' file.")

DataFrame saved to 'data/airport.csv' file.


In [141]:
test = hotel_data_select.copy()

test["price"] = ""

In [142]:

for index, row in test.iterrows():
    
    url = "https://priceline-com-provider.p.rapidapi.com/v1/hotels/booking-details"
    
    date_checkin = "2023-10-18"
    date_checkout = "2023-10-19"
    hotel_id = row["id"]
    rooms_number = "1"

    querystring = {"date_checkin": date_checkin,"hotel_id": hotel_id,"date_checkout": date_checkout,"rooms_number": rooms_number}

    headers = {
	    "X-RapidAPI-Key": priceline_api_key,
	    "X-RapidAPI-Host": "priceline-com-provider.p.rapidapi.com"
    }

    response = requests.get(url, headers=headers, params=querystring)
    
    response = response.json()

    try:
        test.loc[index, "price"] = response["rooms"][0]["displayableRates"][0]["displayPrice"]
    except (KeyError, IndexError):
        # If no hotel is found, set the hotel name as "No hotel found".
        test.loc[index, "price"] = "No price found"
    
    print(f"{test.loc[index, 'id']} - price: {test.loc[index, 'price']}")
    
        

125194804 - price: No price found
21328006 - price: 153.33
1812805 - price: 145.35
39120 - price: 154.70
43582 - price: 90.99
4325605 - price: 531.00
43966504 - price: 152.00
86220506 - price: No price found
86227506 - price: No price found
86205706 - price: No price found
86199806 - price: No price found
86209706 - price: No price found
165741304 - price: No price found
86203606 - price: No price found
86200706 - price: No price found
86219306 - price: No price found
86211506 - price: No price found
4536805 - price: No price found
122035703 - price: No price found
46299306 - price: 470.35
147643504 - price: 719.98
122040303 - price: No price found
43633 - price: 77.85
39373 - price: 339.00
40664 - price: 300.05
10114304 - price: 152.15
177202703 - price: No price found
39053 - price: 409.00
43607 - price: 209.20
9350505 - price: 649.00
17175204 - price: 539.10
43244 - price: 305.00
44061 - price: 179.10
39652 - price: 494.73
69378405 - price: 332.22
31380504 - price: 386.10
35908006 -

Unnamed: 0,name,id,city,state,lat,lon,score,rank,rank2,globalScore,country,price
0,Roseview House,125194804,San Jose,California,37.330200,-121.897280,400809.66,1.000000,97.817245,1.0,US,No price found
22,"Hampton Inn BY Hilton Santa Cruz West, CA",21328006,Santa Cruz,California,36.961063,-122.048920,175204.38,11.810301,49.023651,30.0,US,153.33
23,Super 8 BY Wyndham Santa Cruz/Beach Boardwalk ...,1812805,Santa Cruz,California,36.966590,-122.018330,175204.36,11.810301,6.002491,30.0,US,145.35
24,La Quinta Inn & Suites BY Wyndham Santa Cruz,39120,Santa Cruz,California,36.964360,-122.024280,175204.36,11.810301,5.927703,30.0,US,154.70
25,Comfort Inn Santa Cruz,43582,Santa Cruz,California,36.984950,-122.023220,175204.36,11.810304,66.405663,30.0,US,90.99
...,...,...,...,...,...,...,...,...,...,...,...,...
160,Civic Center Motor Inn,43023,San Francisco,California,37.772861,-122.410534,400658.12,11.862965,97.820938,30.0,US,No price found
161,Aldrich Hotel,41391104,San Francisco,California,37.785436,-122.412940,400658.12,11.840854,97.820930,30.0,US,No price found
162,Hotel Amari,42559,San Francisco,California,37.789009,-122.410939,400658.12,11.866960,97.820938,1.0,US,No price found
163,Layne Hotel,2578205,San Francisco,California,37.786397,-122.413120,400658.06,11.810301,97.820923,30.0,US,No price found


In [146]:
# Drop rows where a certain value is present in a specific column
values_to_drop = 'No price found'  # Replace with the value you want to drop
column_to_check = 'price'   # Replace with the column name

filtered_test = test[test[column_to_check] != values_to_drop]

len(filtered_test)


55

In [147]:
# Create a folder named "data" if it doesn't exist
if not os.path.exists("data"):
    os.mkdir("data")

# Save DataFrame to a CSV file inside the "data" folder
file_path = os.path.join("data", "hotel_price.csv")
filtered_test.to_csv(file_path, index=False)

print("DataFrame saved to 'data/hotel_price.csv' file.")

DataFrame saved to 'data/hotel_price.csv' file.
