In [2]:
# Dependencies and Setup
import requests
import json
import pandas as pd
import os

## Import the Priceline API key
from api_keys import priceline_api_key

In [14]:
# List of cities
city_list = [
    'San Jose, CA',
    'Santa Cruz, CA',
    'Newport Beach, CA',
    'Napa, CA',
    'Santa Monica, CA',
    'Long Beach, CA',
    'Fresno, CA',
    'Monterey, CA',
    'Santa Barbara, CA',
    'Palm Springs, CA',
    'Sacramento, CA',
    'Anaheim, CA',
    'San Diego, CA',
    'Los Angeles, CA',
    'San Francisco, CA',
    'San Luis Obispo, CA',
    'Carmel-by-the-Sea, CA',
    'Laguna Beach, CA',
    'Morro Bay, CA',
    'San Clemente, CA',
    'Catalina Island, CA',
    'Pismo Beach, CA',
    'Big Bear Lake, CA',
    'Pasadena, CA',
    'Carlsbad, CA',
    'Yosemite, CA',
    'Huntington Beach, CA',
    'Lake Tahoe, CA',
    'Eureka, CA',
    'Bakersfield, CA',
    'Oceanside, CA'
    
]

# Initialize an empty list to store DataFrames
dfs = []  
        
# Loop through each city in the list
for city in city_list:
    url = "https://priceline-com-provider.p.rapidapi.com/v1/hotels/locations"
    
    querystring = {"name": city, "search_type": "HOTEL"}

    headers = {
        "X-RapidAPI-Key": priceline_api_key,
        "X-RapidAPI-Host": "priceline-com-provider.p.rapidapi.com"
    }
    
    while True:
        response = requests.get(url, headers=headers, params=querystring)

        if response.status_code == 200:
            # Get the JSON data
            json_data = response.json()

            # Check if response type is "HOTEL"
            if json_data[0].get('type') == 'HOTEL':
                
                # Load the JSON response data into a list
                list_data = json.loads(response.text)
        
                # Create a DataFrame for the current city and append it to the list
                hotel_data = pd.DataFrame(list_data)
                dfs.append(hotel_data)
                
                break  # Exit the while loop if data is successfully retrieved
            else:
                print(f"Response type for {city} is not 'HOTEL'. Retrying...")
        else:
            print(f"Failed to get data for {city}. Status code:", response.status_code)
            break  # Exit the while loop on error



Response type for Santa Cruz, CA is not 'HOTEL'. Retrying...
Response type for Napa, CA is not 'HOTEL'. Retrying...
Response type for Monterey, CA is not 'HOTEL'. Retrying...
Response type for Monterey, CA is not 'HOTEL'. Retrying...
Response type for Sacramento, CA is not 'HOTEL'. Retrying...
Response type for San Diego, CA is not 'HOTEL'. Retrying...
Response type for San Diego, CA is not 'HOTEL'. Retrying...
Response type for San Diego, CA is not 'HOTEL'. Retrying...
Response type for San Diego, CA is not 'HOTEL'. Retrying...
Response type for San Francisco, CA is not 'HOTEL'. Retrying...
Response type for Laguna Beach, CA is not 'HOTEL'. Retrying...
Response type for San Clemente, CA is not 'HOTEL'. Retrying...
Response type for Big Bear Lake, CA is not 'HOTEL'. Retrying...
Response type for Big Bear Lake, CA is not 'HOTEL'. Retrying...
Response type for Big Bear Lake, CA is not 'HOTEL'. Retrying...
Response type for Pasadena, CA is not 'HOTEL'. Retrying...
Response type for Yosemi

In [15]:
# Concatenate all DataFrames in the list into a single DataFrame
final_df = pd.concat(dfs, ignore_index=True)

# Print the final DataFrame
final_df

Unnamed: 0,itemName,id,cityID,type,lat,lon,proximity,savedTravelStartDate,savedTravelEndDate,cityName,...,poiCategoryTypeId,poiCategoryName,poiID,seType,gmtOffset,entered,highlightedName,displayLine1,displayLine2,fromSavedSearch
0,"Roseview House - San Jose, CA",125194804,3000002250,HOTEL,37.330200,-121.897280,0.0,,,San Jose,...,0,,,,-7.0,san jose ca,,Roseview House,"San Jose, California",False
1,"Boutique Hotel Calle 20 - San Jose, Costa Rica",13453403,3000070002,HOTEL,9.937550,-84.087102,0.0,,,San Jose,...,0,,,,-6.0,san jose ca,,Boutique Hotel Calle 20,"San Jose, Costa Rica",False
2,"Casa Lima B&B - San Jose, Costa Rica",42958406,3000070002,HOTEL,9.945148,-84.125250,0.0,,,San Jose,...,0,,,,-6.0,san jose ca,,Casa Lima B&B,"San Jose, Costa Rica",False
3,"El Mirador Del Valle - San Jose Del Valle, Spain",23075205,5000371869,HOTEL,36.612899,-5.804481,0.0,,,San Jose Del Valle,...,0,,,,2.0,san jose ca,,El Mirador Del Valle,"San Jose Del Valle, Spain",False
4,"Hostal San Carlos - Lloret De Mar, Spain",55974405,5000300440,HOTEL,41.701133,2.846687,0.0,,,Lloret De Mar,...,0,,,,2.0,san jose ca,,Hostal San Carlos,"Lloret De Mar, Spain",False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
292,Updated Oceanside Townhome With Pool Walk TO B...,106244505,3000061781,HOTEL,21.092760,-86.769580,0.0,,,Cancun,...,0,,,,-5.0,oceanside ca,,Updated Oceanside Townhome With Pool Walk TO B...,"Cancun, Quintana Roo, Mexico",False
293,Ocean Dunes 5212 Sleeps Eight Pool Oceanside W...,171484503,3000019836,HOTEL,33.743270,-78.815910,0.0,,,Myrtle Beach,...,0,,,,-4.0,oceanside ca,,Ocean Dunes 5212 Sleeps Eight Pool Oceanside W...,"Myrtle Beach, South Carolina",False
294,On Oceanside And Clear Ocean Views Great Prici...,171484103,3000019836,HOTEL,33.743270,-78.815910,0.0,,,Myrtle Beach,...,0,,,,-4.0,oceanside ca,,On Oceanside And Clear Ocean Views Great Prici...,"Myrtle Beach, South Carolina",False
295,"Rodeway Inn Oceanside Marina - Oceanside, CA",5240705,3000002089,HOTEL,33.206600,-117.385200,0.0,,,Oceanside,...,0,,,,-7.0,oceanside ca,,Rodeway Inn Oceanside Marina,"Oceanside, California",False


In [16]:
final_df['name'] = final_df['displayLine1']
final_df['city'] = final_df['displayLine2'].str.split(',').str[0]
final_df['state'] = final_df['displayLine2'].str.split(',').str[1]

column_to_select = ['name',
                    'id',
                    'city',
                    'state',
                    'lat',
                    'lon',
                    'score', 
                    'rank', 
                    'rank2', 
                    'globalScore',
                    'country' 
                    ]

# Drop rows where a certain value is present in a specific column
values_to_keep = 'US'  # Replace with the value you want to drop
column_to_check = 'country'   # Replace with the column name

filtered_df_country = final_df[final_df[column_to_check] == values_to_keep]

# Drop rows where a certain value is present in a specific column
values_to_keep = 'HOTEL'  # Replace with the value you want to drop
column_to_check = 'type'   # Replace with the column name

filtered_df_type = filtered_df_country[filtered_df_country[column_to_check] == values_to_keep]

hotel_data_select = filtered_df_type[column_to_select]

# Drop duplicates based on the 'id' column, keeping the first occurrence
hotel_data_select = hotel_data_select.drop_duplicates(subset=['id'], keep='first')

hotel_data_select

Unnamed: 0,name,id,city,state,lat,lon,score,rank,rank2,globalScore,country
0,Roseview House,125194804,San Jose,California,37.330200,-121.897280,400808.88,1.000000,97.817245,1.0,US
10,Surf City Inn & Suites,42212,Santa Cruz,California,36.969860,-122.019880,400925.56,11.810301,97.820923,30.0,US
11,Villa Vista,71834504,Santa Cruz,California,36.955865,-121.977519,400922.38,1.000000,97.817245,1.0,US
12,Magical Oasis,77022303,Santa Cruz,California,36.956323,-122.025119,400921.44,0.500000,97.816200,1.0,US
20,Newport Beach Simplicity I & Ii,86227506,Newport Beach,California,33.615310,-117.932690,400936.80,0.500000,97.816200,30.0,US
...,...,...,...,...,...,...,...,...,...,...,...
288,Ocean View Compound,86210306,Oceanside,California,33.200380,-117.388660,400808.75,0.500000,97.816200,30.0,US
293,Ocean Dunes 5212 Sleeps Eight Pool Oceanside W...,171484503,Myrtle Beach,South Carolina,33.743270,-78.815910,398967.34,0.500000,97.816200,1.0,US
294,On Oceanside And Clear Ocean Views Great Prici...,171484103,Myrtle Beach,South Carolina,33.743270,-78.815910,398967.34,0.500000,97.816200,1.0,US
295,Rodeway Inn Oceanside Marina,5240705,Oceanside,California,33.206600,-117.385200,372648.22,12.239823,70.453796,30.0,US


In [7]:
hotel_data_select.dtypes

name            object
id              object
city            object
state           object
lat            float64
lon            float64
score          float64
rank           float64
rank2          float64
globalScore    float64
country         object
dtype: object

In [138]:
null_counts = hotel_data_select.isnull().sum()

print(null_counts)

name           0
id             0
city           0
state          0
lat            0
lon            0
score          0
rank           0
rank2          0
globalScore    0
country        0
dtype: int64


In [139]:
# Create a folder named "data" if it doesn't exist
if not os.path.exists("data"):
    os.mkdir("data")

# Save DataFrame to a CSV file inside the "data" folder
file_path = os.path.join("data", "hotel.csv")
hotel_data_select.to_csv(file_path, index=False)

print("DataFrame saved to 'data/hotel.csv' file.")

DataFrame saved to 'data/hotel.csv' file.


In [50]:
url = "https://priceline-com-provider.p.rapidapi.com/v1/hotels/locations"

querystring = {"name":"UNITED STATES","search_type":"AIRPORT"}

headers = {
	"X-RapidAPI-Key": priceline_api_key,
	"X-RapidAPI-Host": "priceline-com-provider.p.rapidapi.com"
}

response = requests.get(url, headers=headers, params=querystring)

if response.status_code == 200:
    # Use json.dumps with 'indent' parameter to pretty-print the JSON data
    json_data = json.dumps(response.json(), indent=4)
    print(json_data)
else:
    print("Failed to get data. Status code:", response.status_code)

[
    {
        "itemName": "Orlando Intl Airport (MCO) , FL",
        "id": "MCO",
        "cityID": "3000003349",
        "type": "AIRPORT",
        "lat": 28.43113982264317,
        "lon": -81.3092428479283,
        "proximity": 0.0,
        "savedTravelStartDate": null,
        "savedTravelEndDate": null,
        "cityName": "Orlando",
        "stateCode": "FL",
        "provinceName": "Florida",
        "score": 388071.66,
        "radius": 33.7,
        "rank": 56.13621,
        "rank2": 86.16417694091797,
        "globalScore": 78.25274,
        "globalScoreReducedCityBoost": 62.42079,
        "globalScoreWOHotelCountNormalize": 48.595905,
        "country": "US",
        "address": null,
        "zip": null,
        "countryName": "UNITED STATES",
        "countryCode": "US",
        "poiCategoryTypeId": 0,
        "poiCategoryName": null,
        "poiID": null,
        "seType": null,
        "gmtOffset": -4.0,
        "entered": "united states",
        "highlightedName": nul

In [51]:
dictionary_data = json.loads(response.text)

airport_data = pd.DataFrame(dictionary_data)

airport_data

Unnamed: 0,itemName,id,cityID,type,lat,lon,proximity,savedTravelStartDate,savedTravelEndDate,cityName,...,poiCategoryTypeId,poiCategoryName,poiID,seType,gmtOffset,entered,highlightedName,displayLine1,displayLine2,fromSavedSearch
0,"Orlando Intl Airport (MCO) , FL",MCO,3000003349,AIRPORT,28.43114,-81.309243,0.0,,,Orlando,...,0,,,,-4.0,united states,,Orlando - Orlando Intl Airport (MCO),Florida,False
1,"Miami Intl Airport (MIA) , FL",MIA,3000003311,AIRPORT,25.795076,-80.282961,0.0,,,Miami,...,0,,,,-4.0,united states,,Miami - Miami Intl Airport (MIA),Florida,False
2,"Hollywood Intl Airport (FLL) , FL",FLL,3000003192,AIRPORT,26.076856,-80.151238,0.0,,,Fort Lauderdale,...,0,,,,-4.0,united states,,Fort Lauderdale - Hollywood Intl Airport (FLL),Florida,False
3,Hartsfield-Jackson Atlanta Intl Airport (ATL) ...,ATL,3000003496,AIRPORT,33.640644,-84.422466,0.0,,,Atlanta,...,0,,,,-4.0,united states,,Atlanta - Hartsfield-Jackson Atlanta Intl Airp...,Georgia,False
4,"Phoenix Sky Harbor Intl Airport (PHX) , AZ",PHX,3000001349,AIRPORT,33.435468,-112.003795,0.0,,,Phoenix,...,0,,,,-7.0,united states,,Phoenix - Phoenix Sky Harbor Intl Airport (PHX),Arizona,False


In [52]:
airport_data.columns

Index(['itemName', 'id', 'cityID', 'type', 'lat', 'lon', 'proximity',
       'savedTravelStartDate', 'savedTravelEndDate', 'cityName', 'stateCode',
       'provinceName', 'score', 'radius', 'rank', 'rank2', 'globalScore',
       'globalScoreReducedCityBoost', 'globalScoreWOHotelCountNormalize',
       'country', 'address', 'zip', 'countryName', 'countryCode',
       'poiCategoryTypeId', 'poiCategoryName', 'poiID', 'seType', 'gmtOffset',
       'entered', 'highlightedName', 'displayLine1', 'displayLine2',
       'fromSavedSearch'],
      dtype='object')

In [57]:
airport_data['name'] = airport_data['itemName'].str.split(',').str[0]
airport_data['city'] = airport_data['cityName']
airport_data['state'] = airport_data['displayLine2']

column_to_select = ['name',
                    'id',
                    'city',
                    'state',
                    'lat',
                    'lon',
                    'score', 
                    'rank', 
                    'rank2', 
                    'globalScore',
                    'country' 
                    ]

airport_data_select = airport_data[column_to_select]

airport_data_select.head()

Unnamed: 0,name,id,city,state,lat,lon,score,rank,rank2,globalScore,country
0,Orlando Intl Airport (MCO),MCO,Orlando,Florida,28.43114,-81.309243,388071.66,56.13621,86.164177,78.25274,US
1,Miami Intl Airport (MIA),MIA,Miami,Florida,25.795076,-80.282961,387476.03,45.91389,85.568512,76.66467,US
2,Hollywood Intl Airport (FLL),FLL,Fort Lauderdale,Florida,26.076856,-80.151238,385823.97,57.068672,83.937508,75.521675,US
3,Hartsfield-Jackson Atlanta Intl Airport (ATL),ATL,Atlanta,Georgia,33.640644,-84.422466,385106.66,42.474194,83.238976,76.96165,US
4,Phoenix Sky Harbor Intl Airport (PHX),PHX,Phoenix,Arizona,33.435468,-112.003795,384922.3,47.539833,83.060394,74.87271,US


In [64]:
airport_data_select.dtypes

name            object
id              object
city            object
state           object
lat            float64
lon            float64
score          float64
rank           float64
rank2          float64
globalScore    float64
country         object
dtype: object

In [66]:
null_counts = airport_data_select.isnull().sum()

print(null_counts)

name           0
id             0
city           0
state          0
lat            0
lon            0
score          0
rank           0
rank2          0
globalScore    0
country        0
dtype: int64


In [67]:
# Create a folder named "data" if it doesn't exist
if not os.path.exists("data"):
    os.mkdir("data")

# Save DataFrame to a CSV file inside the "data" folder
file_path = os.path.join("data", "airport.csv")
airport_data_select.to_csv(file_path, index=False)

print("DataFrame saved to 'data/airport.csv' file.")

DataFrame saved to 'data/airport.csv' file.


In [17]:
test = hotel_data_select.copy()

test["price"] = ""

In [18]:

for index, row in test.iterrows():
    
    url = "https://priceline-com-provider.p.rapidapi.com/v1/hotels/booking-details"
    
    date_checkin = "2023-10-18"
    date_checkout = "2023-10-19"
    hotel_id = row["id"]
    rooms_number = "1"

    querystring = {"date_checkin": date_checkin,"hotel_id": hotel_id,"date_checkout": date_checkout,"rooms_number": rooms_number}

    headers = {
	    "X-RapidAPI-Key": priceline_api_key,
	    "X-RapidAPI-Host": "priceline-com-provider.p.rapidapi.com"
    }

    response = requests.get(url, headers=headers, params=querystring)
    
    response = response.json()
    
    try:
        test.loc[index, "price"] = response["rooms"][0]["displayableRates"][0]["displayPrice"]
    except (KeyError, IndexError):
        # If no hotel is found, set the hotel name as "No hotel found".
        test.loc[index, "price"] = "No price found"

    
        

125194804 - price: No price found
42212 - price: 279.00
71834504 - price: No price found
77022303 - price: No price found
86227506 - price: No price found
86220506 - price: No price found
86205706 - price: No price found
86199806 - price: No price found
86209706 - price: No price found
86211506 - price: No price found
86200706 - price: No price found
86203606 - price: No price found
86219306 - price: No price found
165741304 - price: No price found
4536805 - price: No price found
147643504 - price: 719.98
122035703 - price: No price found
122040303 - price: No price found
46299306 - price: 470.35
43633 - price: 77.85
39373 - price: 339.00
40664 - price: 300.05
10114304 - price: 152.15
177202703 - price: No price found
39053 - price: 409.00
43607 - price: 209.20
9350505 - price: 649.00
17175204 - price: 539.10
43244 - price: 305.00
44061 - price: 179.10
39652 - price: 494.73
69378405 - price: 332.22
31380504 - price: 386.10
35908006 - price: No price found
29945705 - price: 137.61
45322

In [19]:
# Drop rows where a certain value is present in a specific column
values_to_drop = 'No price found'  # Replace with the value you want to drop
column_to_check = 'price'   # Replace with the column name

filtered_test = test[test[column_to_check] != values_to_drop]

len(filtered_test)


173

In [23]:
filtered_test

Unnamed: 0,name,id,city,state,lat,lon,score,rank,rank2,globalScore,country,price
10,Surf City Inn & Suites,42212,Santa Cruz,California,36.969860,-122.019880,400925.56,11.810301,97.820923,30.0,US,279.00
31,Silverado Golf Course,147643504,Napa,California,38.359940,-122.264030,400987.97,0.500000,97.816200,30.0,US,719.98
34,Inn On Randolph,46299306,Napa,California,38.292615,-122.287015,400987.97,0.500000,97.816200,30.0,US,470.35
36,Quality Inn Near Six Flags Discovery Kingdom-N...,43633,Vallejo,California,38.135570,-122.213600,375053.16,11.810301,74.033867,30.0,US,77.85
37,Embassy Suites BY Hilton Hotel Napa Valley,39373,Napa,California,38.297720,-122.300320,368351.38,13.473780,66.864861,30.0,US,339.00
...,...,...,...,...,...,...,...,...,...,...,...,...
284,"Motel 6-Bakersfield, CA - Airport",18718704,Bakersfield,California,35.415570,-119.060431,363192.56,11.810460,62.982529,30.0,US,53.99
285,Courtyard BY Marriott Bakersfield,43831,Bakersfield,California,35.380810,-119.048030,363191.62,11.823450,62.982460,30.0,US,139.00
286,Hampton Inn BY Hilton Bakersfield Central,40334,Bakersfield,California,35.367607,-119.039201,361070.03,11.836448,61.438564,30.0,US,93.24
295,Rodeway Inn Oceanside Marina,5240705,Oceanside,California,33.206600,-117.385200,372648.22,12.239823,70.453796,30.0,US,68.08


In [21]:
# Create a folder named "data" if it doesn't exist
if not os.path.exists("data"):
    os.mkdir("data")

# Save DataFrame to a CSV file inside the "data" folder
file_path = os.path.join("data", "hotel_price.csv")
filtered_test.to_csv(file_path, index=False)

print("DataFrame saved to 'data/hotel_price.csv' file.")

DataFrame saved to 'data/hotel_price.csv' file.


In [5]:
test = filtered_test.copy()

for index, row in test.iterrows():
    
    url = "https://priceline-com-provider.p.rapidapi.com/v1/hotels/booking-details"
    
    date_checkin = "2023-10-18"
    date_checkout = "2023-10-19"
    hotel_id = row["id"]
    rooms_number = "1"

    querystring = {"date_checkin": date_checkin,"hotel_id": hotel_id,"date_checkout": date_checkout,"rooms_number": rooms_number}

    headers = {
	    "X-RapidAPI-Key": priceline_api_key,
	    "X-RapidAPI-Host": "priceline-com-provider.p.rapidapi.com"
    }

    response = requests.get(url, headers=headers, params=querystring)
    
    response = response.json()

        
    try:
        test.loc[index, "guestrating"] = response["overallGuestRating"]
    except (KeyError, IndexError):
        # If no hotel is found, set the hotel name as "No rating found".
        test.loc[index, "guestrating"] = "No rating found"
        print(f"{test.loc[index, 'id']} - No rating found")
        
    


In [7]:
len(test[test["guestrating"] == "No rating found"])

5

In [9]:
# Drop rows where a certain value is present in a specific column
values_to_drop = 'No rating found'  # Replace with the value you want to drop
column_to_check = 'guestrating'   # Replace with the column name

filtered_test = test[test[column_to_check] != values_to_drop]

len(filtered_test)

# Create a folder named "data" if it doesn't exist
if not os.path.exists("data"):
    os.mkdir("data")

# Save DataFrame to a JSON file inside the "data" folder
json_data = filtered_test.to_json(orient='records')
file_path = os.path.join("data", "hotel_final.json")

with open(file_path, 'w') as json_file:
    json_file.write(json_data)

print("DataFrame saved to 'data/hotel_final.json' file.")

DataFrame saved to 'data/hotel_final.json' file.
