# Prepare Tourist Data per Location

<b>Disclaimers: </b>

- All of the data extracted are not my own and are properties of www.booking.com, www.zomato.com, and www.tripadvisor.com.ph and data will not be used for commercial purposes
- Scraped data are not used for commercial purposes and purely for personal education purposes

In [1]:
# import needed libraries
import pandas as pd

In [2]:
# read all needed csv files
hotel = pd.read_csv('Hotel.csv')
resto = pd.read_csv('Restaurant.csv')
tourist = pd.read_csv('Tourist_site.csv')

#view info
hotel.info()
resto.info()
tourist.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 25 entries, 0 to 24
Data columns (total 7 columns):
 #   Column                     Non-Null Count  Dtype 
---  ------                     --------------  ----- 
 0   hotel_name                 25 non-null     object
 1   hotel_location             25 non-null     object
 2   hotel_amenities            25 non-null     object
 3   hotel_min_price_range_php  25 non-null     object
 4   hotel_avail_rooms          25 non-null     object
 5   hotel_other_info           25 non-null     object
 6   hotel_reviews              25 non-null     object
dtypes: object(7)
memory usage: 1.5+ KB
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 27 entries, 0 to 26
Data columns (total 7 columns):
 #   Column                  Non-Null Count  Dtype 
---  ------                  --------------  ----- 
 0   restaurant_name         27 non-null     object
 1   restaurant_location     27 non-null     object
 2   resto_specialty         27 non-null     object
 

In [3]:
# standardize location column names
hotel.rename(columns = {'hotel_location' : 'location'}, inplace = True)
resto.rename(columns = {'restaurant_location' : 'location'}, inplace = True)
tourist.rename(columns = {'tourist_site_location' : 'location'}, inplace = True)

# create new column category for indexing later
hotel['category'] = 'hotels'
resto['category'] = 'restaurants'
tourist['category'] = 'tourist_sites'

# remove whitespaces
dfs = [hotel, resto, tourist]
for df in dfs :
    df['location'] = df['location'].apply(lambda x: x.strip())
    print('=====')
    print(df[['location', 'category']].head(3))
    
# clean location names
hotel.loc[hotel['location'] == 'Paranaque City'] = 'Parañaque City'

=====
          location category
0       Pasay City   hotels
1  Muntinlupa City   hotels
2       Pasig City   hotels
=====
         location     category
0     Quezon City  restaurants
1     Makati City  restaurants
2  Parañaque City  restaurants
=====
      location       category
0  Manila City  tourist_sites
1  Quezon City  tourist_sites
2  Manila City  tourist_sites


In [4]:
# view location per dataframes and count total
print('=======Hotels=======')
print(hotel.location.value_counts())
print('Total:', hotel.location.value_counts().sum())
print('=======Restaurants=======')
print(resto.location.value_counts())
print('Total:', resto.location.value_counts().sum())
print('=======Tourist Sites=======')
print(tourist.location.value_counts())
print('Total:', tourist.location.value_counts().sum())

Pasay City         7
Makati City        6
Quezon City        5
Pasig City         3
Muntinlupa City    2
Parañaque City     1
Manila City        1
Name: location, dtype: int64
Total: 25
Quezon City        10
Taguig City         6
Makati City         3
Muntinlupa City     2
Parañaque City      1
Las Piñas City      1
Tagaytay City       1
Pasig City          1
San Juan City       1
Pasay City          1
Name: location, dtype: int64
Total: 27
Manila City         12
Pasay City           6
Quezon City          3
Taguig City          3
Makati City          3
Mandaluyong City     1
San Juan City        1
Muntinlupa City      1
Name: location, dtype: int64
Total: 30


In [5]:
# index location and category and merge the 3 dataframes
index = ['location', 'category']
merged_df = hotel.merge(resto, how = 'outer', on = index).set_index(index)
overall_df = merged_df.merge(tourist, how = 'outer', on = index).set_index(index)
overall_df.drop_duplicates()
overall_df.head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,hotel_name,hotel_amenities,hotel_min_price_range_php,hotel_avail_rooms,hotel_other_info,hotel_reviews,restaurant_name,resto_specialty,restaurant_price_range,seat_capacity,restaurant_other_info,restaurant_reviews,tourist_site_name,tourist_site_classification,tourist_site_other_info_overall_rating,tourist_site_reviews,approx_visitors_per_year_in_thousands,tourist_site_entance_fee_in_pesos
location,category,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
Pasay City,hotels,Sofitel Philippine Plaza Manila,"1 swimming pool, Airport shuttle, Non-smoking ...",10k and up,{'Luxury King Room with Bay View': 'Only 7 roo...,"Located in Pasay City, Sofitel Philippine Plaz...",Reviewer Name: \nStefan\n\nReview: There was n...,,,,,,,,,,,,
Pasay City,hotels,Conrad Manila,"1 swimming pool, Airport shuttle, Non-smoking ...",10k and up,{'Deluxe King Room with Panorama Bay View': 'O...,"Featuring free WiFi in public areas, Conrad Ma...",Reviewer Name: \nJoe\n\nReview: excellent cust...,,,,,,,,,,,,
Pasay City,hotels,Selah Pods Hotel Manila,"Non-smoking rooms, Family rooms, Free WiFi, Ro...",5k to 10k,{'Standard Double Room without View': 'Only 2 ...,You're eligible for a Genius discount at Selah...,Reviewer Name: \nMaribel\n\nReview: bathroom ...,,,,,,,,,,,,
Pasay City,hotels,Asiatel,"Non-smoking rooms, Free WiFi, Restaurant, Room...",5k to 10k,{'Suite Family': 'Only 6 left on our site'},You're eligible for a Genius discount at OYO 1...,Reviewer Name: \nJohn\n\nReview: Information f...,,,,,,,,,,,,
Pasay City,hotels,Hilton Manila,"1 swimming pool, Airport shuttle, Non-smoking ...",5k to 10k,"{'King Room': 'Only 6 rooms left on our site',...","Set within Resorts World Manila, 2-minute walk...",Reviewer Name: \nShaina\n\nReview: Pool was so...,,,,,,,,,,,,
Pasay City,hotels,Tryp By Wyndham Mall Of Asia Manila,"1 swimming pool, Airport shuttle, Non-smoking ...",5k to 10k,{'Twin Room with Bay View': 'More than 7 avail...,"Overlooking the Manila Bay, TRYP by Wyndham Ma...",Reviewer Name: \nYoyoshin\n\nReview: the place...,,,,,,,,,,,,
Pasay City,hotels,Midas Hotel And Casino,"Airport shuttle, Non-smoking rooms, Family roo...",5k to 10k,{'Executive Double or Twin Room': 'More than 7...,Situated along Roxas Boulevard offering views ...,Reviewer Name: \nGerald\n\nReview: Upgrade to ...,,,,,,,,,,,,
Muntinlupa City,hotels,Azumi Boutique,"1 swimming pool, Airport shuttle, Non-smoking ...",5k to 10k,"{'King Room': 'Only 5 rooms left on our site',...",You're eligible for a Genius discount at Azumi...,Reviewer Name: \nMaria\n\nReview: None Servic...,,,,,,,,,,,,
Muntinlupa City,hotels,Crimson Filinvest City Manila,"1 swimming pool, Airport shuttle, Non-smoking ...",10k and up,{'One Bedroom Suite Double Room': 'Only 3 left...,You're eligible for a Genius discount at Crims...,Reviewer Name: \nCaroline\n\nReview: the beddi...,,,,,,,,,,,,
Pasig City,hotels,Marco Polo Ortigas Manila,"1 swimming pool, Airport shuttle, Non-smoking ...",10k and up,{'Continental Superior King Room': 'Only 4 roo...,"Located in the heart of Manila’s city centre, ...",Reviewer Name: \nJanet\n\nReview: No buffet ye...,,,,,,,,,,,,


In [6]:
# count location and breakdown per category
count_loc = overall_df.groupby(level='location').size().sort_values(ascending = False)

# I chose Quezon City location since it has the most data
qc = overall_df.filter(like = 'Quezon City', axis = 0)

print(count_loc)
print('Total:', count_loc.sum())
print('Check Count of Quezon City:', len(qc))

location
Quezon City         18
Pasay City          14
Manila City         13
Makati City         12
Taguig City          9
Muntinlupa City      5
Pasig City           4
Parañaque City       2
San Juan City        2
Las Piñas City       1
Mandaluyong City     1
Tagaytay City        1
dtype: int64
Total: 82
Check Count of Quezon City: 18


In [7]:
# save to csv
qc.to_csv('Location.csv')