# Loading and interpreting DataFrames

## Lisbon Hotels:

In [1]:
import requests
import pandas as pd
import os
from dotenv import load_dotenv, find_dotenv
import time

In [2]:
df_hotels = pd.read_csv("lisbon_hotels.csv")
df_hotels.head(10)

Unnamed: 0,location_id,name,rating,num_reviews,ranking,price_level,street1,city,country,postalcode,address_string
0,2292067,Czar Lisbon Hotel,3.7,947,,$$,Avenida Almirante Reis 103,Lisbon,Portugal,1150-0020,"Avenida Almirante Reis 103, Lisbon 1150-0020 P..."
1,206890,Avani Avenida Liberdade Lisbon Hotel,4.3,796,,$$$,Rua Julio Cesar Machado 7 9,Lisbon,Portugal,1250-135,"Rua Julio Cesar Machado 7 9, Lisbon 1250-135 P..."
2,11600027,Empire Lisbon Hotel,4.3,398,,$$,Avenida Almirante Reis 130,Lisbon,Portugal,1150-023,"Avenida Almirante Reis 130, Lisbon 1150-023 Po..."
3,8842153,Palacio Do Governador - Lisbon Hotel & Spa,4.4,526,,$$$,"Rua Bartolomeu Dias, 117",Lisbon,Portugal,1400-030,"Rua Bartolomeu Dias, 117, Lisbon 1400-030 Port..."
4,19780664,Aroeira Lisbon Hotel - Sea & Golf Resort,4.1,93,,$$,Avenida Pinhal da Aroeira 1,Aroeira,Portugal,2820-112,Avenida Pinhal da Aroeira 1 Herdade da Aroeira...
5,553361,Dinya Lisbon Hotel & Lounge Bar,4.3,34,,$$,Rua Ilha do Pico 3,Lisbon,Portugal,1000-169,"Rua Ilha do Pico 3, Lisbon 1000-169 Portugal"
6,781013,New Style Lisbon Hotel,3.6,97,,$$,Avenida Almirante Reis 53,Lisbon,Portugal,1150-011,"Avenida Almirante Reis 53, Lisbon 1150-011 Por..."
7,12659702,Corpo Santo Lisbon Historical Hotel,4.9,4398,,$$$$,"Largo do Corpo Santo, 25",Lisbon,Portugal,1200-129,"Largo do Corpo Santo, 25, Lisbon 1200-129 Port..."
8,13402042,Hotel Da Baixa,4.9,2868,,$$$,Rua da Prata 231,Lisbon,Portugal,1100-417,"Rua da Prata 231, Lisbon 1100-417 Portugal"
9,195643,Hotel Avenida Palace,4.8,5028,,$$$$,Rua 1 Dezembro 123,Lisbon,Portugal,1200-359,"Rua 1 Dezembro 123, Lisbon 1200-359 Portugal"


---------------------------------------------------
## Lisbon Restaurants
------------------------------------------------

In [20]:
df_r = pd.read_csv("lisbon_restaurants.csv")
df_r.head()

Unnamed: 0,location_id,name,rating,num_reviews,ranking,price_level,cuisine,street1,city,country,postalcode,address_string
0,12516582,Love Lisbon Restaurant & Bar,4.1,44,,$$ - $$$,"bar, asian, nepali",R. Jose Antonio Serrano Floor 4,Lisbon,Portugal,1150-033,"R. Jose Antonio Serrano Floor 4, Lisbon 1150-0..."
1,4974326,Madrid Lisbon Restaurant,4.8,4,,$$ - $$$,"european, spanish, portuguese",325 Lafayette St,Newark,United States,07105-2724,"325 Lafayette St, Newark, NJ 07105-2724"
2,1520116,Floresta Das Escadinhas,4.8,7099,,$,"mediterranean, barbecue, european, healthy, po...",Rua de Santa Justa N_3,Lisbon,Portugal,1100-483,"Rua de Santa Justa N_3, Lisbon 1100-483 Portugal"
3,2308921,Frade dos Mares,4.8,5546,,$$ - $$$,"seafood, mediterranean, european, portuguese",Av. Dom Carlos I 55A,Lisbon,Portugal,1200-647,"Av. Dom Carlos I 55A, Lisbon 1200-647 Portugal"
4,1886758,Restaurant Ze da Mouraria,4.3,651,,$$ - $$$,"mediterranean, european, portuguese",Rua Joao do Outeiro 24,Lisbon,Portugal,1100-292,"Rua Joao do Outeiro 24, Lisbon 1100-292 Portugal"


In [22]:
df_r['city'].value_counts() 

city
Lisbon         56
Newark          1
Jericho         1
Carle Place     1
Mineola         1
La Oroya        1
Addis Ababa     1
Name: count, dtype: int64

 - API fetched restaurants outside of Lisbon because their name or description contains “Lisbon”. 
 - So I must clean this to only have restaurants situated in Lisbon.

In [23]:
df_r = df_r[df_r["address_string"].str.contains("Lisbon", case=False, na=False)]


In [24]:
df_r['city'].value_counts()

city
Lisbon    56
Name: count, dtype: int64

In [26]:
df_r.drop(columns=["ranking"], inplace=True) # Removing the ranking column because it is empty.

### Luxury Restaurants

In [28]:
# Step 1: filter the most expensive
df_expensive = df_r[df_r["price_level"] == "$$$$"]

# Step 2: sort expensive ones by rating & reviews
df_expensive_sorted = df_expensive.sort_values(
    by=["rating", "num_reviews"],
    ascending=False
)

# I only have 7 restaurants with $$$$ price, we need extra restaurants:
num_missing = 10 - len(df_expensive_sorted)

if num_missing > 0:
    # Taking additional high-rated restaurants from the next tier $$ - $$$
    df_medium = df_r[df_r["price_level"] == "$$ - $$$"]
    
    df_medium_sorted = df_medium.sort_values(
        by=["rating", "num_reviews"],
        ascending=False
    ).head(num_missing)
    
    # Combining into final Luxury Restaurants
    df_r_luxury = pd.concat([df_expensive_sorted, df_medium_sorted])

else:
    df_r_luxury = df_expensive_sorted.head(10)

df_r_luxury


Unnamed: 0,location_id,name,rating,num_reviews,price_level,cuisine,street1,city,country,postalcode,address_string
42,17443651,Grenache,4.7,271,$$$$,"french, mediterranean, european, international...",12 patio de dom Fradique,Lisbon,Portugal,1120-624,"12 patio de dom Fradique, Lisbon 1120-624 Port..."
46,15227438,Sala De Joao Sa,4.7,265,$$$$,"mediterranean, european, healthy, portuguese",Rua dos Bacalhoeiros 103,Lisbon,Portugal,1100-074,"Rua dos Bacalhoeiros 103, Lisbon 1100-074 Port..."
40,2628518,Belcanto,4.6,2248,$$$$,"portuguese, contemporary",Rua Serpa Pinto 10A,Lisbon,Portugal,1200-410,"Rua Serpa Pinto 10A, Lisbon 1200-410 Portugal"
41,9977670,Alma Henrique Sa Pessoa,4.6,1155,$$$$,"mediterranean, european, portuguese, contemporary",Rua Anchieta 15,Lisbon,Portugal,1200-023,"Rua Anchieta 15 Chiado, Lisbon 1200-023 Portugal"
11,1058428,Solar dos Presuntos,4.3,6553,$$$$,"seafood, mediterranean, european, portuguese, ...","Rua das Portas de Sto Antao, 150",Lisbon,Portugal,1150-269,"Rua das Portas de Sto Antao, 150, Lisbon 1150-..."
23,12274149,JNcQUOI Avenida,4.3,1285,$$$$,"european, portuguese",Avenida Da Liberdade 182-184,Lisbon,Portugal,1250-146,"Avenida Da Liberdade 182-184 Tivoli Forum, Lis..."
44,878558,Eleven Restaurant,4.0,1227,$$$$,"international, mediterranean, european, portug...",Rua Marques de Fronteira,Lisbon,Portugal,1070,Rua Marques de Fronteira Jardim Amália Rodrigu...
34,23808782,Seventh Brunch Chiado,4.9,1153,$$ - $$$,"european, british, portuguese, cafe, internati...",Calcada Do Combro 147,Lisbon,Portugal,1200-452,"Calcada Do Combro 147, Lisbon 1200-452 Portugal"
53,23691818,Sto Restaurante & Mercearia,4.9,1006,$$ - $$$,portuguese,"Rua dos Fanqueiros, 85",Lisbon,Portugal,1100-227,"Rua dos Fanqueiros, 85, Lisbon 1100-227 Portugal"
20,18942434,Antikuario Cafe,4.9,746,$$ - $$$,"mediterranean, european, healthy, portuguese",Rua de Sao Jose 168,Lisbon,Portugal,1150-326,"Rua de Sao Jose 168, Lisbon 1150-326 Portugal"


- Made Luxury Package for restaurants called "df_r_luxury", will use it to merge in to the other luxury DataFrames for final Luxury Package.

------------------------------------------
## Lisbon Attractions
--------------------------------------------

In [35]:
df_a = pd.read_csv("lisbon_attractions_with_ratings.csv")
df_a.head(10)

Unnamed: 0,location_id,name,rating,num_reviews,ranking,category,subcategory,street1,city,country,postalcode,address_string
0,195107,Castelo De Sao Jorge,4.2,30905,,attraction,"landmarks, attractions",Rua de Santa Cruz do Castelo,Lisbon,Portugal,1100-129,"Rua de Santa Cruz do Castelo, Lisbon 1100-129 ..."
1,546590,Parque Das Nacoes,4.3,4582,,attraction,"landmarks, attractions, other",Avenida Dom Joao II 13B,Lisbon,Portugal,1990-998,"Avenida Dom Joao II 13B, Lisbon 1990-998 Portugal"
2,23804944,Lisbon Cathedral,4.0,2167,,attraction,"landmarks, attractions",Largo da Se 1,Lisbon,Portugal,1100-585,"Largo da Se 1, Lisbon 1100-585 Portugal"
3,199878,Praca do Comercio (Terreiro do Paco),4.4,17271,,attraction,"landmarks, attractions",Avenida Infante Dom Henrique 1C,Lisbon,Portugal,1100-053,"Avenida Infante Dom Henrique 1C, Lisbon 1100-0..."
4,23957745,Quake - Museu do Terramoto de Lisboa,4.6,939,,attraction,"landmarks, attractions, museums","Rua Cais da Alfandega Velha, 39",Lisbon,Portugal,1300-598,"Rua Cais da Alfandega Velha, 39, Lisbon 1300-5..."
5,2639519,Lisbon Falls,4.2,323,,attraction,"nature_parks, attractions",,Graskop,South Africa,1270,Graskop 1270 South Africa
6,195318,Mosteiro dos Jeronimos,4.5,33014,,attraction,"landmarks, attractions",Praca do Imperio,Lisbon,Portugal,1400-206,"Praca do Imperio, Lisbon 1400-206 Portugal"
7,3928986,Lisboa Story Centre,4.2,932,,attraction,"museums, attractions","Terreiro do Paco, 78- 81",Lisbon,Portugal,1100-148,"Terreiro do Paco, 78- 81, Lisbon 1100-148 Port..."
8,195144,Oceanario de Lisboa,4.5,40983,,attraction,"nature_parks, zoos_aquariums, attractions",Esplanada D. Carlos I,Lisbon,Portugal,1990-005,"Esplanada D. Carlos I Doca dos Olivais, Lisbon..."
9,2397769,Arco do Triunfo,4.4,6202,,attraction,"landmarks, attractions",Rua Augusta 2,Lisbon,Portugal,1100-053,"Rua Augusta 2 Praça do Comércio, Lisbon 1100-0..."


In [None]:
df_a['ranking'].nunique

<bound method IndexOpsMixin.nunique of 0    NaN
1    NaN
2    NaN
3    NaN
4    NaN
      ..
65   NaN
66   NaN
67   NaN
68   NaN
69   NaN
Name: ranking, Length: 70, dtype: float64>

In [None]:
df_a.drop(columns=['ranking'], inplace=True) # Droping ranking columns

In [None]:
df_a.head()

Unnamed: 0,location_id,name,rating,num_reviews,category,subcategory,street1,city,country,postalcode,address_string
0,195107,Castelo De Sao Jorge,4.2,30905,attraction,"landmarks, attractions",Rua de Santa Cruz do Castelo,Lisbon,Portugal,1100-129,"Rua de Santa Cruz do Castelo, Lisbon 1100-129 ..."
1,546590,Parque Das Nacoes,4.3,4582,attraction,"landmarks, attractions, other",Avenida Dom Joao II 13B,Lisbon,Portugal,1990-998,"Avenida Dom Joao II 13B, Lisbon 1990-998 Portugal"
2,23804944,Lisbon Cathedral,4.0,2167,attraction,"landmarks, attractions",Largo da Se 1,Lisbon,Portugal,1100-585,"Largo da Se 1, Lisbon 1100-585 Portugal"
3,199878,Praca do Comercio (Terreiro do Paco),4.4,17271,attraction,"landmarks, attractions",Avenida Infante Dom Henrique 1C,Lisbon,Portugal,1100-053,"Avenida Infante Dom Henrique 1C, Lisbon 1100-0..."
4,23957745,Quake - Museu do Terramoto de Lisboa,4.6,939,attraction,"landmarks, attractions, museums","Rua Cais da Alfandega Velha, 39",Lisbon,Portugal,1300-598,"Rua Cais da Alfandega Velha, 39, Lisbon 1300-5..."


- Attractions has no 'price' column like hotels or restaurants, 
so I will make the luxury attractions data set based on premium experiences such as:
        - Boat Tours
        - Private sightseeing Tours
        - Gourmet food & wine tasting
        - Spa & wellness
        - Exclusive nightlife
        - Art
        - Shopping
- Water activities seems a good subcategory for this list but it is an unstable subcategory for luxury due to weather dependent so we will focus only on the above.
- Boat Tours are also a bit weather dependent but still doable.

### Luxury Attractions

In [None]:
df_a['subcategory'].unique() # For my goal, subcategory column is to vague and icludes to much on the same row.


array(['landmarks, attractions', 'landmarks, attractions, other',
       'landmarks, attractions, museums', 'nature_parks, attractions',
       'museums, attractions',
       'nature_parks, zoos_aquariums, attractions',
       'transportation, attractions',
       'sightseeing_tours, activities, outdoor_activities',
       'shopping, museums, attractions',
       'food_drink, sightseeing_tours, activities',
       'boat_tours_water_sports, outdoor_activities, sightseeing_tours, activities',
       'nightlife, sightseeing_tours, activities',
       'sightseeing_tours, activities', 'nightlife',
       'food_drink, landmarks, attractions, classes, activities',
       'wellness_spas, activities',
       'landmarks, attractions, sightseeing_tours, activities',
       'shopping', 'fun_games, nature_parks, attractions, activities'],
      dtype=object)

In [None]:
df_a['subcategory'].value_counts() 


subcategory
landmarks, attractions                                                        22
museums, attractions                                                           7
sightseeing_tours, activities                                                  6
food_drink, sightseeing_tours, activities                                      5
sightseeing_tours, activities, outdoor_activities                              5
boat_tours_water_sports, outdoor_activities, sightseeing_tours, activities     4
transportation, attractions                                                    4
landmarks, attractions, museums                                                3
shopping                                                                       2
nightlife                                                                      2
nature_parks, attractions                                                      2
landmarks, attractions, other                                                  1
nature_parks, zo

In [None]:
df_a['tags'] = df_a['subcategory'].str.split(', ') # getting tags column for more specific data.

In [None]:
df_a_tags = df_a.explode('tags') # Getting seperate tags in each row.

In [None]:
df_a_tags['tags'].value_counts() # This is better.

tags
attractions                44
landmarks                  28
activities                 25
sightseeing_tours          22
museums                    11
outdoor_activities          9
food_drink                  6
transportation              4
nature_parks                4
boat_tours_water_sports     4
shopping                    3
nightlife                   3
other                       1
zoos_aquariums              1
classes                     1
wellness_spas               1
fun_games                   1
Name: count, dtype: int64

In [None]:
df_a_tags[df_a_tags['tags'] == 'transportation'] # Useless for luxury, how will the customer get to places?


Unnamed: 0,location_id,name,rating,num_reviews,ranking,category,subcategory,street1,city,country,postalcode,address_string,tags
13,262792,Tram 28,3.9,25138,,attraction,"transportation, attractions",,Lisbon,Portugal,,Lisbon Portugal,transportation
33,546613,Carris - Eletricos de Lisboa,4.3,2654,,attraction,"transportation, attractions",,Lisbon,Portugal,2795-221,Lisbon 2795-221 Portugal,transportation
36,10634885,Sintra Tourist Bus 434,2.2,625,,attraction,"transportation, attractions",,Sintra,Portugal,2645-019,Sintra 2645-019 Portugal,transportation
42,9792343,Lisbon Airport travel,2.1,405,,attraction,"transportation, attractions",,Lisbon,Portugal,,Lisbon Portugal,transportation


In [None]:
# Summaryzing tags data

summary = df_a_tags.groupby('tags').agg({
    'name': 'count',
    'rating': 'mean',
    'num_reviews': 'sum'
}).sort_values(by='name', ascending=False)

summary


Unnamed: 0_level_0,name,rating,num_reviews
tags,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
attractions,44,4.25,308097
landmarks,28,4.367857,215390
activities,25,4.616,73268
sightseeing_tours,22,4.636364,44603
museums,11,4.436364,21483
outdoor_activities,9,4.911111,14101
food_drink,6,4.783333,42814
nature_parks,4,4.325,43742
boat_tours_water_sports,4,4.85,6594
transportation,4,3.125,28822


The strongest categories for a luxury product are:

* Wellness Spas:

    Ultra-premium category

    Very high rating (4.90)

    Ideal for luxury packages:

* Outdoor Activities:

    Rating almost 5.0

    Usually small-group, curated, premium experiences

* Boat Tours & Water Sports:

    Premium boat tours, private yachts, sunset cruises

* Food & Drink (wine tastings, gourmet tours):

    Often expensive

    High quality (4.78)

* High-end Sightseeing Tours:

    Private guides

    Small-group premium tours

* Shopping: (still need data for this, must have.)

    Expensive shops

    Premium designer shops

* Nightlife: (Also need data for this, must have.)

    Expensive bars

    High-end people interactions.


We can ignore the other tags since they are either family activities like nature_parks, zoos_aquariums or not luxury type at all like classes, fun_games etc.

In [None]:
# Step 1: filter the most expensive
df_a = df_a[df_a["price_level"] == "$$$$"]

# Step 2: sort expensive ones by rating & reviews
df_expensive_sorted = df_expensive.sort_values(
    by=["rating", "num_reviews"],
    ascending=False
)

# I only have 7 restaurants with $$$$ price, we need extra restaurants:
num_missing = 10 - len(df_expensive_sorted)

if num_missing > 0:
    # Taking additional high-rated restaurants from the next tier $$ - $$$
    df_medium = df_r[df_r["price_level"] == "$$ - $$$"]
    
    df_medium_sorted = df_medium.sort_values(
        by=["rating", "num_reviews"],
        ascending=False
    ).head(num_missing)
    
    # Combining into final Luxury Restaurants
    df_r_luxury = pd.concat([df_expensive_sorted, df_medium_sorted])

else:
    df_r_luxury = df_expensive_sorted.head(10)

df_r_luxury
