------------------------------------------------------------------------------------------------
# DataFrames - Luxury Package
------------------------------------------------------------------------------------------------

In [57]:
import requests
import pandas as pd
import os
from dotenv import load_dotenv, find_dotenv
import time

## Lisbon Hotels

In [58]:
df_hotels = pd.read_csv("lisbon_hotels.csv")
df_hotels.head(10)

Unnamed: 0,location_id,name,rating,num_reviews,ranking,price_level,street1,city,country,postalcode,address_string
0,2292067,Czar Lisbon Hotel,3.7,947,,$$,Avenida Almirante Reis 103,Lisbon,Portugal,1150-0020,"Avenida Almirante Reis 103, Lisbon 1150-0020 P..."
1,206890,Avani Avenida Liberdade Lisbon Hotel,4.3,796,,$$$,Rua Julio Cesar Machado 7 9,Lisbon,Portugal,1250-135,"Rua Julio Cesar Machado 7 9, Lisbon 1250-135 P..."
2,11600027,Empire Lisbon Hotel,4.3,398,,$$,Avenida Almirante Reis 130,Lisbon,Portugal,1150-023,"Avenida Almirante Reis 130, Lisbon 1150-023 Po..."
3,8842153,Palacio Do Governador - Lisbon Hotel & Spa,4.4,526,,$$$,"Rua Bartolomeu Dias, 117",Lisbon,Portugal,1400-030,"Rua Bartolomeu Dias, 117, Lisbon 1400-030 Port..."
4,19780664,Aroeira Lisbon Hotel - Sea & Golf Resort,4.1,93,,$$,Avenida Pinhal da Aroeira 1,Aroeira,Portugal,2820-112,Avenida Pinhal da Aroeira 1 Herdade da Aroeira...
5,553361,Dinya Lisbon Hotel & Lounge Bar,4.3,34,,$$,Rua Ilha do Pico 3,Lisbon,Portugal,1000-169,"Rua Ilha do Pico 3, Lisbon 1000-169 Portugal"
6,781013,New Style Lisbon Hotel,3.6,97,,$$,Avenida Almirante Reis 53,Lisbon,Portugal,1150-011,"Avenida Almirante Reis 53, Lisbon 1150-011 Por..."
7,12659702,Corpo Santo Lisbon Historical Hotel,4.9,4398,,$$$$,"Largo do Corpo Santo, 25",Lisbon,Portugal,1200-129,"Largo do Corpo Santo, 25, Lisbon 1200-129 Port..."
8,13402042,Hotel Da Baixa,4.9,2868,,$$$,Rua da Prata 231,Lisbon,Portugal,1100-417,"Rua da Prata 231, Lisbon 1100-417 Portugal"
9,195643,Hotel Avenida Palace,4.8,5028,,$$$$,Rua 1 Dezembro 123,Lisbon,Portugal,1200-359,"Rua 1 Dezembro 123, Lisbon 1200-359 Portugal"


Cleaning hotels by city and address to make sure we only get Lisbon hotels

In [59]:
df_hotels['city'].value_counts()

city
Lisbon      46
Aroeira      1
Brooklyn     1
Name: count, dtype: int64

In [60]:
df_hotels = df_hotels[df_hotels["address_string"].str.contains("Lisbon", case=False, na=False)]

In [61]:
df_hotels.drop(columns=["ranking"], inplace=True) # Removing the ranking column because it is empty.

### Luxury Hotels:

In [62]:
# Step 1: filter the most expensive
df_lux_h = df_hotels[df_hotels["price_level"] == "$$$$"]

# Step 2: sort expensive ones by rating & reviews
df_lux_h_sorted = df_lux_h.sort_values(
    by=["rating", "num_reviews"],
    ascending=False
)

df_lux_h


Unnamed: 0,location_id,name,rating,num_reviews,price_level,street1,city,country,postalcode,address_string
7,12659702,Corpo Santo Lisbon Historical Hotel,4.9,4398,$$$$,"Largo do Corpo Santo, 25",Lisbon,Portugal,1200-129,"Largo do Corpo Santo, 25, Lisbon 1200-129 Port..."
9,195643,Hotel Avenida Palace,4.8,5028,$$$$,Rua 1 Dezembro 123,Lisbon,Portugal,1200-359,"Rua 1 Dezembro 123, Lisbon 1200-359 Portugal"
12,10621974,Martinhal Lisbon Chiado Family Suites,4.9,662,$$$$,Rua das Flores 44,Lisbon,Portugal,1200-195,"Rua das Flores 44, Lisbon 1200-195 Portugal"
13,231476,InterContinental Lisbon by IHG,4.4,341,$$$$,Rua Castilho 149,Lisbon,Portugal,1099-034,"Rua Castilho 149 Next to Parque Eduardo VII, L..."
15,6351445,Torel Palace Lisbon,4.7,1093,$$$$,"Rua Camara Pestana, 45",Lisbon,Portugal,1150-082,"Rua Camara Pestana, 45, Lisbon 1150-082 Portugal"
16,23799173,Hyatt Regency Lisbon,4.4,515,$$$$,Rua da Junqueira 65,Lisbon,Portugal,1300-343,"Rua da Junqueira 65, Lisbon 1300-343 Portugal"
17,7376234,Santiago De Alfama - Boutique Hotel,4.9,1639,$$$$,Rua Santiago 10 14,Lisbon,Portugal,1100-494,"Rua Santiago 10 14, Lisbon 1100-494 Portugal"
21,548451,Bairro Alto Hotel,4.7,1715,$$$$,Praca Luis de Camoes 2,Lisbon,Portugal,1200-243,"Praca Luis de Camoes 2, Lisbon 1200-243 Portugal"
24,195742,"Hotel Britania Art Deco, a Lisbon Heritage Col...",4.8,3427,$$$$,Rua Rodrigues Sampaio 17,Lisbon,Portugal,1150-278,"Rua Rodrigues Sampaio 17, Lisbon 1150-278 Port..."
26,229311,Four Seasons Hotel Ritz Lisbon,4.6,2249,$$$$,Rua Rodrigo da Fonseca 88,Lisbon,Portugal,1099-039,"Rua Rodrigo da Fonseca 88, Lisbon 1099-039 Por..."


---------------------------------------------------
## Lisbon Restaurants
------------------------------------------------

In [63]:
df_r = pd.read_csv("lisbon_restaurants.csv")
df_r.head(10)

Unnamed: 0,location_id,name,rating,num_reviews,ranking,price_level,cuisine,street1,city,country,postalcode,address_string
0,12516582,Love Lisbon Restaurant & Bar,4.1,44,,$$ - $$$,"bar, asian, nepali",R. Jose Antonio Serrano Floor 4,Lisbon,Portugal,1150-033,"R. Jose Antonio Serrano Floor 4, Lisbon 1150-0..."
1,4974326,Madrid Lisbon Restaurant,4.8,4,,$$ - $$$,"european, spanish, portuguese",325 Lafayette St,Newark,United States,07105-2724,"325 Lafayette St, Newark, NJ 07105-2724"
2,1520116,Floresta Das Escadinhas,4.8,7099,,$,"mediterranean, barbecue, european, healthy, po...",Rua de Santa Justa N_3,Lisbon,Portugal,1100-483,"Rua de Santa Justa N_3, Lisbon 1100-483 Portugal"
3,2308921,Frade dos Mares,4.8,5546,,$$ - $$$,"seafood, mediterranean, european, portuguese",Av. Dom Carlos I 55A,Lisbon,Portugal,1200-647,"Av. Dom Carlos I 55A, Lisbon 1200-647 Portugal"
4,1886758,Restaurant Ze da Mouraria,4.3,651,,$$ - $$$,"mediterranean, european, portuguese",Rua Joao do Outeiro 24,Lisbon,Portugal,1100-292,"Rua Joao do Outeiro 24, Lisbon 1100-292 Portugal"
5,23976748,Restaurant Odaan,4.9,742,,$,"indian, bar, asian, nepali",Rua dos Cavaleiros 07 LJ 11,Lisbon,Portugal,1100-295,"Rua dos Cavaleiros 07 LJ 11, Lisbon 1100-295 P..."
6,13071752,Jam Club,4.9,1757,,$,"portuguese, gastropub, dining_bars",Travessa dos Inglesinhos 49,Lisbon,Portugal,1200-223,"Travessa dos Inglesinhos 49, Lisbon 1200-223 P..."
7,15084159,#Treestory Restaurant,4.4,234,,$$ - $$$,"european, grill, georgian, healthy",Rua Luciano Cordeiro 46A,Lisbon,Portugal,1150-216,"Rua Luciano Cordeiro 46A, Lisbon 1150-216 Port..."
8,1721696,Come Prima Restaurante Italiano,4.8,4357,,$$ - $$$,"italian, pizza, mediterranean, healthy, neapol...",Rua do Olival 256,Lisbon,Portugal,1200-744,"Rua do Olival 256, Lisbon 1200-744 Portugal"
9,15087513,A Nossa Casa,4.8,1489,,$$ - $$$,"brazilian, portuguese",Rua da Atalaia No 31,Lisbon,Portugal,1200-037,"Rua da Atalaia No 31, Lisbon 1200-037 Portugal"


In [64]:
df_r['city'].value_counts() 

city
Lisbon         56
Newark          1
Jericho         1
Carle Place     1
Mineola         1
La Oroya        1
Addis Ababa     1
Name: count, dtype: int64

 - API fetched restaurants outside of Lisbon because their name or description contains “Lisbon”. 
 - So I must clean this to only have restaurants situated in Lisbon.

In [65]:
df_r = df_r[df_r["address_string"].str.contains("Lisbon", case=False, na=False)]


In [66]:
df_r['city'].value_counts()

city
Lisbon    56
Name: count, dtype: int64

In [67]:
df_r.drop(columns=["ranking"], inplace=True) # Removing the ranking column because it is empty.

### Luxury Restaurants

In [68]:
# Step 1: filter the most expensive
df_expensive = df_r[df_r["price_level"] == "$$$$"]

# Step 2: sort expensive ones by rating & reviews
df_expensive_sorted = df_expensive.sort_values(
    by=["rating", "num_reviews"],
    ascending=False
)

# I only have 7 restaurants with $$$$ price, we need extra restaurants:
num_missing = 10 - len(df_expensive_sorted)

if num_missing > 0:
    # Taking additional high-rated restaurants from the next tier $$ - $$$
    df_medium = df_r[df_r["price_level"] == "$$ - $$$"]
    
    df_medium_sorted = df_medium.sort_values(
        by=["rating", "num_reviews"],
        ascending=False
    ).head(num_missing)
    
    # Combining into final Luxury Restaurants
    df_r_luxury = pd.concat([df_expensive_sorted, df_medium_sorted])

else:
    df_r_luxury = df_expensive_sorted.head(10)

df_r_luxury


Unnamed: 0,location_id,name,rating,num_reviews,price_level,cuisine,street1,city,country,postalcode,address_string
42,17443651,Grenache,4.7,271,$$$$,"french, mediterranean, european, international...",12 patio de dom Fradique,Lisbon,Portugal,1120-624,"12 patio de dom Fradique, Lisbon 1120-624 Port..."
46,15227438,Sala De Joao Sa,4.7,265,$$$$,"mediterranean, european, healthy, portuguese",Rua dos Bacalhoeiros 103,Lisbon,Portugal,1100-074,"Rua dos Bacalhoeiros 103, Lisbon 1100-074 Port..."
40,2628518,Belcanto,4.6,2248,$$$$,"portuguese, contemporary",Rua Serpa Pinto 10A,Lisbon,Portugal,1200-410,"Rua Serpa Pinto 10A, Lisbon 1200-410 Portugal"
41,9977670,Alma Henrique Sa Pessoa,4.6,1155,$$$$,"mediterranean, european, portuguese, contemporary",Rua Anchieta 15,Lisbon,Portugal,1200-023,"Rua Anchieta 15 Chiado, Lisbon 1200-023 Portugal"
11,1058428,Solar dos Presuntos,4.3,6553,$$$$,"seafood, mediterranean, european, portuguese, ...","Rua das Portas de Sto Antao, 150",Lisbon,Portugal,1150-269,"Rua das Portas de Sto Antao, 150, Lisbon 1150-..."
23,12274149,JNcQUOI Avenida,4.3,1285,$$$$,"european, portuguese",Avenida Da Liberdade 182-184,Lisbon,Portugal,1250-146,"Avenida Da Liberdade 182-184 Tivoli Forum, Lis..."
44,878558,Eleven Restaurant,4.0,1227,$$$$,"international, mediterranean, european, portug...",Rua Marques de Fronteira,Lisbon,Portugal,1070,Rua Marques de Fronteira Jardim Amália Rodrigu...
34,23808782,Seventh Brunch Chiado,4.9,1153,$$ - $$$,"european, british, portuguese, cafe, internati...",Calcada Do Combro 147,Lisbon,Portugal,1200-452,"Calcada Do Combro 147, Lisbon 1200-452 Portugal"
53,23691818,Sto Restaurante & Mercearia,4.9,1006,$$ - $$$,portuguese,"Rua dos Fanqueiros, 85",Lisbon,Portugal,1100-227,"Rua dos Fanqueiros, 85, Lisbon 1100-227 Portugal"
20,18942434,Antikuario Cafe,4.9,746,$$ - $$$,"mediterranean, european, healthy, portuguese",Rua de Sao Jose 168,Lisbon,Portugal,1150-326,"Rua de Sao Jose 168, Lisbon 1150-326 Portugal"


- Made Luxury Package for restaurants called "df_r_luxury", will use it to merge in to the other luxury DataFrames for final Luxury Package.

------------------------------------------
## Lisbon Attractions
--------------------------------------------

In [69]:
df_a = pd.read_csv("lisbon_attractions.csv")
df_a.head()

Unnamed: 0,location_id,name,rating,num_reviews,ranking,category,subcategory,street1,city,country,postalcode,address_string
0,195107,Castelo De Sao Jorge,4.2,30905,,attraction,"landmarks, attractions",Rua de Santa Cruz do Castelo,Lisbon,Portugal,1100-129,"Rua de Santa Cruz do Castelo, Lisbon 1100-129 ..."
1,546590,Parque Das Nacoes,4.3,4582,,attraction,"landmarks, attractions, other",Avenida Dom Joao II 13B,Lisbon,Portugal,1990-998,"Avenida Dom Joao II 13B, Lisbon 1990-998 Portugal"
2,23804944,Lisbon Cathedral,4.0,2167,,attraction,"landmarks, attractions",Largo da Se 1,Lisbon,Portugal,1100-585,"Largo da Se 1, Lisbon 1100-585 Portugal"
3,199878,Praca do Comercio (Terreiro do Paco),4.4,17271,,attraction,"landmarks, attractions",Avenida Infante Dom Henrique 1C,Lisbon,Portugal,1100-053,"Avenida Infante Dom Henrique 1C, Lisbon 1100-0..."
4,23957745,Quake - Museu do Terramoto de Lisboa,4.6,939,,attraction,"landmarks, attractions, museums","Rua Cais da Alfandega Velha, 39",Lisbon,Portugal,1300-598,"Rua Cais da Alfandega Velha, 39, Lisbon 1300-5..."


In [70]:
df_a['ranking'].nunique

<bound method IndexOpsMixin.nunique of 0    NaN
1    NaN
2    NaN
3    NaN
4    NaN
      ..
65   NaN
66   NaN
67   NaN
68   NaN
69   NaN
Name: ranking, Length: 70, dtype: float64>

In [71]:
df_a.drop(columns=['ranking'], inplace=True) # Droping ranking columns

- Attractions has no 'price' column like hotels or restaurants, 
so I will make the luxury attractions data set based on premium experiences such as:
        - Boat Tours
        - Private sightseeing Tours
        - Gourmet food & wine tasting
        - Spa & wellness
        - Exclusive nightlife
        - Art
        - Shopping
- Water activities seems a good subcategory for this list but it is an unstable subcategory for luxury due to weather dependent so we will focus only on the above.
- Boat Tours are also a bit weather dependent but still doable.

SABINA COMMENT: Really like these comments :)

--------------------------------------------------------------------------------
### Luxury Attractions

In [72]:
df_a['subcategory'].unique() # For my goal, subcategory column is to vague and icludes to much on the same row.


array(['landmarks, attractions', 'landmarks, attractions, other',
       'landmarks, attractions, museums', 'nature_parks, attractions',
       'museums, attractions',
       'nature_parks, zoos_aquariums, attractions',
       'transportation, attractions',
       'sightseeing_tours, activities, outdoor_activities',
       'shopping, museums, attractions',
       'food_drink, sightseeing_tours, activities',
       'boat_tours_water_sports, outdoor_activities, sightseeing_tours, activities',
       'nightlife, sightseeing_tours, activities',
       'sightseeing_tours, activities', 'nightlife',
       'food_drink, landmarks, attractions, classes, activities',
       'wellness_spas, activities',
       'landmarks, attractions, sightseeing_tours, activities',
       'shopping', 'fun_games, nature_parks, attractions, activities'],
      dtype=object)

In [73]:
df_a['subcategory'].value_counts() 


subcategory
landmarks, attractions                                                        22
museums, attractions                                                           7
sightseeing_tours, activities                                                  6
food_drink, sightseeing_tours, activities                                      5
sightseeing_tours, activities, outdoor_activities                              5
boat_tours_water_sports, outdoor_activities, sightseeing_tours, activities     4
transportation, attractions                                                    4
landmarks, attractions, museums                                                3
shopping                                                                       2
nightlife                                                                      2
nature_parks, attractions                                                      2
landmarks, attractions, other                                                  1
nature_parks, zo

In [74]:
df_a['tags'] = df_a['subcategory'].str.split(', ') # getting tags column for more specific data.

In [75]:
df_a_tags = df_a.explode('tags') # Getting seperate tags in each row.

In [76]:
df_a_tags['tags'].value_counts() 

tags
attractions                44
landmarks                  28
activities                 25
sightseeing_tours          22
museums                    11
outdoor_activities          9
food_drink                  6
transportation              4
nature_parks                4
boat_tours_water_sports     4
shopping                    3
nightlife                   3
other                       1
zoos_aquariums              1
classes                     1
wellness_spas               1
fun_games                   1
Name: count, dtype: int64

In [77]:
df_a_tags[df_a_tags['tags'] == 'transportation'] # Transportation data is useless for luxury, weakness of the project.


Unnamed: 0,location_id,name,rating,num_reviews,category,subcategory,street1,city,country,postalcode,address_string,tags
13,262792,Tram 28,3.9,25138,attraction,"transportation, attractions",,Lisbon,Portugal,,Lisbon Portugal,transportation
33,546613,Carris - Eletricos de Lisboa,4.3,2654,attraction,"transportation, attractions",,Lisbon,Portugal,2795-221,Lisbon 2795-221 Portugal,transportation
36,10634885,Sintra Tourist Bus 434,2.2,625,attraction,"transportation, attractions",,Sintra,Portugal,2645-019,Sintra 2645-019 Portugal,transportation
42,9792343,Lisbon Airport travel,2.1,405,attraction,"transportation, attractions",,Lisbon,Portugal,,Lisbon Portugal,transportation


I consider transportation a weakness of the project due to lack of data, we only have city public transports such has bus, tram and cables cars.
TripAdvisor API endpoint doesnt give data of private high-end companies that offer a more luxurious service.

The strongest categories for a luxury product are:

* Wellness Spas:

    Ultra-premium category

    Very high rating (4.90)

    Ideal for luxury packages:

* Outdoor Activities:

    Rating almost 5.0

    Usually small-group, curated, premium experiences

* Boat Tours & Water Sports:

    Premium boat tours, private yachts, sunset cruises

* Food & Drink (wine tastings, gourmet tours):

    Often expensive

    High quality (4.78)

* High-end Sightseeing Tours:

    Private guides

    Small-group premium tours

We can ignore the other tags since they are either family activities like nature_parks, zoos_aquariums or not luxury type at all like classes, fun_games etc.

In [78]:
# Creating a tag list for luxury attractions intended for luxury package
luxury_tags = [
    "wellness_spas",
    "outdoor_activities",
    "boat_tours_water_sports",
    "food_drink",
    "sightseeing_tours",
    "museums",
]

df_lux_a = df_a[
    df_a["tags"].apply(lambda tag_list: any(t in luxury_tags for t in tag_list))
]

df_lux_a.head(35)

Unnamed: 0,location_id,name,rating,num_reviews,category,subcategory,street1,city,country,postalcode,address_string,tags
4,23957745,Quake - Museu do Terramoto de Lisboa,4.6,939,attraction,"landmarks, attractions, museums","Rua Cais da Alfandega Velha, 39",Lisbon,Portugal,1300-598,"Rua Cais da Alfandega Velha, 39, Lisbon 1300-5...","[landmarks, attractions, museums]"
7,3928986,Lisboa Story Centre,4.2,932,attraction,"museums, attractions","Terreiro do Paco, 78- 81",Lisbon,Portugal,1100-148,"Terreiro do Paco, 78- 81, Lisbon 1100-148 Port...","[museums, attractions]"
12,2229331,Pavilhao do Conhecimento,4.5,958,attraction,"museums, attractions",Largo Jose Mariano Gago N0 1,Lisbon,Portugal,1990-073,"Largo Jose Mariano Gago N0 1, Lisbon 1990-073 ...","[museums, attractions]"
14,6628842,Bikes & Company,4.9,1422,attraction,"sightseeing_tours, activities, outdoor_activities",Rua dos Douradores 16,Lisbon,Portugal,1100-206,"Rua dos Douradores 16, Lisbon 1100-206 Portugal","[sightseeing_tours, activities, outdoor_activi..."
17,195776,National Tile Museum,4.5,4597,attraction,"museums, attractions",Rua Madre de Deus 4,Lisbon,Portugal,1900-312,"Rua Madre de Deus 4, Lisbon 1900-312 Portugal","[museums, attractions]"
18,195106,Calouste Gulbenkian Museum,4.6,8709,attraction,"shopping, museums, attractions",Avenida de Berna 45A,Lisbon,Portugal,1067-001,"Avenida de Berna 45A, Lisbon 1067-001 Portugal","[shopping, museums, attractions]"
19,195736,Museu Nacional De Arte Antiga,4.5,1329,attraction,"museums, attractions",Rua das Janelas Verdes,Lisbon,Portugal,1249-017,"Rua das Janelas Verdes, Lisbon 1249-017 Portugal","[museums, attractions]"
20,1996418,Museu Arqueologico do Carmo,4.4,2968,attraction,"museums, attractions",Largo do Carmo,Lisbon,Portugal,1200-092,"Largo do Carmo, Lisbon 1200-092 Portugal","[museums, attractions]"
21,24153846,Royal Treasure Museum,4.7,251,attraction,"landmarks, attractions, museums",Calcada da Ajuda,Lisbon,Portugal,1300-012,"Calcada da Ajuda Ajuda National Palace, Lisbon...","[landmarks, attractions, museums]"
22,12875410,National Museum of Natural History and Science,3.5,398,attraction,"museums, attractions",Rua da Escola Politecnica 56 58,Lisbon,Portugal,1250-102,"Rua da Escola Politecnica 56 58, Lisbon 1250-1...","[museums, attractions]"


Since I have no prices on attractions, I will use a weightmap on the tags to add to review points and counts to decide which attractions fit into luxury package.

In [None]:
luxury_weights = {
    "wellness_spas": 10,
    "boat_tours_water_sports": 9,
    "food_drink": 8,
    "outdoor_activities": 7,
    "sightseeing_tours": 6,
    "museums": 4,
    "nightlife": 3,
    "shopping": 2,
    "landmarks": 1
}

def luxury_score(row):
    tags = row["tags"]
    
    # Tag luxury score = highest luxury weight among tags
    tag_score = max([luxury_weights.get(t, 0) for t in tags])
    
    rating = row.get("rating", 0)
    reviews = row.get("num_reviews", 0)
    
    score = (rating * 3) + (reviews * 0.001) + tag_score # Weighted luxury score formula
    
    return score

df_lux_a["luxury_score"] = df_lux_a.apply(luxury_score, axis=1)

df_luxury_attractions = df_lux_a.sort_values("luxury_score", ascending=False) # Sorting values



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_lux_a["luxury_score"] = df_lux_a.apply(luxury_score, axis=1)


In [80]:
df_luxury_attractions.head(20)

Unnamed: 0,location_id,name,rating,num_reviews,category,subcategory,street1,city,country,postalcode,address_string,tags,luxury_score
40,3214902,Time Out Market Lisboa,4.3,25050,attraction,"food_drink, landmarks, attractions, classes, a...",Avenida 24 de Julho,Lisbon,Portugal,1200-479,"Avenida 24 de Julho, Lisbon 1200-479 Portugal","[food_drink, landmarks, attractions, classes, ...",45.95
26,1746966,Inside Lisbon Tours,4.8,11966,attraction,"food_drink, sightseeing_tours, activities",Rua Ivone Silva 6 70ESQ,Lisbon,Portugal,1050-124,"Rua Ivone Silva 6 70ESQ, Lisbon 1050-124 Portugal","[food_drink, sightseeing_tours, activities]",34.366
18,195106,Calouste Gulbenkian Museum,4.6,8709,attraction,"shopping, museums, attractions",Avenida de Berna 45A,Lisbon,Portugal,1067-001,"Avenida de Berna 45A, Lisbon 1067-001 Portugal","[shopping, museums, attractions]",26.509
52,15518965,EcoMassage Lisbon,4.9,1553,attraction,"wellness_spas, activities",Rua aurea 260,Lisbon,Portugal,1100-062,"Rua aurea 260 Praça Rossio, Lisbon 1100-062 Po...","[wellness_spas, activities]",26.253
47,10807704,Nosso Tejo,4.8,2734,attraction,"boat_tours_water_sports, outdoor_activities, s...",,Lisbon,Portugal,1100-278,Lisbon 1100-278 Portugal,"[boat_tours_water_sports, outdoor_activities, ...",26.134
29,10113758,Discover Lisbon,4.8,5445,attraction,"nightlife, sightseeing_tours, activities",,Lisbon,Portugal,1100,Lisbon 1100 Portugal,"[nightlife, sightseeing_tours, activities]",25.845
28,4289715,Lisbon by Boat,4.9,2033,attraction,"boat_tours_water_sports, outdoor_activities, s...","Padrao dos Descobrimentos , Doca de Belem",Lisbon,Portugal,1300-000,"Padrao dos Descobrimentos , Doca de Belem, Lis...","[boat_tours_water_sports, outdoor_activities, ...",25.733
46,8460163,ColourTrip Lisbon,5.0,2983,attraction,"sightseeing_tours, activities, outdoor_activities",,Lisbon,Portugal,,Lisbon Portugal,"[sightseeing_tours, activities, outdoor_activi...",24.983
53,3203667,Around Lisbon,4.8,1546,attraction,"boat_tours_water_sports, outdoor_activities, s...",,Lisbon,Portugal,,Lisbon Portugal,"[boat_tours_water_sports, outdoor_activities, ...",24.946
56,7374624,Culinary Backstreets Lisbon,5.0,1727,attraction,"food_drink, sightseeing_tours, activities",,Lisbon,Portugal,,Lisbon Portugal,"[food_drink, sightseeing_tours, activities]",24.727


These are the attractions that are most fitted to luxury package from our analysis

In [81]:
# Saving luxury attractions df to csv file to analyze with visualizations:

df_luxury_attractions.to_csv("luxury_attractions.csv", index=False)

In [82]:
df_lux_a = df_lux_a.copy()
df_lux_a["luxury_score"] = df_lux_a.apply(luxury_score, axis=1)


choosing best 5 attractions for luxury:

In [83]:
top5_luxury_attractions = (
    df_lux_a.sort_values("luxury_score", ascending=False)
            .head(5)
            .reset_index(drop=True)
)



In [84]:
top5_luxury_attractions


Unnamed: 0,location_id,name,rating,num_reviews,category,subcategory,street1,city,country,postalcode,address_string,tags,luxury_score
0,3214902,Time Out Market Lisboa,4.3,25050,attraction,"food_drink, landmarks, attractions, classes, a...",Avenida 24 de Julho,Lisbon,Portugal,1200-479,"Avenida 24 de Julho, Lisbon 1200-479 Portugal","[food_drink, landmarks, attractions, classes, ...",45.95
1,1746966,Inside Lisbon Tours,4.8,11966,attraction,"food_drink, sightseeing_tours, activities",Rua Ivone Silva 6 70ESQ,Lisbon,Portugal,1050-124,"Rua Ivone Silva 6 70ESQ, Lisbon 1050-124 Portugal","[food_drink, sightseeing_tours, activities]",34.366
2,195106,Calouste Gulbenkian Museum,4.6,8709,attraction,"shopping, museums, attractions",Avenida de Berna 45A,Lisbon,Portugal,1067-001,"Avenida de Berna 45A, Lisbon 1067-001 Portugal","[shopping, museums, attractions]",26.509
3,15518965,EcoMassage Lisbon,4.9,1553,attraction,"wellness_spas, activities",Rua aurea 260,Lisbon,Portugal,1100-062,"Rua aurea 260 Praça Rossio, Lisbon 1100-062 Po...","[wellness_spas, activities]",26.253
4,10807704,Nosso Tejo,4.8,2734,attraction,"boat_tours_water_sports, outdoor_activities, s...",,Lisbon,Portugal,1100-278,Lisbon 1100-278 Portugal,"[boat_tours_water_sports, outdoor_activities, ...",26.134


## Final Luxury Package

In [96]:
# Creating copys and adding category columns for final dataframe

df_lux_h = df_lux_h.copy()
df_lux_h["category"] = "Hotel"

df_r_luxury = df_r_luxury.copy()
df_r_luxury["category"] = "Restaurant"

top5_luxury_attractions= top5_luxury_attractions.copy()
top5_luxury_attractions["category"] = "Attraction"

In [None]:
# Concat and cleaning
luxury_package = pd.concat([
    df_lux_h.assign(category="Hotel"),
    df_r_luxury.assign(category="Restaurant"),
    top5_luxury_attractions.assign(category="Attraction")
], ignore_index=True)

luxury_package["price_level"] = luxury_package["price_level"].fillna("—")

In [105]:
# Getting 5 hotels only
lux_hotels = luxury_package[luxury_package["category"] == "Hotel"] \
    .sort_values(["rating", "num_reviews"], ascending=False) \
    .head(5)

lux_restaurants = luxury_package[luxury_package["category"] == "Restaurant"] \
    .sort_values(["rating", "num_reviews"], ascending=False) \
    .head(10) 

lux_attractions = luxury_package[luxury_package["category"] == "Attraction"] 


In [106]:
# Concatenating in to final luxury package
final_luxury_package = pd.concat([lux_hotels, lux_restaurants, lux_attractions], ignore_index=True)
final_luxury_package # 20 items luxury package :)

Unnamed: 0,location_id,name,rating,num_reviews,price_level,street1,city,country,postalcode,address_string,category,cuisine,subcategory,tags,luxury_score
0,12659702,Corpo Santo Lisbon Historical Hotel,4.9,4398,$$$$,"Largo do Corpo Santo, 25",Lisbon,Portugal,1200-129,"Largo do Corpo Santo, 25, Lisbon 1200-129 Port...",Hotel,,,,
1,7376234,Santiago De Alfama - Boutique Hotel,4.9,1639,$$$$,Rua Santiago 10 14,Lisbon,Portugal,1100-494,"Rua Santiago 10 14, Lisbon 1100-494 Portugal",Hotel,,,,
2,10621974,Martinhal Lisbon Chiado Family Suites,4.9,662,$$$$,Rua das Flores 44,Lisbon,Portugal,1200-195,"Rua das Flores 44, Lisbon 1200-195 Portugal",Hotel,,,,
3,195643,Hotel Avenida Palace,4.8,5028,$$$$,Rua 1 Dezembro 123,Lisbon,Portugal,1200-359,"Rua 1 Dezembro 123, Lisbon 1200-359 Portugal",Hotel,,,,
4,195742,"Hotel Britania Art Deco, a Lisbon Heritage Col...",4.8,3427,$$$$,Rua Rodrigues Sampaio 17,Lisbon,Portugal,1150-278,"Rua Rodrigues Sampaio 17, Lisbon 1150-278 Port...",Hotel,,,,
5,23808782,Seventh Brunch Chiado,4.9,1153,$$ - $$$,Calcada Do Combro 147,Lisbon,Portugal,1200-452,"Calcada Do Combro 147, Lisbon 1200-452 Portugal",Restaurant,"european, british, portuguese, cafe, internati...",,,
6,23691818,Sto Restaurante & Mercearia,4.9,1006,$$ - $$$,"Rua dos Fanqueiros, 85",Lisbon,Portugal,1100-227,"Rua dos Fanqueiros, 85, Lisbon 1100-227 Portugal",Restaurant,portuguese,,,
7,18942434,Antikuario Cafe,4.9,746,$$ - $$$,Rua de Sao Jose 168,Lisbon,Portugal,1150-326,"Rua de Sao Jose 168, Lisbon 1150-326 Portugal",Restaurant,"mediterranean, european, healthy, portuguese",,,
8,17443651,Grenache,4.7,271,$$$$,12 patio de dom Fradique,Lisbon,Portugal,1120-624,"12 patio de dom Fradique, Lisbon 1120-624 Port...",Restaurant,"french, mediterranean, european, international...",,,
9,15227438,Sala De Joao Sa,4.7,265,$$$$,Rua dos Bacalhoeiros 103,Lisbon,Portugal,1100-074,"Rua dos Bacalhoeiros 103, Lisbon 1100-074 Port...",Restaurant,"mediterranean, european, healthy, portuguese",,,


In [107]:
final_luxury_package.to_csv("luxury_package.csv", index=False) # Saving package as a csv file.


Achieved end goal for the luxury package, it contains 5 hotels for 5 nights, 10 restaurants for at least 10 meals (lunch & dinner) and 5 attractions for 5 days.