In [19]:
# Loading data from Google Sheets
import pandas as pd

url = "https://docs.google.com/spreadsheets/d/1ecopK6oyyb4d_7-QLrCr8YlgFrCetHU7-VQfnYej7JY/export?format=xlsx"
dataset = pd.ExcelFile(url, engine='openpyxl')

sheets = []
for sheet in dataset.sheet_names:
    df = dataset.parse(sheet)
    df["group"] = sheet #adding a column to track from which group is the data
    sheets.append(df)

df_airbnb = pd.concat(sheets, ignore_index=True)

In [20]:
# Check the shape of the combined DataFrame
df_airbnb.shape

(51707, 21)

In [21]:
# Exploratory Data Analysis (EDA)
# Create a column for country and weekday/weekend from group column
df_airbnb['city'] = df_airbnb['group'].str.split('_').str[0]
df_airbnb['weekday/weekend'] = df_airbnb['group'].str.split('_').str[1]


In [22]:
df_airbnb['city'].unique()

array(['amsterdam', 'athens', 'berlin', 'barcelona', 'budapest', 'lisbon',
       'london', 'paris', 'rome', 'vienna'], dtype=object)

In [23]:
# Create a column for country by mapping city names
country_map = {'amsterdam': 'netherlands', 
               'athens': 'greece',
               'berlin': 'germany',
               'barcelona': 'spain',
               'budapest': 'hungary',
               'lisbon': 'portugal',
               'london': 'uk',
               'paris': 'france',
               'rome': 'italy',
               'vienna': 'austria'}

df_airbnb['country'] = df_airbnb['city'].replace(country_map)

In [24]:
# Change the name of realSum to price
df_airbnb.rename(columns={'realSum': 'price'}, inplace=True)

In [26]:
# Instead of having two columns of room_shared and room_private, create one with the respective categories
df_airbnb['room_type_clean'] = df_airbnb.apply(
    lambda row: 'shared room' if row['room_shared'] == True 
        else ('private room' if row['room_private'] == True 
              else 'entire home/apt'), axis=1)