# London Airbnb Analysis
### This notebook analyses Airbnb listings in London to help tourists make informed decisions about accommodations. In this notebook we will use python for data cleaning. There will also be Tableau visualisations within my repository.

In [105]:
#importing libraries
import pandas as pd
import numpy as np


# Dataset Overview
### - Source: [Inside Airbnb](https://insideairbnb.com/get-the-data/) (London)
### - Contains info on price, number of reviews, location and room types.
## We will begin by importing the dataset

In [108]:
#loading data
df = pd.read_csv('airbnb_listings.csv')

In [110]:
df.head()

Unnamed: 0,id,name,host_id,host_name,neighbourhood_group,neighbourhood,latitude,longitude,room_type,price,minimum_nights,number_of_reviews,last_review,reviews_per_month,calculated_host_listings_count,availability_365,number_of_reviews_ltm,license
0,264776,Huge Four Bedroom Apartment,1389063,Sue,,Lewisham,51.44306,-0.01948,Entire home/apt,297.0,3,68,2025-05-28,0.51,11,293,12,
1,264777,One Bedroom Apartment,1389063,Sue,,Lewisham,51.44284,-0.01997,Entire home/apt,98.0,3,24,2024-12-11,0.22,11,318,4,
2,264778,Two Bedroom Newly Refurbished Apartment,1389063,Sue,,Lewisham,51.44359,-0.02275,Entire home/apt,148.0,3,58,2025-05-01,0.43,11,302,6,
3,264779,Refurbished Two Bedroom Apartment,1389063,Sue,,Lewisham,51.44355,-0.02309,Entire home/apt,144.0,3,36,2025-04-10,0.3,11,328,7,
4,264780,Spacious refurbished 2 bedroom apt with balcony,1389063,Sue,,Lewisham,51.44333,-0.02307,Entire home/apt,157.0,3,54,2024-12-29,0.35,11,255,4,


In [112]:
df.shape

(96651, 18)

# Data Cleaning
### Steps performed

### 1. Removed Duplicates
### 2. Handled missing values
### 3. Standardised formats for consistency


In [115]:
df.isnull().sum()

id                                    0
name                                  0
host_id                               0
host_name                            40
neighbourhood_group               96651
neighbourhood                         0
latitude                              0
longitude                             0
room_type                             0
price                             33967
minimum_nights                        0
number_of_reviews                     0
last_review                       25164
reviews_per_month                 25164
calculated_host_listings_count        0
availability_365                      0
number_of_reviews_ltm                 0
license                           96651
dtype: int64

In [117]:
for col in df.columns:
    pct_missing = np.mean(df[col].isnull())
    print("{} - {}".format(col, pct_missing))

id - 0.0
name - 0.0
host_id - 0.0
host_name - 0.000413860177339086
neighbourhood_group - 1.0
neighbourhood - 0.0
latitude - 0.0
longitude - 0.0
room_type - 0.0
price - 0.3514397160919183
minimum_nights - 0.0
number_of_reviews - 0.0
last_review - 0.26035943756401897
reviews_per_month - 0.26035943756401897
calculated_host_listings_count - 0.0
availability_365 - 0.0
number_of_reviews_ltm - 0.0
license - 1.0


In [119]:
df = df.drop(["neighbourhood_group", "license","last_review"], axis=1)


In [121]:
df.head()

Unnamed: 0,id,name,host_id,host_name,neighbourhood,latitude,longitude,room_type,price,minimum_nights,number_of_reviews,reviews_per_month,calculated_host_listings_count,availability_365,number_of_reviews_ltm
0,264776,Huge Four Bedroom Apartment,1389063,Sue,Lewisham,51.44306,-0.01948,Entire home/apt,297.0,3,68,0.51,11,293,12
1,264777,One Bedroom Apartment,1389063,Sue,Lewisham,51.44284,-0.01997,Entire home/apt,98.0,3,24,0.22,11,318,4
2,264778,Two Bedroom Newly Refurbished Apartment,1389063,Sue,Lewisham,51.44359,-0.02275,Entire home/apt,148.0,3,58,0.43,11,302,6
3,264779,Refurbished Two Bedroom Apartment,1389063,Sue,Lewisham,51.44355,-0.02309,Entire home/apt,144.0,3,36,0.3,11,328,7
4,264780,Spacious refurbished 2 bedroom apt with balcony,1389063,Sue,Lewisham,51.44333,-0.02307,Entire home/apt,157.0,3,54,0.35,11,255,4


In [123]:
df.shape

(96651, 15)

In [125]:
df['price'].isna().sum()

33967

In [127]:
df = df.dropna(subset = 'host_name')

In [129]:
df.isna().sum()

id                                    0
name                                  0
host_id                               0
host_name                             0
neighbourhood                         0
latitude                              0
longitude                             0
room_type                             0
price                             33955
minimum_nights                        0
number_of_reviews                     0
reviews_per_month                 25152
calculated_host_listings_count        0
availability_365                      0
number_of_reviews_ltm                 0
dtype: int64

In [131]:
# fill the price with median by room type

df['price'] = df.groupby('room_type')['price'].transform(
    lambda x: x.fillna(x.median())
)

In [132]:
df.isna().sum()

id                                    0
name                                  0
host_id                               0
host_name                             0
neighbourhood                         0
latitude                              0
longitude                             0
room_type                             0
price                                 0
minimum_nights                        0
number_of_reviews                     0
reviews_per_month                 25152
calculated_host_listings_count        0
availability_365                      0
number_of_reviews_ltm                 0
dtype: int64

In [135]:
## filling any missing values with 0
df['reviews_per_month']  = df['reviews_per_month'].fillna(0)

In [137]:
df.isna().sum()

id                                0
name                              0
host_id                           0
host_name                         0
neighbourhood                     0
latitude                          0
longitude                         0
room_type                         0
price                             0
minimum_nights                    0
number_of_reviews                 0
reviews_per_month                 0
calculated_host_listings_count    0
availability_365                  0
number_of_reviews_ltm             0
dtype: int64

In [139]:
df.dtypes

id                                  int64
name                               object
host_id                             int64
host_name                          object
neighbourhood                      object
latitude                          float64
longitude                         float64
room_type                          object
price                             float64
minimum_nights                      int64
number_of_reviews                   int64
reviews_per_month                 float64
calculated_host_listings_count      int64
availability_365                    int64
number_of_reviews_ltm               int64
dtype: object

In [141]:
df['price'] = df['price'].astype(int)

In [143]:
df.head()

Unnamed: 0,id,name,host_id,host_name,neighbourhood,latitude,longitude,room_type,price,minimum_nights,number_of_reviews,reviews_per_month,calculated_host_listings_count,availability_365,number_of_reviews_ltm
0,264776,Huge Four Bedroom Apartment,1389063,Sue,Lewisham,51.44306,-0.01948,Entire home/apt,297,3,68,0.51,11,293,12
1,264777,One Bedroom Apartment,1389063,Sue,Lewisham,51.44284,-0.01997,Entire home/apt,98,3,24,0.22,11,318,4
2,264778,Two Bedroom Newly Refurbished Apartment,1389063,Sue,Lewisham,51.44359,-0.02275,Entire home/apt,148,3,58,0.43,11,302,6
3,264779,Refurbished Two Bedroom Apartment,1389063,Sue,Lewisham,51.44355,-0.02309,Entire home/apt,144,3,36,0.3,11,328,7
4,264780,Spacious refurbished 2 bedroom apt with balcony,1389063,Sue,Lewisham,51.44333,-0.02307,Entire home/apt,157,3,54,0.35,11,255,4


In [145]:
#checking for any duplicates
df.duplicated().sum()

0

In [147]:
#creating a cleaned dataset csv
df.to_csv('airbnb_cleaned.csv', index=False)