# Airbnb Analysis

# IMPORTING MODULES

In [1]:
import pandas as pd
import numpy as np
import pymongo
import seaborn as sns
import plotly.express as px
import matplotlib.pyplot as plt
from wordcloud import STOPWORDS, WordCloud
import warnings
warnings.filterwarnings('ignore')

# MongoDB connection

In [2]:
client = pymongo.MongoClient("mongodb://localhost:27017")
db = client['Airbnb_Analysis']
col = db['listingsAndReviews']

# Total Documents

In [3]:
Documents = [i for i in col.find()]
len(Documents)

20030

# Retrieve the Airbnb dataset for MongoDB connection

In [4]:
rel_data = []
for i in col.find():
    data = dict(Id = i['_id'],
                Listing_url = i['listing_url'],
                Name = i.get('name'),
                Description = i.get('description'),
                House_rules = i.get('house_rules'),
                Property_type = i['property_type'],
                Room_type = i['room_type'],
                Bed_type = i['bed_type'],
                Min_nights = int(i['minimum_nights']),
                Max_nights = int(i['maximum_nights']),
                Cancellation_policy = i['cancellation_policy'],
                Accomodates = i['accommodates'],
                Total_bedrooms = i.get('bedrooms'),
                Total_beds = i.get('beds'),
                Availability_365 = i['availability_365'],
                Price = i['price'],
                Security_deposit = i.get('security_deposit'),
                Cleaning_fee = i.get('cleaning_fee'),
                Extra_people = i['extra_people'],
                Guests_included= i['guests_included'],
                No_of_reviews = i['number_of_reviews'],
                Review_scores = i.get('review_scores_rating'),
                Amenities = ', '.join(i['amenities']),
                Host_id = i['host_id'],
                Host_name = i.get('host_name'),
                Street = i['street'],
                City = i.get('city'),
                State = i.get('state'),
                Zipcode = i.get('zipcode'),
                Bedrooms = i.get('bedrooms'),
                Country = i['country'],
                Country_code = i['country_code'],
                Longitude = i['longitude'],
                Latitude = i['latitude'],
                Is_location_exact = i['is_location_exact']
    )
    rel_data.append(data)

In [5]:
df = pd.DataFrame(rel_data)
df.head(5)

Unnamed: 0,Id,Listing_url,Name,Description,House_rules,Property_type,Room_type,Bed_type,Min_nights,Max_nights,...,Street,City,State,Zipcode,Bedrooms,Country,Country_code,Longitude,Latitude,Is_location_exact
0,659aa9cfc2501afac03f459f,https://www.airbnb.com/rooms/2818,Quiet Garden View Room & Super Fast WiFi,Quiet Garden View Room & Super Fast WiFi I'm r...,Please: - Leave your shoes in the entrance - ...,Apartment,Private room,Real Bed,3,15,...,"Amsterdam, North Holland, Netherlands",Amsterdam,North Holland,,1.0,Netherlands,NL,4.941419,52.365755,False
1,659aa9cfc2501afac03f45a0,https://www.airbnb.com/rooms/3209,"Quiet apt near center, great view",You will love our spacious (90 m2) bright apar...,"Our house comes with our very sweet, but old (...",Apartment,Entire home/apt,Real Bed,4,20,...,"Amsterdam, Noord-Holland, Netherlands",Amsterdam,Noord-Holland,1013 XE,2.0,Netherlands,NL,4.873924,52.390225,True
2,659aa9cfc2501afac03f45a1,https://www.airbnb.com/rooms/20168,100%Centre-Studio 1 Private Floor/Bathroom,"Cozy studio on your own private floor, 100% in...",This studio/room takes entire floor and has it...,Townhouse,Entire home/apt,Real Bed,1,1000,...,"Amsterdam, North Holland, Netherlands",Amsterdam,North Holland,1017,1.0,Netherlands,NL,4.893541,52.365087,True
3,659aa9cfc2501afac03f45a2,https://www.airbnb.com/rooms/25428,Lovely apt in City Centre (Jordaan),"This nicely furnished, newly renovated apt is...","The building is a quiet building, so please do...",Apartment,Entire home/apt,Real Bed,14,60,...,"Amsterdam, North Holland, Netherlands",Amsterdam,North Holland,1016,1.0,Netherlands,NL,4.883668,52.373114,False
4,659aa9cfc2501afac03f45a3,https://www.airbnb.com/rooms/27886,"Romantic, stylish B&B houseboat in canal district",Stylish and romantic houseboat on fantastic hi...,"All the facilities are included ( cleaning , ...",Houseboat,Private room,Real Bed,2,730,...,"Amsterdam, North Holland, Netherlands",Amsterdam,North Holland,1013,1.0,Netherlands,NL,4.892078,52.386727,True


# Data Collection and Preprocessing

# Data type Correction

In [6]:
# checking Data types
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20030 entries, 0 to 20029
Data columns (total 35 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   Id                   20030 non-null  object 
 1   Listing_url          20030 non-null  object 
 2   Name                 19992 non-null  object 
 3   Description          19906 non-null  object 
 4   House_rules          12571 non-null  object 
 5   Property_type        20030 non-null  object 
 6   Room_type            20030 non-null  object 
 7   Bed_type             20030 non-null  object 
 8   Min_nights           20030 non-null  int64  
 9   Max_nights           20030 non-null  int64  
 10  Cancellation_policy  20030 non-null  object 
 11  Accomodates          20030 non-null  int64  
 12  Total_bedrooms       20022 non-null  float64
 13  Total_beds           20023 non-null  float64
 14  Availability_365     20030 non-null  int64  
 15  Price                20030 non-null 

In [7]:
# The below features are in Decimal128 type hence changing it to relevant data types
df['Name'] = df['Name'].isna().astype(str)
df['Description'] = df['Description'].fillna(0)
df['Host_name']  = df['Host_name'].isna().astype(str)
df['Security_deposit']  = df['Security_deposit'].isna().astype(str)
df['Cleaning_fee']  = df['Cleaning_fee'].isna().astype(str)
df['City']  = df['City'].isna().astype(str)
df['State']  = df['State'].isna().astype(str)
df['Zipcode']  = df['Zipcode'].isna().astype(str)
df['Total_beds']  = df['Total_beds'].isna().astype(float)
df['Bedrooms']  = df['Bedrooms'].isna().astype(float)
df['Review_scores']  = df['Review_scores'].isna().astype(float)
df['Total_bedrooms']  = df['Total_bedrooms'].isna().astype(float)
df['House_rules']  = df['House_rules'].isna().astype(float)

# Filling Missing values

In [8]:
df.isna().sum()

Id                     0
Listing_url            0
Name                   0
Description            0
House_rules            0
Property_type          0
Room_type              0
Bed_type               0
Min_nights             0
Max_nights             0
Cancellation_policy    0
Accomodates            0
Total_bedrooms         0
Total_beds             0
Availability_365       0
Price                  0
Security_deposit       0
Cleaning_fee           0
Extra_people           0
Guests_included        0
No_of_reviews          0
Review_scores          0
Amenities              0
Host_id                0
Host_name              0
Street                 0
City                   0
State                  0
Zipcode                0
Bedrooms               0
Country                0
Country_code           0
Longitude              0
Latitude               0
Is_location_exact      0
dtype: int64

In [9]:
df.dtypes

Id                      object
Listing_url             object
Name                    object
Description             object
House_rules            float64
Property_type           object
Room_type               object
Bed_type                object
Min_nights               int64
Max_nights               int64
Cancellation_policy     object
Accomodates              int64
Total_bedrooms         float64
Total_beds             float64
Availability_365         int64
Price                   object
Security_deposit        object
Cleaning_fee            object
Extra_people            object
Guests_included          int64
No_of_reviews            int64
Review_scores          float64
Amenities               object
Host_id                  int64
Host_name               object
Street                  object
City                    object
State                   object
Zipcode                 object
Bedrooms               float64
Country                 object
Country_code            object
Longitud

In [10]:
df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
House_rules,20030.0,0.3723914,0.4834539,0.0,0.0,0.0,1.0,1.0
Min_nights,20030.0,3.328707,12.53742,1.0,2.0,2.0,3.0,1001.0
Max_nights,20030.0,631.2257,541.34,1.0,21.0,1125.0,1125.0,9999.0
Accomodates,20030.0,2.891912,1.31876,1.0,2.0,2.0,4.0,17.0
Total_bedrooms,20030.0,0.0003994009,0.01998152,0.0,0.0,0.0,0.0,1.0
Total_beds,20030.0,0.0003494758,0.01869147,0.0,0.0,0.0,0.0,1.0
Availability_365,20030.0,59.91368,104.0277,0.0,0.0,3.0,67.0,365.0
Guests_included,20030.0,1.468997,0.9378896,1.0,1.0,1.0,2.0,16.0
No_of_reviews,20030.0,21.56046,43.24029,0.0,3.0,8.0,22.0,695.0
Review_scores,20030.0,0.1317524,0.3382298,0.0,0.0,0.0,0.0,1.0


In [11]:
rel_data[0]

{'Id': ObjectId('659aa9cfc2501afac03f459f'),
 'Listing_url': 'https://www.airbnb.com/rooms/2818',
 'Name': 'Quiet Garden View Room & Super Fast WiFi',
 'Description': 'Quiet Garden View Room & Super Fast WiFi I\'m renting a bedroom (room overlooking the garden) in my apartment in Amsterdam,  The room is located to the east of the city centre in a quiet, typical Amsterdam neighbourhood the "Indische Buurt". Amsterdam’s historic centre is less than 15 minutes away by bike or tram. The features of the room are: - Twin beds (80 x 200 cm, down quilts and pillows)  - 2 pure cotton towels for each guest  - reading lamps - bedside table - wardrobe - table with chairs - tea and coffee making facilities - mini bar - alarm clock - Hi-Fi system with cd player, connection for mp3 player / phone - map of Amsterdam and public transport - Wi-Fi Internet connection  Extra services: - Bike rental Indische Buurt ("Indies Neighborhood") is a neighbourhood in the eastern portion of the city of Amsterdam, i

In [12]:
df.isnull().sum()

Id                     0
Listing_url            0
Name                   0
Description            0
House_rules            0
Property_type          0
Room_type              0
Bed_type               0
Min_nights             0
Max_nights             0
Cancellation_policy    0
Accomodates            0
Total_bedrooms         0
Total_beds             0
Availability_365       0
Price                  0
Security_deposit       0
Cleaning_fee           0
Extra_people           0
Guests_included        0
No_of_reviews          0
Review_scores          0
Amenities              0
Host_id                0
Host_name              0
Street                 0
City                   0
State                  0
Zipcode                0
Bedrooms               0
Country                0
Country_code           0
Longitude              0
Latitude               0
Is_location_exact      0
dtype: int64

In [13]:
df.columns

Index(['Id', 'Listing_url', 'Name', 'Description', 'House_rules',
       'Property_type', 'Room_type', 'Bed_type', 'Min_nights', 'Max_nights',
       'Cancellation_policy', 'Accomodates', 'Total_bedrooms', 'Total_beds',
       'Availability_365', 'Price', 'Security_deposit', 'Cleaning_fee',
       'Extra_people', 'Guests_included', 'No_of_reviews', 'Review_scores',
       'Amenities', 'Host_id', 'Host_name', 'Street', 'City', 'State',
       'Zipcode', 'Bedrooms', 'Country', 'Country_code', 'Longitude',
       'Latitude', 'Is_location_exact'],
      dtype='object')

In [14]:
# Checking Duplicate records
df[df.duplicated()]

Unnamed: 0,Id,Listing_url,Name,Description,House_rules,Property_type,Room_type,Bed_type,Min_nights,Max_nights,...,Street,City,State,Zipcode,Bedrooms,Country,Country_code,Longitude,Latitude,Is_location_exact


In [15]:
# Name Column has empty values and some duplicates hence dropping them
df.drop(labels=list(df[df.Name.duplicated(keep=False)].index),inplace=True)

In [16]:
df.reset_index(drop=True,inplace=True)

# Save "Airbnb" Data Csv File

In [17]:
# Converting dataframe to csv file and saving it
df.to_csv('Airbnb_data.csv',index=False)