In [None]:
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt
df = pd.read_csv("AB_NYC_2019.csv")
import seaborn as sns 

In [None]:
# Getting to know the data
df.head()

In [None]:
# Understanding the data using statistics 
df.describe()

In [None]:
df.columns

In [None]:
df['neighbourhood_group'].unique()

In [None]:
df.drop(columns = {'name','id','host_name','last_review'},inplace=True)

In [None]:
df.head(10)

In [None]:
# Replacing NAN values with 0 for further analysis 
df.fillna({'reviews_per_month':0},inplace = True)

In [None]:
#Sorting the data by Area of New York and by neighbourhood 
df.sort_values(by = ['neighbourhood_group'], inplace = True)
df.sort_values(by = ['neighbourhood'], inplace = True)

In [None]:
df.head()

In [None]:
types_of_rooms = sns.countplot(x="room_type", order = df.room_type.value_counts().index, data=df)
types_of_rooms.set(xlabel='Room Types', ylabel='', title='Number of Rooms by Type')

In [None]:
#Dividing the data into groups by area in New York i.e. Bronx, Manhattan etc
nb = df.groupby("neighbourhood_group")

In [None]:
# Separaing data by Area 
bronx = nb.get_group('Bronx')
brooklyn = nb.get_group('Brooklyn')
manhattan = nb.get_group('Manhattan')
queens = nb.get_group('Queens')
staten = nb.get_group('Staten Island')

In [None]:
bronx

In [None]:
# Plotting average price vs neighbourhood
avg = []
nbh = []
for neighbourhood_group, data in nb: 
    x = nb['price'].get_group(neighbourhood_group).mean()
    avg.append(x)
    nbh.append(neighbourhood_group)

plt.bar(nbh,avg,color ='blue')

In [None]:
avail = []
area= []
for neighbourhood_group, data in nb: 
    x = nb['availability_365'].get_group(neighbourhood_group).median()
    avail.append(x)
    area.append(neighbourhood_group)

In [None]:
plt.bar(area,avail)

In [None]:
sns.barplot(x = area, y = avail)

In [None]:
# Top 10 most expensive areas
prc = []
grp = []
nbr = df.groupby("neighbourhood")
for neighbourhood, data in nbr: 
    x = nbr['price'].get_group(neighbourhood).median()
    prc.append(x)
    grp.append(neighbourhood)
    
prices = pd.DataFrame({"Area":grp, 'Mean_Price':prc})
prices.sort_values(by=['Mean_Price'],ascending=False).head(10)

In [None]:
# Top 10 hosts 
df.columns

In [None]:
top_host = df.groupby(['host_name']).sum()
top_host.sort_values(by = 'number_of_reviews', ascending = True)
top_host.head(10)

In [None]:
plt.figure(figsize=(10,6))
sns.scatterplot(df.longitude,df.latitude,hue=df.neighbourhood_group)

In [None]:
!pip install folium

In [None]:
import folium
import folium.plugins
map = folium.Map([40.7128,-74.0060],zoom_start=11)
folium.plugins.HeatMap(df[['latitude','longitude']].dropna(),
                       radius=8, gradient={0.2:'blue',0.4:'purple',0.6:'orange',1.0:'red'}).add_to(map)
display(map)