In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import seaborn as sns
import geopy
from geopy.geocoders import Nominatim
import folium
from folium import Choropleth, Circle, Marker
from folium.plugins import HeatMap, MarkerCluster
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
df=pd.read_csv('/kaggle/input/housesalesprediction/kc_house_data.csv')
df['year']=df['date'].str[0:4]
data=df

# Get location address from longitude and latitude data use Nominatim

In [None]:
maps=df.head(500)
def get_address(df, geolocator, lat_field, lon_field):
    location = geolocator.reverse((df[lat_field], df[lon_field]))
    return location.address

geolocator = geopy.Nominatim(user_agent='http')

df = pd.DataFrame({
    'Lat': maps['lat'].astype(float),
    'Lon': maps['long'].astype(float)
})
maps['address'] = df.apply(get_address, axis=1, geolocator=geolocator, lat_field='Lat', lon_field='Lon')

maps['street']=maps['address'].apply(lambda x:x.split(',')[1])
maps['location']=maps['address'].apply(lambda x:x.split(',')[3])


In [None]:
x = maps.groupby('location')['price'].mean().sort_values(ascending=False).index
y = maps.groupby('location')['price'].mean().sort_values(ascending=False)

fig = go.Figure([go.Bar(x=x, y=y, name='Price')])


fig.update_layout(title_text='House Pricing in King Country')
fig.update_xaxes(title='Location')
fig.update_yaxes(title='Price')
fig.show()

Get Location Longitude and Latitude from city name using geolocator.geocode

In [None]:
city= maps.groupby('location')['price'].mean().reset_index()
def get_address(df, geolocator, city):
    location = geolocator.geocode(df[city])
    return location.latitude

geolocator = geopy.Nominatim(user_agent='http')

df = pd.DataFrame({
    'city': city['location']
})
city['lat'] = df.apply(get_address, axis=1, geolocator=geolocator, city='city')

def get_address(df, geolocator, city):
    location = geolocator.geocode(df[city])
    return location.longitude

geolocator = geopy.Nominatim(user_agent='http')

df = pd.DataFrame({
    'city': city['location']
})
city['long'] = df.apply(get_address, axis=1, geolocator=geolocator, city='city')


Distribution Price by Location

In [None]:
maps1=city[['location','lat','long','price']]
maps1['price']=maps['price'].astype(int)

In [None]:
m = folium.Map(location=[47.5,-122.25], tiles='cartodbpositron', zoom_start=10)

maps1['price']=maps1['price']

# Add Marker
for i in range(0,len(maps1)):
    folium.Marker(
        location=[maps1.iloc[i]['lat'], maps1.iloc[i]['long']],
        radius=20,
        popup= maps1.iloc[i]['location'] +": $"+ maps1.iloc[i]['price'].astype(str)
    ).add_to(m)

# Display the map
m

Distribution Price by Street

In [None]:
street=maps.groupby('street')['price'].mean().reset_index()
street['price']=street['price'].astype(int)

In [None]:
def get_address(df, geolocator, street):
    location = geolocator.geocode(df[street])
    return location.latitude

geolocator = geopy.Nominatim(user_agent='http')

df = pd.DataFrame({
    'street': street['street']
})
street['lat'] = df.apply(get_address, axis=1, geolocator=geolocator, street='street')

def get_address(df, geolocator, street):
    location = geolocator.geocode(df[street])
    return location.longitude

geolocator = geopy.Nominatim(user_agent='http')

df = pd.DataFrame({
    'street': street['street']
})
street['long'] = df.apply(get_address, axis=1, geolocator=geolocator, street='street')


In [None]:
m = folium.Map(location=[47.5,-122.25], tiles='cartodbpositron', zoom_start=10)

# Add Marker
for i in range(0,len(street)):
    folium.Marker(
        location=[street.iloc[i]['lat'], street.iloc[i]['long']],
        radius=20,
        popup= street.iloc[i]['street'] +": "+ street.iloc[i]['price'].astype(str),
        icon=folium.Icon(color='green')
    ).add_to(m)

# Display the map
m

Detail Distribution by Address

In [None]:
maps2=maps[['address','lat','long','price']]
maps2['price']=maps2['price'].astype(int)

In [None]:
m = folium.Map(location=[47.5,-122.25], tiles='cartodbpositron', zoom_start=10)

# Add Marker
for i in range(0,len(maps2)):
    folium.Circle(
        location=[maps2.iloc[i]['lat'], maps2.iloc[i]['long']],
        radius=20,
        popup= maps2.iloc[i]['address'] +": "+ maps2.iloc[i]['price'].astype(str)
    ).add_to(m)

# Display the map
m

## Correlation

In [None]:
train=data[['price','bedrooms','sqft_lot','floors','waterfront','view','condition','grade','sqft_above','sqft_basement','sqft_living15','sqft_lot15','yr_built','yr_renovated',]]
train['price']=train['price'].astype(int)
train['floors']=train['price'].astype(int)

In [None]:
mask=np.zeros_like(train.corr(), dtype=np.bool)
mask[np.triu_indices_from(mask)] = True
plt.subplots(figsize=(16, 12))
sns.heatmap(train.corr(),linewidths=0.25,vmax=0.7,square=True,cmap="BuGn",
            linecolor='w',annot=True,annot_kws={"size":8},mask=mask)