**Introduction**

I will use this dataset which contains house sale prices for King County USA to predicting houses prices using regression

1.**Import Librairies**

In [None]:
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)
import plotly.offline as py
from plotly.offline import init_notebook_mode, iplot
import plotly.graph_objs as go
from plotly import tools
init_notebook_mode(connected=True)  
from plotly.tools import FigureFactory as ff
import pycountry
import random
import squarify
from collections import Counter
import warnings
warnings.filterwarnings('ignore')
import matplotlib.pyplot as plt # Matplotlib is a python 2D plotting library
%matplotlib inline 
# A magic command that tells matplotlib to render figures as static images in the Notebook.

import seaborn as sns # Seaborn is a visualization library based on matplotlib (attractive statistical graphics).
sns.set_style('whitegrid') # One of the five seaborn themes
import warnings
warnings.filterwarnings('ignore') # To ignore some of seaborn warning msg

from scipy import stats, linalg

import folium # for map visualization
from folium import plugins
from mpl_toolkits.mplot3d import Axes3D
import folium

2.**Reading Data**

In [None]:
house = pd.read_csv("../input/kc_house_data.csv")


3.**Data Exploration**

In [None]:
house.shape

In [None]:
house.head()

In [None]:
house.columns.values

4.**Data analysis**

In [None]:
def random_colors(number_of_colors):
    color = ["#"+''.join([random.choice('0123456789ABCDEF') for j in range(6)])
                 for i in range(number_of_colors)]
    return color

In [None]:
def simple_graph(dataframe,type_of_graph, top = 0):
    data_frame = house[dataframe].value_counts()
    layout = go.Layout()
    
    if type_of_graph == 'barh':
        top_category = get_list(house[dataframe].dropna())
        if top !=None:
            data = [go.Bar(
                x=top_category[1].head(top),
                y=top_category[0].head(top),
                orientation = 'h',
                marker=dict(color=random_colors(10), line=dict(color='rgb(8,48,107)',width=1.5,)),
                opacity = 0.6
            )]
        else:
            data = [go.Bar(
            x=top_category[1],
            y=top_category[0],
            orientation = 'h',
            marker=dict(color=random_colors(10), line=dict(color='rgb(8,48,107)',width=1.5,)),
            opacity = 0.6
        )]

    elif type_of_graph == 'barv':
        top_category = get_list(house[dataframe].dropna())
        if top !=None:
            data = [go.Bar(
                x=top_category[0].head(top),
                y=top_category[1].head(top),
                marker=dict(color=random_colors(10), line=dict(color='rgb(8,48,107)',width=1.5,)),
                opacity = 0.6
        )]
        else:
            data = [go.Bar(
                x=top_category[0],
                y=top_category[1],
                marker=dict(color=random_colors(10), line=dict(color='rgb(8,48,107)',width=1.5,)),
                opacity = 0.6
            )]      

    elif type_of_graph == 'pie':
        data = [go.Pie(
            labels = data_frame.index,
            values = data_frame.values,
            marker = dict(colors = random_colors(20)),
            textfont = dict(size = 20)
        )]
    
    elif type_of_graph == 'pie_':
        data = [go.Pie(
            labels = data_frame.index,
            values = data_frame.values,
            marker = dict(colors = random_colors(20)),
            textfont = dict(size = 20)
        )]
        layout = go.Layout(legend=dict(orientation="h"), autosize=False,width=700,height=700)
        pass
    
    fig = go.Figure(data = data, layout = layout)
    py.iplot(fig)
    
def get_list(col_name):
    full_list = ";".join('col_name')
    each_word = full_list.split(";")
    each_word = Counter(each_word).most_common()
    return pd.DataFrame(each_word)

[](http://)4.1. **House with waterfront**

In [None]:
simple_graph('waterfront','pie',5)

4.2.**House grade**

In [None]:
simple_graph('grade','pie',5)

4.3.**Number of Bedrooms per House**

In [None]:
simple_graph('bedrooms','pie',5)

4.4.**House Prices**

In [None]:
fig, ax = plt.subplots(figsize=(12,4))
sns.boxplot(x = 'price', data = house, orient = 'h', width = 0.8, 
                 fliersize = 3, showmeans=True, ax = ax)
plt.show()

* 4.5.**Number of bathrooms per house**

In [None]:
fig, ax = plt.subplots(figsize=(12,4))
sns.boxplot(x = 'bathrooms', data = house, orient = 'h', width = 0.8, 
                 fliersize = 3, showmeans=True, ax = ax)
plt.show()

4.6.**Number of floors per house**

In [None]:
fig, ax = plt.subplots(figsize=(12,4))
sns.boxplot(x = 'floors', data = house, orient = 'h', width = 0.8, 
                 fliersize = 3, showmeans=True, ax = ax)
plt.show()

**5.Hedonic Regression**

**5.1.Relationship between the square footage of a house (sqft_living) and its selling price**

In [None]:
sns.jointplot(x="sqft_living", y="price", data=house, kind = 'reg', size = 7)
plt.show()

**5.2.Relationship between the Number of bedrooms and house price **

In [None]:
sns.jointplot(x="bedrooms", y="price", data=house, kind = 'reg', size = 7)
plt.show()

**5.3.Relationship between the Number of bathrooms and house price **

In [None]:
sns.jointplot(x="bathrooms", y="price", data=house, kind = 'reg', size = 7)
plt.show()

**5.4.Relationship between the grade of houses and it's selling price **

In [None]:
f, axes = plt.subplots(1, 1,figsize=(15,5))
sns.boxplot(x=house['grade'],y=house['price'])
sns.despine(left=True, bottom=True)
axes.set(xlabel='grade', ylabel='Price')
axes.yaxis.tick_left()

**5.5.Relationship between the number of floors of the house and it's selling price **

In [None]:
f, axes = plt.subplots(1, 1,figsize=(15,5))
sns.boxplot(x=house['floors'],y=house['price'])
sns.despine(left=True, bottom=True)
axes.set(xlabel='floors', ylabel='Price')
axes.yaxis.tick_left()

**5.6.Relationship between waterfront and house selling price **

In [None]:
f, axes = plt.subplots(1, 1,figsize=(15,5))
sns.boxplot(x=house['waterfront'],y=house['price'])
sns.despine(left=True, bottom=True)
axes.set(xlabel='waterfront', ylabel='Price')
axes.yaxis.tick_left()

**5.6.Relationship between the condition of the house and it's selling price **

In [None]:
f, axes = plt.subplots(1, 1,figsize=(15,5))
sns.boxplot(x=house['condition'],y=house['price'])
sns.despine(left=True, bottom=True)
axes.set(xlabel='condition', ylabel='Price')
axes.yaxis.tick_left()

6-	**Fuzzy Hedonic Regression**

6.1.Relationship between differents house features (floors,bedrooms,bathrooms)

In [None]:
fig=plt.figure(figsize=(19,12.5))
ax=fig.add_subplot(2,2,1, projection="3d")
ax.scatter(house['floors'],house['bedrooms'],house['bathrooms'],c="darkgreen",alpha=.5)
ax.set(xlabel='\nFloors',ylabel='\nBedrooms',zlabel='\nBathrooms')
ax.set(ylim=[0,12])



6.2.Relationship between differents house features (waterfront,bedrooms,bathrooms)

In [None]:
fig=plt.figure(figsize=(19,12.5))
ax=fig.add_subplot(2,2,1, projection="3d")
ax.scatter(house['waterfront'],house['bedrooms'],house['bathrooms'],c="darkgreen",alpha=.5)
ax.set(xlabel='\nwaterfront',ylabel='\nBedrooms',zlabel='\nBathrooms')
ax.set(ylim=[0,12])


6.3.Relationship between differents house features (sqft_living,bedrooms,waterfront)

In [None]:
fig=plt.figure(figsize=(19,12.5))
ax=fig.add_subplot(2,2,1, projection="3d")
ax.scatter(house['waterfront'],house['bedrooms'],house['sqft_living'],c="darkgreen",alpha=.5)
ax.set(xlabel='\nWaterfront',ylabel='\nBedrooms',zlabel='\nsqft_living')
ax.set(ylim=[0,12])

6.4.Correlation Matrix (Relationship between all house features)

In [None]:
features = ['price','bedrooms','bathrooms','sqft_living','sqft_lot','floors','waterfront',
            'view','condition','grade','sqft_above','sqft_basement','yr_built','yr_renovated',
            'zipcode','lat','long','sqft_living15','sqft_lot15']

mask = np.zeros_like(house[features].corr(), dtype=np.bool) 
mask[np.triu_indices_from(mask)] = True 

f, ax = plt.subplots(figsize=(16, 12))
plt.title('Pearson Correlation Matrix',fontsize=25)

sns.heatmap(house[features].corr(),linewidths=0.25,vmax=0.7,square=True,cmap="BuGn", #"BuGn_r" to reverse 
            linecolor='w',annot=True,annot_kws={"size":8},mask=mask,cbar_kws={"shrink": .9});