# House Price Prediction in California, USA

**Predicting house prices can help to determine the selling price of a house of a particular region and can help people to find the right time to buy a home. In this article, I will introduce you to a machine learning project on house price prediction with Python.**

In [None]:
# Importing the Libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import GridSearchCV
from datetime import datetime

In [None]:
# Importing the dataset
dataset = pd.read_csv('../input/california-housing/California housing.csv')
dataset.head()

# Linear Regression

In [None]:
# Information of the whole dataset
dataset.info()

In [None]:
# Data Visualization on the the whole dataset
dataset.hist(bins = 50, figsize = (15,20))
plt.show()

In [None]:
# Missing values find out
dataset.isnull().sum()

In [None]:
# Total null values on the "total bedrooms" column
dataset[dataset['total_bedrooms'].isnull()]

In [None]:
# Describing the whole dataset
dataset.describe()

In [None]:
# Data Preprocessing
dataset['total_rooms'].mean()
dataset['total_bedrooms'].median()
dataset['total_bedrooms'].fillna(dataset['total_bedrooms'].median(), inplace = True)

In [None]:
# After caring Showing the null values
dataset.isnull().sum()

In [None]:
# After caring the total null values on the "total bedrooms" column
dataset[dataset['total_bedrooms'].isnull()]

In [None]:
# After caring the whole dataset information
dataset.info()

In [None]:
# Counting the "ocean proximity" 
counts = dataset['ocean_proximity'].value_counts()
counts

In [None]:
# Dealing with the "ocean proximity" column
enc = preprocessing.LabelEncoder()
dataset['ocean_proximity'] = enc.fit_transform(dataset['ocean_proximity'])
dataset['ocean_proximity'].value_counts
dataset.head()

In [None]:
# Data selecion in the dataset
X = dataset.drop('median_house_value', axis=1).values
y = dataset['median_house_value'].values

In [None]:
# Data scaling on the 'X'
scaler = preprocessing.StandardScaler()
X = scaler.fit_transform(X)
X.std()

In [None]:
# Splitting the dataset into the Training set and Test set
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state = 42)

In [None]:
# Training the Linear Regression model on the Training set
regressor = LinearRegression()
regressor.fit(X_train, y_train)

In [None]:
# accuracy the regression
regressor.score(X_train, y_train)

In [None]:
# Test set results
y_test

In [None]:
# Predicting the Test set results
y_pred = regressor.predict(X_test)
y_pred

# K- Nearest Neighbors Regressor

In [None]:
# Creating K-Nearest Neighbors environment
params = {
    'n_neighbors': [9],  
    'weights': ['distance'],  
    'p': [1]   
    }
params

In [None]:
# Importing the K-NN model
knn = KNeighborsRegressor()
rs = GridSearchCV(estimator=knn, param_grid=params, cv=10, n_jobs=-1, scoring='neg_mean_squared_error')
rs.fit(X_train, y_train)

In [None]:
rs.best_estimator_

In [None]:
# Importing the Time explorer module
knn = rs.best_estimator_
start = datetime.now()
start

In [None]:
# Training the K-NN model on the Training set
knn.fit(X_train, y_train)

In [None]:
# Predicting the K-NN model on the Test set
y_pred = knn.predict(X_test)
y_pred

In [None]:
# The amount of time spent
stop = datetime.now()
delta = stop - start
delta

In [None]:
# Accuracy of the K-NN model 
accuracy = r2_score(y_test, y_pred)
accuracy

In [None]:
# Eerror detection on the K-NN model 
error = np.sqrt(mean_squared_error(y_test, y_pred))
error

In [None]:
# Time compute of the K-NN Model
seconds = delta.seconds + delta.microseconds/1E6
seconds

# Support Vector Regressor (SVR)

In [None]:
# Creating the support vector regressor environment
svr = SVR(C = 100, gamma = 1, kernel = 'linear')
svr

In [None]:
# Importing the Time explorer module
start = datetime.now()
start

In [None]:
# Training the SVR model on the Training set
svr.fit(X_train, y_train)

In [None]:
# Predicting the SVR model on the Test set
y_pred = svr.predict(X_test)
y_pred

In [None]:
# The amount of time spent
stop = datetime.now()
delta = stop - start
delta

In [None]:
# Accuracy of the SVR model 
accuracy = r2_score(y_test, y_pred)
accuracy

In [None]:
# Eerror detection on the SVR model 
error = np.sqrt(mean_squared_error(y_test, y_pred))
error

In [None]:
# Time compute of the SVR Model
seconds = delta.seconds + delta.microseconds/1E6
seconds

# Decision Tree Regressor

In [None]:
# Splitting the dataset into the Training set and Test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state=42)

In [None]:
# Creating Decision Tree Regressor model environment
params = {'max_depth': [7], 
          'max_features': ['auto', 'sqrt'], 
          'min_samples_leaf': [7],
          'min_samples_split': [0.1], 
          'criterion': ['mse'] 
         }
params

In [None]:
# Importing the Decision Tree Regressor model
tree = DecisionTreeRegressor()
rs = GridSearchCV(estimator=tree, param_grid=params, cv=5, n_jobs=-1, scoring='neg_mean_squared_error')
rs.fit(X_train, y_train)

In [None]:
# In details Decision tree Regressor model
rs.best_estimator_

In [None]:
# Importing the Time explorer module
start = datetime.now()
start

In [None]:
# Training the Decision Tree Regression model on the Training set
tree.fit(X_train, y_train)

In [None]:
# Predicting the Decision Tree Regression model on the Test set
y_pred = tree.predict(X_test)
y_pred

In [None]:
# The amount of time spent
stop = datetime.now()
delta = stop - start
delta

In [None]:
# Accuracy of the Decision tree regression model 
accuracy = r2_score(y_test, y_pred)
accuracy

In [None]:
# Eerror detection on the Decision tree regression model 
error = np.sqrt(mean_squared_error(y_test, y_pred))
error

In [None]:
# Time compute of the Decision tree regression Model
seconds = delta.seconds + delta.microseconds/1E6
seconds

**Gladly, I think you were enjoying this project. Thanksgiving to you for watching till the end.**