# 1- Import all Necessary Libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

# 2- Import the Data

In [None]:
df= pd.read_csv('../input/usa-housing/USA_Housing.csv')

# 3- Data Overview

In [None]:
df.head()

In [None]:
df.shape

In [None]:
df.info()

# 4- Exploratory Data Analysis

**What's the relationship between features and target variable?**

In [None]:
fig,axes = plt.subplots(nrows=1,ncols=5,figsize=(16,6))

axes[0].plot(df['Avg. Area Income'],df['Price'],'o')
axes[0].set_ylabel("Price")
axes[0].set_title("Avg. Area Income")

axes[1].plot(df['Avg. Area House Age'],df['Price'],'o')
axes[1].set_title("Avg. Area House Age")
axes[1].set_ylabel("Price")

axes[2].plot(df['Avg. Area Number of Rooms'],df['Price'],'o')
axes[2].set_title("Avg. Area Number of Rooms");
axes[2].set_ylabel("Price")

axes[3].plot(df['Avg. Area Number of Bedrooms'],df['Price'],'o')
axes[3].set_title("Avg. Area Number of Bedrooms");
axes[3].set_ylabel("Price")

axes[4].plot(df['Area Population'],df['Price'],'o')
axes[4].set_title("Area Population");
axes[4].set_ylabel("Price")
plt.tight_layout();

In [None]:
sns.pairplot(df)

In [None]:
sns.distplot(df['Price'])

In [None]:
sns.heatmap(df.corr(), annot=True, cmap='Greens')

# 5- Determine the Features & Target Variable

In [None]:
df.columns

In [None]:
X=df[['Avg. Area Income', 'Avg. Area House Age', 'Avg. Area Number of Rooms',
       'Avg. Area Number of Bedrooms', 'Area Population']]

In [None]:
X


In [None]:
y=df['Price']

In [None]:
y

# 6- Split the Dataset to Train & Test

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=101)

# 7- Train the Model

In [None]:
from sklearn.linear_model import LinearRegression

In [None]:
model= LinearRegression()

In [None]:
model.fit(X_train, y_train)

In [None]:
pd.DataFrame(model.coef_, X.columns, columns=['coef'])

# 8- Predicting Test Data

In [None]:
predictions=model.predict(X_test)

In [None]:
plt.scatter(y_test, predictions)

# 9- Evaluating the Model



In [None]:
from sklearn import metrics


In [None]:
print('MAE:', metrics.mean_absolute_error(y_test,predictions))
print('MSE:', metrics.mean_squared_error(y_test,predictions))
print('RSME:', np.sqrt(metrics.mean_squared_error(y_test,predictions)))

In [None]:
df['Price'].mean()


# 10-Residuals

In [None]:
sns.distplot(y_test-predictions)

In [None]:
sns.scatterplot(x=y_test, y=y_test-predictions)
plt.axhline(y=0, color='r', ls='--')

In [None]:
import scipy as sp
# Create a figure and axis to plot on
fig, ax = plt.subplots(figsize=(6,6))
# probplot returns the raw values if needed
# we just want to see the plot, so we assign these values to _
_ = sp.stats.probplot(y_test-predictions,plot=ax)