**Import Libraries**

In [None]:
import pandas as pd
import numpy as np
from sklearn import linear_model
from sklearn.model_selection import train_test_split
import warnings
warnings.filterwarnings("ignore")

**Load Dataset**

In [None]:
#Load the Boston Housing Data Set from sklearn.datasets 
from sklearn.datasets import load_boston
boston = load_boston()

**Check for whatever the Keys in your Dataset**

In [None]:
print(boston.keys())

In [None]:
print(boston.data)

In [None]:
boston.data.shape

In [None]:
boston.target

In [None]:
boston.target.shape

In [None]:
boston.feature_names

In [None]:
print(boston.DESCR)

**Convert dataset into Data Frame**

In [None]:
import pandas as pd

data = pd.DataFrame(boston.data)
data.columns = boston.feature_names

In [None]:
data.head()

In [None]:
data['PRICE'] = boston.target

**Get some Statistical Values**

In [None]:
data.describe()

**Find Correlation**

In [None]:
data.corr()

**Correlation Plot for Visualization**

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

plt.rcParams['figure.figsize'] = (15, 10)
plt.style.use('ggplot')

sns.heatmap(data.corr(), annot=True)
plt.title('Correlation Plot', fontsize = 20)
plt.show()

**EDA with Pandas Profiling**

In [None]:
import pandas_profiling as pp 
profile = pp.ProfileReport(data) 
profile

**Checking for Null Values**

In [None]:
data.isnull().sum()

**Using Seaborn for better Visualization**

In [None]:
import seaborn as sns
sns.pairplot(data)

**Defining dependent and independent Variables**

In [None]:
X, y = data.iloc[:,:-1],data.iloc[:,-1]

**Splitting data into Training and Testing data**

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=1)
print('\n \n There are {} samples in the training set and {} samples in the test set'.format(X_train.shape[0], X_test.shape[0]))
print('\n \n There are {} samples in the training set and {} samples in the test set'.format(y_train.shape[0], y_test.shape[0])) 

**Applying Linear Regression to Training Data**

In [None]:
from sklearn.linear_model import LinearRegression
model = LinearRegression()
model = model.fit(X_train, y_train)

In [None]:
#Print the coefecients/weights for each feature/column of our model
print(model.coef_)

In [None]:
print(model.intercept_)

In [None]:
#print our price predictions on our test data
y_pred = model.predict(X_test)
print(y_pred)

**Plotting a graph between Testing and Prediction Data**

In [None]:
x_ax = range(len(X_test))
plt.scatter(x_ax, y_test, s=5, color="blue", label="original")
plt.plot(x_ax, y_pred, lw=0.8, color="red", label="predicted")
plt.legend()
plt.show()

**Getting Score for Model**

In [None]:
from sklearn.metrics import r2_score
r2 = r2_score(y_test, y_pred)
print(r2)

**Calculating Errors**

In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error
print('Mean Squared Error:', mean_squared_error(y_test, y_pred)) 
print('Root Mean Squared Error:', np.sqrt(mean_squared_error(y_test, y_pred)))
print('Mean Absolute Error:', mean_absolute_error(y_test, y_pred))

**Earlier, I have just tried with Linear Regression, Now I am trying to get more accuracy on my model and for this will make a function and try to get model with best accuracy.**

In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import ShuffleSplit
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR

In [None]:
def find_best_model(X,Y):
    models = {
        'linear_regression' :{
            'model': LinearRegression(),
            'parameters':{
                
            }
        },
        
        'decison_tree_regressor':{
            'model': DecisionTreeRegressor(splitter='best'),
            'parameters' :{
                'max_depth' :[5,10]
            }
        },
        
        'random_forest': {
            'model': RandomForestRegressor(),
            
            'parameters' :{
                'n_estimators': [1,5,10,15,20,30,40,50,60,70,80,90,100]
            }
        },

         'svc' : {
            'model' : SVR(gamma= 'auto'),
            
            'parameters': {
                'kernel' : ['rbf','linear'],
                'C': [1,10,20]
            }
        }
    }
    
    scores = []
    cv_shuffle = ShuffleSplit(n_splits=5,test_size= 0.33,random_state=0)
    
    for model_names,model_params in models.items():
        gc = GridSearchCV(model_params['model'],model_params['parameters'],cv = cv_shuffle,return_train_score= False)
        gc.fit(X,Y)
        scores.append({
            'model': model_names,
            'parameters' : gc.best_params_,
            'score' : gc.best_score_
        })
        
    return pd.DataFrame(scores, columns=['model','best_parameters','score'])

find_best_model(X_train, y_train)

**Maximum Score is in Random Forest Model**

In [None]:
model = RandomForestRegressor(n_estimators= 1)
model.fit(X_train,y_train)

In [None]:
predictions = model.predict(X_test)

In [None]:
predictions

In [None]:
x_ax = range(len(X_test))
plt.scatter(x_ax, y_test, s=5, color="blue", label="original")
plt.plot(x_ax, predictions, lw=0.8, color="red", label="predicted")
plt.legend()
plt.show()

**Thanks a lot, Any suggestions are welcome.**
**If you find this notebook helpful, Please upvote and comment!**