## 1. Importing Essential Libraries

In [156]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score,mean_absolute_error,mean_squared_error

## 2. Data Cleaninig

In [157]:
dataset_path = "BostonHousing.csv"
df = pd.read_csv(dataset_path)
#---------------------------------------
import warnings
warnings.filterwarnings("ignore")
#----------------------------------------

df.dropna()
df.dropna(axis=1)
#df["crim"].fillna(df["crim"].mean(),inplace=True)

missing_values = df.isnull().sum()
print("Missing values in each column:\n", missing_values)

Missing values in each column:
 crim       0
zn         0
indus      0
chas       0
nox        0
rm         0
age        0
dis        0
rad        0
tax        0
ptratio    0
b          0
lstat      0
medv       0
dtype: int64


## 3.Variables Encoding

In [158]:
#one hot encode
df = pd.get_dummies(df, drop_first=True)
#--------------------------------------------------
# move y column to last coloumn
column_to_move = df.pop("medv")
df.insert(len(df.columns), "medv", column_to_move)
df

Unnamed: 0,crim,zn,indus,chas,nox,rm,age,dis,rad,tax,ptratio,b,lstat,medv
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.0900,1,296,15.3,396.90,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242,17.8,396.90,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222,18.7,396.90,5.33,36.2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
501,0.06263,0.0,11.93,0,0.573,6.593,69.1,2.4786,1,273,21.0,391.99,9.67,22.4
502,0.04527,0.0,11.93,0,0.573,6.120,76.7,2.2875,1,273,21.0,396.90,9.08,20.6
503,0.06076,0.0,11.93,0,0.573,6.976,91.0,2.1675,1,273,21.0,396.90,5.64,23.9
504,0.10959,0.0,11.93,0,0.573,6.794,89.3,2.3889,1,273,21.0,393.45,6.48,22.0


## 4 Feature Selection

In [159]:

correalation_matrix=df.corr()
correalation_with_charge=correalation_matrix["medv"].sort_values(ascending=False)
correalation_with_charge

medv       1.000000
rm         0.695360
zn         0.360445
b          0.333461
dis        0.249929
chas       0.175260
age       -0.376955
rad       -0.381626
crim      -0.388305
nox       -0.427321
tax       -0.468536
indus     -0.483725
ptratio   -0.507787
lstat     -0.737663
Name: medv, dtype: float64

## 5. Spliting data Sets

In [160]:
X = df.iloc[:, 5].values.reshape(-1,1)
y = df.iloc[:, -1].values
test_size = 0.2  
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=0)

## 6.Feature Scaling 

In [161]:
#from sklearn.preprocessing import MinMaxScaler

#min_max_scaler = MinMaxScaler()

#X_train = min_max_scaler.fit_transform(X_train)
#X_test = min_max_scaler.transform(X_test)


## 7.1 Training the multiple  Regression

In [162]:
linear_regressor=LinearRegression()
linear_regressor.fit(X_train,y_train)

## 7.2 Training the polynomial Regression

In [163]:
pr = PolynomialFeatures(degree = 4)
x_train_poly = pr.fit_transform(X_train)
poly_regressor = LinearRegression()
poly_regressor.fit(x_train_poly, y_train)

## 7.3 Training the Suport Vector Regression

In [164]:
svr_regressor=SVR()
svr_regressor.fit(X_train,y_train)

## 7. 4 Training the decision Tree Regression


In [165]:
decisson_regressor = DecisionTreeRegressor()
decisson_regressor.fit(X_train, y_train)

## 7.5 Training the Random Forest Regression

In [166]:
randomforest_regressor=RandomForestRegressor(n_estimators=50)
randomforest_regressor.fit(X_train,y_train)


## 8. Making Predictions on the Test Data


In [167]:
y_pred_multiple = linear_regressor.predict(X_test)
y_pred_polynomial = poly_regressor.predict(pr.fit_transform(X_test))
y_pred_svr = svr_regressor.predict(X_test)
y_pred_decisiontree = decisson_regressor.predict(X_test)
y_pred_randomforest = randomforest_regressor.predict(X_test)

## 9.1 Evaluating MultipleLinearRegressin Model Performance


In [168]:
r2_multipleLinearRegressin = r2_score(y_test, y_pred_multiple)
mse_multipleLinearRegressin=mean_squared_error(y_test, y_pred_multiple)
rmse_multipleLinearRegressin=np.sqrt(mse_multipleLinearRegressin)
mae_multipleLinearRegressin=mean_absolute_error(y_test, y_pred_multiple)
print(f"R-squared Value in multipleLinearRegressin: {r2_multipleLinearRegressin:.4f}")
print(f"MSE(Mean Squred Error) Value in multipleLinearRegressin: {mse_multipleLinearRegressin:.4f}")
print(f"RMSE (Route Mean Squred Error)Value in multipleLinearRegressin: {rmse_multipleLinearRegressin:.4f}")
print(f"MAE(Mean Abseloute Error) Value in multipleLinearRegressin: {mae_multipleLinearRegressin:.4f}")

R-squared Value in multipleLinearRegressin: 0.4239
MSE(Mean Squred Error) Value in multipleLinearRegressin: 46.9074
RMSE (Route Mean Squred Error)Value in multipleLinearRegressin: 6.8489
MAE(Mean Abseloute Error) Value in multipleLinearRegressin: 4.3247


## 9.2 Evaluating PolyNomialLinearRegressin Model Performance

In [169]:
r2_polynomialeLinearRegressin = r2_score(y_test, y_pred_polynomial)
mse_polynomialeLinearRegressin=mean_squared_error(y_test, y_pred_polynomial)
rmse_polynomialeLinearRegressin=np.sqrt(mse_polynomialeLinearRegressin)
mae_polynomialeLinearRegressin=mean_absolute_error(y_test, y_pred_polynomial)
print(f"R-squared Value in polynomialeLinearRegressin: {r2_polynomialeLinearRegressin:.4f}")
print(f"MSE(Mean Squred Error) Value in polynomialeLinearRegressin: {mse_polynomialeLinearRegressin:.4f}")
print(f"RMSE (Route Mean Squred Error)Value in polynomialeLinearRegressin: {rmse_polynomialeLinearRegressin:.4f}")
print(f"MAE(Mean Abseloute Error) Value in polynomialeLinearRegressin: {mae_polynomialeLinearRegressin:.4f}")


R-squared Value in polynomialeLinearRegressin: 0.4897
MSE(Mean Squred Error) Value in polynomialeLinearRegressin: 41.5539
RMSE (Route Mean Squred Error)Value in polynomialeLinearRegressin: 6.4462
MAE(Mean Abseloute Error) Value in polynomialeLinearRegressin: 4.3372


## 9.3 Evaluating SVRRegressin Model Performance

In [170]:

r2_SVRRegressin = r2_score(y_test,  y_pred_svr)
mse_SVRRegressin=mean_squared_error(y_test,  y_pred_svr)
rmse_SVRRegressin=np.sqrt(mse_SVRRegressin)
mae_SVRRegressin=mean_absolute_error(y_test,  y_pred_svr)
print(f"R-squared Value in SVRRegressin: {r2_SVRRegressin:.4f}")
print(f"MSE(Mean Squred Error) Value in SVRRegressin: {mse_SVRRegressin:.4f}")
print(f"RMSE (Route Mean Squred Error)Value in SVRRegressin: {rmse_SVRRegressin:.4f}")
print(f"MAE(Mean Abseloute Error) Value in SVRRegressin: {mae_SVRRegressin:.4f}")

R-squared Value in SVRRegressin: 0.4540
MSE(Mean Squred Error) Value in SVRRegressin: 44.4584
RMSE (Route Mean Squred Error)Value in SVRRegressin: 6.6677
MAE(Mean Abseloute Error) Value in SVRRegressin: 4.2905


## 9.4 Evaluating DecisionTreeRegressin Model Performance

In [171]:

r2_DecisionTreeRegressin = r2_score(y_test,y_pred_decisiontree)
mse_DecisionTreeRegressin=mean_squared_error(y_test,y_pred_decisiontree)
rmse_DecisionTreeRegressin=np.sqrt(mse_DecisionTreeRegressin)
mae_DecisionTreeRegressin=mean_absolute_error(y_test,y_pred_decisiontree)
print(f"R-squared Value in DecisionTreeRegressin: {r2_DecisionTreeRegressin:.4f}")
print(f"MSE(Mean Squred Error) Value in DecisionTreeRegressin: {mse_DecisionTreeRegressin:.4f}")
print(f"RMSE (Route Mean Squred Error)Value in DecisionTreeRegressin: {rmse_DecisionTreeRegressin:.4f}")
print(f"MAE(Mean Abseloute Error) Value in DecisionTreeRegressin: {mae_DecisionTreeRegressin:.4f}")

R-squared Value in DecisionTreeRegressin: 0.1408
MSE(Mean Squred Error) Value in DecisionTreeRegressin: 69.9609
RMSE (Route Mean Squred Error)Value in DecisionTreeRegressin: 8.3643
MAE(Mean Abseloute Error) Value in DecisionTreeRegressin: 5.6475


## 9.5 Evaluating RandomForestRegressin Model Performance

In [173]:

r2_RandomForestRegressin = r2_score(y_test,y_pred_randomforest)
mse_RandomForestRegressin=mean_squared_error(y_test,y_pred_randomforest)
rmse_RandomForestRegressin=np.sqrt(mse_RandomForestRegressin)
mae_RandomForestRegressin=mean_absolute_error(y_test,y_pred_randomforest)
print(f"R-squared Value in RandomForestRegressin: {r2_RandomForestRegressin:.4f}")
print(f"MSE(Mean Squred Error) Value in RandomForestRegressin: {mse_RandomForestRegressin:.4f}")
print(f"RMSE (Route Mean Squred Error)Value in RandomForestRegressin: {rmse_RandomForestRegressin:.4f}")
print(f"MAE(Mean Abseloute Error) Value in RandomForestRegressin: {mae_RandomForestRegressin:.4f}")

R-squared Value in RandomForestRegressin: 0.3519
MSE(Mean Squred Error) Value in RandomForestRegressin: 52.7730
RMSE (Route Mean Squred Error)Value in RandomForestRegressin: 7.2645
MAE(Mean Abseloute Error) Value in RandomForestRegressin: 4.8871
