In [62]:
import numpy as np
import pandas as pd

In [63]:
#Reading Data
train_set = pd.read_csv('../../Datasets/Housingdata/Housing_Train.csv')
test_set = pd.read_csv('../../Datasets/Housingdata/Housing_Test.csv')
train_set

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,MEDV
0,0.25199,0.0,10.59,0.0,0.489,5.783,72.7,4.3549,4,277,18.6,389.43,,22.5
1,0.62976,0.0,8.14,0.0,0.538,5.949,61.8,4.7075,4,307,21.0,396.90,8.26,20.4
2,1.23247,0.0,8.14,0.0,0.538,6.142,91.7,3.9769,4,307,21.0,396.90,18.72,15.2
3,,0.0,18.10,1.0,0.631,6.683,96.8,1.3567,24,666,20.2,375.33,3.73,50.0
4,12.80230,0.0,18.10,0.0,0.740,5.854,96.6,1.8956,24,666,20.2,240.52,23.79,10.8
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
400,1.00245,0.0,8.14,0.0,0.538,6.674,87.3,4.2390,4,307,21.0,380.23,11.98,21.0
401,0.03041,0.0,5.19,0.0,0.515,5.895,59.6,5.6150,5,224,20.2,394.81,10.56,18.5
402,2.81838,0.0,18.10,0.0,0.532,5.762,40.3,4.0983,24,666,20.2,392.92,10.42,21.8
403,0.44791,0.0,6.20,1.0,0.507,6.726,66.5,3.6519,8,307,17.4,360.20,8.05,29.0


In [64]:
#splliting datasets
y_train = train_set['MEDV']
x_train = train_set.drop(columns='MEDV')

x_test = test_set

In [65]:
#check dataset
x_train.isnull().sum()
x_test.isnull().sum()

CRIM       3
ZN         5
INDUS      6
CHAS       4
NOX        0
RM         0
AGE        5
DIS        0
RAD        0
TAX        0
PTRATIO    0
B          0
LSTAT      2
dtype: int64

In [66]:
#fill Null values
def replace_null_values(cols: list, dataset: pd.DataFrame):
    for col in cols:
        temp = dataset[col].dropna()
        mode = temp.mean()
        dataset[col].fillna(mode, inplace=True)

na_cols = list(x_train.columns[x_train.isnull().any()])
na_cols_test = list(x_test.columns[x_test.isnull().any()])

replace_null_values(na_cols, x_train)
replace_null_values(na_cols_test, x_test)

In [67]:
#feature Scaling
from sklearn.preprocessing import MinMaxScaler, StandardScaler
# scaler = MinMaxScaler()
scaler = StandardScaler()

x_train = pd.DataFrame(scaler.fit_transform(x_train), columns=x_train.columns)
x_test = pd.DataFrame(scaler.fit_transform(x_test), columns=x_test.columns)

In [68]:
# Linear Regression
class LinearRegression:
    def __init__(self):
        self.weight = None

    def fit(self, x:pd.DataFrame, y:pd.DataFrame, epoch:int, learning_rate:float):
        m = len(x)
        if x.iloc[0, 0] != 1:
            x.insert(0, 'bias', 1)

        self.weight = np.ones(len(x.columns))

        for i in range(epoch):
            gradient = 2/m * x.T.dot(self.predict(x) - y)
            self.weight = self.weight - learning_rate * gradient

    def predict(self, df:pd.DataFrame):
        if df.iloc[0, 0] != 1:
            df.insert(0, 'bias', 1)
        return np.dot(df, self.weight)

In [69]:
reg = LinearRegression()
reg.fit(x_train, y_train, 2000, 0.1)

In [70]:
# Evaluating Model
from sklearn.metrics import mean_squared_error, r2_score
y_pred_train = reg.predict(x_train)

mse_error = mean_squared_error(y_train, y_pred_train)
rmse = np.sqrt(mse_error)
r2_error = r2_score(y_train, y_pred_train)

errors = pd.DataFrame({'Value': [mse_error, rmse, r2_error]}, index=['MSE', 'RMSE', 'R2'])
errors

Unnamed: 0,Value
MSE,22.387781
RMSE,4.731573
R2,0.739538


In [71]:
#running model on test set

y_test_pred = pd.DataFrame(reg.predict(x_test), columns=['MEDV'])
y_test_pred.to_csv('house_pred.csv', index=False)