In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error

df = pd.read_csv('/content/Housing.csv')

df = df.drop(columns=["mainroad", "prefarea", "guestroom", "hotwaterheating"])

# Encoding categorical variables
categorical_cols = [ "basement", "airconditioning", "furnishingstatus"]
encoder = LabelEncoder()
for col in categorical_cols:
    df[col] = encoder.fit_transform(df[col])



In [None]:
df.sample(5)

Unnamed: 0,price,area,bedrooms,bathrooms,stories,basement,airconditioning,parking,furnishingstatus
61,7070000,8880,2,1,1,0,1,1,1
248,4543000,4100,2,2,1,1,0,0,1
489,2835000,3300,3,1,2,0,0,1,1
304,4193000,8250,3,1,1,1,0,3,1
300,4200000,4079,3,1,3,0,0,0,1


In [None]:
# Splitting data into features and target variable
X = df.drop(columns=["price"]).values
y = df["price"].values


# Splitting dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
X_test

array([[ 5900,     4,     2,     2,     1,     0,     1,     2],
       [ 6500,     3,     2,     3,     0,     1,     0,     0],
       [ 4040,     2,     1,     1,     0,     0,     0,     1],
       [ 5000,     3,     1,     2,     0,     1,     0,     1],
       [ 3960,     3,     1,     1,     0,     0,     0,     0],
       [ 6720,     3,     1,     1,     0,     0,     0,     2],
       [ 8520,     3,     1,     1,     0,     1,     2,     0],
       [ 4990,     4,     2,     2,     1,     0,     0,     0],
       [ 3240,     2,     1,     1,     0,     0,     1,     2],
       [ 2700,     3,     1,     1,     0,     0,     0,     0],
       [ 8580,     4,     3,     4,     0,     1,     2,     1],
       [ 3934,     2,     1,     1,     0,     0,     0,     2],
       [ 3720,     2,     1,     1,     0,     1,     0,     2],
       [ 3100,     3,     1,     2,     1,     0,     0,     1],
       [ 3970,     3,     1,     2,     1,     0,     0,     2],
       [ 3630,     3,    

In [None]:
# Scaling numerical features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


# Applying Linear Regression
model = LinearRegression()
model.fit(X_train, y_train)

# Predictions
y_pred = model.predict(X_test)

# Evaluating model performance
mae = mean_absolute_error(y_test, y_pred)
mae


932528.935213145

In [None]:
model.coef_

array([ 592326.26266596,   77021.85268177,  521572.7698521 ,
        429000.10182916,  292315.26610096,  368152.80812609,
        250841.66679207, -198168.23462145])

In [None]:
model.intercept_

4791534.571428572

In [None]:
y_hat = 4791534.571428572 + (592326.26266596*0.3461539) + (77021.85268177*1.37042373) + 521572.7698521 *1.43149584 + 429000.10182916*0.22293775 + 292315.26610096*1.3536506 + 368152.80812609*-0.67059984 + 250841.66679207*0.33709075 + (-198168.23462145*1.25107266)
y_hat

5929835.826015247

In [None]:
model.predict(X_test)

array([5929835.82408789, 6902857.3080749 , 3038326.01214491,
       4692651.74533701, 3382115.42344511, 3610358.38672515,
       6001560.12542992, 5916585.12951394, 2847422.12719213,
       3038352.68605947, 9429254.25487869, 2747025.72572536,
       3484514.14241924, 3990044.45924423, 3965023.88051387,
       5351185.78940434, 2975060.46795996, 4992837.48522085,
       4767678.44549702, 3573068.04807812, 5422333.12724913,
       5308897.9743257 , 2669269.86845956, 4208381.92832197,
       5771019.94791381, 7486400.00154968, 3128397.86940564,
       5270423.21371967, 8383334.45951648, 3510268.6013817 ,
       6076743.73554086, 3411220.56894668, 6766443.52339805,
       4337394.75330763, 3676246.51440923, 5918540.39953651,
       4991313.35066147, 4648598.67004047, 3522270.57855479,
       4703907.38201299, 4795979.1573753 , 3459959.66117741,
       6881031.1025266 , 4230635.47198478, 3777688.72509338,
       4364924.56466818, 6997365.65662655, 4191534.09121278,
       3869984.0011851 ,

In [None]:
X_test

array([[ 0.3461539 ,  1.37042373,  1.43149584,  0.22293775,  1.3536506 ,
        -0.67059984,  0.33709075,  1.25107266],
       [ 0.62251602,  0.03008914,  1.43149584,  1.37282722, -0.73874307,
         1.49120227, -0.81797546, -1.3969861 ],
       [-0.51056868, -1.31024544, -0.57259833, -0.92695171, -0.73874307,
        -0.67059984, -0.81797546, -0.07295672],
       [-0.06838928,  0.03008914, -0.57259833,  0.22293775, -0.73874307,
         1.49120227, -0.81797546, -0.07295672],
       [-0.54741696,  0.03008914, -0.57259833, -0.92695171, -0.73874307,
        -0.67059984, -0.81797546, -1.3969861 ],
       [ 0.72384879,  0.03008914, -0.57259833, -0.92695171, -0.73874307,
        -0.67059984, -0.81797546,  1.25107266],
       [ 1.55293515,  0.03008914, -0.57259833, -0.92695171, -0.73874307,
         1.49120227,  1.49215697, -1.3969861 ],
       [-0.07299532,  1.37042373,  1.43149584,  0.22293775,  1.3536506 ,
        -0.67059984, -0.81797546, -1.3969861 ],
       [-0.8790515 , -1.31024544

In [None]:
result = np.dot(X_test[0], model.coef_) + model.intercept_

In [None]:
result

5929835.824087889

In [None]:
class GUBRegression:
  def __init__(self):
    self.beta = None
    self.coef_ = None
    self.intercept_ = None

  def fit(self, X_train, y_train):
    X_train = np.insert(X_train,0,1,axis=1)
    self.beta = np.dot(np.linalg.inv(np.dot(X_train.T, X_train)), np.dot(X_train.T, y_train))
    self.intercept_ = self.beta[0]
    self.coef_ = self.beta[1:]

  def predict(self,X_test):
      y_pred = np.dot(X_test,self.coef_) + self.intercept_
      return y_pred

In [None]:
model1 = GUBRegression()

In [None]:
model1.fit(X_train, y_train)

In [None]:
y_pred1 = model1.predict(X_test)

In [None]:
y_pred1[0]

5929835.824087888

In [None]:
y_pred[0]

5929835.824087889