In [2]:
# linear regression suing the square trick
# most popular

def square_trick(base_price, price_per_room, num_roooms, price, learning_rate):
    predicted_price  = base_price + price_per_room * num_roooms # calculates the prediction
    base_price += learning_rate * (price-predicted_price) # translates the line
    price_per_room += learning_rate * num_roooms * (price-predicted_price) # rotates the line
    return price_per_room, base_price

In [3]:
# absolute trick

def absolute_trick(base_price, price_per_room, num_rooms, price, learning_rate):
    predicted_price = base_price = price_per_room*num_rooms
    if price > predicted_price:
        price_per_room += learning_rate*num_rooms
        base_price += learning_rate
    else:
        price_per_room -= learning_rate*num_rooms
        base_price -= learning_rate
    return price_per_room, base_price

In [4]:
import random
def linear_regression(features, labels, learning_rate=0.01, epochs=1000):
    price_per_room = random.random()
    base_price = random.random()
    for epoch in range(epochs):
        i = random.randint(0, len(features)-1)
        num_rooms = features[i]
        price = labels[i]
        price_per_room, base_price = square_trick(base_price=base_price, price_per_room=price_per_room, num_roooms=num_rooms, price=price, learning_rate=learning_rate)

    return price_per_room, base_price

In [5]:
import numpy as np
features = np.array([1,2,3,5,6,7])
labels = np.array([155, 197, 244, 356, 407, 448])

In [7]:
# run the model
linear_regression(features, labels, learning_rate=0.01, epochs=10000)

(50.22129475023709, 99.28765731077557)

In [8]:
# so we now have the value of p^ = 50.22r + 99.287 for a dataset with 6 homes
# plugging in, if we had a home of 4 rooms, we could do r^(prediction) = 50.22(4) + 99.287 to get the price of a home with 4 bedrooms
# p^ = mr + b
# weights = m
# bias = b (in this case, it is the base price of a home)

In [10]:
# root mean squared error
# remember that rmse is just a tool using squares to evalute the average of our error from the points to our fitted lilne
# we then use gradient descent to adjust to this rmse, which helps us get closer to the lowest amount of error
# this also helps us in knowing when to stop the alrogithm as we hit our lowest point of error
# root mean squared error is also handy for comuting all units of computation, think money for example
def rmse(labels, predictions):
    n = len(labels)
    differences = np.subtract(labels, predictions)
    return np.sqrt(1.0/n * (np.dot(differences, differences))) #we use the dot product as it obtains the sum of squares of the entires via matrix multiplication

In [11]:
import pandas as pd
from sklearn.linear_model import LinearRegression

# Load data
data = pd.read_csv('assets/Hyderabad.csv')

# Separate features and target
X = data.drop(columns=['Price'])
y = data['Price']

# One-hot encode categorical features
X_encoded = pd.get_dummies(X)

# Train model
model = LinearRegression()
model.fit(X_encoded, y)


In [12]:
# New house data (with the same columns as original data)
house = pd.DataFrame({'Area': [1000], 'No. of Bedrooms': [3], 'Location': ['Nizampet']})

# One-hot encode, just like training
house_encoded = pd.get_dummies(house)

# Align columns with training data (add missing columns as 0)
house_encoded = house_encoded.reindex(columns=X_encoded.columns, fill_value=0)

# Predict
predicted_price = model.predict(house_encoded)
print(predicted_price)

[1769648.5]


In [14]:
import pandas as pd
from sklearn.linear_model import LinearRegression

# Load data
data = pd.read_csv('assets/Hyderabad.csv')

# Use only 'Area' as the feature
X = data[['Area']]  # Double brackets to keep it as a DataFrame
y = data['Price']

# Train the model
simple_model = LinearRegression()
simple_model.fit(X, y)

print("Y-Intercept:", simple_model.intercept_)
print("Slope:", simple_model.coef_[0])

Y-Intercept: -6222669.083283626
Slope: 9753.940608183995
