# EVALUATION ASSIGNMENT 1

In [1]:
import numpy as np
import pandas as pd
import random

## 1. Create Dataset

In [2]:
def generate_random_data(num_samples):
    data = {
        'Size(sq ft)': [random.randint(1000, 3000) for _ in range(num_samples)],
        'Bedrooms': [random.randint(1, 5) for _ in range(num_samples)],
        'Age(yrs)': [random.randint(1, 30) for _ in range(num_samples)],
        'Distance(miles)': [random.uniform(1, 10) for _ in range(num_samples)],
        'Price($)': [random.randint(100000, 500000) for _ in range(num_samples)]
    }
    return pd.DataFrame(data)

## Gradient Descent

In [3]:
def gradient_descent(X, y, beta, learning_rate, iterations):
    m = len(y)
    for i in range(iterations):
    
        y_pred = X.dot(beta)
        
        gradient = (1/m) * X.T.dot(y_pred - y)
        
        beta = beta - learning_rate * gradient
        
        if i % 100 == 0:
            cost = (1/(2*m)) * np.sum((y_pred - y) ** 2)
            print(f"Iteration {i}, Cost: {cost}")
            
    return beta

## (a) Training Data

In [4]:
df = generate_random_data(10)
print(df)

X = df[['Size(sq ft)', 'Bedrooms', 'Age(yrs)', 'Distance(miles)']]
y = df['Price($)'].values

   Size(sq ft)  Bedrooms  Age(yrs)  Distance(miles)  Price($)
0         2355         5         7         7.075868    385932
1         1951         4        11         4.620330    437458
2         2764         3        20         1.158274    116904
3         2666         2        25         4.037904    286311
4         2003         1         8         7.725186    478913
5         2416         5        23         8.302594    471048
6         1584         3        25         2.040558    434969
7         2098         3         5         4.610081    434611
8         1304         1        25         9.429360    401743
9         2104         1         3         5.022046    192009


## Setting Hyperparameters

In [5]:
X = np.c_[np.ones(X.shape[0]), X]

# Initialize beta (coefficients) with zeros
beta = np.zeros(X.shape[1])

# Gradient Descent settings
learning_rate = 0.00000001  # Tuning this is important to ensure convergence
iterations = 10000  # Number of iterations for gradient descent

# Run gradient descent
beta = gradient_descent(X, y, beta, learning_rate, iterations)
print("Model coefficients (beta):", beta)

# Predict prices using the final model
y_pred = X.dot(beta)

Iteration 0, Cost: 73177353530.5
Iteration 100, Cost: 13725114128.432312
Iteration 200, Cost: 13720859639.183434
Iteration 300, Cost: 13720528963.554289
Iteration 400, Cost: 13720198586.23404
Iteration 500, Cost: 13719868248.250343
Iteration 600, Cost: 13719537949.579483
Iteration 700, Cost: 13719207690.214811
Iteration 800, Cost: 13718877470.149681
Iteration 900, Cost: 13718547289.377462
Iteration 1000, Cost: 13718217147.891527
Iteration 1100, Cost: 13717887045.685234
Iteration 1200, Cost: 13717556982.751942
Iteration 1300, Cost: 13717226959.085032
Iteration 1400, Cost: 13716896974.677866
Iteration 1500, Cost: 13716567029.523819
Iteration 1600, Cost: 13716237123.616257
Iteration 1700, Cost: 13715907256.948553
Iteration 1800, Cost: 13715577429.514084
Iteration 1900, Cost: 13715247641.306223
Iteration 2000, Cost: 13714917892.318346
Iteration 2100, Cost: 13714588182.54383
Iteration 2200, Cost: 13714258511.976053
Iteration 2300, Cost: 13713928880.608393
Iteration 2400, Cost: 13713599288.4

## Evaluating Data

In [6]:
mse = np.mean((y - y_pred) ** 2)
print("Mean Squared Error:", mse)

Mean Squared Error: 27377328923.133102


## Interpreting Data

In [7]:
intercept = beta[0]
coefficients = beta[1:]

print("Intercept:", intercept)
features = ['Size(sq ft)', 'Bedrooms', 'Age(yrs)', 'Distance(miles)']
for feature, coef in zip(features, coefficients):
    print(f"The coefficient for {feature} is {coef:.2f}, indicating the price change per unit increase in {feature}, holding other variables constant.")

Intercept: 2.6712763174717757
The coefficient for Size(sq ft) is 158.68, indicating the price change per unit increase in Size(sq ft), holding other variables constant.
The coefficient for Bedrooms is 8.38, indicating the price change per unit increase in Bedrooms, holding other variables constant.
The coefficient for Age(yrs) is 44.48, indicating the price change per unit increase in Age(yrs), holding other variables constant.
The coefficient for Distance(miles) is 36.83, indicating the price change per unit increase in Distance(miles), holding other variables constant.


## Testing Data

In [8]:
example_house = [1, 2500, 4, 10, 5]  # Include bias term (1 for intercept)
predicted_price = np.dot(example_house, beta)
print("Predicted Price for example house:", predicted_price)

Predicted Price for example house: 397367.44583772233


<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=c8428051-d173-4322-a2c3-cd34cde998df' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>