In [1]:
# Importing required libraries
import numpy as np
import pandas as pd
from sklearn.linear_model import Ridge, LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split

In [2]:
# Prepare the Data
# We will create a DataFrame from the given dataset and split it into features (X) and target (y). Then, we'll split the data into training and testing sets.
# Define the dataset
data = {
    'size_sqft': [1200, 1500, 1000, 1800, 2000],
    'bedrooms': [2, 3, 1, 4, 3],
    'age_years': [5, 10, 2, 15, 8],
    'price': [250000, 350000, 200000, 400000, 450000]
}

In [3]:
# Create a DataFrame
df = pd.DataFrame(data)

# Features (X) and target (y)
X = df[['size_sqft', 'bedrooms', 'age_years']]
y = df['price']

# Split the data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [5]:
# We will train a Ridge Regression model with alpha=0.5 and evaluate its performance.
# Initialize Ridge Regression model with alpha=0.5
ridge = Ridge(alpha=0.5)

# Train the model on the training data
ridge.fit(X_train, y_train)

# Make predictions on the test data
y_pred_ridge = ridge.predict(X_test)

# Evaluate Ridge Regression performance
rmse_ridge = np.sqrt(mean_squared_error(y_test, y_pred_ridge))
r2_ridge = r2_score(y_test, y_pred_ridge)

print("Ridge Regression Results:")
print(f"RMSE: {rmse_ridge}")
print(f"R²: {r2_ridge}") # nan leading to iverfitting due to low sample size

Ridge Regression Results:
RMSE: 24999.917731821944
R²: nan




In [6]:
# we will train a Linear Regression model and evaluate its performance.
# Initialize Linear Regression model
linear = LinearRegression()

# Train the model on the training data
linear.fit(X_train, y_train)

# Make predictions on the test data
y_pred_linear = linear.predict(X_test)

# Evaluate Linear Regression performance
rmse_linear = np.sqrt(mean_squared_error(y_test, y_pred_linear))
r2_linear = r2_score(y_test, y_pred_linear)

print("\nLinear Regression Results:")
print(f"RMSE: {rmse_linear}")
print(f"R²: {r2_linear}")


Linear Regression Results:
RMSE: 25000.0
R²: nan




In [7]:
print("\nComparison of Models:")
print(f"Ridge Regression - RMSE: {rmse_ridge}, R²: {r2_ridge}")
print(f"Linear Regression - RMSE: {rmse_linear}, R²: {r2_linear}")


Comparison of Models:
Ridge Regression - RMSE: 24999.917731821944, R²: nan
Linear Regression - RMSE: 25000.0, R²: nan
