In [1]:
import pandas as pd
from sklearn.linear_model import Ridge
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import r2_score

# Load the dataset
df = pd.read_csv('kc_house_data.csv')

# Define the features and target variable
features = [
    "floors",
    "waterfront",
    "lat",
    "bedrooms",
    "sqft_basement",
    "view",
    "bathrooms",
    "sqft_living15",
    "sqft_above",
    "grade",
    "sqft_living"
]
X = df[features]  # Features
y = df['price']   # Target variable

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a PolynomialFeatures object for a second-order polynomial transform
poly = PolynomialFeatures(degree=2)

# Transform both the training and testing data
X_train_poly = poly.fit_transform(X_train)
X_test_poly = poly.transform(X_test)

# Create and fit a Ridge regression object with regularization parameter alpha set to 0.1
ridge_model = Ridge(alpha=0.1)
ridge_model.fit(X_train_poly, y_train)

# Make predictions on the test set
y_pred = ridge_model.predict(X_test_poly)

# Calculate R² value
r2 = r2_score(y_test, y_pred)

# Print the R² value
print(f"R² value: {r2:.4f}")


R² value: 0.7003
