In [None]:
import pandas as pd
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
# Load the dataset
from google.colab import drive
drive.mount('/content/drive')
data = pd.read_csv('/content/drive/MyDrive/USA_Housing.csv')
# Divide the dataset into input features and output variable
X = data.drop('Price', axis=1)
y = data['Price']
# Scale the values of input features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Divide input and output features into five folds
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# Run five iterations
best_beta = None
best_r2_score = -float('inf')
for train_index, test_index in kf.split(X_scaled):
  X_train, X_test = X_scaled[train_index], X_scaled[test_index]
  y_train, y_test = y.iloc[train_index], y.iloc[test_index]
  # Find the beta matrix, predicted values, and R2_score
  regressor = LinearRegression()
  regressor.fit(X_train, y_train)
  y_pred = regressor.predict(X_test)
  r2 = r2_score(y_test, y_pred)
  # Update the best beta matrix and R2_score
  if r2 > best_r2_score:
    best_r2_score = r2
    best_beta = regressor.coef_

# Train the regressor for 70% of data
split_index = int(0.7 * len(X_scaled))
X_train, X_test = X_scaled[:split_index], X_scaled[split_index:]
y_train, y_test = y.iloc[:split_index], y.iloc[split_index:]
regressor = LinearRegression()
regressor.fit(X_train, y_train)
regressor.coef_ = best_beta

# Test the performance for remaining 30% data
y_pred = regressor.predict(X_test)
r2 = r2_score(y_test, y_pred)
print('R2 score for 30% test data:', r2)

Mounted at /content/drive
R2 score for 30% test data: 0.9176755577794673
