## Part 3: Implementation using Scikit-learn

In [1]:
import numpy as np
import pandas as pd
import time
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error, r2_score

In [2]:
def model():
    
    data = pd.read_csv('california_prepared.csv')

    X = data.drop('median_house_value', axis=1)
    y = data['median_house_value']

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    scaler = MinMaxScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    X_train_scaled = X_train
    X_test_scaled = X_test

    model = LinearRegression()
    start_time = time.time()
    model.fit(X_train_scaled, y_train)
    end_time = time.time()
    
    print("Training Time:", (end_time - start_time))

    y_pred = model.predict(X_test_scaled)

    test_mae = mean_absolute_error(y_test, y_pred)
    test_rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    test_r2 = r2_score(y_test, y_pred)

    train_mae = mean_absolute_error(y_train, model.predict(X_train_scaled))
    train_rmse = np.sqrt(mean_squared_error(y_train, model.predict(X_train_scaled)))
    train_r2 = r2_score(y_train, model.predict(X_train_scaled))
    
    print(f"Training MAE: {train_mae}")
    print(f"Training RMSE: {train_rmse}")
    print(f"Training R2 Score: {train_r2}")


    print(f"Testing MAE: {test_mae}")
    print(f"Testing RMSE: {test_rmse}")
    print(f"Testing R2 Score: {test_r2}")
    
model()  

Training Time: 0.017530441284179688
Training MAE: 50826.23366720035
Training RMSE: 69766.31335504746
Training R2 Score: 0.6358903491822181
Testing MAE: 52351.18556675165
Testing RMSE: 73292.02001566645
Testing R2 Score: 0.5900730010879597
