In [46]:
import itertools
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

<a href="https://colab.research.google.com/github/leandro-driguez/Machine-Learning-Techniques/blob/main/Lab_1/Sesi%C3%B3n%201/PolynomialRegression.ipynb" target="_parent">
    <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab" />
</a>

## Dataset

In [47]:
url = 'https://raw.githubusercontent.com/leandro-driguez/Machine-Learning-Techniques/main/Lab_1/Sesi%C3%B3n%201/kc_house_data.csv'
data = pd.read_csv(url)
data.head()

Unnamed: 0,id,date,price,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,...,grade,sqft_above,sqft_basement,yr_built,yr_renovated,zipcode,lat,long,sqft_living15,sqft_lot15
0,7129300520,20141013T000000,221900.0,3,1.0,1180,5650,1.0,0,0,...,7,1180,0,1955,0,98178,47.5112,-122.257,1340,5650
1,6414100192,20141209T000000,538000.0,3,2.25,2570,7242,2.0,0,0,...,7,2170,400,1951,1991,98125,47.721,-122.319,1690,7639
2,5631500400,20150225T000000,180000.0,2,1.0,770,10000,1.0,0,0,...,6,770,0,1933,0,98028,47.7379,-122.233,2720,8062
3,2487200875,20141209T000000,604000.0,4,3.0,1960,5000,1.0,0,0,...,7,1050,910,1965,0,98136,47.5208,-122.393,1360,5000
4,1954400510,20150218T000000,510000.0,3,2.0,1680,8080,1.0,0,0,...,8,1680,0,1987,0,98074,47.6168,-122.045,1800,7503


In [48]:
# remove unnecesary columns and split X & y
X = data.drop(['id','date','zipcode','sqft_above','price'],axis=1)
y = data['price']

In [49]:
# normalize data
columns = X.columns
scaler = MinMaxScaler(feature_range=(0, 1))
X = scaler.fit_transform(X)
X = pd.DataFrame(X,columns=columns)

In [50]:
def generate_polynomial_features(df: pd.DataFrame, degree: int):
    """
    Generate a DataFrame with polynomial features of specified degree from the input DataFrame.
    
    This includes all combinations of features raised to powers that sum up to the degree.

    Parameters:
    - df (pd.DataFrame): Input DataFrame with the original features.
    - degree (int): Degree of the polynomial features to generate.

    Returns:
    - pd.DataFrame: DataFrame containing the original features along with their polynomial
                    and interaction combinations up to the specified degree.
    """
    # Validate inputs
    if not isinstance(df, pd.DataFrame):
        raise ValueError("Input must be a pandas DataFrame.")
    if not isinstance(degree, int) or degree < 1:
        raise ValueError("Degree must be a positive integer.")

    # List to hold all polynomial features
    poly_features = [df.copy()]

    # Iterate over degrees to generate polynomial combinations
    for d in range(2, degree + 1):
        for items in itertools.combinations_with_replacement(df.columns, d):
            # Generate the feature name for the combination
            feature_name = "*".join(items)

            # Reduce to calculate the product (i.e., (x1*x2), (x1*x2*x3), ...)
            feature = pd.DataFrame(df.loc[:, items].prod(axis=1), columns=[feature_name])
            
            # Append the new feature
            poly_features.append(feature)

    # Concatenate all polynomial features
    poly_features_df = pd.concat(poly_features, axis=1)

    return poly_features_df

## Hyperparameter setting

In [None]:
Models, MAE, MSE, R2_score = [], [], [], []

In [None]:
for k_degree in range(1, 10):
    # Prepare dataset
    X_pol = generate_polynomial_features(X, degree=k_degree)

    # split the dataset in train and test sets
    X_train, X_test = train_test_split(X_pol, test_size=0.2, random_state=33)
    y_train, y_test = train_test_split(y, test_size=0.2, random_state=33)

    # Training
    PolynomialModel = LinearRegression().fit(X_train, y_train)
    Models += [PolynomialModel]

    # Prediction
    y_pred = PolynomialModel.predict(X_test)

    # Validation
    MAE += [np.mean(np.absolute(y_pred - y_test))]
    MSE += [mean_squared_error(y_pred , y_test)]
    R2_score += [r2_score(y_pred , y_test)]

## Visualization

In [None]:
fig = plt.figure(figsize=(20,10))

ax = fig.add_subplot(111)
degrees = [dg for dg in range(1, 10)]
ax.plot(degrees, MAE,'bo-', label='Mean absolute error')
ax.plot(degrees, MSE,'go-', label='Mean square error')
ax.plot(degrees, R2_score,'ro-', label='R2-score')

ax.set(title='Predicción con Regresión Polinomial',xlabel="degree",ylabel="errors")
ax.legend()

plt.tight_layout()
plt.show()