<a href="https://colab.research.google.com/github/leandro-driguez/Machine-Learning-Techniques/blob/dev/Lab_1/Sesi%C3%B3n%201/RidgeRegularization.ipynb" target="_parent">
    <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab" />
</a>

In [None]:
import itertools
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import Ridge
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

## Dataset

In [None]:
url = 'https://raw.githubusercontent.com/leandro-driguez/Machine-Learning-Techniques/main/Lab_1/Sesi%C3%B3n%201/kc_house_data.csv'
data = pd.read_csv(url)
data.head()

In [None]:
# remove unnecesary columns and split X & y
X = data.drop(['id','date','zipcode','sqft_above','price'],axis=1)
y = data['price']

In [None]:
# normalize data
columns = X.columns
scaler = MinMaxScaler(feature_range=(0, 1))
X = scaler.fit_transform(X)
X = pd.DataFrame(X,columns=columns)

In [None]:
def generate_polynomial_features(df: pd.DataFrame, degree: int):
    """
    Generate a DataFrame with polynomial features of specified degree from the input DataFrame.
    
    This includes all combinations of features raised to powers that sum up to the degree.

    Parameters:
    - df (pd.DataFrame): Input DataFrame with the original features.
    - degree (int): Degree of the polynomial features to generate.

    Returns:
    - pd.DataFrame: DataFrame containing the original features along with their polynomial
                    and interaction combinations up to the specified degree.
    """
    # Validate inputs
    if not isinstance(df, pd.DataFrame):
        raise ValueError("Input must be a pandas DataFrame.")
    if not isinstance(degree, int) or degree < 1:
        raise ValueError("Degree must be a positive integer.")

    # List to hold all polynomial features
    poly_features = [df.copy()]

    # Iterate over degrees to generate polynomial combinations
    for d in range(2, degree + 1):
        for items in itertools.combinations_with_replacement(df.columns, d):
            # Generate the feature name for the combination
            feature_name = "*".join(items)

            # Reduce to calculate the product (i.e., (x1*x2), (x1*x2*x3), ...)
            feature = pd.DataFrame(df.loc[:, items].prod(axis=1), columns=[feature_name])
            
            # Append the new feature
            poly_features.append(feature)

    # Concatenate all polynomial features
    poly_features_df = pd.concat(poly_features, axis=1)

    return poly_features_df

## Hyperparameter setting

In [None]:
Models, MAE, MSE, R2_score = [], [], [], []
MAX_DEGREE = 5

In [None]:
for k_degree in range(1, MAX_DEGREE+1):
    # Prepare dataset
    X_pol = generate_polynomial_features(X, degree=k_degree)

    # split the dataset in train and test sets
    X_train, X_test = train_test_split(X_pol, test_size=0.2, random_state=33)
    y_train, y_test = train_test_split(y, test_size=0.2, random_state=33)

    # Training
    LinearModel = Ridge(alpha=0.1).fit(X_train, y_train)

    # Prediction
    y_pred = LinearModel.predict(X_test)

    # Validation
    MAE += [np.mean(np.absolute(y_pred - y_test))]
    MSE += [mean_squared_error(y_pred , y_test)]
    R2_score += [r2_score(y_pred , y_test)]

## Visualization

In [None]:
degrees = [dg for dg in range(1, MAX_DEGREE+1)]

# Plotting each metric in a separate subplot
fig, axs = plt.subplots(nrows=1, ncols=3, figsize=(20, 5))

# Mean Absolute Error Plot
axs[0].plot(degrees, MAE, 'bo-', label='Mean Absolute Error')
axs[0].set(title='MAE vs. Degree', xlabel='Degree', ylabel='Mean Absolute Error')
axs[0].legend()

# Mean Square Error Plot
axs[1].plot(degrees, MSE, 'go-', label='Mean Square Error')
axs[1].set(title='MSE vs. Degree', xlabel='Degree', ylabel='Mean Square Error')
axs[1].legend()

# R2-score Plot
axs[2].plot(degrees, R2_score, 'ro-', label='R2-score')
axs[2].set(title='R2-score vs. Degree', xlabel='Degree', ylabel='R2-score')
axs[2].legend()

plt.tight_layout()
plt.show()