In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.multioutput import MultiOutputRegressor
from sklearn.metrics import mean_absolute_percentage_error
from xgboost import XGBRegressor
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from sklearn.ensemble import HistGradientBoostingRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import ElasticNet
from lightgbm import LGBMRegressor
from catboost import CatBoostRegressor
from sklearn.linear_model import BayesianRidge
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR
from sklearn.pipeline import make_pipeline
from sklearn.neighbors import KNeighborsRegressor
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, ConstantKernel as C
from sklearn.linear_model import TheilSenRegressor
from sklearn.linear_model import RANSACRegressor, LinearRegression
from sklearn.ensemble import StackingRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import VotingRegressor
from pytorch_tabnet.tab_model import TabNetRegressor
import torch
# Deployment
from pytorch_tabnet.tab_model import TabNetRegressor
import torch

def GPR_predict(train_df, test_df, property_index):
    """
    Trains a GaussianProcessRegressor model for a single blend property
    and makes predictions on the test set.
    """
    features = ['Component1_fraction',
                'Component2_fraction',
                'Component3_fraction',
                'Component4_fraction',
                'Component5_fraction',
                f'Component1_Property{property_index}',
                f'Component2_Property{property_index}',
                f'Component3_Property{property_index}',
                f'Component4_Property{property_index}',
                f'Component5_Property{property_index}']

    X_train = train_df[features]
    y_train = train_df[f'BlendProperty{property_index}']

    kernel = C(1.0, (1e-3, 1e3)) * RBF(length_scale=1.0)

    model = make_pipeline(
        StandardScaler(),
        GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=5, random_state=42)
    )
    model.fit(X_train, y_train)

    X_test = test_df[features]
    test_predictions = model.predict(X_test)

    return test_predictions

def NN_predict(train_df, test_df, property_index):
    """
    Trains a Neural Network model for a single blend property
    and makes predictions on the test set.
    """
    features = ['Component1_fraction',
                'Component2_fraction',
                'Component3_fraction',
                'Component4_fraction',
                'Component5_fraction',
                f'Component1_Property{property_index}',
                f'Component2_Property{property_index}',
                f'Component3_Property{property_index}',
                f'Component4_Property{property_index}',
                f'Component5_Property{property_index}']

    X_train = train_df[features].values
    y_train = train_df[f'BlendProperty{property_index}'].values

    # Build model
    model = Sequential([
        Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
        Dropout(0.2),
        Dense(64, activation='relu'),
        Dense(1)
    ])

    model.compile(optimizer='adam', loss='mae')

    model.fit(X_train, y_train, epochs=100, batch_size=32, verbose=0)

    X_test = test_df[features].values
    test_predictions = model.predict(X_test, verbose=0).flatten()

    return test_predictions


def ElasticNet_predict(train_df, test_df, property_index):
    """
    Trains an ElasticNet model for a single blend property
    and makes predictions on the test set.
    """
    features = ['Component1_fraction',
                'Component2_fraction',
                'Component3_fraction',
                'Component4_fraction',
                'Component5_fraction',
                f'Component1_Property{property_index}',
                f'Component2_Property{property_index}',
                f'Component3_Property{property_index}',
                f'Component4_Property{property_index}',
                f'Component5_Property{property_index}']

    X_train = train_df[features]
    y_train = train_df[f'BlendProperty{property_index}']

    model = ElasticNet(
        alpha=1.0,       # Regularization strength
        l1_ratio=0.5,    # Mix between L1 (lasso) and L2 (ridge)
        random_state=42
    )

    model.fit(X_train, y_train)

    X_test = test_df[features]
    test_predictions = model.predict(X_test)

    return test_predictions

def SVR_RBF_predict(train_df, test_df, property_index):
    """
    Trains an SVR with RBF kernel model for a single blend property
    and makes predictions on the test set.
    """
    features = ['Component1_fraction',
                'Component2_fraction',
                'Component3_fraction',
                'Component4_fraction',
                'Component5_fraction',
                f'Component1_Property{property_index}',
                f'Component2_Property{property_index}',
                f'Component3_Property{property_index}',
                f'Component4_Property{property_index}',
                f'Component5_Property{property_index}']

    X_train = train_df[features]
    y_train = train_df[f'BlendProperty{property_index}']

    model = make_pipeline(
        StandardScaler(),
        SVR(kernel='rbf', C=1.0, epsilon=0.1)
    )

    model.fit(X_train, y_train)

    X_test = test_df[features]
    test_predictions = model.predict(X_test)

    return test_predictions


def RandomForest_predict(train_df, test_df, property_index):
    """
    Trains a RandomForestRegressor model for a single blend property
    and makes predictions on the test set.
    """
    features = ['Component1_fraction',
                'Component2_fraction',
                'Component3_fraction',
                'Component4_fraction',
                'Component5_fraction',
                f'Component1_Property{property_index}',
                f'Component2_Property{property_index}',
                f'Component3_Property{property_index}',
                f'Component4_Property{property_index}',
                f'Component5_Property{property_index}']

    X_train = train_df[features]
    y_train = train_df[f'BlendProperty{property_index}']

    model = RandomForestRegressor(
        n_estimators=100,
        max_depth=10,
        random_state=42,
        n_jobs=-1
    )

    model.fit(X_train, y_train)

    X_test = test_df[features]
    test_predictions = model.predict(X_test)

    return test_predictions

def TabNet_predict(train_df, test_df, property_index):
    """TabNet Regressor to predict a single blend property."""


    features = ['Component1_fraction',
                'Component2_fraction',
                'Component3_fraction',
                'Component4_fraction',
                'Component5_fraction',
                f'Component1_Property{property_index}',
                f'Component2_Property{property_index}',
                f'Component3_Property{property_index}',
                f'Component4_Property{property_index}',
                f'Component5_Property{property_index}']

    X_train = train_df[features].values
    y_train = train_df[f'BlendProperty{property_index}'].values.reshape(-1, 1)

    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)

    np.random.seed(42)
    torch.manual_seed(42)

    model = TabNetRegressor(
        optimizer_fn=torch.optim.Adam,
        optimizer_params=dict(lr=2e-2),
        verbose=0,
        seed=42
    )

    model.fit(
        X_train=X_train_scaled, y_train=y_train,
        max_epochs=200,
        patience=20,
        batch_size=256,
        virtual_batch_size=128
    )

    X_test = test_df[features].values
    X_test_scaled = scaler.transform(X_test)
    test_predictions = model.predict(X_test_scaled).flatten()

    return test_predictions


def SVR_Poly_predict(train_df, test_df, property_index):
    """
    Trains an SVR with Poly kernel model for a single blend property
    and makes predictions on the test set.
    """
    features = ['Component1_fraction',
                'Component2_fraction',
                'Component3_fraction',
                'Component4_fraction',
                'Component5_fraction',
                f'Component1_Property{property_index}',
                f'Component2_Property{property_index}',
                f'Component3_Property{property_index}',
                f'Component4_Property{property_index}',
                f'Component5_Property{property_index}']

    X_train = train_df[features]
    y_train = train_df[f'BlendProperty{property_index}']

    model = make_pipeline(
        StandardScaler(),
        SVR(kernel='poly', C=1.0, epsilon=0.1)
    )

    model.fit(X_train, y_train)

    X_test = test_df[features]
    test_predictions = model.predict(X_test)

    return test_predictions

def Lasso_predict(train_df, test_df, property_index):
    """
    Trains a Lasso model for a single blend property
    and makes predictions on the test set.
    """
    from sklearn.linear_model import Lasso

    features = ['Component1_fraction',
                'Component2_fraction',
                'Component3_fraction',
                'Component4_fraction',
                'Component5_fraction',
                f'Component1_Property{property_index}',
                f'Component2_Property{property_index}',
                f'Component3_Property{property_index}',
                f'Component4_Property{property_index}',
                f'Component5_Property{property_index}']

    X_train = train_df[features]
    y_train = train_df[f'BlendProperty{property_index}']

    model = Lasso(alpha=1.0, random_state=42)

    model.fit(X_train, y_train)

    X_test = test_df[features]
    test_predictions = model.predict(X_test)

    return test_predictions


# Load data
train_df = pd.read_csv("/content/train.csv")
test_df = pd.read_csv("/content/test.csv")
submission_df = pd.read_csv("/content/sample_solution.csv")

# Define the models to use for each property
models = {
    1: GPR_predict,
    2: NN_predict,
    3: ElasticNet_predict,
    4: SVR_RBF_predict,
    5: RandomForest_predict,
    6: TabNet_predict,
    7: SVR_Poly_predict,
    8: ElasticNet_predict, # ElasticNet used twice
    9: Lasso_predict,       # Lasso used
    10: NN_predict          # NN used twice
}

# Generate predictions for each property using the specified model
for i in range(1, 11):
    model_func = models[i]
    print(f"Generating predictions for BlendProperty{i} using {model_func.__name__}...")
    test_predictions = model_func(train_df, test_df, i)
    submission_df[f'BlendProperty{i}'] = test_predictions

# Save the submission file
submission_df.to_csv('submission.csv', index=False)

print("Submission file 'submission.csv' created successfully.")