# Template Repository for Research Papers with Python Code

Main Code to reproduce the results in the paper

In [None]:
import logging

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import RandomizedSearchCV
from sklearn.utils.fixes import loguniform
from scipy.stats import uniform
from file_handling import (
    load_data, export_results, serialize_model, deserialize_model)
from preprocessing import select_features

from pyrcn.extreme_learning_machine import ELMRegressor

To suppress functionality, you can easily deactivate these options

In [None]:
plot=False
export=True
serialize=True

At first, we load the training dataset

In [None]:
training_data = load_data("../data/train.csv")
if plot:
    fig, axs = plt.subplots()
    sns.scatterplot(
        data=training_data, x="GrLivArea", y="SalePrice", ax=axs)
    plt.title("Training data")
    plt.tight_layout()

Next, we scale the input data to have zero mean and unitary a variance of 1, and transform our training data

In [None]:
X, y, feature_trf = select_features(
    df=training_data, input_features=["GrLivArea"], target="SalePrice")
scaler = StandardScaler().fit(X)
X_train = scaler.transform(X)
y_train = y

In case a pre-trained model is available, we can load this.

If not, we train our model. If desired, we serialize the model.

In [None]:
try:
    model = deserialize_model("../results/model.joblib")
except FileNotFoundError:
    model = RandomizedSearchCV(
        estimator=ELMRegressor(input_activation="relu", random_state=42,
                               hidden_layer_size=50),
        param_distributions={"input_scaling": uniform(loc=0, scale=2),
                             "bias_scaling": uniform(loc=0, scale=2),
                             "alpha": loguniform(1e-5, 1e1)},
        random_state=42, n_iter=200, refit=True).fit(X, y)

if serialize:
    serialize_model(model, "../results/model.joblib")

We can visualize how bad this model performs on the training data.

In [None]:
if plot:
    y_pred = model.predict(X)
    fig, axs = plt.subplots()
    sns.scatterplot(
        data=training_data, x="GrLivArea", y="SalePrice", ax=axs)
    plt.title("Training data")
    sns.scatterplot(x=training_data["GrLivArea"], y=y_pred, ax=axs)
    plt.tight_layout()

Next, we load the test dataset, scale it using the fitted scaler and predict the house prices.

In [None]:
test_data = load_data("../data/test.csv")
X = feature_trf.transform(test_data)
X_test = scaler.transform(X)
y_pred = model.predict(X_test)

We can visualize how bad this model performs on the test data.

In [None]:
if plot:
    fig, axs = plt.subplots()
    sns.scatterplot(x=X, y=y_pred, ax=axs)
    plt.ylabel("Predicted SalePrice")
    plt.title("Test data")
    plt.tight_layout()

Finally, we export our results if desired.

In [None]:
results = {
        "GrLivArea": test_data["GrLivArea"], "PredictedSalePrice": y_pred.ravel()}

if export:
    export_results(results, "../results/results.csv")