In [1]:
import pandas as pd
import numpy as np
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,Normalizer
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.kernel_ridge import KernelRidge
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics.pairwise import chi2_kernel, rbf_kernel, linear_kernel, polynomial_kernel, sigmoid_kernel

In [2]:
df_train = pd.read_csv("data/train.csv")
df_test = pd.read_csv("data/test.csv")

In [3]:
X_columns_date = ['Date', 'Open', 'High', 'Low', 'Volume']
X_columns = ['Open', 'High', 'Low', 'Volume']
X_values = df_train[X_columns].values
X_values_date = df_train[X_columns_date].values
X_test_values=df_test[X_columns].values

In [4]:
scaler = StandardScaler()
normalizer = Normalizer()

In [5]:
preprocessing_pipe = Pipeline([("scaler", scaler), ("normalizer", normalizer)])
preprocessing_pipe.fit(X_values)
X_transformed = preprocessing_pipe.transform(X_values)
X_test_tranformed=preprocessing_pipe.transform(X_test_values)
y = df_train["Close"].values

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X_values, y, test_size=0.15, random_state=0)
X_train_transformed, X_test_transformed, y_train, y_test = train_test_split(X_transformed, y, test_size=0.15, random_state=0)

In [7]:
X_train.shape, X_test.shape

((1275, 4), (225, 4))

In [8]:
def getRidgeRegressorConfigTrained():
    # https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.Ridge.html#sklearn.linear_model.Ridge
    model = Ridge(random_state=0, max_iter=None, alpha=1.8000000000000003, positive= False)
    return model

def getLassoRegressorConfigTrained():
    model = Lasso(random_state=0, max_iter=10000,alpha=0.1, positive=False, precompute=False, selection="cyclic",tol=0)
    return model

def getKernelRidgeRegressorConfigTrained():
    model = KernelRidge(alpha=0.7000000000000001,degree=3, gamma=0.1, kernel="linear")
    return model

def getKNeighboursRegressorConfigTrained():
    model = KNeighborsRegressor(n_jobs=-1, p=2,algorithm="ball_tree",n_neighbors=8, weights="uniform")
    return model


In [9]:
functions = [name for (name, thing) in locals().items() if callable(thing)]
model_functions = [eval(function) for function in functions if function.endswith("RegressorConfigTrained")]

In [10]:
model_functions

[<function __main__.getRidgeRegressorConfigTrained()>,
 <function __main__.getLassoRegressorConfigTrained()>,
 <function __main__.getKernelRidgeRegressorConfigTrained()>,
 <function __main__.getKNeighboursRegressorConfigTrained()>]

In [11]:
X_transformed = preprocessing_pipe.transform(X_values)
X_test_tranformed=preprocessing_pipe.transform(X_test_values)
X_train2=X_transformed[:1400]
X_test2=X_transformed[1400:]
y = df_train["Close"].values
y_train2=y[:1400]
y_test2=y[1400:]
for model in model_functions:
    model=model()
    model.fit(X_train2,y_train2)
    y_pred= model.predict(X_test2)
    print(model, mean_squared_error(y_pred,y_test2)**0.5)

Ridge(alpha=1.8000000000000003, random_state=0) 63.9885577265469
Lasso(alpha=0.1, max_iter=10000, random_state=0, tol=0) 64.28071661565224
KernelRidge(alpha=0.7000000000000001, gamma=0.1) 258.9722055084847


  model = cd_fast.enet_coordinate_descent(


KNeighborsRegressor(algorithm='ball_tree', n_jobs=-1, n_neighbors=8) 38.32127632940132


In [12]:
for model in model_functions:
    model=model()
    model.fit(X_transformed,y)
    y_pred= model.predict(X_test_transformed)
    df_submission=pd.read_csv("sample_submission.csv")
    df_submission["Close"]=y_pred
    df_submission.to_csv(f"submission_{model}.csv",index=False)

ValueError: Length of values (225) does not match length of index (30)

In [13]:
linear=LinearRegression()
linear.fit(X_transformed,y)
X_test_values=df_test[X_columns].values
X_test_transformed=preprocessing_pipe.transform(X_test_values)
y_pred_test=linear.predict(X_test_transformed)
df_submission=pd.read_csv("sample_submission.csv")
df_submission["Close"]=y_pred_test
df_submission.to_csv("submission.csv",index=False)

In [14]:
for model in model_functions:
    model=model()
    model.fit(X_transformed,y)
    y_pred= model.predict(X_test_transformed)
    df_submission=pd.read_csv("sample_submission.csv")
    df_submission["Close"]=y_pred
    df_submission.to_csv(f"submission_{model}.csv",index=False)

  model = cd_fast.enet_coordinate_descent(


Grid search code for models

In [15]:
# https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.SGDRegressor.html#sklearn.linear_model.SGDRegressor
# https://scikit-learn.org/stable/modules/generated/sklearn.tree.DecisionTreeRegressor.html#sklearn.tree.DecisionTreeRegressor
# https://scikit-learn.org/stable/modules/gaussian_process.html#gaussian-process-regression-gpr

def getLinearRegressorConfig():
    model = LinearRegression(n_jobs=-1)
    parameters = {
        "positive": [True, False]
    }
    return model, parameters

def getRidgeRegressorConfig():
    # https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.Ridge.html#sklearn.linear_model.Ridge
    model = Ridge(random_state=0, max_iter=None)
    parameters = {
        "alpha": np.arange(0.1, 3.1, 0.1),
        "positive": [True, False]
        # "solver": ["svd", "cholesky", ]
    }
    return model, parameters

def getLassoRegressorConfig():
    model = Lasso(random_state=0, max_iter=10000)
    parameters = {
        "alpha": np.arange(0.1, 3.1, 0.1),
        "precompute": ["auto", True, False],
        "tol": [1e-4, 1e-3, 1e-2, 1e-1, 0, 1e1, 1e2, 1e3],
        "positive": [True, False],
        "selection": ["cyclic", "random"]
    }
    return model, parameters

def getKernelRidgeRegressorConfig():
    model = KernelRidge()
    parameters = {
        "alpha": np.arange(0.1, 3.1, 0.1),
        "kernel": ["linear", rbf_kernel, polynomial_kernel, sigmoid_kernel, chi2_kernel],
        "gamma": np.arange(0.1, 3.1, 0.1),
        "degree": [3, 4, 5]
    }
    return model, parameters

def getSVMRegressorConfig():
    model = SVR(max_iter=10000)
    parameters = {
        "epsilon": np.arange(0.1, 3.1, 0.1),
        "tol": [1e-4, 1e-3, 1e-2, 1e-1, 0, 1e1, 1e2, 1e3],
        "C": np.arange(0.1, 3.1, 0.1),
        "kernel":["rbf","poly","linear","sigmoid","precomputed"],
	"degree":[3,4,5]
    }
    return model, parameters

def getKNeighboursRegressorConfig():
    model = KNeighborsRegressor(n_jobs=-1, p=2)
    parameters = {
        "n_neighbors": [3, 4, 5, 6, 7, 8],
        "weights": ["uniform", "distance"],
        "algorithm": ["ball_tree", "kd_tree", "brute"]
        # "leaf_size": [25, 30, 35, 40, 45, 50]
    }
    return model, parameters

In [18]:
functions = [name for (name, thing) in locals().items() if callable(thing)]
functions
model_functions = [eval(function) for function in functions if function.endswith("RegressorConfig")]
model_functions

[<function __main__.getLinearRegressorConfig()>,
 <function __main__.getRidgeRegressorConfig()>,
 <function __main__.getLassoRegressorConfig()>,
 <function __main__.getKernelRidgeRegressorConfig()>,
 <function __main__.getSVMRegressorConfig()>,
 <function __main__.getKNeighboursRegressorConfig()>]

In [11]:
# for model_function in model_functions:
#     try:
#         model, parameters = model_function()
#         search = GridSearchCV(model, parameters, n_jobs=-1, refit=True)
#         search.fit(X_train, y_train)
#         print(search.best_params_, search.best_score_)
#     except Exception as error_message:
#         print(f"Error in model {model_function}: {error_message}")