In [1]:
import json
import pickle

import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

In [2]:
def get_dataframe(cfg: dict) -> pd.DataFrame:
    path = cfg["data_path"]
    df = pd.read_csv(path, index_col=0)

    label_list = []
    for i in range(len(df)):
        tide = df.loc[i, "tide level"]
        if tide > 200.00:
            label_list.append(1)  # 門司港レトロクルーズ
        elif tide > 150.00:
            label_list.append(2)  # 関門海峡クルージング
        elif tide > 100.00:
            label_list.append(3)  # 巌流島上陸
        else:
            label_list.append(4)   # 運行中止

    df["label"] = label_list

    return df


def make_datasets(df: pd.DataFrame, cfg: dict) -> dict:
    pre_df = df.copy().drop(
        [
            "longitude", "calendar", "JMA", "MIRC",
            "rainfall(mm)", "temperature(℃)",
        ],
        axis=1,
    )

    X_cols = []
    y_cols = ["tide level", "label"]
    for i in range(1, 13):
        title = f"tide level shift {i}h"
        X_cols.append(title)
        pre_df[title] = pre_df["tide level"].shift(i)
    X_cols.append("tide level shift 1y")
    pre_df["tide level shift 1y"] = pre_df["tide level"].shift(8570)
    X_cols.append("moon phase")

    pre_df = pre_df.dropna().reset_index().drop("index", axis=1)
    X = pre_df[X_cols]
    y = pre_df[y_cols]
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=cfg["split_rate"], random_state=cfg["seed"]
    )

    out = {
        "datasets": pre_df,
        "X": X,
        "X_cols": X_cols,
        "y": y,
        "y_cols": y_cols,
        "X_train": X_train,
        "y_train": y_train,
        "X_test": X_test,
        "y_test": y_test,
    }

    return out


def LRModel(cfg: dict, load_model=False, filename=None, X=None, y=None):
    if load_model:
        loaded_model = pickle.load(open(filename, "rb"))
        return loaded_model

    else:
        model = LinearRegression(
            fit_intercept=bool(cfg["params"]["fit_intercept"]),
            copy_X=bool(cfg["params"]["copy_X"]),
            n_jobs=cfg["params"]["n_jobs"],
            positive=bool(cfg["params"]["positive"]),
        )
        model.fit(X, y)
        return model

In [3]:
filename = "../config/default/LinearRegression.json"
with open(filename) as f:
    cfg = json.load(f)

In [4]:
cfg["data_path"] = "../data/Full_2011-2021/preprocessed_same.csv"
df = get_dataframe(cfg)
out = make_datasets(df, cfg)

In [5]:
model = LRModel(cfg, X=out["X_train"], y=out["y_train"]["tide level"])