In [None]:
import cudf

df = cudf.read_parquet("../input/ubiquant-parquet/train_low_mem.parquet")
print(df.shape)
df.head()

In [None]:
df["time_id"].max(), df.shape

In [None]:
DROP_BEFORE = 950

df = df[df["time_id"] > DROP_BEFORE].reset_index(drop=True)
df.shape

In [None]:
import cupy
import cuml

print("cuML version:", cuml.__version__)

WINDOW = 20
START = 1100
N_SPLITS = 6

cv = []

for i in range(N_SPLITS):
    train_ind = cupy.where(df["time_id"].values <= START + i*WINDOW)[0]
    val_ind = cupy.where((df["time_id"].values > START + i*WINDOW) & (df["time_id"].values <= START + (i+1)*WINDOW))[0]
    cv.append((cupy.asnumpy(train_ind), cupy.asnumpy(val_ind)))
    print(len(train_ind), len(val_ind))

In [None]:
features = [col for col in df.columns if col not in {"row_id", "target", "investment_id", "time_id"}]
len(features)

In [None]:
from tqdm import tqdm


def evaluate(val_df):
    scores = []
    for time_id in val_df["time_id"].unique().values_host:
        time_df = val_df[val_df["time_id"] == time_id]
        scores.append(time_df["target"].corr(time_df["pred"]))

    return cupy.mean(cupy.array(scores))


val_scores = []


for f, (train_ind, val_ind) in tqdm(enumerate(cv), total=len(cv)):
    train_df, val_df = df.iloc[train_ind], df.iloc[val_ind]

    model = cuml.SVR(C=0.1, cache_size=3000.0, epsilon=0.4)
    model.fit(train_df[features], train_df["target"])

    y_pred = model.predict(val_df[features])
    val_df["pred"] = y_pred.values
    
    val_scores.append(evaluate(val_df).item())
    
val_scores = cupy.array(val_scores)

In [None]:
print("Validation scores:", val_scores)
print("Mean:", cupy.mean(val_scores))
print("STD:", cupy.std(val_scores))

In [None]:
model = cuml.SVR(C=0.1, cache_size=3000.0, epsilon=0.4)
model.fit(df[features], df["target"])

In [None]:
import ubiquant
env = ubiquant.make_env()
iter_test = env.iter_test() 

for (test_df, sample_prediction_df) in iter_test:
    sample_prediction_df['target'] = model.predict(test_df[features])
    env.predict(sample_prediction_df) 