In [1]:
from lenskit.algorithms import als
from lenskit import batch, crossfold as xf
from lenskit.metrics.predict import rmse
import pandas as pd
import joblib

from mlcomp.data.load import load_regression_train

In [2]:
df = load_regression_train()
# remove everything before 01-01-2016
#df = df.loc[df['timestamp'] > 1451602800]
#df = df[:10000]

windows = pd.DataFrame(columns=['start', 'stop'])

current_start = df['timestamp'].min()
window_size = 63072000 * 1000 * 2 # 4 years
window_offset = 15768000 * 1000 # 1/2 year
counter = 0
while current_start < df['timestamp'].max():
    windows.loc[counter] = [current_start, current_start + window_size]
    current_start += window_offset
    counter += 1

In [None]:
ttpair_gen = xf.partition_users(df, 2, xf.SampleFrac(0.15))
ttpair = ttpair_gen.__next__()

train = ttpair.train
test = ttpair.test

X_val = test.drop(columns=["rating"])
y_val = test["rating"]

results = []

for index, win in windows.iterrows():
    start_t = win["start"]
    stop_t = win["stop"]

    data_in_window = train.loc[
        (train["timestamp"] > start_t) & (train["timestamp"] < stop_t)
    ]

    bmf = als.BiasedMF(50)

    bmf.fit(data_in_window)

    # for some reason this sometimes throws an exception, so we just catch that and skip this window
    try:
        pred = batch.predict(bmf, X_val)
    except Exception:
        print(f"Skipping window {index}")
        continue

    # lenskit messed up the index but keeps the order, so we can fix it like this:
    pred.index = X_val.index
    error = rmse(pred["prediction"], y_val)

    results.append((start_t, stop_t, bmf, error))

    print(f"Done with window {index}")

joblib.dump(results, "time_window_result.joblib")

In [None]:
results

[(1182960009677,
  1309104009677,
  <lenskit.algorithms.als.BiasedMF at 0x2bfa728b400>,
  0.895996211572217),
 (1198728009677,
  1324872009677,
  <lenskit.algorithms.als.BiasedMF at 0x2bfcbfeb970>,
  0.842229946727006),
 (1214496009677,
  1340640009677,
  <lenskit.algorithms.als.BiasedMF at 0x2bfc9ce2980>,
  0.9479959222129786),
 (1451016009677,
  1577160009677,
  <lenskit.algorithms.als.BiasedMF at 0x2bfa6da8f70>,
  0.8956210391318301),
 (1466784009677,
  1592928009677,
  <lenskit.algorithms.als.BiasedMF at 0x2bfa6e7c760>,
  0.8930837773198265),
 (1482552009677,
  1608696009677,
  <lenskit.algorithms.als.BiasedMF at 0x2bfc9ccff70>,
  0.8927663527496429),
 (1498320009677,
  1624464009677,
  <lenskit.algorithms.als.BiasedMF at 0x2bfa20e3a60>,
  0.8927172014899835),
 (1514088009677,
  1640232009677,
  <lenskit.algorithms.als.BiasedMF at 0x2bfc67f9660>,
  0.8971136833153208),
 (1624464009677,
  1750608009677,
  <lenskit.algorithms.als.BiasedMF at 0x2bfcbfe9270>,
  0.6234340810847926)]

## Results:

Aborted due to some error:
```Python
[(1182960009677,
  1246032009677,
  <lenskit.algorithms.als.BiasedMF at 0x29cfbfff190>,
  nan),
 (1230264009677,
  1293336009677,
  <lenskit.algorithms.als.BiasedMF at 0x29c8f458c40>,
  0.7321834671571964),
 (1277568009677,
  1340640009677,
  <lenskit.algorithms.als.BiasedMF at 0x29ca50313f0>,
  0.8816875338682053)]
```


### R1:
- File: time_window_res/R1.joblib
- Error in batch predict that I did not yet solve
- Only did window 0 to 4
- Window size: 2 years
- Offset: 1.5 years

### R2
- File: time_window_res/R2.joblib
- Error still persists, therefore only window 0 - 2, 17 - 21 and 28
- Window size: 4 years
- Offset: 0.6 years
