In [None]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
train = pd.read_csv(
    '/kaggle/input/tabular-playground-series-jan-2022/train.csv',
    index_col='row_id', parse_dates=['date'])
test = pd.read_csv(
    '/kaggle/input/tabular-playground-series-jan-2022/test.csv',
    index_col='row_id', parse_dates=['date'])
submission = pd.read_csv(
    '/kaggle/input/tabular-playground-series-jan-2022/sample_submission.csv',
    index_col='row_id')

In [None]:
train.head()

## Let's see what the data looks like

In [None]:
plot_df = train.set_index('date')
plot_df[['country', 'store', 'product', 'num_sold']].pivot(
    columns=['country', 'store', 'product'], values='num_sold').plot(figsize=(18,6));

There's definitely some short-term and long-term seasonality, as well as some holiday effect. (And probably others!) We can create a simple RandomForest model by creating some date features.

### Create simple time features

In [None]:
y = train.pop('num_sold')

train['year'] = train['date'].dt.year
train['month'] = train['date'].dt.month
train['day'] = train['date'].dt.day
train['dayofweek'] = train['date'].dt.dayofweek

test['year'] = test['date'].dt.year
test['month'] = test['date'].dt.month
test['day'] = test['date'].dt.day
test['dayofweek'] = test['date'].dt.dayofweek

train = train.drop('date', axis='columns')
test = test.drop('date', axis='columns')

### Encode the categoricals

In [None]:
country_map = {'Finland': 0, 'Sweden': 1, 'Norway': 2}
store_map = {'KaggleMart': 0, 'KaggleRama': 1}
product_map = {'Kaggle Mug': 0, 'Kaggle Hat': 1, 'Kaggle Sticker': 2}

train['country'] = train['country'].map(country_map)
train['store'] = train['store'].map(store_map)
train['product'] = train['product'].map(product_map)

test['country'] = test['country'].map(country_map)
test['store'] = test['store'].map(store_map)
test['product'] = test['product'].map(product_map)

In [None]:
model = RandomForestRegressor(n_estimators=500, n_jobs=-1)
model.fit(train, y)
benchmark = submission.copy()
benchmark['num_sold'] = model.predict(test)

benchmark.to_csv('rf_submission.csv')