In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from xgboost import XGBRegressor
from catboost import CatBoostRegressor
from lightgbm import LGBMRegressor
import lightgbm as lgb
from sklearn.ensemble import VotingRegressor

In [2]:
traindf = pd.read_csv('/kaggle/input/rohlik-orders-forecasting-challenge/train.csv')

testdf = pd.read_csv('/kaggle/input/rohlik-orders-forecasting-challenge/test.csv')

In [3]:
import datetime

def date_to_week(date_str):
    date = datetime.datetime.strptime(date_str, "%Y-%m-%d")
    week_number = date.isocalendar()[1]
    return f"{week_number:02d}"


traindf['week_number'] = traindf['date'].apply(date_to_week)
testdf['week_number'] = testdf['date'].apply(date_to_week)

In [4]:
traindf['date'] = pd.to_datetime(traindf['date'])
traindf['date'] = traindf['date'].dt.day_name()

testdf['date'] = pd.to_datetime(testdf['date'])
testdf['date'] = testdf['date'].dt.day_name()

In [5]:
warehouse = {'Prague_1': 1,
'Brno_1':2,
'Prague_2':3,
'Prague_3':4,
'Budapest_1':5,
'Munich_1':6,
'Frankfurt_1':7}

traindf['warehouse'] = traindf['warehouse'].map(warehouse)
testdf['warehouse'] = testdf['warehouse'].map(warehouse)

date = {'Wednesday': 1,
'Thursday':2,
'Saturday':3,
'Tuesday':4,
'Friday':5,
'Monday':6,
'Sunday':7}

traindf['date'] = traindf['date'].map(date)
testdf['date'] = testdf['date'].map(date)

holiday_name = {
"International womens day": 1,
"Christmas Eve": 2,
"2nd Christmas Day": 3,
"Good Friday": 4,
"New Years Day": 5,
"Den osvobozeni": 6,
"Easter Monday": 7,
"Den ceske statnosti": 8,
"Labour Day": 9,
"Cyrila a Metodej": 10,
"Jan Hus": 11,
"Den vzniku samostatneho ceskoslovenskeho statu": 12,
"Den boje za svobodu a demokracii": 13,
"Memorial Day of the Republic": 14,
"Independent Hungary Day": 15,
"Day of National Unity": 16,
"Reformation Day": 17,
"National Defense Day": 18,
"Memorial Day for the Victims of the Holocaust": 19,
"Memorial Day for the Martyrs of Arad": 20,
"Memorial Day for the Victims of the Communist Dictatorships": 21,
"All Saints' Day Holiday": 22,
"1848 Revolution Memorial Day (Extra holiday)": 23,
"Peace Festival in Augsburg": 24}

traindf['holiday_name'] = traindf['holiday_name'].map(holiday_name)
traindf.fillna(0, inplace=True)

testdf['holiday_name'] = testdf['holiday_name'].map(holiday_name)
testdf.fillna(0, inplace=True)

In [6]:
traindf = traindf.drop(['id', 'shutdown', 'mini_shutdown', 'blackout', 'mov_change', 
                        'frankfurt_shutdown', 'precipitation', 'snow', 'user_activity_1',
                       'user_activity_2'], axis=1)

In [7]:
train_X = traindf.drop('orders', axis=1)
train_y = traindf['orders']

In [8]:
test_id = testdf['id']
testdf = testdf.drop('id', axis=1)

In [9]:
print(train_X.head(10))
print('############################################################')
print(testdf.head(10))

   warehouse  date  holiday_name  holiday  shops_closed  \
0          1     3           0.0        0             0   
1          1     7           0.0        0             0   
2          1     6           0.0        0             0   
3          1     4           0.0        0             0   
4          1     1           0.0        0             0   
5          1     2           0.0        0             0   
6          1     5           0.0        0             0   
7          1     3           0.0        0             0   
8          1     7           0.0        0             0   
9          1     6           0.0        0             0   

   winter_school_holidays  school_holidays week_number  
0                       0                0          49  
1                       0                0          49  
2                       0                0          50  
3                       0                0          50  
4                       0                0          50  
5       

In [10]:
testdf['week_number'] = np.float32(testdf['week_number'])
train_X['week_number'] = np.float32(train_X['week_number'])

In [11]:
scaler = StandardScaler()

train_X = scaler.fit_transform(train_X)

testdf = scaler.fit_transform(testdf)

In [12]:
catb_reg = CatBoostRegressor(learning_rate=0.05)


In [13]:
xgb_reg = XGBRegressor(learning_rate=0.05)


In [14]:
lgbm_reg = LGBMRegressor(learning_rate=0.05)


In [15]:
voting_reg = VotingRegressor(estimators=[('catb', catb_reg),
                                         ('lgbm', lgbm_reg), 
                                         ('xgb', xgb_reg)])
 
    
voting_reg.fit(train_X, train_y)

pred = voting_reg.predict(testdf)
 
#mape = mean_absolute_percentage_error(y_test, pred)

0:	learn: 2093.1918373	total: 54.3ms	remaining: 54.3s
1:	learn: 2009.7058587	total: 55.9ms	remaining: 27.9s
2:	learn: 1930.9791242	total: 57.2ms	remaining: 19s
3:	learn: 1855.7280687	total: 58.7ms	remaining: 14.6s
4:	learn: 1789.1652739	total: 60ms	remaining: 11.9s
5:	learn: 1723.0157075	total: 61.4ms	remaining: 10.2s
6:	learn: 1660.5807497	total: 62.8ms	remaining: 8.91s
7:	learn: 1601.5855990	total: 64.2ms	remaining: 7.96s
8:	learn: 1544.6450435	total: 65.6ms	remaining: 7.22s
9:	learn: 1495.7192186	total: 67.1ms	remaining: 6.64s
10:	learn: 1449.8967872	total: 68.6ms	remaining: 6.17s
11:	learn: 1408.6441419	total: 70ms	remaining: 5.77s
12:	learn: 1365.1310134	total: 71.4ms	remaining: 5.42s
13:	learn: 1326.1721065	total: 72.8ms	remaining: 5.13s
14:	learn: 1287.7895608	total: 74.3ms	remaining: 4.88s
15:	learn: 1253.2475262	total: 75.9ms	remaining: 4.67s
16:	learn: 1222.0585260	total: 77.3ms	remaining: 4.47s
17:	learn: 1193.2017553	total: 78.8ms	remaining: 4.3s
18:	learn: 1165.5780832	tot

In [16]:
submission = pd.DataFrame({
    'id': test_id,
    'orders': pred
})

# Save
submission.to_csv('submission.csv', index=False)