In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
trainfile = '/kaggle/input/rossmann-store-sales/train.csv'
testfile = '/kaggle/input/rossmann-store-sales/test.csv'
storefile = '/kaggle/input/rossmann-store-sales/store.csv'
subfile = '/kaggle/input/rossmann-store-sales/sample_submission.csv'

In [None]:
train = pd.read_csv(trainfile)
test = pd.read_csv(testfile)
store = pd.read_csv(storefile)
print(train.shape, test.shape, store.shape)

In [None]:
train.head(2)

In [None]:
test.drop('Id',axis=1,inplace=True)
test.head(2)

In [None]:
traindf = train.merge(store,on=["Store"],how="inner")
print(traindf.shape)
traindf.head()

In [None]:
testdf = test.merge(store,on=["Store"],how="inner")
print(testdf.shape)
testdf.head()

In [None]:
store.head()

In [None]:
competition_open = []
for index, value in store[['CompetitionOpenSinceMonth', 'CompetitionOpenSinceYear']].iterrows():
    try:
        year, month = int(value['CompetitionOpenSinceYear']), int(value['CompetitionOpenSinceMonth'])
        date = pd.to_datetime("{}-{}-01".format(year, month), format='%Y-%m')
        competition_open.append(date)
    except:
        competition_open.append(np.nan)
competition_open = pd.Series(competition_open)
competition_open.shape

In [None]:
store['CompetitionOpen'] = competition_open #converted int to datetime
store['CompetitionOpen'] = store.CompetitionOpen.dt.strftime('%Y%m%d')

In [None]:
#### Create a new variable called promo ###
promo = []
for index, value in store[['Promo2SinceWeek', 'Promo2SinceYear']].iterrows():
    try:
        year, week = int(value['Promo2SinceYear']), int(value['Promo2SinceWeek'])
        date = pd.to_datetime("{}-{}-01".format(year, week), format='%Y%W')
        promo.append(date)
    except:
        promo.append(np.nan)
promo = pd.to_datetime(pd.Series(competition_open))
print(promo.shape)

In [None]:
store['PromoSince'] = promo #converted int to datetime
store['PromoSince'] = store.PromoSince.dt.strftime('%Y%m%d')

In [None]:
store_features = ['Store', 'StoreType', 'Assortment', 'CompetitionDistance', 'CompetitionOpen', 
                  'PromoSince',]
traindf = pd.merge(train, store[store_features], how='left', on=['Store'])
traindf.head()

In [None]:
testdf = pd.merge(test, store[store_features], how='left', on=['Store'])
testdf.head()

In [None]:
store_data_sales = traindf.groupby([traindf['Store']])['Sales'].sum()
store_data_customers = traindf.groupby([traindf['Store']])['Customers'].sum()
store_data_open = traindf.groupby([traindf['Store']])['Open'].count()

store_data_sales_per_day = store_data_sales / store_data_open
store_data_customers_per_day = store_data_customers / store_data_open
store_data_sales_per_customer_per_day = store_data_sales_per_day / store_data_customers_per_day

df_store = pd.merge(store, store_data_sales_per_day.reset_index(name='SalesPerDay'), how='left', on=['Store'])
df_store = pd.merge(df_store, store_data_customers_per_day.reset_index(name='CustomersPerDay'), how='left', on=['Store'])
df_store = pd.merge(df_store, store_data_sales_per_customer_per_day.reset_index(name='SalesPerCustomersPerDay'), how='left', on=['Store'])

In [None]:
store_features = ['Store', 'SalesPerDay', 'CustomersPerDay', 'SalesPerCustomersPerDay']
features_x = test.columns.tolist()
features_x = list(set(features_x + store_features))
traindf = pd.merge(traindf, df_store[store_features], how='left', on=['Store'])
testdf = pd.merge(testdf, df_store[store_features], how='left', on=['Store'])

In [None]:
traindf.drop('Customers', axis=1, inplace=True)
print(traindf.shape, testdf.shape)
traindf.head()

In [None]:
testdf.head()

# Let's use featurewiz to create new date-time columns

In [None]:
!pip install featurewiz

In [None]:
ts_column = 'Date'

In [None]:
import featurewiz as FW

In [None]:
traindf2, ts_adds_in = FW.FE_create_time_series_features(traindf, ts_column, ts_adds_in=[])
print(traindf2.shape)
traindf2.head()

In [None]:
testdf2, _ = FW.FE_create_time_series_features(testdf, ts_column, ts_adds_in)
print(testdf2.shape)
testdf2.head()

In [None]:
testdf2.drop(['Date_minute', 'Date_hour'],axis=1,inplace=True)

# Now Install Deep AutoViML

In [None]:
!pip install deep_autoviml

In [None]:
from deep_autoviml import deep_autoviml as deepauto

In [None]:
#traindf2.to_csv('rossmann_train.csv', index=False)
#testdf2.to_csv('rossmann_test.csv', index=False)

In [None]:
keras_model_type = "auto" ## always try "fast" first, then "fast1", "fast2", etc.
project_name = "Rossmann"
model_options = {'nlp_char_limit':50,'max_trials': 10, }
keras_options = {'early_stopping': True,}
target = "Sales"

In [None]:
model, cat_vocab_dict = deepauto.fit(traindf2, target, keras_model_type=keras_model_type,
		project_name="deep_autoviml", keras_options=keras_options,  
		model_options=model_options, save_model_flag=True, use_my_model='',
		model_use_case='', verbose=0)

In [None]:
predictions = deepauto.predict(model, project_name, test_dataset=testdf2,
                                 keras_model_type=keras_model_type, 
                                 cat_vocab_dict=cat_vocab_dict)