In [1]:
#| default_exp app_v1

#### Competition

[LINK](https://www.kaggle.com/competitions/playground-series-s3e11/overview)

#### Imports

In [2]:
#| export
from fastai.tabular.all import *

#### Downloading Datasets

In [3]:
#| export
try: import fastkaggle
except ModuleNotFoundError:
    !pip install -Uq fastkaggle

from fastkaggle import *

In [4]:
#| export
comp = 'playground-series-s3e11'
path = setup_comp(comp, install='fastai')

In [5]:
# copy .gitignore template from my home directory and append project data folder to it
if not os.path.exists('.gitignore'):
    !cp ~/.gitignore .
if comp not in open('.gitignore').read():
    with open('.gitignore', 'a') as f: f.write(f'{comp}')

#### Create Dataframes

In [6]:
#| export
df_train = pd.read_csv(path/'train.csv', low_memory=False)
df_test = pd.read_csv(path/'test.csv', low_memory=False)

In [7]:
df_train.head()

Unnamed: 0,id,store_sales(in millions),unit_sales(in millions),total_children,num_children_at_home,avg_cars_at home(approx).1,gross_weight,recyclable_package,low_fat,units_per_case,store_sqft,coffee_bar,video_store,salad_bar,prepared_food,florist,cost
0,0,8.61,3.0,2.0,2.0,2.0,10.3,1.0,0.0,32.0,36509.0,0.0,0.0,0.0,0.0,0.0,62.09
1,1,5.0,2.0,4.0,0.0,3.0,6.66,1.0,0.0,1.0,28206.0,1.0,0.0,0.0,0.0,0.0,121.8
2,2,14.08,4.0,0.0,0.0,3.0,21.3,1.0,0.0,26.0,21215.0,1.0,0.0,0.0,0.0,0.0,83.51
3,3,4.02,3.0,5.0,0.0,0.0,14.8,0.0,1.0,36.0,21215.0,1.0,0.0,0.0,0.0,0.0,66.78
4,4,2.13,3.0,5.0,0.0,3.0,17.0,1.0,1.0,20.0,27694.0,1.0,1.0,1.0,1.0,1.0,111.51


In [8]:
#| export
df_comb = pd.concat([df_train, df_test], ignore_index=True)

In [9]:
len(df_train) + len(df_test) == len(df_comb)

True

In [10]:
#| export
train_idxs = np.arange(len(df_train))

In [11]:
#| export
test_idxs = np.arange(len(df_train), len(df_comb))

In [12]:
#| export
dep_var = 'cost'

In [13]:
#| export
procs = [Categorify, FillMissing, Normalize]

In [14]:
#| export
cont, cat = cont_cat_split(df_comb, max_card=1, dep_var=dep_var)

In [18]:
#| export
splits = RandomSplitter(valid_pct=0.2)(range_of(df_train))

In [15]:
#| export
df_train = df_comb.iloc[train_idxs]
df_test = df_comb.iloc[test_idxs]

In [16]:
len(df_train) + len(df_test) == len(df_comb)

True

In [19]:
#| export
to = TabularPandas(df_train, procs, cat, cont, y_names=dep_var, splits=splits)

In [20]:
to.train.xs.head()

Unnamed: 0,id,store_sales(in millions),unit_sales(in millions),total_children,num_children_at_home,avg_cars_at home(approx).1,gross_weight,recyclable_package,low_fat,units_per_case,store_sqft,coffee_bar,video_store,salad_bar,prepared_food,florist
7100,-1.664502,0.424008,-1.330779,1.708532,-0.567639,-0.187767,1.706371,0.872274,-0.698752,-0.389928,-1.168067,0.877464,-0.620148,-1.010584,-1.01064,-1.006658
10505,-1.631776,-1.180566,-0.056532,-0.306484,-0.567639,-0.187767,-0.351752,-1.146428,-0.698752,-0.194073,-0.082972,0.877464,1.612517,0.989526,0.989471,0.993386
342672,1.560756,0.043262,-0.056532,-0.306484,-0.567639,-0.187767,-1.136006,0.872274,-0.698752,0.099709,-0.85036,0.877464,1.612517,0.989526,0.989471,0.993386
93177,-0.837196,-1.576421,-1.330779,-1.649827,-0.567639,-0.187767,0.623148,-1.146428,-0.698752,1.372765,0.436715,0.877464,1.612517,0.989526,0.989471,0.993386
170469,-0.094326,-1.47368,-1.330779,-0.306484,1.080581,-0.187767,-0.200101,0.872274,-0.698752,-1.271275,1.393354,-1.139647,-0.620148,-1.010584,-1.01064,-1.006658


In [21]:
import nbdev
nbdev.export.nb_export('media_campaign_cost.ipynb', 'app_v1')
print("export successful")

export successful
