In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!unzip /content/drive/MyDrive/machine-learning-data/sales-forecasting/sales-data.zip -d data

In [None]:
!pip install PyGithub

In [4]:
import os

from github import Github, Auth

# Get DataGenerator object from sales-project repo
auth = Auth.Token('')

# Public web Github
with Github(auth=auth) as gh:
    # Get team_project repo
    repo = gh.get_user().get_repo('sales-project')

    # Get data generator from repo
    content = repo.get_contents(
        path='sales-model/src/data_generator.py',
        ref='a07dd52'
    )
    rawdata = content.decoded_content

    # Save data generator
    if not(os.path.exists(content.name)):
        with open(content.name, 'wb') as f:
            f.write(rawdata)

In [None]:
from data_generator import DataGenerator

# Create train data generator
train_gen = DataGenerator('data/sales_train.csv', 'data/items.csv')

# Create validation data generator
val_gen = train_gen.split_generator(0.3, shuffle=True)

# Create test data generator
test_gen = val_gen.split_generator(0.33, shuffle=True)

In [6]:
import numpy as np
import pandas as pd

# Get item prices
prices = pd.read_csv('/content/data/sales_train.csv', usecols=['item_price'])
prices = np.squeeze(prices.to_numpy())

In [None]:
from keras import Model, Input
from keras.layers import Concatenate, Dense, Discretization, GRU, Hashing

# Sales feature
sales_input = Input((train_gen.seq_len, 12), name='sales')
x = GRU(16, name='gru_layer')(sales_input)

# Items feature
items_input = Input((1,), name='items')
y = Hashing(16, name='hashing_layer')(items_input)

# Prices feature
prices_input = Input((1,), name='prices')
discrete_layer = Discretization(num_bins=16, name='discrete_layer')
discrete_layer.adapt(prices)
z = discrete_layer(prices_input)

w = Concatenate(name='concatenate')([x,y,z])
w = Dense(256, activation='relu', name='hidden_layer')(w)
pred = Dense(1, activation='relu', name='prediction')(w)

# Create model
model = Model(
    inputs=[sales_input, items_input, prices_input],
    outputs=pred
)
model.summary()

In [8]:
model.compile(optimizer='adam', loss='mse')

In [None]:
from keras.callbacks import EarlyStopping

# Fit model
history = model.fit(
    x=train_gen,
    batch_size=train_gen.batch_size,
    epochs=100,
    callbacks=EarlyStopping(patience=3),
    validation_data=val_gen,
    validation_batch_size=val_gen.batch_size
)

In [None]:
import matplotlib.pyplot as plt

def plot_history(history, *args):
    for arg in args:
        plt.plot(history.history[arg], label=f"{arg}")
    plt.xlabel('epoch')
    plt.legend()
    plt.show()

# Plot model
plot_history(history, 'loss', 'val_loss')

In [None]:
model.save('gru_model.keras')

In [None]:
!zip -r gru_model.zip gru_model.keras
!cp gru_model.zip /content/drive/MyDrive/machine-learning-models/