# Data Modelling
This is a notebook to experiment with the data modelling of the sales quantity data.
This was done on a cloud instance so the file paths will be different if you are running this locally.
Note that the dataset is also propiertary so it will not be included in this repository.

# 1.Imports and Constants
We will be using the following libraries:

In [169]:
import numpy as np
import pandas as pd
import requests
import json
from google.cloud import aiplatform
import sys

from flask import Flask, jsonify

In [78]:
WINDOW = 30

In [107]:
api_url = "http://34.128.116.172:8080/api/send-supplier-data"

body = {
    "supplierName": sys.argv[1]
}
response = requests.post(api_url, json=body)

json_data = response.json()
print(json_data)

{'success': True, 'supplierCode': 'GMP', 'supplierName': 'Global Mitra Prima', 'items': [{'kode': '6901668053121'}, {'kode': '7622201402686'}, {'kode': '7622201402709'}, {'kode': '7622210551733'}, {'kode': '7622210580276'}, {'kode': '7622300136055'}, {'kode': '7622300442507'}, {'kode': '8886020033431'}, {'kode': '8887549472992'}, {'kode': '8991115000103'}, {'kode': '8991115010102'}, {'kode': '8992716108816'}, {'kode': '8992716108878'}, {'kode': '8992727000048'}, {'kode': '8992727003087'}, {'kode': '8992753004010'}, {'kode': '8992753031900'}, {'kode': '8992753033645'}, {'kode': '8992753033720'}, {'kode': '8992753033737'}, {'kode': '8992753033744'}, {'kode': '8992753101207'}, {'kode': '8992753102303'}, {'kode': '8992753182008'}, {'kode': '8992753184002'}, {'kode': '8992753184200'}, {'kode': '8992760221028'}, {'kode': '8992760223015'}, {'kode': '8992946511790'}, {'kode': '8992946512285'}, {'kode': '8992946521416'}, {'kode': '8993560025496'}, {'kode': '8993560156619'}, {'kode': '8997035563

In [111]:
# Extract details
details = json_data['details']

# Create DataFrame
data = pd.DataFrame(details)
data = data.rename(columns={'kode_barang':'item_code', 'tanggal':'date','total_qty':'quantity'})
data['date'] = pd.to_datetime(data['date']).dt.strftime('%Y-%m-%d')
data['quantity'] = data['quantity'].astype(np.float64)

# Add missing items with current date and 0 total_qty
items = json_data['items']
item_codes = [item['kode'] for item in items]
missing_items = set(item_codes) - set(data['item_code'].unique())
#get missing dates from the 'tanggal' column going back 30 days
date_str = '2023-04-01'
missing_dates = pd.date_range(end=pd.to_datetime(date_str), periods=WINDOW).strftime('%Y-%m-%d')
#get dates for a single item
reference_date = pd.to_datetime(data[data.item_code == data['item_code'].unique()[0]]['date'])
reference_date = reference_date.dt.strftime('%Y-%m-%d')

#remove dates that are already in the data
missing_dates = set(missing_dates) - set(reference_date)
print(len(missing_dates))
#add missing dates for a single item
missing_data = pd.DataFrame({'date':list(missing_dates), 'item_code':data['item_code'].unique()[0], 'quantity':0})
#add missing items
data = pd.concat([data, missing_data], ignore_index=True)
#sort by date

# Rearrange columns

# Display DataFrame
print(data)


16
           date      item_code  quantity
0    2023-03-12  6901668053121       4.0
1    2023-03-13  6901668053121       2.0
2    2023-03-14  6901668053121       3.0
3    2023-03-15  6901668053121       1.0
4    2023-03-16  6901668053121       1.0
..          ...            ...       ...
458  2023-03-06  6901668053121       0.0
459  2023-03-08  6901668053121       0.0
460  2023-03-23  6901668053121       0.0
461  2023-03-05  6901668053121       0.0
462  2023-03-11  6901668053121       0.0

[463 rows x 3 columns]


In [112]:
#extract date features from date column
data['date'] = pd.to_datetime(data['date'])
data['year'] = data['date'].dt.year
data['month'] = data['date'].dt.month
data['day'] = data['date'].dt.day
data['day_of_week'] = data['date'].dt.dayofweek
data['day_of_year'] = data['date'].dt.dayofyear



We need to create a wide dataframe with each item code as a column and the quantity sold for each day as the values.

In [113]:
#stack dataframe based on item_code
data = data.groupby(['item_code','date','year','month','day','day_of_week','day_of_year'])['quantity'].sum().unstack(level=0)
#turn each NaN value to 0
data = data.sort_values('date')
data.fillna(0, inplace=True)
data.reset_index(inplace=True)
print(data)

item_code       date  year  month  day  day_of_week  day_of_year  \
0         2023-03-03  2023      3    3            4           62   
1         2023-03-04  2023      3    4            5           63   
2         2023-03-05  2023      3    5            6           64   
3         2023-03-06  2023      3    6            0           65   
4         2023-03-07  2023      3    7            1           66   
5         2023-03-08  2023      3    8            2           67   
6         2023-03-09  2023      3    9            3           68   
7         2023-03-10  2023      3   10            4           69   
8         2023-03-11  2023      3   11            5           70   
9         2023-03-12  2023      3   12            6           71   
10        2023-03-13  2023      3   13            0           72   
11        2023-03-14  2023      3   14            1           73   
12        2023-03-15  2023      3   15            2           74   
13        2023-03-16  2023      3   16          

## Prepare item code and dates
Since the dataset will use date and item code feature as input, to create an array of item code mapped to every date value

In [114]:
#prepare the list of item codes

items = np.array(data.columns[6:])
total_items = items.shape[0]
print(items.shape)

(36,)


In [115]:
# prepare the array of date_related features, since we will be windowing these features
# we ignore the first few ones

dates = np.array(data[['year','month','day','day_of_week','day_of_year']][WINDOW-1:])

#normalize for cyclic feature

dates = np.sin(dates) + np.cos(dates)
total_dates = dates.shape[0]
dates_feature = dates.shape[1]
print(dates.shape)

(1, 5)


Create numpy arrays for each repeated item and dates for later joining.

In [116]:
repeated_items = items.repeat(total_dates)
repeated_dates = dates.reshape(1,dates.shape[0],dates.shape[1]).repeat(total_items,axis=0).reshape(-1,dates_feature)

print(repeated_items)
print(repeated_dates)

['6901668053121' '7622201402686' '7622201402709' '7622210551733'
 '7622210580276' '7622300136055' '7622300442507' '8886020033431'
 '8887549472992' '8991115010102' '8992716108816' '8992716108878'
 '8992727000048' '8992727003087' '8992753004010' '8992753031900'
 '8992753033645' '8992753033720' '8992753033737' '8992753033744'
 '8992753101207' '8992753102303' '8992753182008' '8992753184002'
 '8992753184200' '8992760221028' '8992760223015' '8992946511790'
 '8992946512285' '8992946521416' '8993560025496' '8993560156619'
 '8997035563414' '8997035563544' '8999809700032' '8999809700056']
[[ 0.798209   -1.41044612  1.38177329 -0.67526209 -0.88837995]
 [ 0.798209   -1.41044612  1.38177329 -0.67526209 -0.88837995]
 [ 0.798209   -1.41044612  1.38177329 -0.67526209 -0.88837995]
 [ 0.798209   -1.41044612  1.38177329 -0.67526209 -0.88837995]
 [ 0.798209   -1.41044612  1.38177329 -0.67526209 -0.88837995]
 [ 0.798209   -1.41044612  1.38177329 -0.67526209 -0.88837995]
 [ 0.798209   -1.41044612  1.3817732

## Prepare the sales data to be windowed
We need to create windows of the sales data corresponding to the dates. This will be used as input and output for the data later on.

In [117]:
#transpose the sales quantity so dates are columns
sales = np.array(data[items].fillna(0)).T


#create the windows
windowed = np.lib.stride_tricks.sliding_window_view(sales, WINDOW, axis=-1).reshape(-1,WINDOW)
print(f'Shape of windowed data {windowed.shape}')

Shape of windowed data (36, 30)


In [138]:
#convert the data to json format using the following structure
#{instances:[
#    {"sales_window":[windowed[0]], "item_code":[repeated_items[0]], "date_features":[repeated_dates[0]]},
#    {"sales_window":[windowed[1]], "item_code":[repeated_items[1]], "date_features":[repeated_dates[1]]},...]}

instances = []
for i in range(windowed.shape[0]):
    instances.append({"sales_window":windowed[i].tolist(), "item_code":repeated_items[i], "date_features":repeated_dates[i].tolist()})

data_json = {"instances":instances}
print(data_json)



{'instances': [{'sales_window': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 2.0, 3.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 2.0, 6.0, 3.0, 5.0, 2.0, 0.0, 0.0, 0.0, 2.0], 'item_code': '6901668053121', 'date_features': [0.7982089967143482, -1.4104461161715403, 1.3817732906760363, -0.6752620891999122, -0.8883799491770447]}, {'sales_window': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 0.0, 0.0, 1.0, 2.0, 5.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 3.0, 5.0, 2.0], 'item_code': '7622201402686', 'date_features': [0.7982089967143482, -1.4104461161715403, 1.3817732906760363, -0.6752620891999122, -0.8883799491770447]}, {'sales_window': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 1.0, 0.0, 0.0, 2.0, 1.0, 3.0, 2.0, 4.0, 3.0, 1.0, 0.0, 0.0, 3.0, 3.0, 0.0, 4.0, 1.0, 1.0], 'item_code': '7622201402709', 'date_features': [0.7982089967143482, -1.4104461161715403, 1.3817732906760363, -0.6752620891999122, -0.8883799491770447]}, {'sales_window': [0

In [139]:
#save the data to a json variable
json_object = json.dumps(data_json, indent = 4)

print(json_object)

{
    "instances": [
        {
            "sales_window": [
                0.0,
                0.0,
                0.0,
                0.0,
                0.0,
                0.0,
                0.0,
                0.0,
                0.0,
                4.0,
                2.0,
                3.0,
                1.0,
                1.0,
                1.0,
                0.0,
                1.0,
                0.0,
                0.0,
                2.0,
                0.0,
                2.0,
                6.0,
                3.0,
                5.0,
                2.0,
                0.0,
                0.0,
                0.0,
                2.0
            ],
            "item_code": "6901668053121",
            "date_features": [
                0.7982089967143482,
                -1.4104461161715403,
                1.3817732906760363,
                -0.6752620891999122,
                -0.8883799491770447
            ]
        },
        {
     

In [157]:
def endpoint_predict_sample(
        project: str, location: str, instances: list, endpoint: str
):
    aiplatform.init(project=project, location=location)

    endpoint = aiplatform.Endpoint(endpoint)

    prediction = endpoint.predict(instances=instances)
    print(prediction)
    return prediction


predictions = endpoint_predict_sample(
    "1058401447829",
    "asia-southeast2",
    instances,
    "3449440655217000448"
)

Prediction(predictions=[[-0.145476714], [0.00562978815], [-0.124448821], [-0.360635668], [-0.317194], [-0.436661571], [-0.308738321], [-0.38273859], [-0.140872478], [-0.46947819], [-0.239746436], [-0.148404256], [0.0469430536], [-0.0360197648], [0.527458787], [-0.376199633], [0.283085734], [-0.221400917], [-0.27070421], [-0.131049126], [-0.460462779], [-0.232224703], [0.667532682], [-0.133249119], [-0.209337637], [-0.268408], [-0.214045569], [-0.181671515], [0.064549908], [-0.46637705], [-0.0110621033], [-0.405589044], [-0.102462143], [0.178158835], [0.0856325775], [-0.259854585]], deployed_model_id='8984417373813473280', model_version_id='2', model_resource_name='projects/1058401447829/locations/asia-southeast2/models/2939196891184758784', explanations=None)


In [165]:
denormalized_predictions = np.int32((np.array(predictions[0])*3.997635572233167)+ 2.1712620248965555).flatten()

In [166]:
final = zip(items,denormalized_predictions)
return jsonify(list(final))


In [170]:
app = Flask(__name__)

@app.route('/predict', methods=['GET'])
def main():
    predict()

RuntimeError: Working outside of application context.

This typically means that you attempted to use functionality that needed
the current application. To solve this, set up an application context
with app.app_context(). See the documentation for more information.