In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from tqdm import tqdm


In [13]:
from sklearn.preprocessing import OneHotEncoder

In [42]:
class GaussianDNN(nn.Module):
    def __init__(self, num_feats: int):
        super(GaussianDNN, self).__init__()
        self.fc1 = nn.Linear(num_feats, 256)
        self.fc2 = nn.Linear(256, 256)
        self.fc3 = nn.Linear(256, 256)
        self.mean_output = nn.Linear(256, 1)
        self.logvar_output = nn.Linear(256, 1)
        self.activation = nn.ReLU()

    def forward(self, x):
        x = self.activation(self.fc1(x))
        x = self.activation(self.fc2(x))
        x = self.activation(self.fc3(x))
        mean = self.mean_output(x)
        logvar = self.logvar_output(x)
        return mean, logvar

In [43]:
def train_gaussian_dnn(train_loader, model, optimizer):
    model.train()
    running_loss = 0.0
    for inputs, targets in train_loader:
        inputs, targets = inputs.to(device), targets.to(device)
        optimizer.zero_grad()
        mean, logvar = model(inputs)
        
        loss = 0.5 * (torch.exp(-logvar) * (targets - mean)**2 + logvar).mean()
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * inputs.size(0)
    return running_loss / len(train_loader.dataset)

def evaluate_gaussian_dnn(val_loader, model):
    model.eval()
    running_loss = 0.0
    with torch.no_grad():
        for inputs, targets in val_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            mean, logvar = model(inputs)
            
            loss = 0.5 * (torch.exp(-logvar) * (targets - mean)**2 + logvar).mean()
            running_loss += loss.item() * inputs.size(0)
    return running_loss / len(val_loader.dataset)

In [90]:
tol = 95.0

In [91]:
dta = pd.read_csv("../data/fall-msd-treatment.csv")

In [92]:
dta = dta[dta['payoff'] > 0.0]

In [111]:
dta

Unnamed: 0.1,Unnamed: 0,store_name,store_id,display_extra_info,display_id,last_scanned_datetime,name,previous_post_scan_num_facings,max_slots,payoff,product_id
15,58,WALMART SC #2511,7d4a839d-d9e2-46d3-bce4-6c8e630780eb,Self Checkout Market Entrance,25cf3fa3-bb5b-4945-a94d-590bdce753f6,2022-11-18 17:47:02+00:00,"POWERADE Mountain Berry Blast Bottle, 20 fl oz",2.0,36.0,5.705230,ba7ba9c4-89d1-4f1e-958d-f6aaf1135a54
17,60,WALMART SC #2511,7d4a839d-d9e2-46d3-bce4-6c8e630780eb,Self Checkout Market Entrance,25cf3fa3-bb5b-4945-a94d-590bdce753f6,2022-11-18 17:47:02+00:00,"Sprite Bottle, 20 fl oz",4.0,36.0,5.705230,fac3c507-24ce-4dbb-872c-9dcaddc40dd6
20,63,WALMART SC #2511,7d4a839d-d9e2-46d3-bce4-6c8e630780eb,Self Checkout Market Entrance,25cf3fa3-bb5b-4945-a94d-590bdce753f6,2022-11-18 17:47:02+00:00,"Coca-Cola Cherry Bottle, 20 fl oz",2.0,36.0,4.564184,68767549-3cb3-4165-b0e3-1a3a75289e42
21,64,WALMART SC #2511,7d4a839d-d9e2-46d3-bce4-6c8e630780eb,Self Checkout Market Entrance,25cf3fa3-bb5b-4945-a94d-590bdce753f6,2022-11-18 17:47:02+00:00,Monster Energy Zero Ultra 16oz Can,3.0,36.0,2.282092,97de171d-9ebe-4e3a-a948-8e74ec9b45e0
22,65,WALMART SC #2511,7d4a839d-d9e2-46d3-bce4-6c8e630780eb,Self Checkout Market Entrance,25cf3fa3-bb5b-4945-a94d-590bdce753f6,2022-11-18 17:47:02+00:00,"Diet Coke Bottle, 20 fl oz",4.0,36.0,2.282092,a701c623-579d-42bd-a2fe-a34578c54b24
...,...,...,...,...,...,...,...,...,...,...,...
14976,36441,WALMART NM #4100,405e1d90-4782-4013-87b6-efcc73af808a,Self checkout Swire cooler,f3f231b8-9933-4b25-94e1-67ef63e2db3f,2022-09-21 20:32:22+00:00,"Sprite Bottle, 20 fl oz",3.0,50.0,2.333874,fac3c507-24ce-4dbb-872c-9dcaddc40dd6
14977,36442,WALMART NM #4100,405e1d90-4782-4013-87b6-efcc73af808a,Self checkout Swire cooler,f3f231b8-9933-4b25-94e1-67ef63e2db3f,2022-09-21 20:32:22+00:00,Coca-Cola Dreamworld 20oz,3.0,50.0,1.909533,6e0c30a8-554f-4960-9124-1eb1e4cfe72d
14978,36443,WALMART NM #4100,405e1d90-4782-4013-87b6-efcc73af808a,Self checkout Swire cooler,f3f231b8-9933-4b25-94e1-67ef63e2db3f,2022-09-21 20:32:22+00:00,"Coca-Cola Zero Sugar Bottle, 20 fl oz",3.0,50.0,2.546044,a2673d8c-c3a0-47d3-b72d-605503f1a21a
14979,36444,WALMART NM #4100,405e1d90-4782-4013-87b6-efcc73af808a,Self checkout Swire cooler,f3f231b8-9933-4b25-94e1-67ef63e2db3f,2022-09-21 20:32:22+00:00,Monster Energy Zero Ultra 16oz Can,4.0,50.0,2.333874,97de171d-9ebe-4e3a-a948-8e74ec9b45e0


In [93]:
np.percentile(dta['payoff'], q=[tol])

array([8.68000192])

In [94]:
dta = dta[dta['payoff'] < np.percentile(dta['payoff'], q=[tol])[0]]

In [95]:
day_feats = pd.to_datetime(dta['last_scanned_datetime']).dt.day_of_week.values.reshape(-1, 1)

In [96]:
one_hot_prod = OneHotEncoder(sparse=False)

In [97]:
one_hot_store = OneHotEncoder(sparse=False)

In [98]:
prod_feats = one_hot_prod.fit_transform(dta['product_id'].values.reshape(-1, 1))

In [99]:
one_hot_prod.fit_transform(dta['store_id'].values.reshape(-1, 1))

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [100]:
day_feats

array([[4],
       [4],
       [4],
       ...,
       [2],
       [2],
       [2]])

In [101]:
prod_feats.shape

(6937, 132)

In [102]:
x_values = np.concatenate([prod_feats, day_feats], axis=-1)

In [103]:
y_values = dta['payoff'].values

In [104]:
# Convert data to PyTorch tensors
x_tensor = torch.tensor(x_values, dtype=torch.float32).unsqueeze(1)
y_tensor = torch.tensor(y_values, dtype=torch.float32).unsqueeze(1)

# Split data into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(x_tensor, y_tensor, test_size=0.1, random_state=42)

# Create a DataLoader for training data
train_dataset = TensorDataset(x_train, y_train)
test_dataset = TensorDataset(x_test, y_test)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(test_dataset, batch_size=32, shuffle=True)

In [105]:
device = torch.device("cpu")

In [106]:
# Instantiate and train the Gaussian DNN model
gaussian_dnn_model = GaussianDNN(num_feats =  x_values.shape[1]).to(device)
optimizer = optim.Adam(gaussian_dnn_model.parameters(), lr=0.01)

In [110]:
num_epochs = 20
progress_bar = tqdm(range(num_epochs), desc="Training Gaussian DNN", unit="epoch")
for epoch in progress_bar:
    train_loss = train_gaussian_dnn(train_loader, gaussian_dnn_model, optimizer)
    val_loss = evaluate_gaussian_dnn(val_loader, gaussian_dnn_model)
    
    progress_bar.set_postfix({"Train Loss": f"{train_loss:.2f}", "Val Loss": f"{val_loss:.2f}"})

Training Gaussian DNN: 100%|█| 20/20 [00:11<00:00,  1.69epoch/s, Train Loss=1.14, Val Loss


In [108]:
i = 0

In [109]:
torch.save(gaussian_dnn_model.state_dict(), f"gauss-dnn-{i}.pt")