In [1]:
import wandb
import pandas as pd
import numpy as np
import os
import shutil

import torch
import torch.nn as nn
from torch.nn import functional as F
import torch.optim as optim
import torchmetrics


In [2]:
# Import Metrics for use with evaluation

from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix, classification_report

# Split dataset in train and test with a ratio of 70-30

from sklearn.model_selection import train_test_split


In [3]:
# set device to cuda if available else pass to cpu
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [4]:
# %env "WANDB_NOTEBOOK_NAME" "demo_wine_wandb_test"
wandb.login()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
wandb: Currently logged in as: markgich (use `wandb login --relogin` to force relogin)


True

In [5]:
# SET SEED
torch.manual_seed(32)
np.random.seed(32)
torch.use_deterministic_algorithms(True)

In [6]:

df = pd.read_csv("./data/wine_data.csv")
df.head()

Unnamed: 0,Class,Alcohol,Malic acid,Ash,Alcalinity of ash,Magnesium,Total phenols,Flavanoids,Nonflavanoid phenols,Proanthocyanins,Color intensity,Hue,OD280/OD315 of diluted wines,Proline
0,0,14.23,1.71,2.43,15.6,127,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065
1,0,13.2,1.78,2.14,11.2,100,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050
2,0,13.16,2.36,2.67,18.6,101,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185
3,0,14.37,1.95,2.5,16.8,113,3.85,3.49,0.24,2.18,7.8,0.86,3.45,1480
4,0,13.24,2.59,2.87,21.0,118,2.8,2.69,0.39,1.82,4.32,1.04,2.93,735


In [7]:

le = LabelEncoder()
df['Class'] = le.fit_transform(df['Class'])

In [8]:

# set the feature variables

df_features = df.drop('Class', axis=1)
df_features.head()

Unnamed: 0,Alcohol,Malic acid,Ash,Alcalinity of ash,Magnesium,Total phenols,Flavanoids,Nonflavanoid phenols,Proanthocyanins,Color intensity,Hue,OD280/OD315 of diluted wines,Proline
0,14.23,1.71,2.43,15.6,127,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065
1,13.2,1.78,2.14,11.2,100,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050
2,13.16,2.36,2.67,18.6,101,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185
3,14.37,1.95,2.5,16.8,113,3.85,3.49,0.24,2.18,7.8,0.86,3.45,1480
4,13.24,2.59,2.87,21.0,118,2.8,2.69,0.39,1.82,4.32,1.04,2.93,735


In [9]:

# Set the target variable

df_target = df[['Class']]
df_target.head()

Unnamed: 0,Class
0,0
1,0
2,0
3,0
4,0


In [10]:
# Split the dataset
X_train, x_test, Y_train, y_test = train_test_split(df_features,
                                                    df_target,
                                                    test_size=0.3,
                                                    random_state=42)


X_train.shape, x_test.shape,  Y_train.shape, y_test.shape,

Xtrain = torch.from_numpy(X_train.values).float()
Xtest = torch.from_numpy(x_test.values).float()
print(Xtrain.shape, Xtest.shape)

print(Xtrain.dtype, Xtest.dtype)


torch.Size([124, 13]) torch.Size([54, 13])
torch.float32 torch.float32


In [11]:
# Reshape tensor to 1D

Ytrain = torch.from_numpy(Y_train.values).view(1, -1)[0]
Ytest = torch.from_numpy(y_test.values).view(1, -1)[0]
print(Ytrain.shape, Ytest.shape)


torch.Size([124]) torch.Size([54])


In [12]:
input_size = 13
output_size = 3
hidden_size = 100

config = dict(
    input_size=13,
    output_size=3,
    hidden_size=100,
    dataset="wine dataset",
    architecture='Linear',
    onnx_model_path="/models/wine_model.onnx",
    learning_rate=0.01,
    # CHANGE THE LOSS
    # loss=nn.NLLLoss(),
    loss=nn.CrossEntropyLoss(),
    # CHANGE THE OPTIMIZER
    # optimizer="adam",
    optimizer="SGD",
    # optimizer="adagrad"
)
for k, v in config.items():
    print(f"wandb config{k}:{v}")


wandb configinput_size:13
wandb configoutput_size:3
wandb confighidden_size:100
wandb configdataset:wine dataset
wandb configarchitecture:Linear
wandb configonnx_model_path:/models/wine_model.onnx
wandb configlearning_rate:0.01
wandb configloss:CrossEntropyLoss()
wandb configoptimizer:SGD


In [13]:
# Define Class Net
class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.fc3 = nn.Linear(hidden_size, output_size)

        # ADD DROPOUT
        # self.dropout = nn.Dropout(p=0.25)  # DROPOUT

    def forward(self, X):
        X = torch.sigmoid((self.fc1(X)))
        # X = self.dropout(X)  # DROPOUT
        X = torch.sigmoid(self.fc2(X))
        # X = self.dropout(X)  # DROPOUT
        X = self.fc3(X)

        return F.log_softmax(X, dim=-1)



In [14]:
# Instantiate the model
model = Net()
# preview out model
print(model)


Net(
  (fc1): Linear(in_features=13, out_features=100, bias=True)
  (fc2): Linear(in_features=100, out_features=100, bias=True)
  (fc3): Linear(in_features=100, out_features=3, bias=True)
)


In [15]:

optimizer = optim.Adam(model.parameters(), lr=config.get("learning_rate"))
loss_fn = config.get("loss")

In [16]:
# TRAINING LOOP
# TRAIN THE MODEL
epochs = 1000
with wandb.init(project="demo_wandb_sklearn", config=config):
    wandb.watch(model, criterion=loss_fn, log="all", log_freq=10)

    for epoch in range(epochs):

        optimizer.zero_grad()
        Ypred = model(Xtrain)

        loss = loss_fn(Ypred, Ytrain)
        acc = torchmetrics.functional.accuracy(Ypred, Ytrain)
        loss.backward()

        optimizer.step()

        wandb.log(
            {"Train": {'Epoch': epoch, "Loss": loss.item(), "Accuracy": acc}})

    # SAVE MODEL STATE DICT TO DISK

    wandb.save(torch.save(model.state_dict(), "./models/wine_train.pt"))

    # LOAD MODEL FROM DISK and EVALUATE

    new_model = Net()
    new_model.load_state_dict(torch.load("./models/wine_train.pt"))
    new_model.eval()

    # SET THE PREDICTIONS

    predict = new_model(Xtest)
    _, predict_y = torch.max(predict, 1)

    # VISUALIZE CONFUSION MATRIX

    wandb.sklearn.plot_confusion_matrix(Ytest, predict_y, labels=[0, 1, 2])

    # Print Metrics

    wandb.log({"Test": {"accuracy_score": accuracy_score(Ytest, predict_y),
               "precision_score": precision_score(Ytest, predict_y, average='weighted'),
                        "recall_score": recall_score(Ytest, predict_y, average="weighted")}})

    table = wandb.Table(data=df, columns=[df_features, df_target])
    wandb.log({"Data Table": table})

    torch.onnx.export(model=model, args=(Xtrain), f="./models/wine_test.onnx", input_names=['input'], output_names=['output'],
                      verbose=True, do_constant_folding=True, opset_version=11)
    # COPY ONNX TO WANDB RUN DIR FOR LOGGING
    shutil.copy("./models/wine_test.onnx",
                os.path.join(wandb.run.dir,
                             "wine_test.onnx"))
    # COPY PT TO WANDB RUN DIR FOR LOGGING
    shutil.copy("./models/wine_train.pt",
                os.path.join(wandb.run.dir, "wine_train.pt"))

wandb.finish()
torch.cuda.empty_cache()

# END OF FILE



graph(%input : Float(124, 13, strides=[1, 124], requires_grad=0, device=cpu),
      %fc1.weight : Float(100, 13, strides=[13, 1], requires_grad=1, device=cpu),
      %fc1.bias : Float(100, strides=[1], requires_grad=1, device=cpu),
      %fc2.weight : Float(100, 100, strides=[100, 1], requires_grad=1, device=cpu),
      %fc2.bias : Float(100, strides=[1], requires_grad=1, device=cpu),
      %fc3.weight : Float(3, 100, strides=[100, 1], requires_grad=1, device=cpu),
      %fc3.bias : Float(3, strides=[1], requires_grad=1, device=cpu)):
  %7 : Float(124, 100, strides=[100, 1], requires_grad=1, device=cpu) = onnx::Gemm[alpha=1., beta=1., transB=1](%input, %fc1.weight, %fc1.bias) # c:\Users\markg\venv\torch-venv\lib\site-packages\torch\nn\functional.py:1753:0
  %8 : Float(124, 100, strides=[100, 1], requires_grad=1, device=cpu) = onnx::Sigmoid(%7) # <ipython-input-13-c039220e49cb>:14:0
  %9 : Float(124, 100, strides=[100, 1], requires_grad=1, device=cpu) = onnx::Gemm[alpha=1., beta=1., tra

VBox(children=(Label(value=' 0.03MB of 0.03MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
_runtime,12
_timestamp,1626273586
_step,1002


0,1
_runtime,▁▁▁▁▁▁▁▃▃▃▃▃▃▃▃▅▅▅▅▅▅▅▅▆▆▆▆▆▆▆▆▆████████
_timestamp,▁▁▁▁▁▁▁▃▃▃▃▃▃▃▃▅▅▅▅▅▅▅▅▆▆▆▆▆▆▆▆▆████████
_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
