## Notebook for pretraining locally

It requires you to have the .env correctly configured as well as a gcp_sa_key.json with a secret from your GCP SA.

In [1]:
import os
from google.cloud import storage
from dotenv import load_dotenv

load_dotenv() 

# Point to your key file
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = os.getenv("GCP_SA_KEY")

In [2]:
MODEL_FILE = "model-64.pth"

In [3]:
import torch

  cpu = _conversion_method_template(device=torch.device("cpu"))


In [4]:
# create a simple mlp for binary classification
mlp = torch.nn.Sequential(
    torch.nn.Linear(64, 64),
    torch.nn.ReLU(),
    torch.nn.Linear(64, 1),
    torch.nn.Sigmoid(),
)

# create a simple dataset
dataset = torch.utils.data.TensorDataset(
    torch.randn(100, 64),
    torch.randint(0, 2, (100,)),
)

# create a simple dataloader
dataloader = torch.utils.data.DataLoader(
    dataset,
    batch_size=16,
    shuffle=True,
)

# create a simple optimizer
optimizer = torch.optim.Adam(mlp.parameters(), lr=0.01)

In [5]:
# train the model
for epoch in range(10):
    for batch in dataloader:
        optimizer.zero_grad()
        loss = torch.nn.BCELoss()(mlp(batch[0]), batch[1].float().view(-1, 1))
        loss.backward()
        optimizer.step()

# save the model (to CPU)
torch.save(mlp.state_dict(), MODEL_FILE)

In [6]:
# push to Google Cloud Storage bucket
from google.cloud import storage

# Upload 'model.pth' to your bucket
client = storage.Client()
bucket = client.bucket(os.getenv("BUCKET_NAME"))
blob = bucket.blob(f"weights/{MODEL_FILE}")
blob.upload_from_filename(MODEL_FILE)

print("Uploaded!")

Uploaded!
