<a href="https://colab.research.google.com/github/lamini-H/DeepLearningProjects/blob/main/Weights_and_Biases.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report


import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

import joblib

def load_raw_titanc():
  #Load the Titanic data
  df = pd.read_csv('https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv')
  return df

In [6]:
load_raw_titanc().head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [12]:
def clean_titanic(titanic_df, fill_missing=True):
  df = titanic_df.drop(['PassengerId','Name','Ticket','Cabin'],axis=1)
  #Fill missing values
  if fill_missing:
    df['Age'] = df['Age'].fillna(df['Age'].median())
    df['Embarked'] = df['Embarked'].fillna(df['Embarked'].mode()[0])
    df['Fare'] = df['Fare'].fillna(df['Fare'].median())
  else:
    df.dropna(inplace=True)
  #Convert categorical variables to numerical
  df['Sex'] = df['Sex'].map({'male':0,'female':1})
  df['Embarked'] = df['Embarked'].map({'S':0,'C':1,'Q':2})
  return df

def load_titanic(cleaned=True,Xy=True):

  df = load_raw_titanc()
  if cleaned:
    df = clean_titanic(df)
  if Xy:
    return df.drop('Survived',axis=1),df['Survived']
  else:
    return df
def get_data_loaders(X_train, X_test,y_train,y_test,batch_size=32):
  #Convert to Tensors
  X_train_tensor = torch.tensor(X_train.to_numpy(), dtype=torch.float32)
  y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).view(-1, 1)
  X_test_tensor = torch.tensor(X_test.to_numpy(), dtype=torch.float32)
  y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).view(-1, 1)

  # Create TensorDatasets
  train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
  test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

  #Create DataLoaders
  train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
  test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

  return train_loader,test_loader

def train_loop(train_loader,model,optimizer,criterion,device):
  model.train()
  epoch_train_loss = 0.0
  for batch, labels in train_loader:
    batch,labels = batch.to(device),labels.to(device)
    optimizer.zero_grad()
    #Forward pass
    outputs = model(batch)
    loss = criterion(outputs,labels)
    #Update Parameters
    loss.backward()
    optimizer.step()
    #Save loss
    epoch_train_loss += loss.item() * batch.size(0)
  return epoch_train_loss/len(train_loader)

def test_loop(test_loader,model,criterion,device):
  model.eval()
  epoch_test_loss = 0.0
  with torch.no_grad():
    for batch,labels in test_loader:
      batch,labels = batch.to(device),labels.to(device)
      #Calculate predictions
      outputs = model(batch)
      #Save loss
      loss = criterion(outputs,labels)
      epoch_test_loss += loss.item()*batch.size(0)
    return epoch_test_loss/len(test_loader.dataset)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


#Setup Weights and Biases

In [1]:
!pip install wandb



In [2]:
import wandb
wandb.login()

  | |_| | '_ \/ _` / _` |  _/ -_)


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mlaminiharunah[0m ([33mlaminiharunah-vrije-universiteit-brussel[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

#Simple Scikit-Learn Run

In [10]:
#Define hyperparamenters

n_estimators = 30
max_depth = 2

#Create a new run in a project
run = wandb.init(
    project="Simple Scikit-Learn Run",
    notes="Commit message for the run",
    config={
        "n_estimators":n_estimators,
        "max_depth":max_depth
    }
)

#Get Data
X, y = load_titanic()
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42)

#Define Random Forest Classifier
crf = RandomForestClassifier(n_estimators=n_estimators,max_depth=max_depth)

#Train the model
crf.fit(X_train,y_train)

#Log metrics
wandb.log({"train_accuracy":crf.score(X_train,y_train)})
wandb.log({"test_accuracy":crf.score(X_test,y_test)})

#Finish the run
run.finish()

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Age'] = df['Age'].fillna(df['Age'].median(),inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Embarked'] = df['Embarked'].fillna(df['Embarked'].mode()[0],inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the inte

0,1
test_accuracy,▁
train_accuracy,▁

0,1
test_accuracy,0.7933
train_accuracy,0.80899


#Pytorch Run

In [15]:
#Define hyperparameters
num_epochs = 10
learning_rate = 0.01
run_count = 3

#Start a run
run = wandb.init(
    project="Pytorch Run",
    name = f"MyRun {run_count}",
    notes="Commit message for the run",
    config={
        "num_epochs":num_epochs,
        "learning_rate":learning_rate,
    }
)

#Get Data
X,y = load_titanic()
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42)

train_loader,test_loader = get_data_loaders(X_train,X_test,y_train,y_test)

#Define PyTorch Model
model = nn.Sequential(
    nn.Linear(7,32),
    nn.ReLU(),
    nn.Linear(32,16),
    nn.ReLU(),
    nn.Linear(16,1),
    nn.Sigmoid()
).to(device)

criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(),lr=learning_rate)

for epoch in range(num_epochs):
  train_loss = train_loop(train_loader,model,optimizer,criterion,device)
  test_loss = test_loop(test_loader,model,criterion,device)

  print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.4f}, Test Loss: {test_loss:.4f}")
  wandb.log({"train_loss":train_loss,"test_loss":test_loss})

run.finish()


Epoch 1/10, Train Loss: 21.3907, Test Loss: 0.5671
Epoch 2/10, Train Loss: 18.6641, Test Loss: 0.5518
Epoch 3/10, Train Loss: 19.4279, Test Loss: 0.5710
Epoch 4/10, Train Loss: 18.5255, Test Loss: 0.5414
Epoch 5/10, Train Loss: 17.4626, Test Loss: 0.5059
Epoch 6/10, Train Loss: 16.4381, Test Loss: 0.4832
Epoch 7/10, Train Loss: 15.4344, Test Loss: 0.7359
Epoch 8/10, Train Loss: 16.5115, Test Loss: 0.4782
Epoch 9/10, Train Loss: 14.7752, Test Loss: 0.4851
Epoch 10/10, Train Loss: 14.6152, Test Loss: 0.4486


0,1
test_loss,▄▄▄▃▂▂█▂▂▁
train_loss,█▅▆▅▄▃▂▃▁▁

0,1
test_loss,0.44863
train_loss,14.61518


#First Dataset Artifact - Add Raw Titanic Data


In [16]:
run = wandb.init(project="Artifacts Registry", job_type = "data-loading")

#Save data locally
raw_titanic_df = load_titanic(cleaned=False,Xy=False)
raw_titanic_df.to_csv("raw_titanic.csv",index=False)

#Create Artifact object
raw_dataset_artifact = wandb.Artifact(name='raw_titanic', type='dataset')

#Add files to the artifact (multiple)
raw_dataset_artifact.add_file("raw_titanic.csv")

#Log the artifact
wandb.log_artifact(raw_dataset_artifact, aliases=["raw"])

run.finish()

In [19]:
run = wandb.init(project="Artifacts Registry", job_type = "data-loading")

#Get Clean data
clean_titanic_df_dropped = clean_titanic(raw_titanic_df,fill_missing=False)
clean_titanic_df_dropped.to_csv("clean_titanic_dropped.csv",index=False)

#Create and log New Artifact
clean_dataset_artifact = wandb.Artifact(name='clean_titanic', type='dataset')
clean_dataset_artifact.add_file("clean_titanic_dropped.csv")

run.log_artifact(clean_dataset_artifact, aliases=["clean_dropped"])
wandb.finish()