In [1]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor
from torchinfo import summary

In [3]:
import dagshub
dagshub.init(repo_owner='quangbhdang', repo_name='COSC2984-SMNA-Assignment', mlflow=True)

In [3]:
# # Upload using the DagsHub client, to a DVC tracked folder also called "data".
# # Follow the instructions that appear to authorize the request.
# from dagshub import upload_files

# dagshub.upload_files('quangbhdang/COSC2984-SMNA-Assignment', 'Dataset/So-Spam/')

In [2]:
def train(epoch, model):
    model.train()
    output = model(des_tensor,tweets_tensor,num_prop,category_prop,edge_index,edge_type)
    loss_train = loss(output[train_idx], labels[train_idx])
    acc_train = accuracy(output[train_idx], labels[train_idx])
    acc_val = accuracy(output[val_idx], labels[val_idx])
    optimizer.zero_grad()
    loss_train.backward()
    optimizer.step()
    print('Epoch: {:04d}'.format(epoch+1),
        'loss_train: {:.4f}'.format(loss_train.item()),
        'acc_train: {:.4f}'.format(acc_train.item()),
        'acc_val: {:.4f}'.format(acc_val.item()),)
    return acc_train,loss_train

def test(model):
    model.eval()
    output = model(des_tensor,tweets_tensor,num_prop,category_prop,edge_index,edge_type)
    loss_test = loss(output[test_idx], labels[test_idx])
    acc_test = accuracy(output[test_idx], labels[test_idx])
    output=output.max(1)[1].to('cpu').detach().numpy()
    label=labels.to('cpu').detach().numpy()
    f1=f1_score(label[test_idx],output[test_idx])
    mcc=matthews_corrcoef(label[test_idx], output[test_idx])
    print("Test set results:",
            "test_loss= {:.4f}".format(loss_test.item()),
            "test_accuracy= {:.4f}".format(acc_test.item()),
            "f1_score= {:.4f}".format(f1),
            "mcc= {:.4f}".format(mcc),
            )
    return acc_test,loss_test,f1, mcc

In [4]:
from Src.Dataset import Twibot20
from Src.model import BotRGCN
import torch
from torch import nn
from Src.utils import accuracy, init_weights

from sklearn.metrics import f1_score
from sklearn.metrics import matthews_corrcoef

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 
embedding_size,dropout,lr,weight_decay=128,0.3,1e-3,5e-3

dataset= Twibot20(device=device ,process=False)
des_tensor,tweets_tensor,num_prop,category_prop,edge_index,edge_type,labels,train_idx,val_idx,test_idx=dataset.dataloader(build_feature_graph=True)

botRGCN=BotRGCN(num_prop_size=5,cat_prop_size=3,embedding_dimension=embedding_size).to(device)

loss=nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(botRGCN.parameters(),
                    lr=lr,weight_decay=weight_decay)

botRGCN.apply(init_weights)

epochs=100

for epoch in range(epochs):
    acc_train, loss_train = train(epoch,botRGCN)
    
acc_test, loss_test, f1, mcc = test(botRGCN)

Loading labels...   Finished
Running feature1 embedding
Finished
Running feature2 embedding
Finished
Processing feature3...   Finished
Processing feature4...   Finished
Building feature-based graph from all features (chunked)...   Finished
Epoch: 0001 loss_train: 0.9123 acc_train: 0.5145 acc_val: 0.5209
Epoch: 0002 loss_train: 1.1311 acc_train: 0.5935 acc_val: 0.5835
Epoch: 0003 loss_train: 0.7247 acc_train: 0.6707 acc_val: 0.6567
Epoch: 0004 loss_train: 0.6245 acc_train: 0.6753 acc_val: 0.6490
Epoch: 0005 loss_train: 0.7013 acc_train: 0.6069 acc_val: 0.5865
Epoch: 0006 loss_train: 0.6792 acc_train: 0.6149 acc_val: 0.5932
Epoch: 0007 loss_train: 0.5917 acc_train: 0.6851 acc_val: 0.6727
Epoch: 0008 loss_train: 0.5267 acc_train: 0.7603 acc_val: 0.7400
Epoch: 0009 loss_train: 0.5187 acc_train: 0.7870 acc_val: 0.7649
Epoch: 0010 loss_train: 0.5307 acc_train: 0.7867 acc_val: 0.7700
Epoch: 0011 loss_train: 0.5431 acc_train: 0.7844 acc_val: 0.7755
Epoch: 0012 loss_train: 0.5164 acc_train: 0.7

In [5]:
import mlflow
import mlflow.pytorch
import uuid

# Optional: set a custom or unique experiment name
experiment_name = f"Dang_TwiBot20_{uuid.uuid4().hex[:6]}"
mlflow.set_experiment(experiment_name)    

2025/05/22 16:41:03 INFO mlflow.tracking.fluent: Experiment with name 'Dang_TwiBot20_624157' does not exist. Creating a new experiment.


<Experiment: artifact_location='mlflow-artifacts:/767f283567aa4f70b42d159094f7c847', creation_time=1747906863301, experiment_id='7', last_update_time=1747906863301, lifecycle_stage='active', name='Dang_TwiBot20_624157', tags={}>

In [6]:
# Logging experiment results

run_description = "Bot RGCN with custom feature based graph encoding instead of relationship"

with mlflow.start_run(description=run_description):
    # Log hyperparameters
    mlflow.log_params({
        "embedding_size": embedding_size,
        "dropout": dropout,
        "learning_rate": lr,
        "weight_decay": weight_decay,
        "epochs": epochs
    })

    # Log final test metrics (replace with your actual variable values)
    mlflow.log_metrics({
        "train_accuracy":acc_train.item() if isinstance(acc_train, torch.Tensor) else acc_train,
        "train_loss": loss_train.item() if isinstance(acc_train, torch.Tensor) else loss_train,
        "test_accuracy": acc_test.item() if isinstance(acc_test, torch.Tensor) else acc_test,
        "test_loss": loss_test.item() if isinstance(loss_test, torch.Tensor) else loss_test,
        "f1_score": f1,
        "matthew_coeff": mcc
        
    })

    # Log model summary.
    with open("model_summary.txt", "w") as f:
        f.write(str(summary(botRGCN)))
    mlflow.log_artifact("model_summary.txt")

    # Optionally log model
    mlflow.pytorch.log_model(botRGCN, "BotRGCN")



🏃 View run youthful-shrimp-498 at: https://dagshub.com/quangbhdang/COSC2984-SMNA-Assignment.mlflow/#/experiments/7/runs/4b6c298b01d14c3ea3ff0096580e0ddf
🧪 View experiment at: https://dagshub.com/quangbhdang/COSC2984-SMNA-Assignment.mlflow/#/experiments/7


In [8]:
from Src.model import BotGCN

embedding_size,dropout,lr,weight_decay=128,0.3,1e-3,5e-3

botGCN=BotGCN(num_prop_size=5,cat_prop_size=3,embedding_dimension=embedding_size).to(device)

loss=nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(botGCN.parameters(),
                    lr=lr,weight_decay=weight_decay)

botRGCN.apply(init_weights)

epochs=100

for epoch in range(epochs):
    acc_train, loss_train = train(epoch,botGCN)
    
acc_test, loss_test, f1, mcc = test(botGCN)

Epoch: 0001 loss_train: 0.6836 acc_train: 0.5754 acc_val: 0.5700
Epoch: 0002 loss_train: 0.6685 acc_train: 0.5935 acc_val: 0.5839
Epoch: 0003 loss_train: 0.6557 acc_train: 0.6046 acc_val: 0.5979
Epoch: 0004 loss_train: 0.6422 acc_train: 0.6148 acc_val: 0.6089
Epoch: 0005 loss_train: 0.6269 acc_train: 0.6540 acc_val: 0.6461
Epoch: 0006 loss_train: 0.6086 acc_train: 0.7315 acc_val: 0.7235
Epoch: 0007 loss_train: 0.5908 acc_train: 0.7675 acc_val: 0.7480
Epoch: 0008 loss_train: 0.5710 acc_train: 0.7747 acc_val: 0.7543
Epoch: 0009 loss_train: 0.5506 acc_train: 0.7803 acc_val: 0.7658
Epoch: 0010 loss_train: 0.5310 acc_train: 0.7879 acc_val: 0.7712
Epoch: 0011 loss_train: 0.5096 acc_train: 0.7906 acc_val: 0.7780
Epoch: 0012 loss_train: 0.4855 acc_train: 0.8031 acc_val: 0.7831
Epoch: 0013 loss_train: 0.4615 acc_train: 0.8108 acc_val: 0.7928
Epoch: 0014 loss_train: 0.4400 acc_train: 0.8210 acc_val: 0.8123
Epoch: 0015 loss_train: 0.4235 acc_train: 0.8321 acc_val: 0.8275
Epoch: 0016 loss_train: 0

In [9]:
# Logging experiment results
run_description = "BotGCN with custom feature graph build instead of traditional graph"
with mlflow.start_run(description=run_description):
    # Log hyperparameters
    mlflow.log_params({
        "embedding_size": embedding_size,
        "dropout": dropout,
        "learning_rate": lr,
        "weight_decay": weight_decay,
        "epochs": epochs,
        "optimiser": "AdamW"
    })

    # Log final test metrics (replace with your actual variable values)
    mlflow.log_metrics({
        "train_accuracy":acc_train.item() if isinstance(acc_train, torch.Tensor) else acc_train,
        "train_loss": loss_train.item() if isinstance(acc_train, torch.Tensor) else loss_train,
        "test_accuracy": acc_test.item() if isinstance(acc_test, torch.Tensor) else acc_test,
        "test_loss": loss_test.item() if isinstance(loss_test, torch.Tensor) else loss_test,
        "f1_score": f1,
        "matthew_coeff": mcc
        
    })

    # Log model summary.
    with open("model_summary.txt", "w") as f:
        f.write(str(summary(botGCN)))
    mlflow.log_artifact("model_summary.txt")

    # Optionally log model
    mlflow.pytorch.log_model(botGCN, "model")



🏃 View run sincere-ray-269 at: https://dagshub.com/quangbhdang/COSC2984-SMNA-Assignment.mlflow/#/experiments/7/runs/2d3287e0e90748149a4cd63b7419b2bf
🧪 View experiment at: https://dagshub.com/quangbhdang/COSC2984-SMNA-Assignment.mlflow/#/experiments/7


In [13]:
from Src.model import BotGAT

embedding_size,dropout,lr,weight_decay=128,0.3,1e-3,5e-3

botGAT=BotGAT(num_prop_size=5,cat_prop_size=3,embedding_dimension=embedding_size).to(device)

loss=nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(botGAT.parameters(),
                    lr=lr,weight_decay=weight_decay)

botGAT.apply(init_weights)

epochs=100

for epoch in range(epochs):
    acc_train, loss_train = train(epoch,botGAT)
    
acc_test, loss_test, f1, mcc = test(botGAT)


Epoch: 0001 loss_train: 1.0021 acc_train: 0.4391 acc_val: 0.4537
Epoch: 0002 loss_train: 0.7826 acc_train: 0.5860 acc_val: 0.5890
Epoch: 0003 loss_train: 0.7350 acc_train: 0.6103 acc_val: 0.6034
Epoch: 0004 loss_train: 0.6059 acc_train: 0.7166 acc_val: 0.7171
Epoch: 0005 loss_train: 0.5489 acc_train: 0.7466 acc_val: 0.7404
Epoch: 0006 loss_train: 0.5789 acc_train: 0.6808 acc_val: 0.6617
Epoch: 0007 loss_train: 0.5578 acc_train: 0.6812 acc_val: 0.6778
Epoch: 0008 loss_train: 0.5247 acc_train: 0.7433 acc_val: 0.7345
Epoch: 0009 loss_train: 0.4968 acc_train: 0.7861 acc_val: 0.7704
Epoch: 0010 loss_train: 0.4993 acc_train: 0.7952 acc_val: 0.7886
Epoch: 0011 loss_train: 0.4929 acc_train: 0.7945 acc_val: 0.7835
Epoch: 0012 loss_train: 0.4776 acc_train: 0.7973 acc_val: 0.7856
Epoch: 0013 loss_train: 0.4722 acc_train: 0.7978 acc_val: 0.7839
Epoch: 0014 loss_train: 0.4641 acc_train: 0.7966 acc_val: 0.7877
Epoch: 0015 loss_train: 0.4557 acc_train: 0.7961 acc_val: 0.7848
Epoch: 0016 loss_train: 0

In [15]:
# Logging experiment results
run_description = "BotGAT runs with custom features graph instead of traditional graph building"

with mlflow.start_run():
    # Log hyperparameters
    mlflow.log_params({
        "embedding_size": embedding_size,
        "dropout": dropout,
        "learning_rate": lr,
        "weight_decay": weight_decay,
        "epochs": epochs,
        "optimiser": "AdamW"
    })

    # Log final test metrics (replace with your actual variable values)
    mlflow.log_metrics({
        "train_accuracy":acc_train.item() if isinstance(acc_train, torch.Tensor) else acc_train,
        "train_loss": loss_train.item() if isinstance(acc_train, torch.Tensor) else loss_train,
        "test_accuracy": acc_test.item() if isinstance(acc_test, torch.Tensor) else acc_test,
        "test_loss": loss_test.item() if isinstance(loss_test, torch.Tensor) else loss_test,
        "f1_score": f1,
        "matthew_coeff": mcc
        
    })

    # Log model summary.
    with open("model_summary.txt", "w") as f:
        f.write(str(summary(botGAT)))
    mlflow.log_artifact("model_summary.txt")

    # Optionally log model
    mlflow.pytorch.log_model(botGAT, "model")



🏃 View run nosy-sloth-269 at: https://dagshub.com/quangbhdang/COSC2984-SMNA-Assignment.mlflow/#/experiments/7/runs/2b93c57275c3481c874ad697bae83be0
🧪 View experiment at: https://dagshub.com/quangbhdang/COSC2984-SMNA-Assignment.mlflow/#/experiments/7
