PyTorch Version

In [7]:
import warnings; warnings.filterwarnings('ignore')
import gc, os,time,numpy as np, pandas as pd, datetime
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix,classification_report
from sklearn.model_selection import StratifiedKFold, train_test_split
import torch, torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from transformers import AutoTokenizer, AutoConfig, AutoModel, logging

# ------------ GPU Setup ------------
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Device: {device}")
if device.type == 'cuda':torch.cuda.empty_cache()

# ------------- Hyperparameters ------------
MAX_LEN = 128
BERT_TRAINABLE = True
DRPT = 0.4
FC_ACT = 'elu'
LR_RATE = 1e-5
BATCH = 32
NEPOCHS = 20
PATIENCE = 4
DECAY = True
DECAY_RATE = 0.3
DECAY_AFTER = 1

modelname = 'hfFineTuneBert'
modelpath = os.path.join('.', 'Saved Models', modelname)
modelresults = os.path.join('.', 'Model Results')
modelsummaries = os.path.join('.', 'Model - Summaries-Figures')
for d in [modelpath, modelresults, modelsummaries]:
    os.makedirs(d, exist_ok=True)

# ---------------- Utils ---------------------
def hms_string(sec):
    h = int(sec // 3600)
    m = int((sec % 3600) // 60)
    s = sec % 60
    return f"{h} hrs {m:02d} mins {s:05.2f} secs"
########################################################
def WriteResutls(reports):
  unt0 = {'precision':[], 'recall':[], 'f1-score':[] }
  tin1 = {'precision':[], 'recall':[], 'f1-score':[] }
  Oth2 = {'precision':[], 'recall':[], 'f1-score':[] }
  macroavg = {'precision':[], 'recall':[], 'f1-score':[] }
  weightedavg = {'precision':[], 'recall':[], 'f1-score':[] }
  accu = []
  for report in reports:
    for k,v in report.items():
      if 'IND' in k:
        unt0['precision'].append(v['precision'])
        unt0['recall'].append(v['recall'])
        unt0['f1-score'].append(v['f1-score'])

      elif 'GRP' in k:
        tin1['precision'].append(v['precision'])
        tin1['recall'].append(v['recall'])
        tin1['f1-score'].append(v['f1-score'])

      elif 'OTH' in k:
        Oth2['precision'].append(v['precision'])
        Oth2['recall'].append(v['recall'])
        Oth2['f1-score'].append(v['f1-score'])

      elif 'macro avg' in k:
        macroavg['precision'].append(v['precision'])
        macroavg['recall'].append(v['recall'])
        macroavg['f1-score'].append(v['f1-score'])

      elif 'weighted avg' in k:
        weightedavg['precision'].append(v['precision'])
        weightedavg['recall'].append(v['recall'])
        weightedavg['f1-score'].append(v['f1-score'])
      elif 'accuracy' in k:
        accu.append(v)

  print('Accuracy:',np.mean(accu))
  print("")
  print('IND 1 Precision:',np.mean(unt0['precision']))
  print('IND 1 Recall:',np.mean(unt0['recall']))
  print('IND 1 F1-Score:',np.mean(unt0['f1-score']))
  print("")
  print('GRP 2 Precision:',np.mean(tin1['precision']))
  print('GRP 2 Recall:',np.mean(tin1['recall']))
  print('GRP 2 F1-Score:',np.mean(tin1['f1-score']))

  print("")
  print('OTH 3 Precision:',np.mean(Oth2['precision']))
  print('OTH 3 Recall:',np.mean(Oth2['recall']))
  print('OTH 3 F1-Score:',np.mean(Oth2['f1-score']))

  print("")
  print('Weighted Avg Precision:',np.mean(weightedavg['precision']))
  print('Weighted Avg Recall:',np.mean(weightedavg['recall']))
  print('Weighted Avg F1-Score:',np.mean(weightedavg['f1-score']))

  print("")
  print('Macro  Precision:',np.mean(macroavg['precision']))
  print('Macro  Recall:',np.mean(macroavg['recall']))
  print('Macro  F1-Score:',np.mean(macroavg['f1-score']))

  file = open( result_path, mode='a' )
  file.write( modelname+ ' ( '+ datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") +') \n' )
  file.write( 'Accuracy:'+str(np.mean(accu))+'\n' )
  file.write('IND 1 Precision:'+str(np.mean(unt0['precision']))+'\n' )
  file.write('IND 1 Recall:'+str(np.mean(unt0['recall']))+'\n' )
  file.write('IND 1 F1-Score:'+str(np.mean(unt0['f1-score']))+'\n' )

  file.write('GRP 2 Precision:'+str(np.mean(tin1['precision']))+'\n' )
  file.write('GRP 2 Recall:'+str(np.mean(tin1['recall']))+'\n' )
  file.write('GRP 2 F1-Score:'+str(np.mean(tin1['f1-score']))+'\n' )

  file.write('OTH 3 Precision:'+str(np.mean(Oth2['precision']))+'\n' )
  file.write('OTH 3 Recall:'+str(np.mean(Oth2['recall']))+'\n' )
  file.write('OTH 3 F1-Score:'+str(np.mean(Oth2['f1-score']))+'\n' )

  file.write('Weighted Avg Precision:'+str(np.mean(weightedavg['precision']))+'\n' )
  file.write('Weighted Avg Recall:'+str(np.mean(weightedavg['recall']))+'\n' )
  file.write('Weighted Avg F1-Score:'+str(np.mean(weightedavg['f1-score']))+'\n' )

  file.write('Macro  Precision:'+str(np.mean(macroavg['precision']))+'\n' )
  file.write('Macro  Recall:'+str(np.mean(macroavg['recall']))+'\n' )
  file.write('Macro  F1-Score:'+str(np.mean(macroavg['f1-score']))+'\n' )
  file.close()
  print("Done")
######################################################################################


logging.set_verbosity_error()
os.environ['TOKENIZERS_PARALLELISM'] = 'false'

# ----------- Dataset Class ----------------
class TextDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels
    def __len__(self):
        return len(self.labels)
    def __getitem__(self, idx):
        item = {k: torch.tensor(v[idx]) for k, v in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx], dtype=torch.long)   # changed to long for CrossEntropy
        return item

# -------- Model Definition ----------------
class BertClassifier(nn.Module):
    def __init__(self, model_name, dropout_rate=DRPT, num_classes=3):  # num_classes=3
        super().__init__()
        self.bert = AutoModel.from_pretrained(model_name, config=config)
        if not BERT_TRAINABLE:
            for p in self.bert.parameters(): p.requires_grad = False
        hidden = self.bert.config.hidden_size
        self.dropout = nn.Dropout(dropout_rate)
        self.fc1 = nn.Linear(hidden, hidden)
        self.act = nn.ELU()
        self.out = nn.Linear(hidden, num_classes)   # output size = num_classes
        # he_uniform init
        nn.init.kaiming_uniform_(self.fc1.weight, nonlinearity='linear')
        nn.init.zeros_(self.fc1.bias)
        nn.init.kaiming_uniform_(self.out.weight, nonlinearity='linear')
        nn.init.zeros_(self.out.bias)
    def forward(self, input_ids, attention_mask):
        o = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        pooled = o.pooler_output if hasattr(o, 'pooler_output') else o.last_hidden_state[:,0]
        x = self.dropout(pooled)
        x = self.act(self.fc1(x))
        return self.out(x)   # raw logits for CrossEntropyLoss

# -------------- Data Loading ---------------
result_path =  r'C:\Users\mojua\Desktop\DL-Code\T3-LLM-Classification-Result.csv'
file_path = r'C:\Users\mojua\Desktop\DL-Code\Dataset\Offensive-24K-T3.xlsx'

df = pd.read_excel(file_path, engine='openpyxl')
df['Tweet'] = df['Tweet'].astype(str)
df.dropna(inplace=True)
df.reset_index(drop=True, inplace=True)

print(df.info())
print(df.columns, df.shape)
print(df.Tag.unique())
df['Tag'] = df['Tag'].replace({1: 0, 2: 1, 3: 2})
print(df.Tag.unique())
xcolumn = 'Tweet'
ycolumn = 'Tag'

# ----------- Tokenizer & Config ------------
bertmodelname = 'bert-base-multilingual-cased'
tokenizer = AutoTokenizer.from_pretrained(bertmodelname, use_fast=True)
config = AutoConfig.from_pretrained(bertmodelname)



Device: cuda
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6594 entries, 0 to 6593
Data columns (total 3 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   Unnamed: 0  6594 non-null   int64 
 1   Tweet       6594 non-null   object
 2   Tag         6594 non-null   int64 
dtypes: int64(2), object(1)
memory usage: 154.7+ KB
None
Index(['Unnamed: 0', 'Tweet', 'Tag'], dtype='object') (6594, 3)
[1 2 3]
[0 1 2]


In [9]:
# --------- K-Fold Training ----------------
skf = StratifiedKFold(n_splits=5, random_state=0, shuffle=True)
# Metrics storage (macro-averaged for multi-class)
valf1 = []
reports = []

start = time.time()
print("Local System Time:", time.strftime("%I:%M %p", time.localtime()))

for fold, (tr, te) in enumerate(skf.split(df[xcolumn], df[ycolumn]), 1):
    xtr = df.loc[tr, xcolumn].tolist(); ytr = df.loc[tr, ycolumn].values
    xte = df.loc[te, xcolumn].tolist(); yte = df.loc[te, ycolumn].values
    xtr, xv, ytr, yv = train_test_split(xtr, ytr, test_size=0.15, random_state=0)

    # tokenize
    enc_tr = tokenizer(xtr, padding='max_length', truncation=True, max_length=MAX_LEN)
    enc_v  = tokenizer(xv,  padding='max_length', truncation=True, max_length=MAX_LEN)
    enc_te = tokenizer(xte, padding='max_length', truncation=True, max_length=MAX_LEN)

    # datasets & loaders
    dt_tr = TextDataset(enc_tr, ytr); lt = DataLoader(dt_tr, batch_size=BATCH, shuffle=True)
    dt_v  = TextDataset(enc_v,  yv); lv = DataLoader(dt_v, batch_size=BATCH)
    dt_te = TextDataset(enc_te, yte); le = DataLoader(dt_te, batch_size=BATCH)

    # model, loss, opt
    model = BertClassifier(bertmodelname).to(device)
    crit = nn.CrossEntropyLoss()   # changed to CrossEntropy
    opt = torch.optim.Adam(model.parameters(), lr=LR_RATE)

    best_f, pt = -np.inf, 0

    # train
    for e in range(NEPOCHS):
        model.train()
        for b in lt:
            opt.zero_grad()
            ids = b['input_ids'].to(device); m = b['attention_mask'].to(device)
            lbls = b['labels'].to(device)
            logits = model(ids, m)
            loss = crit(logits, lbls)
            loss.backward(); opt.step()

        # val
        model.eval(); vp, vt = [], []
        with torch.no_grad():
            for b in lv:
                ids = b['input_ids'].to(device); m = b['attention_mask'].to(device)
                logits = model(ids, m)
                preds = torch.argmax(logits, dim=1).cpu().numpy()
                vp.extend(preds.tolist()); vt.extend(b['labels'].cpu().numpy().tolist())

       
        valf1.append(f1_score(vt, vp, average='macro'))       
        vm = valf1[-1]
        if vm > best_f:
            best_f, pt = vm, 0
            torch.save(model.state_dict(), os.path.join(modelpath, f"{modelname}_fold{fold}.bin"))
        else:
            pt += 1
        if DECAY and pt % DECAY_AFTER == 0 and pt != 0:
            for g in opt.param_groups: g['lr'] *= DECAY_RATE
        print(f"Fold{fold} Ep{e+1}/{NEPOCHS} - ValMacroF1={vm:.4f} Pat={pt}")
        if pt >= PATIENCE:
            print(f"Stopping early at epoch {e+1}")
            break

    # test eval
    model.load_state_dict(torch.load(os.path.join(modelpath, f"{modelname}_fold{fold}.bin")))
    model.eval(); tp, tt = [], []
    with torch.no_grad():
        for b in le:
            ids = b['input_ids'].to(device); m = b['attention_mask'].to(device)
            logits = model(ids, m)
            preds = torch.argmax(logits, dim=1).cpu().numpy()
            tp.extend(preds.tolist()); tt.extend(b['labels'].cpu().numpy().tolist())

    #testcm.append(confusion_matrix(tt, tp))
    reports.append(classification_report( tt, tp, output_dict=True, zero_division=0,labels=[0, 1, 2], target_names=['IND 0', 'GRP 1', 'OTH 2']))
    
    print(f"Completed fold {fold}/5")
    del model; torch.cuda.empty_cache(); gc.collect()
    
WriteResutls(reports)

Local System Time: 12:42 PM
Fold1 Ep1/20 - ValMacroF1=0.5350 Pat=0
Fold1 Ep2/20 - ValMacroF1=0.6852 Pat=0
Fold1 Ep3/20 - ValMacroF1=0.7021 Pat=0
Fold1 Ep4/20 - ValMacroF1=0.7368 Pat=0
Fold1 Ep5/20 - ValMacroF1=0.7050 Pat=1
Fold1 Ep6/20 - ValMacroF1=0.7090 Pat=2
Fold1 Ep7/20 - ValMacroF1=0.7176 Pat=3
Fold1 Ep8/20 - ValMacroF1=0.7231 Pat=4
Stopping early at epoch 8
Completed fold 1/5
Fold2 Ep1/20 - ValMacroF1=0.5043 Pat=0
Fold2 Ep2/20 - ValMacroF1=0.6575 Pat=0
Fold2 Ep3/20 - ValMacroF1=0.7249 Pat=0
Fold2 Ep4/20 - ValMacroF1=0.7295 Pat=0
Fold2 Ep5/20 - ValMacroF1=0.7412 Pat=0
Fold2 Ep6/20 - ValMacroF1=0.7155 Pat=1
Fold2 Ep7/20 - ValMacroF1=0.7155 Pat=2
Fold2 Ep8/20 - ValMacroF1=0.7287 Pat=3
Fold2 Ep9/20 - ValMacroF1=0.7356 Pat=4
Stopping early at epoch 9
Completed fold 2/5
Fold3 Ep1/20 - ValMacroF1=0.4871 Pat=0
Fold3 Ep2/20 - ValMacroF1=0.6681 Pat=0
Fold3 Ep3/20 - ValMacroF1=0.7053 Pat=0
Fold3 Ep4/20 - ValMacroF1=0.6938 Pat=1
Fold3 Ep5/20 - ValMacroF1=0.6715 Pat=2
Fold3 Ep6/20 - ValMacroF

In [11]:
import warnings; warnings.filterwarnings('ignore')
import gc, os,time,numpy as np, pandas as pd, datetime
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix,classification_report
from sklearn.model_selection import StratifiedKFold, train_test_split
import torch, torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from transformers import AutoTokenizer, MT5EncoderModel, logging, AutoConfig, AutoModel
# ------------ GPU Setup ------------
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Device: {device}")
if device.type == 'cuda':torch.cuda.empty_cache()

# ---------- Hyperparameters ---------
MAX_LEN = 128
EMBED_SIZE = 768
BERT_TRAINABLE = True
DRPT = 0.4
FC_ACT = 'elu'
LR_RATE = 6e-5
BATCH = 30  # reduced to lower GPU memory usage
NEPOCHS = 20
PATIENCE = 4
DECAY = True
DECAY_RATE = 0.3
DECAY_AFTER = 1
NUM_CLASSES = 3 

modelname = 'hfFineTuneMT5'
modelpath = os.path.join('.', 'Saved Models', modelname)
modelresults = os.path.join('.', 'Model Results')
modelsummaries = os.path.join('.', 'Model - Summaries-Figures')
for d in [modelpath, modelresults, modelsummaries]:
    os.makedirs(d, exist_ok=True)

# ------------ Utils ----------------
def hms_string(sec):
    h = int(sec // 3600)
    m = int((sec % 3600) // 60)
    s = sec % 60
    return f"{h} hrs {m:02d} mins {s:05.2f} secs"

logging.set_verbosity_error()
# -------- Dataset Class -----------
class TextDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels
    def __len__(self):
        return len(self.labels)
    def __getitem__(self, idx):
        item = {k: torch.tensor(v[idx]) for k, v in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx], dtype=torch.long)
        return item

# -------- Model Definition --------      
class MT5Classifier(nn.Module):
    def __init__(self, model_name, dropout_rate=DRPT, num_classes=NUM_CLASSES):
        super().__init__()
        self.encoder = MT5EncoderModel.from_pretrained(model_name)
        if not BERT_TRAINABLE:
            for p in self.encoder.parameters():
                p.requires_grad = False
        hidden = self.encoder.config.hidden_size
        self.dropout = nn.Dropout(dropout_rate)
        self.fc1 = nn.Linear(hidden, hidden)
        self.act = nn.ELU()
        self.out = nn.Linear(hidden, num_classes)    # output layer for multi-class
        # init
        nn.init.kaiming_uniform_(self.fc1.weight, nonlinearity='linear')
        nn.init.zeros_(self.fc1.bias)
        nn.init.kaiming_uniform_(self.out.weight, nonlinearity='linear')
        nn.init.zeros_(self.out.bias)
    def forward(self, input_ids, attention_mask):
        out = self.encoder(input_ids=input_ids, attention_mask=attention_mask)
        pool = out.last_hidden_state[:, 0]
        x = self.dropout(pool)
        x = self.act(self.fc1(x))
        return self.out(x)   # raw logits

# --------- Data Loading ------------
########################################################
def WriteResutls(reports):
  unt0 = {'precision':[], 'recall':[], 'f1-score':[] }
  tin1 = {'precision':[], 'recall':[], 'f1-score':[] }
  Oth2 = {'precision':[], 'recall':[], 'f1-score':[] }
  macroavg = {'precision':[], 'recall':[], 'f1-score':[] }
  weightedavg = {'precision':[], 'recall':[], 'f1-score':[] }
  accu = []
  for report in reports:
    for k,v in report.items():
      if 'IND' in k:
        unt0['precision'].append(v['precision'])
        unt0['recall'].append(v['recall'])
        unt0['f1-score'].append(v['f1-score'])

      elif 'GRP' in k:
        tin1['precision'].append(v['precision'])
        tin1['recall'].append(v['recall'])
        tin1['f1-score'].append(v['f1-score'])

      elif 'OTH' in k:
        Oth2['precision'].append(v['precision'])
        Oth2['recall'].append(v['recall'])
        Oth2['f1-score'].append(v['f1-score'])

      elif 'macro avg' in k:
        macroavg['precision'].append(v['precision'])
        macroavg['recall'].append(v['recall'])
        macroavg['f1-score'].append(v['f1-score'])

      elif 'weighted avg' in k:
        weightedavg['precision'].append(v['precision'])
        weightedavg['recall'].append(v['recall'])
        weightedavg['f1-score'].append(v['f1-score'])
      elif 'accuracy' in k:
        accu.append(v)

  print('Accuracy:',np.mean(accu))
  print("")
  print('IND 1 Precision:',np.mean(unt0['precision']))
  print('IND 1 Recall:',np.mean(unt0['recall']))
  print('IND 1 F1-Score:',np.mean(unt0['f1-score']))
  print("")
  print('GRP 2 Precision:',np.mean(tin1['precision']))
  print('GRP 2 Recall:',np.mean(tin1['recall']))
  print('GRP 2 F1-Score:',np.mean(tin1['f1-score']))

  print("")
  print('OTH 3 Precision:',np.mean(Oth2['precision']))
  print('OTH 3 Recall:',np.mean(Oth2['recall']))
  print('OTH 3 F1-Score:',np.mean(Oth2['f1-score']))

  print("")
  print('Weighted Avg Precision:',np.mean(weightedavg['precision']))
  print('Weighted Avg Recall:',np.mean(weightedavg['recall']))
  print('Weighted Avg F1-Score:',np.mean(weightedavg['f1-score']))

  print("")
  print('Macro  Precision:',np.mean(macroavg['precision']))
  print('Macro  Recall:',np.mean(macroavg['recall']))
  print('Macro  F1-Score:',np.mean(macroavg['f1-score']))

  file = open( result_path, mode='a' )
  file.write( modelname+ ' ( '+ datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") +') \n' )
  file.write( 'Accuracy:'+str(np.mean(accu))+'\n' )
  file.write('IND 1 Precision:'+str(np.mean(unt0['precision']))+'\n' )
  file.write('IND 1 Recall:'+str(np.mean(unt0['recall']))+'\n' )
  file.write('IND 1 F1-Score:'+str(np.mean(unt0['f1-score']))+'\n' )

  file.write('GRP 2 Precision:'+str(np.mean(tin1['precision']))+'\n' )
  file.write('GRP 2 Recall:'+str(np.mean(tin1['recall']))+'\n' )
  file.write('GRP 2 F1-Score:'+str(np.mean(tin1['f1-score']))+'\n' )

  file.write('OTH 3 Precision:'+str(np.mean(Oth2['precision']))+'\n' )
  file.write('OTH 3 Recall:'+str(np.mean(Oth2['recall']))+'\n' )
  file.write('OTH 3 F1-Score:'+str(np.mean(Oth2['f1-score']))+'\n' )

  file.write('Weighted Avg Precision:'+str(np.mean(weightedavg['precision']))+'\n' )
  file.write('Weighted Avg Recall:'+str(np.mean(weightedavg['recall']))+'\n' )
  file.write('Weighted Avg F1-Score:'+str(np.mean(weightedavg['f1-score']))+'\n' )

  file.write('Macro  Precision:'+str(np.mean(macroavg['precision']))+'\n' )
  file.write('Macro  Recall:'+str(np.mean(macroavg['recall']))+'\n' )
  file.write('Macro  F1-Score:'+str(np.mean(macroavg['f1-score']))+'\n' )
  file.close()
  print("Done")
######################################################################################
# -------------- Data Loading ---------------
result_path =  r'C:\Users\mojua\Desktop\DL-Code\T3-LLM-Classification-Result.csv'
file_path = r'C:\Users\mojua\Desktop\DL-Code\Dataset\Offensive-24K-T3.xlsx'

df = pd.read_excel(file_path, engine='openpyxl')
df['Tweet'] = df['Tweet'].astype(str)
df.dropna(inplace=True)
df.reset_index(drop=True, inplace=True)
print(df.columns, df.shape)
print(df.Tag.unique())
df['Tag'] = df['Tag'].replace({1: 0, 2: 1, 3: 2})
print(df.Tag.unique())
xcolumn = 'Tweet'
ycolumn = 'Tag'
# --------- Tokenizer Setup --------
bertmodelname = 'google/mt5-base'
tokenizer = AutoTokenizer.from_pretrained(bertmodelname)


Device: cuda
Index(['Unnamed: 0', 'Tweet', 'Tag'], dtype='object') (6594, 3)
[1 2 3]
[0 1 2]


In [13]:
# --- K-Fold Training -------------
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=0)
val_f1_macro, test_f1_macro = [], []
start = time.time()
print('Start:', time.strftime("%I:%M %p"))
reports = []
for fold, (tr, te) in enumerate(skf.split(df[xcolumn], df[ycolumn]), 1):
    # split
    X_tr = df.loc[tr, xcolumn].tolist()
    y_tr = df.loc[tr, ycolumn].values
    X_te = df.loc[te, xcolumn].tolist()
    y_te = df.loc[te, ycolumn].values
    # train/val split
    X_tr, X_val, y_tr, y_val = train_test_split(X_tr, y_tr, test_size=0.15, random_state=0
    )
    # encodings
    enc_tr = tokenizer(X_tr, padding='max_length', truncation=True, max_length=MAX_LEN)
    enc_val = tokenizer(X_val, padding='max_length', truncation=True, max_length=MAX_LEN)
    enc_te = tokenizer(X_te, padding='max_length', truncation=True, max_length=MAX_LEN)
    # datasets
    ds_tr = TextDataset(enc_tr, y_tr)
    ds_val = TextDataset(enc_val, y_val)
    ds_te = TextDataset(enc_te, y_te)

    # dataloaders
    ld_tr = DataLoader(ds_tr, batch_size=BATCH, shuffle=True, pin_memory=True)
    ld_val = DataLoader(ds_val, batch_size=BATCH, pin_memory=True)
    ld_te = DataLoader(ds_te, batch_size=BATCH, pin_memory=True)

    # model, loss, optimizer
    model = MT5Classifier(bertmodelname).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=LR_RATE)
    best_val_f1, patience = 0.0, 0
    for epoch in range(NEPOCHS):
        model.train()
        for b in ld_tr:
            optimizer.zero_grad()
            ids = b['input_ids'].to(device)
            masks = b['attention_mask'].to(device)
            labels = b['labels'].to(device)

            logits = model(ids, masks)
            loss = criterion(logits, labels)
            loss.backward()
            optimizer.step()

        # validation
        model.eval()
        vt, vp = [], []
        with torch.no_grad():
            for b in ld_val:
                logits = model(b['input_ids'].to(device), b['attention_mask'].to(device))
                preds = torch.argmax(logits, dim=1).cpu().numpy()
                vp.extend(preds.tolist())
                vt.extend(b['labels'].cpu().numpy().tolist())       
        f1m = f1_score(vt, vp, average='macro')
        # save best
        if f1m > best_val_f1:
            best_val_f1, patience = f1m, 0
            torch.save(model.state_dict(), os.path.join(modelpath, f"{modelname}_fold{fold}.pt"))
        else:
            patience += 1
            if DECAY and patience % DECAY_AFTER == 0:
                for g in optimizer.param_groups:
                    g['lr'] *= DECAY_RATE

        print(f"Fold{fold} Ep{epoch+1}: Val MacroF1={f1m:.4f} Pat={patience}")
        if patience >= PATIENCE:
            print(f"Stopping early at epoch {epoch+1}")
            break

    # test
    model.load_state_dict(torch.load(os.path.join(modelpath, f"{modelname}_fold{fold}.pt")))
    model.eval()
    tt, tp = [], []
    with torch.no_grad():
        for b in ld_te:
            logits = model(b['input_ids'].to(device), b['attention_mask'].to(device))
            preds = torch.argmax(logits, dim=1).cpu().numpy()
            tp.extend(preds.tolist())
            tt.extend(b['labels'].cpu().numpy().tolist())
    
   # testcm.append(confusion_matrix(tt, tp))
    reports.append(classification_report( tt, tp, output_dict=True, zero_division=0,labels=[0, 1, 2], target_names=['IND 0', 'GRP 1', 'OTH 2']))
    print(f"Fold{fold} done at {time.strftime('%I:%M %p')}")

    # cleanup
    del model, optimizer, criterion
    torch.cuda.empty_cache()
    gc.collect()

print(f"Total runtime: {hms_string(time.time()-start)}")
WriteResutls(reports)

Start: 01:08 PM
Fold1 Ep1: Val MacroF1=0.2955 Pat=0
Fold1 Ep2: Val MacroF1=0.3455 Pat=0
Fold1 Ep3: Val MacroF1=0.3784 Pat=0
Fold1 Ep4: Val MacroF1=0.4874 Pat=0
Fold1 Ep5: Val MacroF1=0.4095 Pat=1
Fold1 Ep6: Val MacroF1=0.5169 Pat=0
Fold1 Ep7: Val MacroF1=0.5014 Pat=1
Fold1 Ep8: Val MacroF1=0.5157 Pat=2
Fold1 Ep9: Val MacroF1=0.4949 Pat=3
Fold1 Ep10: Val MacroF1=0.4962 Pat=4
Stopping early at epoch 10
Fold1 done at 01:17 PM
Fold2 Ep1: Val MacroF1=0.2843 Pat=0
Fold2 Ep2: Val MacroF1=0.2843 Pat=1
Fold2 Ep3: Val MacroF1=0.2843 Pat=2
Fold2 Ep4: Val MacroF1=0.2843 Pat=3
Fold2 Ep5: Val MacroF1=0.2843 Pat=4
Stopping early at epoch 5
Fold2 done at 01:27 PM
Fold3 Ep1: Val MacroF1=0.2953 Pat=0
Fold3 Ep2: Val MacroF1=0.3122 Pat=0
Fold3 Ep3: Val MacroF1=0.4014 Pat=0
Fold3 Ep4: Val MacroF1=0.4532 Pat=0
Fold3 Ep5: Val MacroF1=0.4529 Pat=1
Fold3 Ep6: Val MacroF1=0.4725 Pat=0
Fold3 Ep7: Val MacroF1=0.4825 Pat=0
Fold3 Ep8: Val MacroF1=0.4782 Pat=1
Fold3 Ep9: Val MacroF1=0.4740 Pat=2
Fold3 Ep10: Val Macr

In [None]:
#Start from here in case of failure

In [20]:
import warnings
warnings.filterwarnings('ignore')
import gc, os,time, numpy as np,pandas as pd, datetime
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix,classification_report
from sklearn.model_selection import StratifiedKFold, train_test_split
import torch, torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from transformers import AutoTokenizer, AutoConfig, AutoModel, logging
# ------------- GPU Setup ------------
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Device: {device}")
if device.type == 'cuda': torch.cuda.empty_cache()
# ---------- Hyperparameters ---------
MAX_LEN = 128
EMBED_SIZE = 768
BERT_TRAINABLE = True
DRPT = 0.4
FC_ACT = 'elu'
LR_RATE = 9e-6
BATCH = 32
NEPOCHS = 20
PATIENCE = 5
DECAY = True
DECAY_RATE = 0.3
DECAY_AFTER = 1
num_classes = 3

modelname = 'hfFineTuneMuril'
modelpath = os.path.join('.', 'Saved Models', modelname)
modelresults = os.path.join('.', 'Model Results')
modelsummaries = os.path.join('.', 'Model - Summaries-Figures')
for d in [modelpath, modelresults, modelsummaries]:
    os.makedirs(d, exist_ok=True)

# ---------- Utils ------------------
def hms_string(sec):
    h = int(sec // 3600)
    m = int((sec % 3600) // 60)
    s = sec % 60
    return f"{h} hrs {m:02d} mins {s:05.2f} secs"

logging.set_verbosity_error()

# -------- Dataset Class -----------
class TextDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels
    def __len__(self): return len(self.labels)
    def __getitem__(self, idx):
        item = {k: torch.tensor(v[idx]) for k, v in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx], dtype=torch.long)
        return item

# ------- Model Definition ----------
class MurilClassifier(nn.Module):
    def __init__(self, model_name, num_classes, dropout_rate=DRPT):
        super().__init__()
        self.bert = AutoModel.from_pretrained(model_name)
        if not BERT_TRAINABLE:
            for p in self.bert.parameters(): p.requires_grad = False
        hidden = self.bert.config.hidden_size
        self.dropout = nn.Dropout(dropout_rate)
        self.fc1 = nn.Linear(hidden, hidden)
        self.act = nn.ELU()
        self.out = nn.Linear(hidden, num_classes)
        # he_uniform init
        nn.init.kaiming_uniform_(self.fc1.weight, nonlinearity='linear')
        nn.init.zeros_(self.fc1.bias)
        nn.init.kaiming_uniform_(self.out.weight, nonlinearity='linear')
        nn.init.zeros_(self.out.bias)

    def forward(self, input_ids, attention_mask):
        o = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        pooled = o.pooler_output if hasattr(o, 'pooler_output') and o.pooler_output is not None else o.last_hidden_state[:,0]
        x = self.dropout(pooled)
        x = self.act(self.fc1(x))
        return self.out(x)  # logits
        
# ---------- Data Loading -----------
########################################################
def WriteResutls(reports):

  unt0 = {'precision':[], 'recall':[], 'f1-score':[] }
  tin1 = {'precision':[], 'recall':[], 'f1-score':[] }
  Oth2 = {'precision':[], 'recall':[], 'f1-score':[] }
  macroavg = {'precision':[], 'recall':[], 'f1-score':[] }
  weightedavg = {'precision':[], 'recall':[], 'f1-score':[] }
  accu = []
  for report in reports:
    for k,v in report.items():
      if 'IND' in k:
        unt0['precision'].append(v['precision'])
        unt0['recall'].append(v['recall'])
        unt0['f1-score'].append(v['f1-score'])

      elif 'GRP' in k:
        tin1['precision'].append(v['precision'])
        tin1['recall'].append(v['recall'])
        tin1['f1-score'].append(v['f1-score'])

      elif 'OTH' in k:
        Oth2['precision'].append(v['precision'])
        Oth2['recall'].append(v['recall'])
        Oth2['f1-score'].append(v['f1-score'])

      elif 'macro avg' in k:
        macroavg['precision'].append(v['precision'])
        macroavg['recall'].append(v['recall'])
        macroavg['f1-score'].append(v['f1-score'])

      elif 'weighted avg' in k:
        weightedavg['precision'].append(v['precision'])
        weightedavg['recall'].append(v['recall'])
        weightedavg['f1-score'].append(v['f1-score'])
      elif 'accuracy' in k:
        accu.append(v)

  print('Accuracy:',np.mean(accu))
  print("")
  print('IND 1 Precision:',np.mean(unt0['precision']))
  print('IND 1 Recall:',np.mean(unt0['recall']))
  print('IND 1 F1-Score:',np.mean(unt0['f1-score']))
  print("")
  print('GRP 2 Precision:',np.mean(tin1['precision']))
  print('GRP 2 Recall:',np.mean(tin1['recall']))
  print('GRP 2 F1-Score:',np.mean(tin1['f1-score']))

  print("")
  print('OTH 3 Precision:',np.mean(Oth2['precision']))
  print('OTH 3 Recall:',np.mean(Oth2['recall']))
  print('OTH 3 F1-Score:',np.mean(Oth2['f1-score']))

  print("")
  print('Weighted Avg Precision:',np.mean(weightedavg['precision']))
  print('Weighted Avg Recall:',np.mean(weightedavg['recall']))
  print('Weighted Avg F1-Score:',np.mean(weightedavg['f1-score']))

  print("")
  print('Macro  Precision:',np.mean(macroavg['precision']))
  print('Macro  Recall:',np.mean(macroavg['recall']))
  print('Macro  F1-Score:',np.mean(macroavg['f1-score']))

  file = open( result_path, mode='a' )
  file.write( modelname+ ' ( '+ datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") +') \n' )
  file.write( 'Accuracy:'+str(np.mean(accu))+'\n' )
  file.write('IND 1 Precision:'+str(np.mean(unt0['precision']))+'\n' )
  file.write('IND 1 Recall:'+str(np.mean(unt0['recall']))+'\n' )
  file.write('IND 1 F1-Score:'+str(np.mean(unt0['f1-score']))+'\n' )

  file.write('GRP 2 Precision:'+str(np.mean(tin1['precision']))+'\n' )
  file.write('GRP 2 Recall:'+str(np.mean(tin1['recall']))+'\n' )
  file.write('GRP 2 F1-Score:'+str(np.mean(tin1['f1-score']))+'\n' )

  file.write('OTH 3 Precision:'+str(np.mean(Oth2['precision']))+'\n' )
  file.write('OTH 3 Recall:'+str(np.mean(Oth2['recall']))+'\n' )
  file.write('OTH 3 F1-Score:'+str(np.mean(Oth2['f1-score']))+'\n' )

  file.write('Weighted Avg Precision:'+str(np.mean(weightedavg['precision']))+'\n' )
  file.write('Weighted Avg Recall:'+str(np.mean(weightedavg['recall']))+'\n' )
  file.write('Weighted Avg F1-Score:'+str(np.mean(weightedavg['f1-score']))+'\n' )

  file.write('Macro  Precision:'+str(np.mean(macroavg['precision']))+'\n' )
  file.write('Macro  Recall:'+str(np.mean(macroavg['recall']))+'\n' )
  file.write('Macro  F1-Score:'+str(np.mean(macroavg['f1-score']))+'\n' )
  file.close()
  print("Done")
######################################################################################
# -------------- Data Loading ---------------
result_path =  r'C:\Users\mojua\Desktop\DL-Code\T3-LLM-Classification-Result.csv'
file_path = r'C:\Users\mojua\Desktop\DL-Code\Dataset\Offensive-24K-T3.xlsx'

df = pd.read_excel(file_path, engine='openpyxl')
df['Tweet'] = df['Tweet'].astype(str)
df.dropna(inplace=True)
df.reset_index(drop=True, inplace=True)
print(df.head())
print(df.Tag.unique())
df['Tag'] = df['Tag'].replace({1: 0, 2: 1, 3: 2})
print(df.Tag.unique())
xcolumn = 'Tweet'
ycolumn = 'Tag'

Device: cuda
   Unnamed: 0                                              Tweet  Tag
0           3                   USER گھٹیا انسان دنیا ہی چھوڑ دو    1
1          11  USER PMLN میں آپ کے بارے میں میری بہتر راۓ تھی...    1
2          20  ہمیں تو آج تک سمجھ نہیں آئی کہ کم عقل عیسائی ح...    2
3          21  کیا پتہ اس گدھے کو بھی ہینڈلرز کی طرف سے گرین ...    1
4          25  USER ون آن ون والی پہلے اپنی اوقات تو کر لو۔ ی...    1
[1 2 3]
[0 1 2]


In [22]:
# ----- K-Fold Training -------------
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=0)
# ---------- Tokenizer -------------
bertmodelname = 'google/muril-base-cased'
tokenizer = AutoTokenizer.from_pretrained(bertmodelname)
# metrics storage
reports = []
val_f1 =[]
start = time.time()
print('Start:', time.strftime('%I:%M %p'))
for fold, (tr, te) in enumerate(skf.split(df[xcolumn], df[ycolumn]), 1):
    X_tr = df.loc[tr, xcolumn].tolist()
    y_tr = df.loc[tr, ycolumn].values
    X_te = df.loc[te, xcolumn].tolist()
    y_te = df.loc[te, ycolumn].values

    X_tr, X_val, y_tr, y_val = train_test_split(X_tr, y_tr, test_size=0.15, random_state=0)

    enc_tr = tokenizer(X_tr,   padding='max_length', truncation=True, max_length=MAX_LEN)
    enc_val= tokenizer(X_val,  padding='max_length', truncation=True, max_length=MAX_LEN)
    enc_te = tokenizer(X_te,   padding='max_length', truncation=True, max_length=MAX_LEN)

    ld_tr  = DataLoader(TextDataset(enc_tr, y_tr), batch_size=BATCH, shuffle=True)
    ld_val = DataLoader(TextDataset(enc_val, y_val), batch_size=BATCH)
    ld_te  = DataLoader(TextDataset(enc_te, y_te), batch_size=BATCH)

    model = MurilClassifier(bertmodelname, num_classes).to(device)
    crit  = nn.CrossEntropyLoss()
    opt   = torch.optim.Adam(model.parameters(), lr=LR_RATE)
    best_val_f1, patience = -np.inf, 0
    for ep in range(NEPOCHS):
        model.train()
        for b in ld_tr:
            opt.zero_grad()
            ids    = b['input_ids'].to(device)
            mask   = b['attention_mask'].to(device)
            labels = b['labels'].to(device)
            logits = model(ids, mask)
            loss   = crit(logits, labels)
            loss.backward()
            opt.step()

        # validation
        model.eval()
        preds, trues = [], []
        with torch.no_grad():
            for b in ld_val:
                ids    = b['input_ids'].to(device)
                mask   = b['attention_mask'].to(device)
                logits = model(ids, mask).cpu().numpy()
                preds.extend(np.argmax(logits, axis=1).tolist())
                trues.extend(b['labels'].cpu().numpy().tolist()) 
        val_f1.append(       f1_score(trues, preds, average='macro'))
        vf1 = val_f1[-1]
        if vf1 > best_val_f1:
            best_val_f1, patience = vf1, 0
            torch.save(model.state_dict(), os.path.join(modelpath, f"{modelname}_fold{fold}.pt"))
        else:
            patience += 1

        if DECAY and patience % DECAY_AFTER == 0 and patience != 0:
            for g in opt.param_groups:
                g['lr'] *= DECAY_RATE

        print(f"Fold{fold} Ep{ep+1}/{NEPOCHS} - ValMacroF1={vf1:.4f} Pat={patience}")
        if patience >= PATIENCE:
            break

    # test evaluation
    model.load_state_dict(torch.load(os.path.join(modelpath, f"{modelname}_fold{fold}.pt")))
    model.eval()
    preds, trues = [], []
    with torch.no_grad():
        for b in ld_te:
            ids    = b['input_ids'].to(device)
            mask   = b['attention_mask'].to(device)
            logits = model(ids, mask).cpu().numpy()
            preds.extend(np.argmax(logits, axis=1).tolist())
            trues.extend(b['labels'].cpu().numpy().tolist())
    reports.append(classification_report( trues, preds, output_dict=True, zero_division=0,labels=[0, 1, 2], target_names=['IND 0', 'GRP 1', 'OTH 2']))
    print(f"Completed fold {fold}/5")
    del model
    torch.cuda.empty_cache()
    gc.collect()

print(f"Total runtime: {hms_string(time.time()-start)}")
WriteResutls(reports)

Start: 02:58 PM
Fold1 Ep1/20 - ValMacroF1=0.2821 Pat=0
Fold1 Ep2/20 - ValMacroF1=0.2821 Pat=1
Fold1 Ep3/20 - ValMacroF1=0.2821 Pat=2
Fold1 Ep4/20 - ValMacroF1=0.2821 Pat=3
Fold1 Ep5/20 - ValMacroF1=0.2821 Pat=4
Fold1 Ep6/20 - ValMacroF1=0.2821 Pat=5
Completed fold 1/5
Fold2 Ep1/20 - ValMacroF1=0.2843 Pat=0
Fold2 Ep2/20 - ValMacroF1=0.2843 Pat=1
Fold2 Ep3/20 - ValMacroF1=0.2843 Pat=2
Fold2 Ep4/20 - ValMacroF1=0.2843 Pat=3
Fold2 Ep5/20 - ValMacroF1=0.4752 Pat=0
Fold2 Ep6/20 - ValMacroF1=0.4880 Pat=0
Fold2 Ep7/20 - ValMacroF1=0.4807 Pat=1
Fold2 Ep8/20 - ValMacroF1=0.4815 Pat=2
Fold2 Ep9/20 - ValMacroF1=0.4797 Pat=3
Fold2 Ep10/20 - ValMacroF1=0.4797 Pat=4
Fold2 Ep11/20 - ValMacroF1=0.4797 Pat=5
Completed fold 2/5
Fold3 Ep1/20 - ValMacroF1=0.2860 Pat=0
Fold3 Ep2/20 - ValMacroF1=0.2860 Pat=1
Fold3 Ep3/20 - ValMacroF1=0.2860 Pat=2
Fold3 Ep4/20 - ValMacroF1=0.4751 Pat=0
Fold3 Ep5/20 - ValMacroF1=0.4759 Pat=0
Fold3 Ep6/20 - ValMacroF1=0.4782 Pat=0
Fold3 Ep7/20 - ValMacroF1=0.4812 Pat=0
Fold3 Ep

In [23]:
########################################################
def WriteResutls(reports):
  unt0 = {'precision':[], 'recall':[], 'f1-score':[] }
  tin1 = {'precision':[], 'recall':[], 'f1-score':[] }
  Oth2 = {'precision':[], 'recall':[], 'f1-score':[] }
  macroavg = {'precision':[], 'recall':[], 'f1-score':[] }
  weightedavg = {'precision':[], 'recall':[], 'f1-score':[] }
  accu = []
  for report in reports:
    for k,v in report.items():
      if 'IND' in k:
        unt0['precision'].append(v['precision'])
        unt0['recall'].append(v['recall'])
        unt0['f1-score'].append(v['f1-score'])

      elif 'GRP' in k:
        tin1['precision'].append(v['precision'])
        tin1['recall'].append(v['recall'])
        tin1['f1-score'].append(v['f1-score'])

      elif 'OTH' in k:
        Oth2['precision'].append(v['precision'])
        Oth2['recall'].append(v['recall'])
        Oth2['f1-score'].append(v['f1-score'])

      elif 'macro avg' in k:
        macroavg['precision'].append(v['precision'])
        macroavg['recall'].append(v['recall'])
        macroavg['f1-score'].append(v['f1-score'])

      elif 'weighted avg' in k:
        weightedavg['precision'].append(v['precision'])
        weightedavg['recall'].append(v['recall'])
        weightedavg['f1-score'].append(v['f1-score'])
      elif 'accuracy' in k:
        accu.append(v)

  print('Accuracy:',np.mean(accu))
  print("")
  print('IND 1 Precision:',np.mean(unt0['precision']))
  print('IND 1 Recall:',np.mean(unt0['recall']))
  print('IND 1 F1-Score:',np.mean(unt0['f1-score']))
  print("")
  print('GRP 2 Precision:',np.mean(tin1['precision']))
  print('GRP 2 Recall:',np.mean(tin1['recall']))
  print('GRP 2 F1-Score:',np.mean(tin1['f1-score']))

  print("")
  print('OTH 3 Precision:',np.mean(Oth2['precision']))
  print('OTH 3 Recall:',np.mean(Oth2['recall']))
  print('OTH 3 F1-Score:',np.mean(Oth2['f1-score']))

  print("")
  print('Weighted Avg Precision:',np.mean(weightedavg['precision']))
  print('Weighted Avg Recall:',np.mean(weightedavg['recall']))
  print('Weighted Avg F1-Score:',np.mean(weightedavg['f1-score']))

  print("")
  print('Macro  Precision:',np.mean(macroavg['precision']))
  print('Macro  Recall:',np.mean(macroavg['recall']))
  print('Macro  F1-Score:',np.mean(macroavg['f1-score']))

  file = open( result_path, mode='a' )
  file.write( modelname+ ' ( '+ datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") +') \n' )
  file.write( 'Accuracy:'+str(np.mean(accu))+'\n' )
  file.write('IND 1 Precision:'+str(np.mean(unt0['precision']))+'\n' )
  file.write('IND 1 Recall:'+str(np.mean(unt0['recall']))+'\n' )
  file.write('IND 1 F1-Score:'+str(np.mean(unt0['f1-score']))+'\n' )

  file.write('GRP 2 Precision:'+str(np.mean(tin1['precision']))+'\n' )
  file.write('GRP 2 Recall:'+str(np.mean(tin1['recall']))+'\n' )
  file.write('GRP 2 F1-Score:'+str(np.mean(tin1['f1-score']))+'\n' )

  file.write('OTH 3 Precision:'+str(np.mean(Oth2['precision']))+'\n' )
  file.write('OTH 3 Recall:'+str(np.mean(Oth2['recall']))+'\n' )
  file.write('OTH 3 F1-Score:'+str(np.mean(Oth2['f1-score']))+'\n' )

  file.write('Weighted Avg Precision:'+str(np.mean(weightedavg['precision']))+'\n' )
  file.write('Weighted Avg Recall:'+str(np.mean(weightedavg['recall']))+'\n' )
  file.write('Weighted Avg F1-Score:'+str(np.mean(weightedavg['f1-score']))+'\n' )

  file.write('Macro  Precision:'+str(np.mean(macroavg['precision']))+'\n' )
  file.write('Macro  Recall:'+str(np.mean(macroavg['recall']))+'\n' )
  file.write('Macro  F1-Score:'+str(np.mean(macroavg['f1-score']))+'\n' )
  file.close()
  print("Done")
######################################################################################

In [24]:
import warnings; warnings.filterwarnings('ignore')
import gc, os, time, numpy as np, pandas as pd, datetime
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix,classification_report
from sklearn.model_selection import StratifiedKFold, train_test_split
import torch, torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from transformers import XLMRobertaTokenizer, XLMRobertaConfig, XLMRobertaModel, logging
# ---------------- GPU Setup ----------------
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Device: {device}")
if device.type == 'cuda': torch.cuda.empty_cache()
# ------------- Hyperparameters ------------
MAX_LEN = 128
EMBED_SIZE = 768  # same as hidden_size
BERT_TRAINABLE = True
DRPT = 0.4
FC_WEIGHTS_INIT = 'he_uniform'
FC_ACT = 'elu'
LR_RATE = 9e-6
BATCH = 32
NEPOCHS = 20
PATIENCE = 5
DECAY = True
DECAY_RATE = 0.3
DECAY_AFTER = 1
num_classes = 3

result_path =  r'C:\Users\mojua\Desktop\DL-Code\T3-LLM-Classification-Result.csv'
file_path = r'C:\Users\mojua\Desktop\DL-Code\Dataset\Offensive-24K-T3.xlsx'
modelname = 'hfFineTuneRoberta'

modelpath = os.path.join('.', 'Saved Models', modelname)
modelresults = os.path.join('.', 'Model Results')
modelsummaries = os.path.join('.', 'Model - Summaries-Figures')
for d in [modelpath, modelresults, modelsummaries]: os.makedirs(d, exist_ok=True)

# --------------- Utils ---------------------
def hms_string(sec_elapsed):
    h = int(sec_elapsed / 3600)
    m = int((sec_elapsed % 3600) / 60)
    s = sec_elapsed % 60
    return f"{h} hrs {m:02d} mins {s:05.2f} secs"

logging.set_verbosity_error()

# ----------- Dataset Class ----------------
class TextDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        item = {k: torch.tensor(v[idx]) for k, v in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx], dtype=torch.long)
        return item

class RobertaClassifier(nn.Module):
    def __init__(self, model_name, num_classes, dropout_rate=DRPT):
        super().__init__()
        self.bert = XLMRobertaModel.from_pretrained(model_name)
        if not BERT_TRAINABLE:
            for param in self.bert.parameters():
                param.requires_grad = False
        hidden = self.bert.config.hidden_size
        self.dropout = nn.Dropout(dropout_rate)
        self.fc1 = nn.Linear(hidden, hidden)
        self.act = nn.ELU()
        self.out = nn.Linear(hidden, num_classes)
        # he_uniform init
        nn.init.kaiming_uniform_(self.fc1.weight, nonlinearity='linear')
        nn.init.zeros_(self.fc1.bias)
        nn.init.kaiming_uniform_(self.out.weight, nonlinearity='linear')
        nn.init.zeros_(self.out.bias)

    def forward(self, input_ids, attention_mask):
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        pooled = outputs.last_hidden_state[:, 0]
        x = self.dropout(pooled)
        x = self.act(self.fc1(x))
        return self.out(x)  # logits
# -------------- Data Loading ---------------
result_path =  r'C:\Users\mojua\Desktop\DL-Code\T3-LLM-Classification-Result.csv'
file_path = r'C:\Users\mojua\Desktop\DL-Code\Dataset\Offensive-24K-T3.xlsx'
df = pd.read_excel(file_path, engine='openpyxl')
df['Tweet'] = df['Tweet'].astype(str)
df.dropna(inplace=True)
df.reset_index(drop=True, inplace=True)
print(df.Tag.unique())
df['Tag'] = df['Tag'].replace({1: 0, 2: 1, 3: 2})
print(df.Tag.unique())
xcolumn = 'Tweet'
ycolumn = 'Tag'
# ----------- Tokenizer Setup --------------
bertmodelname = 'xlm-roberta-base'
tokenizer = XLMRobertaTokenizer.from_pretrained(bertmodelname)

# -------- K-Fold Training -----------------

Device: cuda
[1 2 3]
[0 1 2]


In [28]:
skf = StratifiedKFold(n_splits=5, random_state=0, shuffle=True)
reports = []
valf1 = []
start_time = time.time()
print('Local System Time:', time.strftime('%I:%M %p', time.localtime()))
for fold, (train_idx, test_idx) in enumerate(skf.split(df[xcolumn], df[ycolumn]), 1):
    x_train = df.loc[train_idx, xcolumn].tolist()
    y_train = df.loc[train_idx, ycolumn].values
    x_test = df.loc[test_idx, xcolumn].tolist()
    y_test = df.loc[test_idx, ycolumn].values

    x_train, x_val, y_train, y_val = train_test_split( x_train, y_train, test_size=0.15, random_state=0)

    train_enc = tokenizer(x_train, padding='max_length', truncation=True, max_length=MAX_LEN)
    val_enc   = tokenizer(x_val,   padding='max_length', truncation=True, max_length=MAX_LEN)
    test_enc  = tokenizer(x_test,  padding='max_length', truncation=True, max_length=MAX_LEN)

    train_loader = DataLoader(TextDataset(train_enc, y_train), batch_size=BATCH, shuffle=True)
    val_loader   = DataLoader(TextDataset(val_enc,   y_val),   batch_size=BATCH)
    test_loader  = DataLoader(TextDataset(test_enc,  y_test),  batch_size=BATCH)

    model = RobertaClassifier(bertmodelname, num_classes=num_classes).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=LR_RATE)

    best_val_f1 = -np.inf
    patience_counter = 0

    for epoch in range(NEPOCHS):
        model.train()
        for batch in train_loader:
            optimizer.zero_grad()
            ids   = batch['input_ids'].to(device)
            mask  = batch['attention_mask'].to(device)
            labels= batch['labels'].to(device)
            logits= model(ids, mask)
            loss  = criterion(logits, labels)
            loss.backward()
            optimizer.step()

        # Validation
        model.eval()
        val_preds, val_trues = [], []
        with torch.no_grad():
            for batch in val_loader:
                ids    = batch['input_ids'].to(device)
                mask   = batch['attention_mask'].to(device)
                trues  = batch['labels'].cpu().numpy()
                logits = model(ids, mask).cpu().numpy()
                preds  = np.argmax(logits, axis=1)
                val_preds.extend(preds.tolist())
                val_trues.extend(trues.tolist())

        valf1.append(        f1_score(val_trues, val_preds, average='macro'))
        vf1m = valf1[-1]
        if vf1m > best_val_f1:
            best_val_f1 = vf1m
            patience_counter = 0
            torch.save(model.state_dict(), os.path.join(modelpath, f"{modelname}_fold{fold}.bin"))
        else:
            patience_counter += 1

        if DECAY and patience_counter % DECAY_AFTER == 0 and patience_counter != 0:
            for g in optimizer.param_groups:
                g['lr'] *= DECAY_RATE

        print(f"Fold {fold} Epoch {epoch+1}/{NEPOCHS} - Val Macro F1: {vf1m:.4f} - Pat: {patience_counter}")
        if patience_counter >= PATIENCE:
            print(f"Stopping early at epoch {epoch+1}")
            break

    # Test evaluation
    model.load_state_dict(torch.load(os.path.join(modelpath, f"{modelname}_fold{fold}.bin")))
    model.eval()
    test_preds, test_trues = [], []
    with torch.no_grad():
        for batch in test_loader:
            ids    = batch['input_ids'].to(device)
            mask   = batch['attention_mask'].to(device)
            trues  = batch['labels'].cpu().numpy()
            logits = model(ids, mask).cpu().numpy()
            preds  = np.argmax(logits, axis=1)
            test_preds.extend(preds.tolist())
            test_trues.extend(trues.tolist())
            
    reports.append(classification_report( test_trues, test_preds, output_dict=True, zero_division=0,labels=[0, 1, 2], target_names=['IND 0', 'GRP 1', 'OTH 2']))
    print(f"Completed fold {fold}/5")
    del model
    torch.cuda.empty_cache()
    gc.collect()
print(f"Total runtime: {hms_string(time.time() - start_time)}")
WriteResutls(reports)

Local System Time: 04:02 PM
Fold 1 Epoch 1/20 - Val Macro F1: 0.2821 - Pat: 0
Fold 1 Epoch 2/20 - Val Macro F1: 0.4351 - Pat: 0
Fold 1 Epoch 3/20 - Val Macro F1: 0.7352 - Pat: 0
Fold 1 Epoch 4/20 - Val Macro F1: 0.7357 - Pat: 0
Fold 1 Epoch 5/20 - Val Macro F1: 0.7498 - Pat: 0
Fold 1 Epoch 6/20 - Val Macro F1: 0.7485 - Pat: 1
Fold 1 Epoch 7/20 - Val Macro F1: 0.7652 - Pat: 0
Fold 1 Epoch 8/20 - Val Macro F1: 0.7539 - Pat: 1
Fold 1 Epoch 9/20 - Val Macro F1: 0.7708 - Pat: 0
Fold 1 Epoch 10/20 - Val Macro F1: 0.7760 - Pat: 0
Fold 1 Epoch 11/20 - Val Macro F1: 0.7653 - Pat: 1
Fold 1 Epoch 12/20 - Val Macro F1: 0.7642 - Pat: 2
Fold 1 Epoch 13/20 - Val Macro F1: 0.7642 - Pat: 3
Fold 1 Epoch 14/20 - Val Macro F1: 0.7642 - Pat: 4
Fold 1 Epoch 15/20 - Val Macro F1: 0.7642 - Pat: 5
Stopping early at epoch 15
Completed fold 1/5
Fold 2 Epoch 1/20 - Val Macro F1: 0.2843 - Pat: 0
Fold 2 Epoch 2/20 - Val Macro F1: 0.6873 - Pat: 0
Fold 2 Epoch 3/20 - Val Macro F1: 0.7472 - Pat: 0
Fold 2 Epoch 4/20 - 

In [None]:
texts = df['Tweet'].tolist()
# Get token lengths
lengths = [len(tokenizer.encode(text, truncation=False)) for text in texts]
# Percentiles
p90 = int(np.percentile(lengths, 90))
p95 = int(np.percentile(lengths, 95))
p99 = int(np.percentile(lengths, 99))
max_len = p95  # or use p90 for stricter cutoff

print(f"90th percentile length: {p90}")
print(f"95th percentile length: {p95}")
print(f"95th percentile length: {p99}")


In [None]:
import warnings
warnings.filterwarnings('ignore')

import gc, os,time, numpy as np,pandas as pd, datetime
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report
from sklearn.model_selection import StratifiedKFold, train_test_split
import torch,torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from transformers import AutoTokenizer, AutoConfig, AutoModel, logging

result_path =  r'C:\Users\mojua\Desktop\DL-Code\T3-LLM-Classification-Result.csv'
file_path = r'C:\Users\mojua\Desktop\DL-Code\Dataset\Offensive-24K-T3.xlsx'

# ---------------- GPU Setup ----------------
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Device: {device}")
if device.type == 'cuda':torch.cuda.empty_cache()

# --------- Hyperparameters ---------
MAX_LEN = 128
BERT_TRAINABLE = True
DRPT = 0.4
FC_ACT = 'elu'
LR_RATE = 9e-6
BATCH = 32
NEPOCHS = 20
PATIENCE = 4
DECAY = True
DECAY_RATE = 0.3
DECAY_AFTER = 1

modelname = 'hfFineTuneDistilBert'
modelpath = os.path.join('.', 'Saved Models', modelname)
for d in [modelpath, './Model Results', './Model - Summaries-Figures']:
    os.makedirs(d, exist_ok=True)
######################################################################################
# -------- Utils --------
def hms_string(sec):
    h = int(sec // 3600)
    m = int((sec % 3600) // 60)
    s = sec % 60
    return f"{h} hrs {m:02d} mins {s:05.2f} secs"

logging.set_verbosity_error()

# -------- Dataset --------
class TextDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings, self.labels = encodings, labels
    def __len__(self):
        return len(self.labels)
    def __getitem__(self, idx):
        item = {k: torch.tensor(v[idx]) for k,v in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx], dtype=torch.float)
        return item

# -------- Model --------
class DistilBertClassifier(nn.Module):
    def __init__(self, model_name):
        super().__init__()
        self.bert = AutoModel.from_pretrained(model_name, config=config)
        if not BERT_TRAINABLE:
            for p in self.bert.parameters(): p.requires_grad=False
        hidden = self.bert.config.hidden_size
        self.dropout = nn.Dropout(DRPT)
        self.fc1 = nn.Linear(hidden, hidden)
        self.act = nn.ELU()
        self.out = nn.Linear(hidden, 1)
        self.sig = nn.Sigmoid()
        # he_uniform initialization
        nn.init.kaiming_uniform_(self.fc1.weight, nonlinearity='linear')
        nn.init.zeros_(self.fc1.bias)
        nn.init.kaiming_uniform_(self.out.weight, nonlinearity='linear')
        nn.init.zeros_(self.out.bias)
    def forward(self, input_ids, attention_mask):
        o = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        pooled = o.last_hidden_state[:,0]
        x = self.dropout(pooled)
        x = self.act(self.fc1(x))
        return self.sig(self.out(x).squeeze(-1))

# -------- Threshold --------
def optimize_threshold(y_true, y_probs):
    best_t, best_f = 0.5, 0
    for t in np.arange(0.1,0.9,0.001):
        p = (y_probs>=t).astype(int)
        f = f1_score(y_true,p)
        if f>best_f:
            best_f, best_t = f, t
    return best_t

In [None]:
# -------- Data Loading --------
df = pd.read_excel(file_path, engine='openpyxl')
df['Tweet'] = df['Tweet'].astype(str)
df.dropna(inplace=True)
df.reset_index(drop=True, inplace=True)
print(df.head())
print(df.info(), df.shape)
gc.collect()
xcol, ycol = 'Tweet', 'Tag'



In [None]:
########################################################
def WriteResutls(reports):

  unt0 = {'precision':[], 'recall':[], 'f1-score':[] }
  tin1 = {'precision':[], 'recall':[], 'f1-score':[] }
  Oth2 = {'precision':[], 'recall':[], 'f1-score':[] }
  macroavg = {'precision':[], 'recall':[], 'f1-score':[] }
  weightedavg = {'precision':[], 'recall':[], 'f1-score':[] }
  accu = []
  for report in reports:
    for k,v in report.items():
      if 'IND' in k:
        unt0['precision'].append(v['precision'])
        unt0['recall'].append(v['recall'])
        unt0['f1-score'].append(v['f1-score'])

      elif 'GRP' in k:
        tin1['precision'].append(v['precision'])
        tin1['recall'].append(v['recall'])
        tin1['f1-score'].append(v['f1-score'])

      elif 'OTH' in k:
        Oth2['precision'].append(v['precision'])
        Oth2['recall'].append(v['recall'])
        Oth2['f1-score'].append(v['f1-score'])

      elif 'macro avg' in k:
        macroavg['precision'].append(v['precision'])
        macroavg['recall'].append(v['recall'])
        macroavg['f1-score'].append(v['f1-score'])

      elif 'weighted avg' in k:
        weightedavg['precision'].append(v['precision'])
        weightedavg['recall'].append(v['recall'])
        weightedavg['f1-score'].append(v['f1-score'])
      elif 'accuracy' in k:
        accu.append(v)

  print('Accuracy:',np.mean(accu))
  print("")
  print('IND 1 Precision:',np.mean(unt0['precision']))
  print('IND 1 Recall:',np.mean(unt0['recall']))
  print('IND 1 F1-Score:',np.mean(unt0['f1-score']))
  print("")
  print('GRP 2 Precision:',np.mean(tin1['precision']))
  print('GRP 2 Recall:',np.mean(tin1['recall']))
  print('GRP 2 F1-Score:',np.mean(tin1['f1-score']))

  print("")
  print('OTH 3 Precision:',np.mean(Oth2['precision']))
  print('OTH 3 Recall:',np.mean(Oth2['recall']))
  print('OTH 3 F1-Score:',np.mean(Oth2['f1-score']))

  print("")
  print('Weighted Avg Precision:',np.mean(weightedavg['precision']))
  print('Weighted Avg Recall:',np.mean(weightedavg['recall']))
  print('Weighted Avg F1-Score:',np.mean(weightedavg['f1-score']))

  print("")
  print('Macro  Precision:',np.mean(macroavg['precision']))
  print('Macro  Recall:',np.mean(macroavg['recall']))
  print('Macro  F1-Score:',np.mean(macroavg['f1-score']))

  file = open( result_path, mode='a' )
  file.write( modelname+ ' ( '+ datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") +') \n' )
  file.write( 'Accuracy:'+str(np.mean(accu))+'\n' )
  file.write('IND 1 Precision:'+str(np.mean(unt0['precision']))+'\n' )
  file.write('IND 1 Recall:'+str(np.mean(unt0['recall']))+'\n' )
  file.write('IND 1 F1-Score:'+str(np.mean(unt0['f1-score']))+'\n' )

  file.write('GRP 2 Precision:'+str(np.mean(tin1['precision']))+'\n' )
  file.write('GRP 2 Recall:'+str(np.mean(tin1['recall']))+'\n' )
  file.write('GRP 2 F1-Score:'+str(np.mean(tin1['f1-score']))+'\n' )

  file.write('OTH 3 Precision:'+str(np.mean(Oth2['precision']))+'\n' )
  file.write('OTH 3 Recall:'+str(np.mean(Oth2['recall']))+'\n' )
  file.write('OTH 3 F1-Score:'+str(np.mean(Oth2['f1-score']))+'\n' )

  file.write('Weighted Avg Precision:'+str(np.mean(weightedavg['precision']))+'\n' )
  file.write('Weighted Avg Recall:'+str(np.mean(weightedavg['recall']))+'\n' )
  file.write('Weighted Avg F1-Score:'+str(np.mean(weightedavg['f1-score']))+'\n' )

  file.write('Macro  Precision:'+str(np.mean(macroavg['precision']))+'\n' )
  file.write('Macro  Recall:'+str(np.mean(macroavg['recall']))+'\n' )
  file.write('Macro  F1-Score:'+str(np.mean(macroavg['f1-score']))+'\n' )
  file.close()
  print("Done")
######################################################################################
# -------- Training Loop --------
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=0)
# -------- Tokenizer & Config --------
bertmodelname = 'distilbert-base-multilingual-cased'
tokenizer = AutoTokenizer.from_pretrained(bertmodelname)
config = AutoConfig.from_pretrained(bertmodelname)

In [None]:
# storage
valacc, valprec, valrec, valf1, valcm = [], [], [], [], []
val_prec_pc, val_rec_pc, val_f1_pc, val_f1_macro = [], [], [], []
testacc, testprec, testrec, testf1, testcm = [], [], [], [], []
test_prec_pc, test_rec_pc, test_f1_pc, test_f1_macro = [], [], [], []
reports =  []
start = time.time()
for fold, (tr, te) in enumerate(skf.split(df[xcol], df[ycol]), 1):
    Xtr = df.loc[tr, xcol].tolist()
    ytr = df.loc[tr, ycol].values
    Xte = df.loc[te, xcol].tolist()
    yte = df.loc[te, ycol].values

    Xtr, Xv, ytr, yv = train_test_split(Xtr, ytr, test_size=0.15, random_state=0)
    enc_tr = tokenizer(Xtr, padding='max_length', truncation=True, max_length=MAX_LEN)
    enc_v  = tokenizer(Xv,  padding='max_length', truncation=True, max_length=MAX_LEN)
    enc_te = tokenizer(Xte, padding='max_length', truncation=True, max_length=MAX_LEN)

    dt_tr = DataLoader(TextDataset(enc_tr, ytr), batch_size=BATCH, shuffle=True)
    dt_v  = DataLoader(TextDataset(enc_v,  yv), batch_size=BATCH)
    dt_te = DataLoader(TextDataset(enc_te, yte), batch_size=BATCH)

    model = DistilBertClassifier(bertmodelname).to(device)
    crit  = nn.BCELoss()
    opt   = torch.optim.Adam(model.parameters(), lr=LR_RATE)
    best_f, pat = -np.inf, 0

    for ep in range(NEPOCHS):
        model.train()
        for b in dt_tr:
            opt.zero_grad()
            ids   = b['input_ids'].to(device)
            mask  = b['attention_mask'].to(device)
            labels= b['labels'].to(device)
            probs = model(ids, mask)
            loss  = crit(probs, labels)
            loss.backward()
            opt.step()
        # validation
        model.eval()
        vp, vt = [], []
        with torch.no_grad():
            for b in dt_v:
                ids  = b['input_ids'].to(device)
                mask = b['attention_mask'].to(device)
                vp.extend(model(ids, mask).cpu().tolist())
                vt.extend(b['labels'].cpu().tolist())
        th    = optimize_threshold(np.array(vt), np.array(vp))
        vpred = (np.array(vp) >= th).astype(int)
        # record metrics
        valacc.append(       accuracy_score(vt, vpred))
        valprec.append(      precision_score(vt, vpred))
        valrec.append(       recall_score(vt, vpred))
        valf1.append(        f1_score(vt, vpred))
        valcm.append(        confusion_matrix(vt, vpred))
       
        vf1m = f1_score(vt, vpred, average='macro')
        val_f1_macro.append(vf1m)
        # early stopping
        if vf1m > best_f:
            best_f, pat = vf1m, 0
            torch.save(model.state_dict(),os.path.join(modelpath, f"{modelname}_fold{fold}.bin"))
        else:
            pat += 1
        if DECAY and pat % DECAY_AFTER == 0 and pat != 0:
            for g in opt.param_groups:
                g['lr'] *= DECAY_RATE
                
        print(f"Fold{fold} Ep{ep+1}/{NEPOCHS} - ValMacroF1={vf1m:.4f} Pat={pat}")
        if pat >= PATIENCE:
            break

    # test evaluation
    model.load_state_dict(torch.load(os.path.join(modelpath, f"{modelname}_fold{fold}.bin")))
    model.eval()
    tp, tt = [], []
    with torch.no_grad():
        for b in dt_te:
            ids  = b['input_ids'].to(device)
            mask = b['attention_mask'].to(device)
            tp.extend(model(ids, mask).cpu().tolist())
            tt.extend(b['labels'].cpu().tolist())
    tpred = (np.array(tp) >= th).astype(int)

    testacc.append(       accuracy_score(tt, tpred))
    testprec.append(      precision_score(tt, tpred))
    testrec.append(       recall_score(tt, tpred))
    testf1.append(        f1_score(tt, tpred))
    testcm.append(        confusion_matrix(tt, tpred))
    #reports.append(classification_report( tt, tpred, output_dict=True, zero_division=0, target_names=['UNT 0', 'TIN 1']))
    reports.append(classification_report( tt, tpred, output_dict=True, zero_division=0,labels=[1, 2, 3], target_names=['IND 1', 'GRP 2', 'OTH 3']))
    print(f"Completed fold {fold}/5")
    del model
    torch.cuda.empty_cache()
    gc.collect()


print("Total runtime:", hms_string(time.time() - start))
WriteResutls(reports)

In [None]:
# start from here after kernel restart

In [None]:
import warnings
warnings.filterwarnings('ignore')

import gc, os,time, numpy as np,pandas as pd, datetime
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report
from sklearn.model_selection import StratifiedKFold, train_test_split
import torch,torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from transformers import AutoTokenizer, AutoConfig, AutoModel, logging

result_path =  r'C:\Users\mojua\Desktop\DL-Code\T3-LLM-Classification-Result.csv'
file_path = r'C:\Users\mojua\Desktop\DL-Code\Dataset\Offensive-24K-T3.xlsx'

# ---------------- GPU Setup ----------------
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Device: {device}")
if device.type == 'cuda':torch.cuda.empty_cache()

# --------- Hyperparameters ---------
MAX_LEN = 128
BERT_TRAINABLE = True
DRPT = 0.4
FC_ACT = 'elu'
LR_RATE = 1e-5
BATCH = 32
NEPOCHS = 20
PATIENCE = 4
DECAY = True
DECAY_RATE = 0.3
DECAY_AFTER = 1
modelname = 'hfFineTuneBert'
bertmodelname = 'bert-base-multilingual-cased'

modelpath = os.path.join('.', 'Saved Models', modelname)
for d in [modelpath, './Model Results', './Model - Summaries-Figures']:
    os.makedirs(d, exist_ok=True)
#################################################################################
# -------- Utils --------
def hms_string(sec):
    h = int(sec // 3600)
    m = int((sec % 3600) // 60)
    s = sec % 60
    return f"{h} hrs {m:02d} mins {s:05.2f} secs"

logging.set_verbosity_error()
# ----------- Dataset Class ----------------
class TextDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels
    def __len__(self):
        return len(self.labels)
    def __getitem__(self, idx):
        item = {k: torch.tensor(v[idx]) for k, v in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx], dtype=torch.float)
        return item
# -------- Model Definition ----------------
class BertClassifier(nn.Module):
    def __init__(self, model_name, dropout_rate=DRPT):
        super().__init__()
        self.bert = AutoModel.from_pretrained(model_name, config=config)
        if not BERT_TRAINABLE:
            for p in self.bert.parameters(): p.requires_grad = False
        hidden = self.bert.config.hidden_size
        self.dropout = nn.Dropout(dropout_rate)
        self.fc1 = nn.Linear(hidden, hidden)
        self.act = nn.ELU()
        self.out = nn.Linear(hidden, 1)
        self.sig = nn.Sigmoid()
        # he_uniform init
        nn.init.kaiming_uniform_(self.fc1.weight, nonlinearity='linear')
        nn.init.zeros_(self.fc1.bias)
        nn.init.kaiming_uniform_(self.out.weight, nonlinearity='linear')
        nn.init.zeros_(self.out.bias)
    def forward(self, input_ids, attention_mask):
        o = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        pooled = o.pooler_output if hasattr(o, 'pooler_output') else o.last_hidden_state[:,0]
        x = self.dropout(pooled)
        x = self.act(self.fc1(x))
        return self.sig(self.out(x).squeeze(-1))

# -------- Threshold Optimization -----------
def optimize_threshold(y_true, y_probs):
    best_t, best_f = 0.5, 0
    for t in np.arange(0.1,0.9,0.001):
        p = (y_probs>=t).astype(int)
        f = f1_score(y_true, p, average='weighted')
        if f>best_f: best_f, best_t = f, t
    return best_t



In [None]:
-------------------------------------------------------------------------------------------------------------

In [None]:
-------------------------------------------------------------------------------------------------------------

In [None]:
#Do multi-class classification(specifically 3 classes) using the same model.

In [13]:
import warnings
warnings.filterwarnings('ignore')

import gc, os,time, numpy as np,pandas as pd, datetime
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report
from sklearn.model_selection import StratifiedKFold, train_test_split
import torch,torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from transformers import AutoTokenizer, AutoConfig, AutoModel, logging

result_path =  r'C:\Users\mojua\Desktop\DL-Code\T3-LLM-Classification-Result.csv'
file_path =  r'C:\Users\mojua\Desktop\DL-Code\Dataset\Offensive-24K-T3.xlsx'

# ---------------- GPU Setup ----------------
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Device: {device}")
if device.type == 'cuda':torch.cuda.empty_cache()

# --------- Hyperparameters ---------
MAX_LEN = 128
BERT_TRAINABLE = True
DRPT = 0.4
FC_ACT = 'elu'
LR_RATE = 1e-5
BATCH = 32
NEPOCHS = 20
PATIENCE = 4
DECAY = True
DECAY_RATE = 0.3
DECAY_AFTER = 1
modelname = 'hfFineTuneBert'
bertmodelname = 'bert-base-multilingual-cased'

modelpath = os.path.join('.', 'Saved Models', modelname)
for d in [modelpath, './Model Results', './Model - Summaries-Figures']:
    os.makedirs(d, exist_ok=True)
#################################################################################
# -------- Utils --------
def hms_string(sec):
    h = int(sec // 3600)
    m = int((sec % 3600) // 60)
    s = sec % 60
    return f"{h} hrs {m:02d} mins {s:05.2f} secs"

logging.set_verbosity_error()
# ----------- Dataset Class ----------------
class TextDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels
    def __len__(self):
        return len(self.labels)
    def __getitem__(self, idx):
        item = {k: torch.tensor(v[idx]) for k, v in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx], dtype=torch.long)
        return item

config = AutoConfig.from_pretrained(bertmodelname)
# -------- Model Definition ----------------
class BertClassifier(nn.Module):
    def __init__(self, model_name, dropout_rate=DRPT):
        super().__init__()
        self.bert = AutoModel.from_pretrained(model_name, config=config)
        if not BERT_TRAINABLE:
            for p in self.bert.parameters(): p.requires_grad = False
        hidden = self.bert.config.hidden_size
        self.dropout = nn.Dropout(dropout_rate)
        self.fc1 = nn.Linear(hidden, hidden)
        self.act = nn.ELU()
        self.out = nn.Linear(hidden, 3)  # 3 output units for 3 classes
        self.softmax = nn.Softmax(dim=1)  # Added softmax
        # he_uniform init
        nn.init.kaiming_uniform_(self.fc1.weight, nonlinearity='linear')
        nn.init.zeros_(self.fc1.bias)
        nn.init.kaiming_uniform_(self.out.weight, nonlinearity='linear')
        nn.init.zeros_(self.out.bias)
    def forward(self, input_ids, attention_mask):
        o = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        pooled = o.pooler_output if hasattr(o, 'pooler_output') else o.last_hidden_state[:,0]
        x = self.dropout(pooled)
        x = self.act(self.fc1(x))
        x = self.out(x)
        return self.softmax(x)  # Applied softmax to output

Device: cuda


In [15]:
from sklearn.metrics import f1_score
import numpy as np

def optimize_thresholds_multiclass(y_true, y_probs, average='weighted'):
    """
    Optimize thresholds for multi-class classification
    
    Parameters:
    y_true (np.array): True class labels (0, 1, 2)
    y_probs (np.array): Predicted probabilities from softmax (n_samples × 3)
    average (str): F1 averaging method ('weighted', 'macro', 'micro')
    
    Returns:
    np.array: Optimized thresholds for each class [threshold_class0, threshold_class1, threshold_class2]
    """
    num_classes = y_probs.shape[1]
    thresholds = np.full(num_classes, 0.5)  # Default thresholds
    best_f1 = 0
    
    # Optimize each class threshold separately
    for c in range(num_classes):
        best_t = 0.5
        best_class_f1 = 0
        
        # Test thresholds from 0.1 to 0.9
        for t in np.arange(0.1, 0.9, 0.01):
            # Create temporary predictions where we only consider current class
            temp_preds = np.zeros_like(y_probs)
            temp_preds[:, c] = (y_probs[:, c] >= t).astype(int)
            
            # Get final predictions (class with highest probability)
            pred_labels = np.argmax(temp_preds, axis=1)
            
            # Calculate F1 score
            current_f1 = f1_score(y_true, pred_labels, average=average)
            
            # Update best threshold for this class
            if current_f1 > best_class_f1:
                best_class_f1 = current_f1
                best_t = t
        
        # Update global best F1 and thresholds
        if best_class_f1 > best_f1:
            best_f1 = best_class_f1
        thresholds[c] = best_t
    
    return thresholds

In [17]:
# -------------- Data Loading ---------------
df = pd.read_excel(file_path, engine='openpyxl')
df['Tweet'] = df['Tweet'].astype(str)
df.dropna(inplace=True)
df.reset_index(drop=True, inplace=True)

print(df.head())
print(df.info())
print(df.columns, df.shape)

# Class information (now expects 3 classes: 1, 2, 3)
xcolumn = 'Tweet'
ycolumn = 'Tag'
print("Unique Classes:", df[ycolumn].unique())  # Should show [1, 2, 3]
print("Class Distribution:\n", df[ycolumn].value_counts())  # Counts for each class

gc.collect()

   Unnamed: 0                                              Tweet  Tag
0           3                   USER گھٹیا انسان دنیا ہی چھوڑ دو    1
1          11  USER PMLN میں آپ کے بارے میں میری بہتر راۓ تھی...    1
2          20  ہمیں تو آج تک سمجھ نہیں آئی کہ کم عقل عیسائی ح...    2
3          21  کیا پتہ اس گدھے کو بھی ہینڈلرز کی طرف سے گرین ...    1
4          25  USER ون آن ون والی پہلے اپنی اوقات تو کر لو۔ ی...    1
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6594 entries, 0 to 6593
Data columns (total 3 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   Unnamed: 0  6594 non-null   int64 
 1   Tweet       6594 non-null   object
 2   Tag         6594 non-null   int64 
dtypes: int64(2), object(1)
memory usage: 154.7+ KB
None
Index(['Unnamed: 0', 'Tweet', 'Tag'], dtype='object') (6594, 3)
Unique Classes: [1 2 3]
Class Distribution:
 Tag
1    4850
2    1244
3     500
Name: count, dtype: int64


8802

In [19]:
def WriteResults(reports):
    from collections import defaultdict
    import datetime

    class_metrics = defaultdict(lambda: {'precision': [], 'recall': [], 'f1-score': []})
    macroavg = {'precision': [], 'recall': [], 'f1-score': []}
    weightedavg = {'precision': [], 'recall': [], 'f1-score': []}
    accu = []

    for report in reports:
        for k, v in report.items():
            if k == 'accuracy':
                accu.append(v)
            elif k == 'macro avg':
                macroavg['precision'].append(v['precision'])
                macroavg['recall'].append(v['recall'])
                macroavg['f1-score'].append(v['f1-score'])
            elif k == 'weighted avg':
                weightedavg['precision'].append(v['precision'])
                weightedavg['recall'].append(v['recall'])
                weightedavg['f1-score'].append(v['f1-score'])
            elif k in ['0', '1', '2']:  # Modified to expect class labels 0,1,2
                class_metrics[k]['precision'].append(v['precision'])
                class_metrics[k]['recall'].append(v['recall'])
                class_metrics[k]['f1-score'].append(v['f1-score'])

    # Rest of the function remains EXACTLY THE SAME
    print(f"Accuracy: {np.mean(accu):.4f}\n")

    for class_name, metrics in class_metrics.items():
        print(f"{class_name} Precision: {np.mean(metrics['precision']):.4f}")
        print(f"{class_name} Recall:    {np.mean(metrics['recall']):.4f}")
        print(f"{class_name} F1-Score:  {np.mean(metrics['f1-score']):.4f}\n")

    print(f"Weighted Avg Precision: {np.mean(weightedavg['precision']):.4f}")
    print(f"Weighted Avg Recall:    {np.mean(weightedavg['recall']):.4f}")
    print(f"Weighted Avg F1-Score:  {np.mean(weightedavg['f1-score']):.4f}\n")

    print(f"Macro Avg Precision:    {np.mean(macroavg['precision']):.4f}")
    print(f"Macro Avg Recall:       {np.mean(macroavg['recall']):.4f}")
    print(f"Macro Avg F1-Score:     {np.mean(macroavg['f1-score']):.4f}")

    # Write to file
    with open(result_path, mode='a') as file:
        file.write(modelname + ' ( ' + datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") + ' )\n')
        file.write(f'Accuracy: {np.mean(accu):.4f}\n')
        for class_name, metrics in class_metrics.items():
            file.write(f'{class_name} Precision: {np.mean(metrics["precision"]):.4f}\n')
            file.write(f'{class_name} Recall:    {np.mean(metrics["recall"]):.4f}\n')
            file.write(f'{class_name} F1-Score:  {np.mean(metrics["f1-score"]):.4f}\n')
        file.write(f'Weighted Avg Precision: {np.mean(weightedavg["precision"]):.4f}\n')
        file.write(f'Weighted Avg Recall:    {np.mean(weightedavg["recall"]):.4f}\n')
        file.write(f'Weighted Avg F1-Score:  {np.mean(weightedavg["f1-score"]):.4f}\n')
        file.write(f'Macro Avg Precision:    {np.mean(macroavg["precision"]):.4f}\n')
        file.write(f'Macro Avg Recall:       {np.mean(macroavg["recall"]):.4f}\n')
        file.write(f'Macro Avg F1-Score:     {np.mean(macroavg["f1-score"]):.4f}\n')

    print("Done")

In [21]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report
import torch.nn.functional as F

# K-Fold setup (unchanged)
skf = StratifiedKFold(n_splits=5, random_state=0, shuffle=True)

# Clear previous metrics
valaccuracy, valprecision, valrecall, valf1, valcm = [], [], [], [], []
val_f1_macro = []
testaccuracy, testprecision, testrecall, testf1, testcm = [], [], [], [], []
test_f1_macro = []
reports = []

tokenizer = AutoTokenizer.from_pretrained(bertmodelname)

start = time.time()
print("Local System Time:", time.strftime("%I:%M %p", time.localtime()))

for fold, (tr, te) in enumerate(skf.split(df[xcolumn], df[ycolumn]), 1):
    xtr = df.loc[tr, xcolumn].tolist()
    ytr = df.loc[tr, ycolumn].values - 1  # Convert labels to 0,1,2
    xte = df.loc[te, xcolumn].tolist()
    yte = df.loc[te, ycolumn].values - 1  # Convert labels to 0,1,2

    xtr, xv, ytr, yv = train_test_split(xtr, ytr, test_size=0.15, random_state=0)

    # Tokenization (unchanged)
    enc_tr = tokenizer(xtr, padding='max_length', truncation=True, max_length=MAX_LEN)
    enc_v = tokenizer(xv, padding='max_length', truncation=True, max_length=MAX_LEN)
    enc_te = tokenizer(xte, padding='max_length', truncation=True, max_length=MAX_LEN)

    # Dataset loaders (unchanged)
    dt_tr = TextDataset(enc_tr, ytr)
    lt = DataLoader(dt_tr, batch_size=BATCH, shuffle=True)
    dt_v = TextDataset(enc_v, yv)
    lv = DataLoader(dt_v, batch_size=BATCH)
    dt_te = TextDataset(enc_te, yte)
    le = DataLoader(dt_te, batch_size=BATCH)

    # Model & loss (modified for 3 classes)
    model = BertClassifier(bertmodelname).to(device)  # Your class now outputs 3 units
    crit = nn.CrossEntropyLoss()  # Changed from BCELoss to CrossEntropyLoss
    opt = torch.optim.Adam(model.parameters(), lr=LR_RATE)

    best_f, pt = -np.inf, 0

    # Training loop (unchanged structure)
    for e in range(NEPOCHS):
        model.train()
        for b in lt:
            opt.zero_grad()
            ids = b['input_ids'].to(device)
            m = b['attention_mask'].to(device)
            lbls = b['labels'].to(device)  # Now expects class indices 0,1,2
            pr = model(ids, m)            # Outputs [batch_size, 3]
            loss = crit(pr, lbls)
            loss.backward()
            opt.step()

        # Validation (modified metrics for 3 classes)
        model.eval(); vp, vt = [], []
        with torch.no_grad():
            for b in lv:
                ids = b['input_ids'].to(device)
                m = b['attention_mask'].to(device)
                lbls = b['labels'].cpu().numpy().tolist()
                logits = model(ids, m)
                preds = torch.argmax(logits, dim=1).cpu().numpy().tolist()  # Class indices
                vp.extend(preds)
                vt.extend(lbls)

        # Metrics (now uses 'macro' averaging by default)
        valaccuracy.append(accuracy_score(vt, vp))
        valprecision.append(precision_score(vt, vp, average='macro', zero_division=0))
        valrecall.append(recall_score(vt, vp, average='macro', zero_division=0))
        valf1.append(f1_score(vt, vp, average='macro', zero_division=0))
        valcm.append(confusion_matrix(vt, vp, labels=[0,1,2]))  # Added explicit labels
        vm = f1_score(vt, vp, average='macro', zero_division=0)
        val_f1_macro.append(vm)

        # Early stopping (unchanged)
        if vm > best_f:
            best_f, pt = vm, 0
            torch.save(model.state_dict(), os.path.join(modelpath, f"{modelname}_fold{fold}.bin"))
        else:
            pt += 1

        if DECAY and pt % DECAY_AFTER == 0 and pt != 0:
            for g in opt.param_groups:
                g['lr'] *= DECAY_RATE

        print(f"Fold{fold} Ep{e + 1}/{NEPOCHS} - ValMacroF1={vm:.4f} Pat={pt}")
        if pt >= PATIENCE:
            print(f"Stopping early at epoch {e + 1}")
            break

    # Test Evaluation (modified for 3 classes)
    model.load_state_dict(torch.load(os.path.join(modelpath, f"{modelname}_fold{fold}.bin")))
    model.eval(); tp, tt = [], []
    with torch.no_grad():
        for b in le:
            ids = b['input_ids'].to(device)
            m = b['attention_mask'].to(device)
            logits = model(ids, m)
            preds = torch.argmax(logits, dim=1).cpu().numpy().tolist()  # Class indices
            lbls = b['labels'].cpu().numpy().tolist()
            tp.extend(preds)
            tt.extend(lbls)

    testaccuracy.append(accuracy_score(tt, tp))
    testprecision.append(precision_score(tt, tp, average='macro', zero_division=0))
    testrecall.append(recall_score(tt, tp, average='macro', zero_division=0))
    testf1.append(f1_score(tt, tp, average='macro', zero_division=0))
    testcm.append(confusion_matrix(tt, tp, labels=[0,1,2]))  # Added explicit labels
    test_f1_macro.append(f1_score(tt, tp, average='macro', zero_division=0))

    # Classification report for 3 classes
    reports.append(classification_report(
        tt, tp,
        output_dict=True,
        zero_division=0,
        labels=[0, 1, 2],  # Explicit class indices
        target_names=['Class 0', 'Class 1', 'Class 2']  # Your class names
    ))

    print(f"Completed fold {fold}/5")
    del model; torch.cuda.empty_cache(); gc.collect()

print(f"Total runtime: {hms_string(time.time()-start)}")
WriteResults(reports)  # Fixed typo in your original (WriteResutls -> WriteResults)

Local System Time: 09:33 AM
Fold1 Ep1/20 - ValMacroF1=0.2821 Pat=0
Fold1 Ep2/20 - ValMacroF1=0.2821 Pat=1
Fold1 Ep3/20 - ValMacroF1=0.2821 Pat=2
Fold1 Ep4/20 - ValMacroF1=0.2821 Pat=3
Fold1 Ep5/20 - ValMacroF1=0.2821 Pat=4
Stopping early at epoch 5
Completed fold 1/5
Fold2 Ep1/20 - ValMacroF1=0.2843 Pat=0
Fold2 Ep2/20 - ValMacroF1=0.2843 Pat=1
Fold2 Ep3/20 - ValMacroF1=0.2843 Pat=2
Fold2 Ep4/20 - ValMacroF1=0.2843 Pat=3
Fold2 Ep5/20 - ValMacroF1=0.2843 Pat=4
Stopping early at epoch 5
Completed fold 2/5
Fold3 Ep1/20 - ValMacroF1=0.2860 Pat=0
Fold3 Ep2/20 - ValMacroF1=0.3183 Pat=0
Fold3 Ep3/20 - ValMacroF1=0.2860 Pat=1
Fold3 Ep4/20 - ValMacroF1=0.2860 Pat=2
Fold3 Ep5/20 - ValMacroF1=0.2860 Pat=3
Fold3 Ep6/20 - ValMacroF1=0.2860 Pat=4
Stopping early at epoch 6
Completed fold 3/5
Fold4 Ep1/20 - ValMacroF1=0.2790 Pat=0
Fold4 Ep2/20 - ValMacroF1=0.2790 Pat=1
Fold4 Ep3/20 - ValMacroF1=0.2790 Pat=2
Fold4 Ep4/20 - ValMacroF1=0.2790 Pat=3
Fold4 Ep5/20 - ValMacroF1=0.2790 Pat=4
Stopping early at 

In [None]:
---------------------------------------------------------------------

In [None]:
---------------------------------------------------------------------

In [None]:
---------------------------------------------------------------------

In [None]:
---------------------------------------------------------------------

In [9]:
# -------------- Data Loading ---------------
df = pd.read_excel(file_path, engine='openpyxl')
df['Tweet'] = df['Tweet'].astype(str)
df.dropna(inplace=True)
df.reset_index(drop=True, inplace=True)
print(df.head())
print(df.info())
print(df.columns, df.shape)
gc.collect()
xcolumn = 'Tweet'
ycolumn = 'Tag'
print(df.Tag.unique())

   Unnamed: 0                                              Tweet  Tag
0           3                   USER گھٹیا انسان دنیا ہی چھوڑ دو    1
1          11  USER PMLN میں آپ کے بارے میں میری بہتر راۓ تھی...    1
2          20  ہمیں تو آج تک سمجھ نہیں آئی کہ کم عقل عیسائی ح...    2
3          21  کیا پتہ اس گدھے کو بھی ہینڈلرز کی طرف سے گرین ...    1
4          25  USER ون آن ون والی پہلے اپنی اوقات تو کر لو۔ ی...    1
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6594 entries, 0 to 6593
Data columns (total 3 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   Unnamed: 0  6594 non-null   int64 
 1   Tweet       6594 non-null   object
 2   Tag         6594 non-null   int64 
dtypes: int64(2), object(1)
memory usage: 154.7+ KB
None
Index(['Unnamed: 0', 'Tweet', 'Tag'], dtype='object') (6594, 3)
[1 2 3]


In [None]:
########################################################
def WriteResutls(reports):

  unt0 = {'precision':[], 'recall':[], 'f1-score':[] }
  tin1 = {'precision':[], 'recall':[], 'f1-score':[] }
  Oth2 = {'precision':[], 'recall':[], 'f1-score':[] }
  macroavg = {'precision':[], 'recall':[], 'f1-score':[] }
  weightedavg = {'precision':[], 'recall':[], 'f1-score':[] }
  accu = []
  for report in reports:
    for k,v in report.items():
      if 'IND' in k:
        unt0['precision'].append(v['precision'])
        unt0['recall'].append(v['recall'])
        unt0['f1-score'].append(v['f1-score'])

      elif 'GRP' in k:
        tin1['precision'].append(v['precision'])
        tin1['recall'].append(v['recall'])
        tin1['f1-score'].append(v['f1-score'])

      elif 'OTH' in k:
        Oth2['precision'].append(v['precision'])
        Oth2['recall'].append(v['recall'])
        Oth2['f1-score'].append(v['f1-score'])

      elif 'macro avg' in k:
        macroavg['precision'].append(v['precision'])
        macroavg['recall'].append(v['recall'])
        macroavg['f1-score'].append(v['f1-score'])

      elif 'weighted avg' in k:
        weightedavg['precision'].append(v['precision'])
        weightedavg['recall'].append(v['recall'])
        weightedavg['f1-score'].append(v['f1-score'])
      elif 'accuracy' in k:
        accu.append(v)

  print('Accuracy:',np.mean(accu))
  print("")
  print('IND 1 Precision:',np.mean(unt0['precision']))
  print('IND 1 Recall:',np.mean(unt0['recall']))
  print('IND 1 F1-Score:',np.mean(unt0['f1-score']))
  print("")
  print('GRP 2 Precision:',np.mean(tin1['precision']))
  print('GRP 2 Recall:',np.mean(tin1['recall']))
  print('GRP 2 F1-Score:',np.mean(tin1['f1-score']))

  print("")
  print('OTH 3 Precision:',np.mean(Oth2['precision']))
  print('OTH 3 Recall:',np.mean(Oth2['recall']))
  print('OTH 3 F1-Score:',np.mean(Oth2['f1-score']))

  print("")
  print('Weighted Avg Precision:',np.mean(weightedavg['precision']))
  print('Weighted Avg Recall:',np.mean(weightedavg['recall']))
  print('Weighted Avg F1-Score:',np.mean(weightedavg['f1-score']))

  print("")
  print('Macro  Precision:',np.mean(macroavg['precision']))
  print('Macro  Recall:',np.mean(macroavg['recall']))
  print('Macro  F1-Score:',np.mean(macroavg['f1-score']))

  file = open( result_path, mode='a' )
  file.write( modelname+ ' ( '+ datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") +') \n' )
  file.write( 'Accuracy:'+str(np.mean(accu))+'\n' )
  file.write('IND 1 Precision:'+str(np.mean(unt0['precision']))+'\n' )
  file.write('IND 1 Recall:'+str(np.mean(unt0['recall']))+'\n' )
  file.write('IND 1 F1-Score:'+str(np.mean(unt0['f1-score']))+'\n' )

  file.write('GRP 2 Precision:'+str(np.mean(tin1['precision']))+'\n' )
  file.write('GRP 2 Recall:'+str(np.mean(tin1['recall']))+'\n' )
  file.write('GRP 2 F1-Score:'+str(np.mean(tin1['f1-score']))+'\n' )

  file.write('OTH 3 Precision:'+str(np.mean(Oth2['precision']))+'\n' )
  file.write('OTH 3 Recall:'+str(np.mean(Oth2['recall']))+'\n' )
  file.write('OTH 3 F1-Score:'+str(np.mean(Oth2['f1-score']))+'\n' )

  file.write('Weighted Avg Precision:'+str(np.mean(weightedavg['precision']))+'\n' )
  file.write('Weighted Avg Recall:'+str(np.mean(weightedavg['recall']))+'\n' )
  file.write('Weighted Avg F1-Score:'+str(np.mean(weightedavg['f1-score']))+'\n' )

  file.write('Macro  Precision:'+str(np.mean(macroavg['precision']))+'\n' )
  file.write('Macro  Recall:'+str(np.mean(macroavg['recall']))+'\n' )
  file.write('Macro  F1-Score:'+str(np.mean(macroavg['f1-score']))+'\n' )
  file.close()
  print("Done")
######################################################################################
from transformers import BertTokenizer
tokenizer = BertTokenizer.from_pretrained(bertmodelname)
config = AutoConfig.from_pretrained(bertmodelname)

In [None]:
# --------- K-Fold Training ----------------
skf = StratifiedKFold(n_splits=5, random_state=0, shuffle=True)

# Metrics storage
valaccuracy, valprecision, valrecall, valf1, valcm = [], [], [], [], []
val_prec_pc, val_rec_pc, val_f1_pc, val_f1_macro = [], [], [], []
testaccuracy, testprecision, testrecall, testf1, testcm = [], [], [], [], []
test_prec_pc, test_rec_pc, test_f1_pc, test_f1_macro = [], [], [], []
reports = []
start = time.time()
print("Local System Time:", time.strftime("%I:%M %p", time.localtime()))

for fold, (tr, te) in enumerate(skf.split(df[xcolumn], df[ycolumn]),1):
    xtr = df.loc[tr, xcolumn].tolist();
    ytr = df.loc[tr, ycolumn].values
    xte = df.loc[te, xcolumn].tolist(); 
    yte = df.loc[te, ycolumn].values
    xtr, xv, ytr, yv = train_test_split(xtr, ytr, test_size=0.15, random_state=0)
    # tokenize
    enc_tr = tokenizer(xtr, padding='max_length', truncation=True, max_length=MAX_LEN)
    enc_v  = tokenizer(xv,  padding='max_length', truncation=True, max_length=MAX_LEN)
    enc_te = tokenizer(xte, padding='max_length', truncation=True, max_length=MAX_LEN)
    # datasets & loaders
    dt_tr = TextDataset(enc_tr, ytr); 
    lt = DataLoader(dt_tr, batch_size=BATCH, shuffle=True)
    dt_v  = TextDataset(enc_v,  yv); 
    lv = DataLoader(dt_v, batch_size=BATCH)
    dt_te = TextDataset(enc_te, yte); 
    le = DataLoader(dt_te, batch_size=BATCH)
    # model, loss, opt
    model = BertClassifier(bertmodelname).to(device)
    crit = nn.BCELoss()
    opt = torch.optim.Adam(model.parameters(), lr=LR_RATE)
    best_f, pt = -np.inf, 0
    # train
    for e in range(NEPOCHS):
        model.train()
        for b in lt:
            opt.zero_grad()
            ids = b['input_ids'].to(device); 
            m = b['attention_mask'].to(device)
            lbls = b['labels'].to(device)
            pr = model(ids,m)
            loss = crit(pr, lbls)
            loss.backward(); opt.step()
        # val
        model.eval(); vp, vt = [], []
        with torch.no_grad():
            for b in lv:
                ids = b['input_ids'].to(device); 
                m = b['attention_mask'].to(device)
                vp.extend(model(ids,m).cpu().numpy().tolist()); 
                vt.extend(b['labels'].cpu().numpy().tolist())
        th = optimize_threshold(np.array(vt), np.array(vp))
        vpred = (np.array(vp)>=th).astype(int)
        # record
        valaccuracy.append(accuracy_score(vt, vpred)); 
        valprecision.append(precision_score(vt, vpred))
        valrecall.append(recall_score(vt, vpred)); 
        valf1.append(f1_score(vt, vpred))
        valcm.append(confusion_matrix(vt, vpred))
        # per-class
       
        vm = f1_score(vt, vpred, average='macro'); 
        val_f1_macro.append(vm)
        # early stop & save
        if vm>best_f:
            best_f, pt = vm, 0
            torch.save(model.state_dict(), os.path.join(modelpath,f"{modelname}_fold{fold}.bin"))
        else:
            pt+=1
        if DECAY and pt%DECAY_AFTER==0 and pt!=0:
            for g in opt.param_groups: g['lr']*=DECAY_RATE
        print(f"Fold{fold} Ep{e+1}/{NEPOCHS} - ValMacroF1={vm:.4f} Pat={pt}")
        if pt>=PATIENCE:
            print(f"Stopping early at epoch {e+1}"); break
    # test eval
    model.load_state_dict(torch.load(os.path.join(modelpath,f"{modelname}_fold{fold}.bin")))
    model.eval(); tp, tt = [], []
    with torch.no_grad():
        for b in le:
            ids = b['input_ids'].to(device); 
            m = b['attention_mask'].to(device)
            tp.extend(model(ids,m).cpu().numpy().tolist()); 
            tt.extend(b['labels'].cpu().numpy().tolist())
    tpred = (np.array(tp)>=th).astype(int)
    testaccuracy.append(accuracy_score(tt,tpred)); 
    testprecision.append(precision_score(tt,tpred))
    testrecall.append(recall_score(tt,tpred)); 
    testf1.append(f1_score(tt,tpred))
    testcm.append(confusion_matrix(tt,tpred))
    #reports.append(classification_report( tt, tpred, output_dict=True, zero_division=0, target_names=['UNT 0', 'TIN 1']))
    reports.append(classification_report( tt, tpred, output_dict=True, zero_division=0,labels=[1, 2, 3], target_names=['IND 1', 'GRP 2', 'OTH 3']))
    print(f"Completed fold {fold}/5")
    # cleanup
    del model; torch.cuda.empty_cache(); gc.collect()

print(f"Total runtime: {hms_string(time.time()-start)}")
WriteResutls(reports)