In [None]:
# Downloading modules 
!pip install simpletransformers  # force install simpletrransformers in colab-environment
!pip install wandb # force install wandb in colab-environment

# Importing modules
import pandas as pd
import simpletransformers
from simpletransformers.classification import ClassificationModel, ClassificationArgs

import sklearn
from sklearn.metrics import accuracy_score, classification_report, precision_recall_fscore_support
from sklearn.model_selection import train_test_split, KFold

import torch
from torch.nn.modules.activation import Threshold

import os
import numpy as np
import wandb
import logging

In [None]:
!wandb login  # using this requires a Weights & Biases account: https://wandb.ai/login

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit: 
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


In [2]:
# mount colab to Google drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# load the data
data = pd.read_csv("/content/drive/MyDrive/ModelFolder/data_eigstig_text_label.csv")  # must be set to the exact path of the data

In [None]:
# split data into train and test
train, test = train_test_split(data, test_size=0.1)

# train data to use for training and test (splitting training into train and val later on)
train_data = train  # this is used in 'training_model_weightdecay()'
test_data =  test

# write data to csv, so we can always evaluate the model later
test_data.to_csv('ModelAllDataWeightDecay.csv')  # a user path can be inserted into the str argument if wanted

In [None]:
# set logging information 
logging.basicConfig(level=logging.INFO)
transformers_logger = logging.getLogger("transformers")
transformers_logger.setLevel(logging.WARNING)

def training_model_weightdecay():
  '''
  This function initializes a Weights & Biases project called "ModelAllDataWeightDecay"
  It also sets up 5 folds for cross-validation. 
  Model arguments are set for a "bert-base-uncased" model that uses GPU.
  The model performs training on a training set and appends results to a list called "results"

  '''
    # intialize new wandb project
    wandb.init(project = "ModelAllDataWeightDecay")

    # set k_fold-specifics
    n = 5
    seed = 43
    kf = KFold(n_splits=n, random_state = seed, shuffle=True)

    # model arguments
    model_args = ClassificationArgs()
    model_args.output_dir = '/content/drive/MyDrive/ASD_second_account/Model_weightDecay'  # change to desired output directory 
    model_args.num_train_epochs = 20
    model_args.learning_rate = 0.00001
    model_args.train_batch_size = 32
    model_args.overwrite_output_dir = True
    model_args.evaluate_during_training = True
    model_args.use_multiprocessing = True
    model_args.save_best_model = True
    model_args.weigth_decay = 0.1
    model_args.wandb_project = "ModelAllDataWeightDecay"

    # Defining model using k-folds
    results = [] 
    for train_index, val_index in kf.split(train_data):
      # splitting Dataframe (dataset not included)
      train_df = train_data.iloc[train_index]
      val_df = train_data.iloc[val_index]
      # Defining Model
      model = ClassificationModel('bert', 'bert-base-uncased', use_cuda=True, args= model_args, num_labels = 2)
      # train the model
      model.train_model(train_df, eval_df = val_df, acc = sklearn.metrics.accuracy_score)
      # validate the model
      result, model_outputs, wrong_predictions = model.eval_model(val_df, acc = sklearn.metrics.accuracy_score) # , acc = accuracy_score
      print(result['acc'])
      # append model score
      results.append(result['acc'])

    print("results",results)
    print(f"Mean-Precision: {sum(results) / len(results)}")

In [None]:
# train the model with weight decay
training_model_weightdecay()