In [1]:
import pandas as pd
import torch
import os
import numpy as np
import datasets
import transformers
from GPUtil import showUtilization as gpu_usage
from numba import cuda
import torch.nn.functional as F

from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from transformers import (
    AutoModelForSequenceClassification,
    AutoTokenizer,
    Trainer,
    TrainingArguments,
)

from datasets import load_dataset, Dataset, DatasetDict

In [2]:
# !watch -n 0.5 nvidia-smi

In [3]:
print(f'PyTorch version: {torch.__version__}')  # 1.9.1+cu111
print(f'CUDA version: {torch.version.cuda}')  # 11.1
print(f'cuDNN version: {torch.backends.cudnn.version()}')  # 8005
print(f'Current device: {torch.cuda.current_device()}')  # 0
print(f'Is cuda available: {torch.cuda.is_available()}')  # TRUE

PyTorch version: 2.0.1
CUDA version: 11.8
cuDNN version: 8700
Current device: 0
Is cuda available: True


In [4]:
print(f'Transformers version: {transformers.__version__}')
print(f'Datasets version: {datasets.__version__}')

Transformers version: 4.37.2
Datasets version: 2.14.5


In [5]:
# Prevent a warning related to the tokenization process in the transformers library. 
os.environ["TOKENIZERS_PARALLELISM"] = "False"
# Makes CUDA operations synchronous
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"

In [6]:
# Find the GPU with the least memory usage.
!nvidia-smi

Sat Mar 16 14:29:25 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 545.23.08              Driver Version: 545.23.08    CUDA Version: 12.3     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA GeForce RTX 3090        On  | 00000000:41:00.0 Off |                  N/A |
| 30%   28C    P8              27W / 350W |      3MiB / 24576MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
|   1  NVIDIA GeForce RTX 3090        On  | 00000000:61:00.0 Off |  

In [7]:
def free_gpu_cache():
    print("Initial GPU Usage")
    gpu_usage()                             

    # free unreferenced tensors from the GPU memory.
    torch.cuda.empty_cache()

    cuda.select_device(0)
    cuda.close()
    cuda.select_device(0)

    print("GPU Usage after emptying the cache")
    gpu_usage()

free_gpu_cache() 

Initial GPU Usage
| ID | GPU | MEM |
------------------
|  0 |  0% |  0% |
|  1 | 97% | 38% |
|  2 |  0% |  0% |
|  3 |  0% |  0% |
GPU Usage after emptying the cache
| ID | GPU  | MEM |
-------------------
|  0 |   0% |  0% |
|  1 | 100% | 38% |
|  2 |   0% |  0% |
|  3 |   4% |  1% |


In [8]:
data = pd.read_csv("clean_test_or_not_test.csv" , index_col = 0)
data

Unnamed: 0,text_clean,label
0,add tests for systemdrawingcommonthis issue tr...,1
1,acctests for packetfabricportresource please k...,1
2,text analytics abstractivesummarizebatchconven...,1
3,see entity recordssee the story in the release...,1
4,editor crash when click mesh instance 3d with ...,1
...,...,...
72325,implement timebased event generator descriptio...,0
72326,solis energy today does not reset correctlydes...,0
72327,how to enable new home structure and navigatio...,0
72328,package request waydroidrun android apps on wa...,0


In [9]:
data_test_set = pd.read_csv("testset_test_or_not_test_clean.csv" , index_col = 0)
data_test_set

Unnamed: 0,text_clean,label
0,extreme memory usage in gpuinteropdescribe the...,0
1,draft recruitment plan for 092022 supplemental...,0
2,create staging server problem multiple times n...,1
3,python 2 postcommit failures due to timeout in...,1
4,bug save search history does not work type oth...,0
...,...,...
8032,drawing resets after rotation in crop modesimi...,0
8033,psaas bug windows installer contents contact d...,1
8034,test workflowmanager task update statustest to...,1
8035,unable to delete temporary dll on windows due ...,0


In [10]:
base_model_id = "roberta-base"
epochs = 5 #Number of full cyles through the training set.
num_labels = 2 
learning_rate = 5e-5 # Rate the model updates based on the data its trained on.
train_batch_size = 16 # Number of training examples in one iteration.
eval_batch_size = 32 # Number evalutaion examples in on iteratoion.
save_strategy = "no" # Should the model be saved automatically during training.
save_steps = 500 # How often to save the model during training. No effect since no over.
logging_steps = 100
model_dir = "./model" #Where to save model

# Use early stopping to prevent overfitting
#load_best_model_at_end=True
#metric_for_best_model="eval_loss"
#greater_is_better=False

In [11]:
# Split dataframe into three parts: training, validation and testing.
def train_validate_test_split(df, train_percent=.8, validate_percent=.1, seed=42):
    np.random.seed(seed)
    # Shuffle index of dataframe
    perm = np.random.permutation(df.index)
    
    df_length = len(df.index)
    
    # Number of row in training set
    train_end = int(train_percent * df_length)
    # Number of rows in validate set
    validate_end = int(validate_percent * df_length) + train_end
    
    # From start to train end
    train = df.iloc[perm[:train_end]]
    # From train_end to validate_end
    validate = df.iloc[perm[train_end:validate_end]]
    # From validate to the last row in dataframe.
    test = df.iloc[perm[validate_end:]]
    return train, validate, test

In [12]:
# Drops rows with missing values
data.dropna(inplace=True)

In [13]:
# Resets the index after dropping rows
data.reset_index(inplace=True)
data

Unnamed: 0,index,text_clean,label
0,0,add tests for systemdrawingcommonthis issue tr...,1
1,1,acctests for packetfabricportresource please k...,1
2,2,text analytics abstractivesummarizebatchconven...,1
3,3,see entity recordssee the story in the release...,1
4,4,editor crash when click mesh instance 3d with ...,1
...,...,...,...
72325,72325,implement timebased event generator descriptio...,0
72326,72326,solis energy today does not reset correctlydes...,0
72327,72327,how to enable new home structure and navigatio...,0
72328,72328,package request waydroidrun android apps on wa...,0


In [14]:
#Drops the index col, better for managint the data.
data.drop(columns= ["index"], inplace = True)

In [15]:
data


Unnamed: 0,text_clean,label
0,add tests for systemdrawingcommonthis issue tr...,1
1,acctests for packetfabricportresource please k...,1
2,text analytics abstractivesummarizebatchconven...,1
3,see entity recordssee the story in the release...,1
4,editor crash when click mesh instance 3d with ...,1
...,...,...
72325,implement timebased event generator descriptio...,0
72326,solis energy today does not reset correctlydes...,0
72327,how to enable new home structure and navigatio...,0
72328,package request waydroidrun android apps on wa...,0


In [16]:
# 80% trainig, 10% validate, 10% test. Seed 42.
# Test 80-10-10 and 70-15-15
train , validate , test = train_validate_test_split(data)


In [17]:
train.set_index("label" , inplace = True)
validate.set_index("label" , inplace = True)
test.set_index("label" , inplace = True)

In [18]:
test

Unnamed: 0_level_0,text_clean
label,Unnamed: 1_level_1
0,tablevel snapshots description originally cove...
0,optimize checksamplechecksamplehttpsgithubcomd...
1,test groupbysize testing disabled due to panda...
1,parallelize tests using spark packages feature...
0,should close lobby if host leave the lobby whe...
...,...
0,ui create ui for viewing waypoints on mapcreat...
1,able to pvp through pvp toggled offwhile testi...
0,allow apps to pass options to jbangsee this di...
1,testing add pcap to facilitate testingadd pcap...


In [19]:
# Convert from Pandas DataFrame to Hugging Face datasets
tds = Dataset.from_pandas(train)
vds = Dataset.from_pandas(validate)
testds = Dataset.from_pandas(test)

separate_test_set = Dataset.from_pandas(data_test_set)
ds = DatasetDict()

ds["test"] = testds
ds["train"] = tds
ds["validate"] = vds
ds["separate_test_set"] = separate_test_set

ds

DatasetDict({
    test: Dataset({
        features: ['text_clean', 'label'],
        num_rows: 7233
    })
    train: Dataset({
        features: ['text_clean', 'label'],
        num_rows: 57864
    })
    validate: Dataset({
        features: ['text_clean', 'label'],
        num_rows: 7233
    })
    separate_test_set: Dataset({
        features: ['text_clean', 'label', '__index_level_0__'],
        num_rows: 8037
    })
})

In [20]:
train_dataset = ds["train"]
valid_dataset = ds["validate"]
test_ds = ds["test"]
separate_test_set_dataset = ds["separate_test_set"]

In [21]:
ds["train"][0]

{'text_clean': 'dynamicresolution does not workingdescribe the bug after running with f dynamicresolution does not work when using shortcut keys or float bar to cancel full screen to reproduce 1 run xfreerdp command xfreerdp logleveldebug f floatbarstickyoffdefaulthiddenshowalways dynamicresolution vxxxxxxxxx uxxxxx 2 remove the full screen display by manipulating the floatbar or press ctrl alt enter 3 resize the window 4 automatic resizing does not workthe window size can be adjusted but the resolution is not changed expected behavior when the window size is adjusted i would like the resolution to automatically change to match the window size screenshots screenshothttpsuserimagesgithubusercontentcom11292533718855688065eb577856474508929d572bbd9863bepng additional context the problem did not occur in v220 but has been occurring since v230 in addition to f the problem was also reproduced with multimon i am connected to windows 10 specifically the following commit was causing the problem 

In [22]:
def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds)
    acc = accuracy_score(labels, preds)
    return {"accuracy": acc, "f1": f1, "precision": precision, "recall": recall}

In [23]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = AutoModelForSequenceClassification.from_pretrained(base_model_id, num_labels=num_labels)


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


# Tokanization

In [24]:
model_ckpt = "roberta-base"
tokenizer = AutoTokenizer.from_pretrained(model_ckpt)

In [25]:
#Tokenize the dataset to the correct input for the transformer model.
def tokenize(batch):
    return tokenizer(batch["text_clean"], padding="max_length", truncation=True)

train_dataset = train_dataset.map(tokenize, batched=True, batch_size=len(train_dataset))
valid_dataset = valid_dataset.map(tokenize, batched=True, batch_size=len(valid_dataset))
test_dataset = test_ds.map(tokenize, batched=True, batch_size=len(test_ds))
separate_test_set_dataset = separate_test_set_dataset.map(tokenize, batched=True, batch_size=len(separate_test_set_dataset))

Map:   0%|          | 0/57864 [00:00<?, ? examples/s]

Map:   0%|          | 0/7233 [00:00<?, ? examples/s]

Map:   0%|          | 0/7233 [00:00<?, ? examples/s]

Map:   0%|          | 0/8037 [00:00<?, ? examples/s]

In [26]:
training_args = TrainingArguments(
    output_dir=model_dir,
    num_train_epochs=epochs,
    per_device_train_batch_size=train_batch_size,
    per_device_eval_batch_size=eval_batch_size,
    save_strategy=save_strategy,
    save_steps=save_steps,
    evaluation_strategy="epoch",
    learning_rate=learning_rate,
    logging_steps=logging_steps,
)

In [27]:
 trainer = Trainer(
    model=model,
    args=training_args,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
    train_dataset=train_dataset,
    eval_dataset=valid_dataset,
)

In [28]:
trainer.train() 

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.6549,0.518908,0.790682,[0.8003692 0.78000581],[0.76237126 0.82533825],[0.84235359 0.73939394]
2,0.4899,0.517299,0.793447,[0.81002035 0.77370494],[0.74747712 0.85935397],[0.88398557 0.70358127]
3,0.5093,0.511475,0.787087,[0.80231065 0.76932295],[0.74635777 0.84307288],[0.86733278 0.70743802]
4,0.6936,0.693433,0.498134,[0.66500554 0. ],[0.49813355 0. ],[1. 0.]
5,0.6937,0.693141,0.501866,[0. 0.66832367],[0. 0.50186645],[0. 1.]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


TrainOutput(global_step=18085, training_loss=0.5986318738545573, metrics={'train_runtime': 7110.9183, 'train_samples_per_second': 40.687, 'train_steps_per_second': 2.543, 'total_flos': 7.61232905367552e+16, 'train_loss': 0.5986318738545573, 'epoch': 5.0})

In [29]:
eval_result = trainer.evaluate(eval_dataset=valid_dataset)

  _warn_prf(average, modifier, msg_start, len(result))


In [30]:
for key, value in sorted(eval_result.items()):
    print(f"{key} = {value}\n")

epoch = 5.0

eval_accuracy = 0.501866445458316

eval_f1 = [0.         0.66832367]

eval_loss = 0.6931409239768982

eval_precision = [0.         0.50186645]

eval_recall = [0. 1.]

eval_runtime = 55.0006

eval_samples_per_second = 131.508

eval_steps_per_second = 4.127



## Training loss decreases, valdiation loss increases = Overfitting

In [31]:
# Evaluate test data set
test_results = trainer.evaluate(eval_dataset=test_dataset)

  _warn_prf(average, modifier, msg_start, len(result))


In [32]:
for key, value in sorted(test_results.items()):
    print(f"{key} = {value}\n")

epoch = 5.0

eval_accuracy = 0.5177657956587861

eval_f1 = [0.         0.68227364]

eval_loss = 0.6929818987846375

eval_precision = [0.        0.5177658]

eval_recall = [0. 1.]

eval_runtime = 54.8399

eval_samples_per_second = 131.893

eval_steps_per_second = 4.139



# Test set

In [33]:
separate_test_set_results = trainer.evaluate(eval_dataset=separate_test_set_dataset)

  _warn_prf(average, modifier, msg_start, len(result))


In [34]:
for key, value in sorted(separate_test_set_results.items()):
    print(f"{key} = {value}\n")

epoch = 5.0

eval_accuracy = 0.49906681597611047

eval_f1 = [0.         0.66583665]

eval_loss = 0.6931688785552979

eval_precision = [0.         0.49906682]

eval_recall = [0. 1.]

eval_runtime = 60.9683

eval_samples_per_second = 131.823

eval_steps_per_second = 4.133



In [35]:
trainer.save_model(model_dir + "_local") 

In [36]:
from transformers import pipeline
    
classifier = pipeline("text-classification", model="./model_local_Roberta", truncation=True)

OSError: Incorrect path_or_model_id: './model_local_Roberta'. Please provide either the path to a local folder or the repo_id of a model on the Hub.

In [None]:
classifier.model

In [None]:
classifier("this contain bugs regarding testing")

In [None]:
classifier("this contain bugs regarding automtion not testing")

In [None]:
classifier("this bug has super high impact on the project")

In [None]:
import pandas as pd
df = pd.read_csv('clean_test_or_not_test_debt.csv',index_col = 0)
df.head()

In [None]:
def classify_text(text):
    # Get prediction
    result = classifier(text)
    # Return the label of the highest scoring classification
    return result[0]['label']

# Apply the classification function to your text column (assuming it's named 'text_clean')
df['predicted_label'] = df['text_clean'].apply(classify_text)


In [None]:
df.head(100)

In [None]:
from sklearn.metrics import accuracy_score, f1_score

# Assuming your predicted labels are in a format like 'LABEL_0', 'LABEL_1', etc.
# Convert these to numeric by extracting the number part and converting it to an integer
df['predicted_label_numeric'] = df['predicted_label'].apply(lambda x: int(x.split('_')[1]))

# Now, your actual labels are assumed to be in the 'label' column and already numeric
actual_labels = df['label']

# Predicted labels are now in 'predicted_label_numeric'
predicted_labels = df['predicted_label_numeric']


df.head()

In [None]:
df.to_csv('TestNotTestRes_roberta.csv', index=False)

### Delete the dataset with large memory

In [None]:
# Calculate accuracy
accuracy = accuracy_score(actual_labels, predicted_labels)

# Calculate F1 score
# Assuming it's a binary classification, you could specify the average method if it's multi-class
f1 = f1_score(actual_labels, predicted_labels, average='binary' if df['label'].nunique() == 2 else 'weighted')

print(f"Accuracy: {accuracy}")
print(f"F1 Score: {f1}")

In [None]:
del valid_dataset

In [None]:
del model

In [None]:
# Free cache
torch.cuda.empty_cache()

In [None]:
!nvidia-smi