In [1]:
import torch
from datasets import load_dataset
from transformers import RobertaTokenizer, RobertaForCausalLM, Trainer, TrainingArguments
torch.cuda.empty_cache()

  from .autonotebook import tqdm as notebook_tqdm





In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [3]:
dataset = load_dataset("code_search_net", "python",cache_dir="./Datasets")

In [4]:
dataset.column_names

{'train': ['repository_name',
  'func_path_in_repository',
  'func_name',
  'whole_func_string',
  'language',
  'func_code_string',
  'func_code_tokens',
  'func_documentation_string',
  'func_documentation_tokens',
  'split_name',
  'func_code_url'],
 'test': ['repository_name',
  'func_path_in_repository',
  'func_name',
  'whole_func_string',
  'language',
  'func_code_string',
  'func_code_tokens',
  'func_documentation_string',
  'func_documentation_tokens',
  'split_name',
  'func_code_url'],
 'validation': ['repository_name',
  'func_path_in_repository',
  'func_name',
  'whole_func_string',
  'language',
  'func_code_string',
  'func_code_tokens',
  'func_documentation_string',
  'func_documentation_tokens',
  'split_name',
  'func_code_url']}

In [5]:
tokenizer = RobertaTokenizer.from_pretrained("microsoft/codebert-base", cache_dir="./Models")
model = RobertaForCausalLM.from_pretrained("microsoft/codebert-base", cache_dir="./Models",is_decoder = True).to(device)

Some weights of RobertaForCausalLM were not initialized from the model checkpoint at microsoft/codebert-base and are newly initialized: ['lm_head.bias', 'lm_head.decoder.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [6]:
def tokenize_code(code_snippet, tokenizer):
    return tokenizer(
        code_snippet, max_length=512, truncation=True, padding='max_length', return_tensors="pt"
    )

In [7]:
def preprocess_data(batch, tokenizer):
    batch["input_ids"] = tokenize_code(batch["func_code_string"], tokenizer)["input_ids"]
    batch["labels"] = tokenize_code(batch["func_documentation_string"], tokenizer)["input_ids"]
    return batch

In [8]:
train_dataset = dataset["train"].shuffle(seed=42).select(range(1000)).map(lambda x: preprocess_data(x, tokenizer), batched=True)
eval_dataset = dataset["validation"].shuffle(seed=42).select(range(200)).map(lambda x: preprocess_data(x, tokenizer), batched=True)

In [9]:
model

RobertaForCausalLM(
  (roberta): RobertaModel(
    (embeddings): RobertaEmbeddings(
      (word_embeddings): Embedding(50265, 768, padding_idx=1)
      (position_embeddings): Embedding(514, 768, padding_idx=1)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): RobertaEncoder(
      (layer): ModuleList(
        (0-11): 12 x RobertaLayer(
          (attention): RobertaAttention(
            (self): RobertaSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): RobertaSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNor

In [10]:
training_args = TrainingArguments(
    output_dir="./codebert-finetuned-roberta/results",    # output directory
    num_train_epochs=10,                   # number of epochs
    per_device_train_batch_size=16,       # batch size
    per_device_eval_batch_size=16,        # eval batch size
    warmup_steps=500,                     # warmup steps
    weight_decay=0.01,                    # weight decay
    logging_dir="./logs",                 # logging directory
    logging_steps=10,
    evaluation_strategy="steps",          # Evaluation after every logging step
    save_total_limit=2,                   # Keep only last two checkpoints
    save_steps=500,                       # Save model every 500 steps
    report_to="none",
    fp16=True                     # No reports (e.g., to wandb)
)



In [11]:
# Trainer setup
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
)

  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


In [None]:
trainer.train()

  0%|          | 0/630 [00:00<?, ?it/s]We strongly recommend passing in an `attention_mask` since your input_ids may be padded. See https://huggingface.co/docs/transformers/troubleshooting#incorrect-output-when-padding-tokens-arent-masked.
  attn_output = torch.nn.functional.scaled_dot_product_attention(
  2%|▏         | 10/630 [04:19<4:29:50, 26.11s/it]

{'loss': 18.2138, 'grad_norm': 63.97089767456055, 'learning_rate': 7.000000000000001e-07, 'epoch': 0.16}


                                                  
  2%|▏         | 10/630 [05:57<4:29:50, 26.11s/it]

{'eval_loss': 18.06072998046875, 'eval_runtime': 98.6642, 'eval_samples_per_second': 2.027, 'eval_steps_per_second': 0.132, 'epoch': 0.16}


  3%|▎         | 20/630 [10:19<4:38:02, 27.35s/it]

{'loss': 17.815, 'grad_norm': 63.01264190673828, 'learning_rate': 1.7000000000000002e-06, 'epoch': 0.32}


                                                  
  3%|▎         | 20/630 [11:58<4:38:02, 27.35s/it]

{'eval_loss': 17.291728973388672, 'eval_runtime': 98.5539, 'eval_samples_per_second': 2.029, 'eval_steps_per_second': 0.132, 'epoch': 0.32}


  5%|▍         | 30/630 [16:19<4:33:49, 27.38s/it]

{'loss': 16.7654, 'grad_norm': 57.824493408203125, 'learning_rate': 2.7e-06, 'epoch': 0.48}


                                                  
  5%|▍         | 30/630 [17:58<4:33:49, 27.38s/it]

{'eval_loss': 15.921609878540039, 'eval_runtime': 98.5362, 'eval_samples_per_second': 2.03, 'eval_steps_per_second': 0.132, 'epoch': 0.48}


  6%|▋         | 40/630 [22:19<4:28:58, 27.35s/it]

{'loss': 15.2072, 'grad_norm': 55.265506744384766, 'learning_rate': 3.7e-06, 'epoch': 0.63}


                                                  
  6%|▋         | 40/630 [23:58<4:28:58, 27.35s/it]

{'eval_loss': 14.1182222366333, 'eval_runtime': 98.4494, 'eval_samples_per_second': 2.032, 'eval_steps_per_second': 0.132, 'epoch': 0.63}


  8%|▊         | 50/630 [28:19<4:24:25, 27.35s/it]

{'loss': 13.214, 'grad_norm': 49.358726501464844, 'learning_rate': 4.7e-06, 'epoch': 0.79}


                                                  
  8%|▊         | 50/630 [29:57<4:24:25, 27.35s/it]

{'eval_loss': 11.902570724487305, 'eval_runtime': 98.4252, 'eval_samples_per_second': 2.032, 'eval_steps_per_second': 0.132, 'epoch': 0.79}


 10%|▉         | 60/630 [34:19<4:19:51, 27.35s/it]

{'loss': 10.735, 'grad_norm': 42.13877868652344, 'learning_rate': 5.7000000000000005e-06, 'epoch': 0.95}


                                                  
 10%|▉         | 60/630 [35:57<4:19:51, 27.35s/it]

{'eval_loss': 9.393935203552246, 'eval_runtime': 98.4339, 'eval_samples_per_second': 2.032, 'eval_steps_per_second': 0.132, 'epoch': 0.95}


 11%|█         | 70/630 [40:06<4:12:22, 27.04s/it]

{'loss': 7.9519, 'grad_norm': 37.08514404296875, 'learning_rate': 6.700000000000001e-06, 'epoch': 1.11}


                                                  
 11%|█         | 70/630 [41:44<4:12:22, 27.04s/it]

{'eval_loss': 6.563320159912109, 'eval_runtime': 98.2585, 'eval_samples_per_second': 2.035, 'eval_steps_per_second': 0.132, 'epoch': 1.11}


 13%|█▎        | 80/630 [46:05<4:10:15, 27.30s/it]

{'loss': 5.3564, 'grad_norm': 38.86039733886719, 'learning_rate': 7.7e-06, 'epoch': 1.27}


                                                  
 13%|█▎        | 80/630 [47:43<4:10:15, 27.30s/it]

{'eval_loss': 3.8364667892456055, 'eval_runtime': 98.2346, 'eval_samples_per_second': 2.036, 'eval_steps_per_second': 0.132, 'epoch': 1.27}


 14%|█▍        | 90/630 [52:04<4:05:45, 27.31s/it]

{'loss': 3.0083, 'grad_norm': 15.443470001220703, 'learning_rate': 8.7e-06, 'epoch': 1.43}


                                                  
 14%|█▍        | 90/630 [53:42<4:05:45, 27.31s/it]

{'eval_loss': 2.4120659828186035, 'eval_runtime': 98.2651, 'eval_samples_per_second': 2.035, 'eval_steps_per_second': 0.132, 'epoch': 1.43}


 16%|█▌        | 100/630 [58:03<4:01:12, 27.31s/it]

{'loss': 1.7858, 'grad_norm': 5.62119197845459, 'learning_rate': 9.7e-06, 'epoch': 1.59}


                                                   
 16%|█▌        | 100/630 [59:41<4:01:12, 27.31s/it]

{'eval_loss': 2.1433205604553223, 'eval_runtime': 98.2443, 'eval_samples_per_second': 2.036, 'eval_steps_per_second': 0.132, 'epoch': 1.59}


 17%|█▋        | 110/630 [1:04:02<3:56:43, 27.31s/it]

{'loss': 1.9412, 'grad_norm': 25.111492156982422, 'learning_rate': 1.0700000000000001e-05, 'epoch': 1.75}


                                                     
 17%|█▋        | 110/630 [1:05:40<3:56:43, 27.31s/it]

{'eval_loss': 2.046808958053589, 'eval_runtime': 98.2786, 'eval_samples_per_second': 2.035, 'eval_steps_per_second': 0.132, 'epoch': 1.75}


 19%|█▉        | 120/630 [1:10:01<3:52:08, 27.31s/it]

{'loss': 1.6942, 'grad_norm': 4.7146711349487305, 'learning_rate': 1.1700000000000001e-05, 'epoch': 1.9}


                                                     
 19%|█▉        | 120/630 [1:11:39<3:52:08, 27.31s/it]

{'eval_loss': 1.9239897727966309, 'eval_runtime': 98.2528, 'eval_samples_per_second': 2.036, 'eval_steps_per_second': 0.132, 'epoch': 1.9}


 21%|██        | 130/630 [1:15:47<3:39:34, 26.35s/it]

{'loss': 1.6171, 'grad_norm': 35.03866195678711, 'learning_rate': 1.27e-05, 'epoch': 2.06}


                                                     
 21%|██        | 130/630 [1:17:26<3:39:34, 26.35s/it]

{'eval_loss': 1.890832543373108, 'eval_runtime': 98.3834, 'eval_samples_per_second': 2.033, 'eval_steps_per_second': 0.132, 'epoch': 2.06}


 22%|██▏       | 140/630 [1:21:47<3:42:57, 27.30s/it]

{'loss': 1.5519, 'grad_norm': 6.825279235839844, 'learning_rate': 1.3700000000000001e-05, 'epoch': 2.22}


                                                     
 22%|██▏       | 140/630 [1:23:25<3:42:57, 27.30s/it]

{'eval_loss': 1.804990530014038, 'eval_runtime': 98.3702, 'eval_samples_per_second': 2.033, 'eval_steps_per_second': 0.132, 'epoch': 2.22}


 24%|██▍       | 150/630 [1:27:46<3:38:38, 27.33s/it]

{'loss': 1.6409, 'grad_norm': 7.955662250518799, 'learning_rate': 1.47e-05, 'epoch': 2.38}


                                                     
 24%|██▍       | 150/630 [1:29:24<3:38:38, 27.33s/it]

{'eval_loss': 1.9959540367126465, 'eval_runtime': 98.3894, 'eval_samples_per_second': 2.033, 'eval_steps_per_second': 0.132, 'epoch': 2.38}


 25%|██▌       | 160/630 [1:33:46<3:34:05, 27.33s/it]

{'loss': 1.5351, 'grad_norm': 5.789573669433594, 'learning_rate': 1.5700000000000002e-05, 'epoch': 2.54}


                                                     
 25%|██▌       | 160/630 [1:35:24<3:34:05, 27.33s/it]

{'eval_loss': 1.8086962699890137, 'eval_runtime': 98.4175, 'eval_samples_per_second': 2.032, 'eval_steps_per_second': 0.132, 'epoch': 2.54}


 27%|██▋       | 170/630 [1:39:45<3:29:29, 27.32s/it]

{'loss': 1.3187, 'grad_norm': 5.638758659362793, 'learning_rate': 1.6700000000000003e-05, 'epoch': 2.7}


                                                     
 27%|██▋       | 170/630 [1:41:23<3:29:29, 27.32s/it]

{'eval_loss': 1.7570443153381348, 'eval_runtime': 98.3937, 'eval_samples_per_second': 2.033, 'eval_steps_per_second': 0.132, 'epoch': 2.7}


 29%|██▊       | 180/630 [1:45:50<3:32:10, 28.29s/it]

{'loss': 1.2994, 'grad_norm': 3.6513569355010986, 'learning_rate': 1.77e-05, 'epoch': 2.86}


                                                     
 29%|██▊       | 180/630 [1:47:36<3:32:10, 28.29s/it]

{'eval_loss': 1.6654132604599, 'eval_runtime': 105.7219, 'eval_samples_per_second': 1.892, 'eval_steps_per_second': 0.123, 'epoch': 2.86}


 30%|███       | 190/630 [1:51:53<3:07:29, 25.57s/it]

{'loss': 1.2441, 'grad_norm': 7.131467342376709, 'learning_rate': 1.87e-05, 'epoch': 3.02}


                                                     
 30%|███       | 190/630 [1:53:35<3:07:29, 25.57s/it]

{'eval_loss': 1.6175390481948853, 'eval_runtime': 102.1133, 'eval_samples_per_second': 1.959, 'eval_steps_per_second': 0.127, 'epoch': 3.02}


 32%|███▏      | 200/630 [1:58:06<3:22:33, 28.26s/it]

{'loss': 1.2121, 'grad_norm': 3.989121675491333, 'learning_rate': 1.97e-05, 'epoch': 3.17}


                                                     
 32%|███▏      | 200/630 [1:59:48<3:22:33, 28.26s/it]

{'eval_loss': 1.649303913116455, 'eval_runtime': 102.0437, 'eval_samples_per_second': 1.96, 'eval_steps_per_second': 0.127, 'epoch': 3.17}


 33%|███▎      | 210/630 [2:04:19<3:18:22, 28.34s/it]

{'loss': 1.188, 'grad_norm': 2.066319465637207, 'learning_rate': 2.07e-05, 'epoch': 3.33}


                                                     
 33%|███▎      | 210/630 [2:06:01<3:18:22, 28.34s/it]

{'eval_loss': 1.5563119649887085, 'eval_runtime': 102.0351, 'eval_samples_per_second': 1.96, 'eval_steps_per_second': 0.127, 'epoch': 3.33}


 35%|███▍      | 220/630 [2:10:32<3:13:39, 28.34s/it]

{'loss': 1.4524, 'grad_norm': 5.586645126342773, 'learning_rate': 2.1700000000000002e-05, 'epoch': 3.49}


                                                     
 35%|███▍      | 220/630 [2:12:14<3:13:39, 28.34s/it]

{'eval_loss': 1.5339303016662598, 'eval_runtime': 102.0875, 'eval_samples_per_second': 1.959, 'eval_steps_per_second': 0.127, 'epoch': 3.49}


 37%|███▋      | 230/630 [2:16:45<3:08:57, 28.34s/it]

{'loss': 1.0352, 'grad_norm': 2.45631742477417, 'learning_rate': 2.2700000000000003e-05, 'epoch': 3.65}


                                                     
 37%|███▋      | 230/630 [2:18:27<3:08:57, 28.34s/it]

{'eval_loss': 1.523171067237854, 'eval_runtime': 102.0739, 'eval_samples_per_second': 1.959, 'eval_steps_per_second': 0.127, 'epoch': 3.65}


 38%|███▊      | 240/630 [2:22:58<3:04:12, 28.34s/it]

{'loss': 0.9608, 'grad_norm': 4.037929534912109, 'learning_rate': 2.37e-05, 'epoch': 3.81}


                                                     
 38%|███▊      | 240/630 [2:24:40<3:04:12, 28.34s/it]

{'eval_loss': 1.5019946098327637, 'eval_runtime': 102.1641, 'eval_samples_per_second': 1.958, 'eval_steps_per_second': 0.127, 'epoch': 3.81}


 40%|███▉      | 250/630 [2:29:11<2:59:29, 28.34s/it]

{'loss': 1.3032, 'grad_norm': 1.6020177602767944, 'learning_rate': 2.47e-05, 'epoch': 3.97}


                                                     
 40%|███▉      | 250/630 [2:30:53<2:59:29, 28.34s/it]

{'eval_loss': 1.5026812553405762, 'eval_runtime': 102.1276, 'eval_samples_per_second': 1.958, 'eval_steps_per_second': 0.127, 'epoch': 3.97}


 41%|████▏     | 260/630 [2:35:10<2:53:19, 28.11s/it]

{'loss': 1.2494, 'grad_norm': 4.114760398864746, 'learning_rate': 2.57e-05, 'epoch': 4.13}


                                                     
 41%|████▏     | 260/630 [2:36:54<2:53:19, 28.11s/it]

{'eval_loss': 1.490681767463684, 'eval_runtime': 104.1022, 'eval_samples_per_second': 1.921, 'eval_steps_per_second': 0.125, 'epoch': 4.13}


 43%|████▎     | 270/630 [2:41:29<2:51:01, 28.50s/it]

{'loss': 1.0958, 'grad_norm': 1.419173002243042, 'learning_rate': 2.6700000000000002e-05, 'epoch': 4.29}


                                                     
 43%|████▎     | 270/630 [2:43:11<2:51:01, 28.50s/it]

{'eval_loss': 1.4775652885437012, 'eval_runtime': 102.5612, 'eval_samples_per_second': 1.95, 'eval_steps_per_second': 0.127, 'epoch': 4.29}


 44%|████▍     | 280/630 [2:47:42<2:45:29, 28.37s/it]

{'loss': 1.0101, 'grad_norm': 3.212052822113037, 'learning_rate': 2.7700000000000002e-05, 'epoch': 4.44}


                                                     
 44%|████▍     | 280/630 [2:49:24<2:45:29, 28.37s/it]

{'eval_loss': 1.4733206033706665, 'eval_runtime': 101.9181, 'eval_samples_per_second': 1.962, 'eval_steps_per_second': 0.128, 'epoch': 4.44}


 46%|████▌     | 290/630 [2:53:55<2:40:53, 28.39s/it]

{'loss': 1.2838, 'grad_norm': 1.561474323272705, 'learning_rate': 2.87e-05, 'epoch': 4.6}


                                                     
 46%|████▌     | 290/630 [2:55:38<2:40:53, 28.39s/it]

{'eval_loss': 1.4769577980041504, 'eval_runtime': 102.2934, 'eval_samples_per_second': 1.955, 'eval_steps_per_second': 0.127, 'epoch': 4.6}


 48%|████▊     | 300/630 [3:00:09<2:36:11, 28.40s/it]

{'loss': 1.1144, 'grad_norm': 3.1627540588378906, 'learning_rate': 2.97e-05, 'epoch': 4.76}


                                                     
 48%|████▊     | 300/630 [3:01:51<2:36:11, 28.40s/it]

{'eval_loss': 1.447873830795288, 'eval_runtime': 102.3191, 'eval_samples_per_second': 1.955, 'eval_steps_per_second': 0.127, 'epoch': 4.76}


 49%|████▉     | 310/630 [3:06:22<2:31:18, 28.37s/it]

{'loss': 1.0621, 'grad_norm': 4.4483866691589355, 'learning_rate': 3.07e-05, 'epoch': 4.92}


                                                     
 49%|████▉     | 310/630 [3:08:04<2:31:18, 28.37s/it]

{'eval_loss': 1.4556777477264404, 'eval_runtime': 102.147, 'eval_samples_per_second': 1.958, 'eval_steps_per_second': 0.127, 'epoch': 4.92}


 51%|█████     | 320/630 [3:12:22<2:23:15, 27.73s/it]

{'loss': 1.0423, 'grad_norm': 3.4632248878479004, 'learning_rate': 3.1700000000000005e-05, 'epoch': 5.08}


                                                     
 51%|█████     | 320/630 [3:14:04<2:23:15, 27.73s/it]

{'eval_loss': 1.4665087461471558, 'eval_runtime': 102.2118, 'eval_samples_per_second': 1.957, 'eval_steps_per_second': 0.127, 'epoch': 5.08}


 52%|█████▏    | 330/630 [3:18:35<2:21:34, 28.31s/it]

{'loss': 1.1901, 'grad_norm': 3.2542998790740967, 'learning_rate': 3.27e-05, 'epoch': 5.24}


                                                     
 52%|█████▏    | 330/630 [3:20:17<2:21:34, 28.31s/it]

{'eval_loss': 1.445196270942688, 'eval_runtime': 102.269, 'eval_samples_per_second': 1.956, 'eval_steps_per_second': 0.127, 'epoch': 5.24}


 54%|█████▍    | 340/630 [3:24:48<2:17:13, 28.39s/it]

{'loss': 0.9981, 'grad_norm': 2.506216049194336, 'learning_rate': 3.3700000000000006e-05, 'epoch': 5.4}


                                                     
 54%|█████▍    | 340/630 [3:26:31<2:17:13, 28.39s/it]

{'eval_loss': 1.4267113208770752, 'eval_runtime': 102.3111, 'eval_samples_per_second': 1.955, 'eval_steps_per_second': 0.127, 'epoch': 5.4}


 56%|█████▌    | 350/630 [3:31:02<2:12:31, 28.40s/it]

{'loss': 1.0915, 'grad_norm': 2.7236931324005127, 'learning_rate': 3.4699999999999996e-05, 'epoch': 5.56}


                                                     
 56%|█████▌    | 350/630 [3:32:44<2:12:31, 28.40s/it]

{'eval_loss': 1.471602201461792, 'eval_runtime': 102.2322, 'eval_samples_per_second': 1.956, 'eval_steps_per_second': 0.127, 'epoch': 5.56}


 57%|█████▋    | 360/630 [3:37:22<2:12:01, 29.34s/it]

{'loss': 1.1306, 'grad_norm': 2.3553919792175293, 'learning_rate': 3.57e-05, 'epoch': 5.71}


                                                     
 57%|█████▋    | 360/630 [3:39:07<2:12:01, 29.34s/it]

{'eval_loss': 1.440331220626831, 'eval_runtime': 105.1871, 'eval_samples_per_second': 1.901, 'eval_steps_per_second': 0.124, 'epoch': 5.71}


 59%|█████▊    | 370/630 [3:43:48<2:07:31, 29.43s/it]

{'loss': 1.0946, 'grad_norm': 2.5069327354431152, 'learning_rate': 3.6700000000000004e-05, 'epoch': 5.87}


                                                     
 59%|█████▊    | 370/630 [3:45:34<2:07:31, 29.43s/it]

{'eval_loss': 1.432854175567627, 'eval_runtime': 105.9194, 'eval_samples_per_second': 1.888, 'eval_steps_per_second': 0.123, 'epoch': 5.87}


 60%|██████    | 380/630 [3:49:59<1:53:22, 27.21s/it]

{'loss': 1.0298, 'grad_norm': 2.4703848361968994, 'learning_rate': 3.77e-05, 'epoch': 6.03}


                                                     
 60%|██████    | 380/630 [3:51:44<1:53:22, 27.21s/it]

{'eval_loss': 1.4182806015014648, 'eval_runtime': 104.8755, 'eval_samples_per_second': 1.907, 'eval_steps_per_second': 0.124, 'epoch': 6.03}


 62%|██████▏   | 390/630 [3:56:23<1:56:45, 29.19s/it]

{'loss': 1.0224, 'grad_norm': 2.3515713214874268, 'learning_rate': 3.8700000000000006e-05, 'epoch': 6.19}


                                                     
 62%|██████▏   | 390/630 [3:58:09<1:56:45, 29.19s/it]

{'eval_loss': 1.4271224737167358, 'eval_runtime': 105.7196, 'eval_samples_per_second': 1.892, 'eval_steps_per_second': 0.123, 'epoch': 6.19}


 62%|██████▏   | 391/630 [3:58:37<4:01:28, 60.62s/it]

In [None]:
model.save_pretrained("./codebert-finetuned-roberta")
tokenizer.save_pretrained("./codebert-finetuned-roberta")

In [13]:
def generate_documentation(code_snippet, model, tokenizer):
    # Tokenize input
    inputs = tokenizer(code_snippet, return_tensors="pt", padding=True, truncation=True).to(device)
    
    # Generate output
    outputs = model.generate(inputs["input_ids"], max_length=150, num_beams=5, early_stopping=True)
    
    # Decode generated text
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

In [16]:
code_example = """
def add_numbers(a, b):
    return a + c
"""

In [None]:
documentation = generate_documentation(code_example, model, tokenizer)
print("Generated Documentation:", documentation)