<a href="https://colab.research.google.com/github/maitreya-v/ruby_2_docstring_fine_tune_t5/blob/main/Code2Docstring_Ruby.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install -q transformers datasets

In [2]:
!pip install -q pytorch-lightning wandb

In [3]:
from datasets import load_dataset

In [4]:
dataset = load_dataset("code_x_glue_ct_code_to_text","ruby")

In [5]:
from pprint import pprint

print("Code: " + dataset["train"][0]["code"])
print("\n")
print("Docstring" + dataset["train"][0]["docstring"])

Code: def handle_parsed_websocket_message(json_data)
      data =  json_data.is_a?(Hash) ? json_data.stringify_keys : {}
      if CelluloidPubsub::Reactor::AVAILABLE_ACTIONS.include?(data['client_action'].to_s)
        log_debug "#{self.class} finds actions for  #{json_data}"
        delegate_action(data) if data['client_action'].present?
      else
        handle_unknown_action(data['channel'], json_data)
      end
    end


Docstringmethod that checks if the data is a Hash

 if the data is a hash then will stringify the keys and will call the method {#delegate_action}
 that will handle the message, otherwise will call the method {#handle_unknown_action}

 @see #delegate_action
 @see #handle_unknown_action

 @param [Hash] json_data

 @return [void]

 @api public


In [6]:
from transformers import RobertaTokenizer

tokenizer = RobertaTokenizer.from_pretrained('Salesforce/codet5-small')

In [7]:
# prefix = "Summarize Ruby: "

# max_input_length = 256
# max_target_length = 128

# def preprocess(examples):
#   codes = examples["code"]

#   docstrings = examples["docstring"]


#   inputs = [prefix + code for code in codes]

#   model_inputs = tokenizer(inputs,max_length=max_input_length,truncation=True,padding="max_length")

#   labels = tokenizer(docstrings,max_length=max_target_length,truncation=True,padding="max_length").input_ids

#   labels_with_ignore_index=[]

#   for label_one in labels:
#     label_one_processed = [label if label != 0 else -100 for label in label_one]

#     labels_with_ignore_index.append(label_one_processed)

#   model_inputs["labels"] = labels_with_ignore_index

#   return model_inputs

In [8]:
prefix = "Summarize Ruby: "
max_input_length = 256
max_target_length = 128

def preprocess(examples):
  # encode the code-docstring pairs
  codes = examples['code']
  docstrings = examples['docstring']

  inputs = [prefix + code for code in codes]
  model_inputs = tokenizer(inputs, max_length=max_input_length, padding="max_length", truncation=True,return_tensors="pt")

  # encode the summaries
  labels = tokenizer(docstrings, max_length=max_target_length, padding="max_length", truncation=True,return_tensors="pt").input_ids

  # important: we need to replace the index of the padding tokens by -100
  # such that they are not taken into account by the CrossEntropyLoss
  labels_with_ignore_index = []
  for labels_example in labels:
    labels_example = [label if label != 0 else -100 for label in labels_example]
    labels_with_ignore_index.append(labels_example)

  model_inputs["labels"] = labels_with_ignore_index

  return model_inputs

In [9]:
dataset_processed = dataset.map(preprocess,batched=True)

In [10]:
!pip install torch torchvision torchaudio



In [11]:
import torch

In [12]:
tokenizer.decode([label if label!=-100 else 0 for label in dataset_processed["train"][0]["labels"]])

'<s>method that checks if the data is a Hash\n\n if the data is a hash then will stringify the keys and will call the method {#delegate_action}\n that will handle the message, otherwise will call the method {#handle_unknown_action}\n\n @see #delegate_action\n @see #handle_unknown_action\n\n @param [Hash] json_data\n\n @return [void]\n\n @api public</s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>'

In [13]:
from torch.utils.data import DataLoader

dataset_processed.set_format(type="torch", columns=['input_ids', 'attention_mask', 'labels'])
train_dataloader = DataLoader(dataset_processed['train'], shuffle=True, batch_size=8)
valid_dataloader = DataLoader(dataset_processed['validation'], batch_size=4)
test_dataloader = DataLoader(dataset_processed['test'], batch_size=4)

In [14]:
next(iter(train_dataloader))

{'input_ids': tensor([[    1,  3495, 21872,  ...,     0,     0,     0],
         [    1,  3495, 21872,  ...,     0,     0,     0],
         [    1,  3495, 21872,  ...,   565,   679,     2],
         ...,
         [    1,  3495, 21872,  ...,     0,     0,     0],
         [    1,  3495, 21872,  ...,     0,     0,     0],
         [    1,  3495, 21872,  ...,     0,     0,     0]]),
 'attention_mask': tensor([[1, 1, 1,  ..., 0, 0, 0],
         [1, 1, 1,  ..., 0, 0, 0],
         [1, 1, 1,  ..., 1, 1, 1],
         ...,
         [1, 1, 1,  ..., 0, 0, 0],
         [1, 1, 1,  ..., 0, 0, 0],
         [1, 1, 1,  ..., 0, 0, 0]]),
 'labels': tensor([[    1, 29198,   280,  ...,  -100,  -100,  -100],
         [    1,  9685,   279,  ...,   632,   891,     2],
         [    1,  3567,    67,  ...,    18,   971,     2],
         ...,
         [    1,  3116,   731,  ...,  -100,  -100,  -100],
         [    1,  3002,   326,  ...,  -100,  -100,  -100],
         [    1, 21488,  3315,  ...,  -100,  -100,  -1

In [15]:
!pip install transformers[torch]
!pip install accelerate -U



In [16]:
!pip install -q accelerate -U

In [17]:
from accelerate import Accelerator
from transformers import TrainingArguments, Trainer

In [18]:
from transformers import T5ForConditionalGeneration, AdamW, get_linear_schedule_with_warmup
import pytorch_lightning as pl

class CodeT5(pl.LightningModule):
    def __init__(self, lr=5e-5, num_train_epochs=15, warmup_steps=1000):
        super().__init__()
        self.model = T5ForConditionalGeneration.from_pretrained("Salesforce/codet5-small")
        self.save_hyperparameters()

    def forward(self, input_ids, attention_mask, labels=None):
        outputs = self.model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
        return outputs

    def common_step(self, batch, batch_idx):
        outputs = self(**batch)
        loss = outputs.loss

        return loss

    def training_step(self, batch, batch_idx):
        loss = self.common_step(batch, batch_idx)
        # logs metrics for each training_step,
        # and the average across the epoch
        self.log("training_loss", loss)

        return loss

    def validation_step(self, batch, batch_idx):
        loss = self.common_step(batch, batch_idx)
        self.log("validation_loss", loss, on_epoch=True)

        return loss

    def test_step(self, batch, batch_idx):
        loss = self.common_step(batch, batch_idx)

        return loss

    def configure_optimizers(self):
        # create optimizer
        optimizer = AdamW(self.parameters(), lr=self.hparams.lr)
        # create learning rate scheduler
        num_train_optimization_steps = self.hparams.num_train_epochs * len(train_dataloader)
        lr_scheduler = {'scheduler': get_linear_schedule_with_warmup(optimizer,
                                                    num_warmup_steps=self.hparams.warmup_steps,
                                                    num_training_steps=num_train_optimization_steps),
                        'name': 'learning_rate',
                        'interval':'step',
                        'frequency': 1}

        return {"optimizer": optimizer, "lr_scheduler": lr_scheduler}

    def train_dataloader(self):
        return train_dataloader

    def val_dataloader(self):
        return valid_dataloader

    def test_dataloader(self):
        return test_dataloader

In [19]:
import wandb

wandb.login()

[34m[1mwandb[0m: Currently logged in as: [33mmaitreya-vaghulade[0m ([33mreadmegenerators[0m). Use [1m`wandb login --relogin`[0m to force relogin


True

In [20]:
model = CodeT5()

In [21]:
!pip install pytorch_lightning -U



In [33]:
# dataset_processed["train"]

dataset_processed_final = dataset_processed.remove_columns(['id', 'repo', 'path', 'func_name', 'original_string', 'language', 'code', 'code_tokens', 'docstring', 'docstring_tokens', 'sha', 'url'])

In [35]:
dataset_processed_final["train"]

Dataset({
    features: ['input_ids', 'attention_mask', 'labels'],
    num_rows: 24927
})

In [23]:
!huggingface-cli login --token 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

Token will not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /root/.cache/huggingface/token
Login successful


In [24]:
device = 'cuda:0'
# tensor_on_gpu = tensor_on_cpu.to(device)

In [36]:
training_args = TrainingArguments('t5-code2docstring-small',
                                  evaluation_strategy='epoch',
                                  num_train_epochs=2,
                                  learning_rate=5e-5,
                                  weight_decay=0.005,
                                  per_device_train_batch_size=8,
                                  per_device_eval_batch_size=8,
                                  report_to = 'none'
                                 )

trainer = Trainer(
    model,
    training_args,
    train_dataset=dataset_processed_final["train"],
    eval_dataset=dataset_processed_final["validation"],
    # data_collator=data_collator,
    tokenizer=tokenizer,
    # compute_metrics=compute_metrics
)

trainer.train()



Epoch,Training Loss,Validation Loss
1,2.7395,2.777556
2,2.5709,2.732157


TrainOutput(global_step=6232, training_loss=2.7481790427525024, metrics={'train_runtime': 1385.5664, 'train_samples_per_second': 35.981, 'train_steps_per_second': 4.498, 'total_flos': 0.0, 'train_loss': 2.7481790427525024, 'epoch': 2.0})

In [37]:
model_directory = "./"
trainer.save_model(model_directory)

In [None]:
hfrom transformers import T5ForConditionalGeneration, TrainingArguments, Trainer
from datasets import load_metric
import numpy as np

model = T5ForConditionalGeneration.from_pretrained('Salesforce/codet5-small').to(device)

# Set id2label and label2id
# model.config.id2label = id2label
# model.config.label2id = label2id

# Metrics
# metric = load_metric("seqeval")
return_entity_level_metrics = True


# def compute_metrics(p):
#     predictions, labels = p
#     predictions = np.argmax(predictions, axis=2)

#     # Remove ignored index (special tokens)
#     true_predictions = [
#         [id2label[p] for (p, l) in zip(prediction, label) if l != -100]
#         for prediction, label in zip(predictions, labels)
#     ]
#     true_labels = [
#         [id2label[l] for (p, l) in zip(prediction, label) if l != -100]
#         for prediction, label in zip(predictions, labels)
#     ]

#     results = metric.compute(predictions=true_predictions, references=true_labels)
#     if return_entity_level_metrics:
#         # Unpack nested dictionaries
#         final_results = {}
#         for key, value in results.items():
#             if isinstance(value, dict):
#                 for n, v in value.items():
#                     final_results[f"{key}_{n}"] = v
#             else:
#                 final_results[key] = value
#         return final_results
#     else:
#         return {
#             "precision": results["overall_precision"],
#             "recall": results["overall_recall"],
#             "f1": results["overall_f1"],
#             "accuracy": results["overall_accuracy"],
#         }


class CordTrainer(Trainer):
    def get_train_dataloader(self):
        return train_dataloader

    def get_test_dataloader(self, test_dataset):
        return test_dataloader


args = TrainingArguments(
    output_dir="Code2Docstring_Ruby".lower(),  # name of directory to store the checkpoints
    max_steps=3000,  # we train for a maximum of 1,000 batches
    warmup_ratio=0.1,  # we warmup a bit
    fp16=True,  # we use mixed precision (less memory consumption), False when on CPU
    push_to_hub=True,  # after training, we'd like to push our model to the hub
    push_to_hub_model_id=f"Code2Docstring_Ruby".lower(),  # this is the name we'll use for our model on the hub
    device='cuda:0',
)

# Initialize our Trainer
trainer = CordTrainer(
    model=model,
    args=args,
    # compute_metrics=compute_metrics,
)

In [28]:
training_args = TrainingArguments(
    output_dir="/content/drive/MyDrive/CodeT5/Notebooks/Checkpoints",  # Checkpoint directory
    logging_dir="./logs",  # Directory for storing logs
    evaluation_strategy="steps",  # Evaluate every "eval_steps"
    eval_steps=100,  # Evaluate every 100 steps
    save_steps=500,  # Save checkpoint every 500 steps
    logging_steps=10,  # Log metrics every 10 steps
    save_total_limit=2,  # Limit the number of saved checkpoints
    load_best_model_at_end=True,  # Load the best model when finished
    metric_for_best_model="eval_loss",  # Metric to determine the best model
    greater_is_better=False,  # Smaller values are better for loss
    learning_rate=1e-4,  # Set your desired learning rate
)

# Create Trainer instance
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataloader,
    eval_dataset=valid_dataloader,
    # data_collator=data_collator,  # Define your data collator if needed
    # callbacks=[early_stopping_callback, lr_monitor],
    # logger=wandb_logger,
)

# Start training
trainer.train()



TypeError: ignored

In [None]:
trainer.train()

In [43]:
import pickle

In [None]:
pickle.dump(model, open('model.pkl', 'wb'))

In [38]:
from datasets import load_dataset

dataset = load_dataset("code_x_glue_ct_code_to_text", "ruby")
print(dataset['test'])

Dataset({
    features: ['id', 'repo', 'path', 'func_name', 'original_string', 'language', 'code', 'code_tokens', 'docstring', 'docstring_tokens', 'sha', 'url'],
    num_rows: 1261
})


In [39]:
test_example = dataset['test'][2]
print("Code:", test_example['code'])

Code: def confirm_ejson_keys_not_prunable
      secret = ejson_provisioner.ejson_keys_secret
      return unless secret.dig("metadata", "annotations", KubernetesResource::LAST_APPLIED_ANNOTATION)

      @logger.error("Deploy cannot proceed because protected resource " \
        "Secret/#{EjsonSecretProvisioner::EJSON_KEYS_SECRET} would be pruned.")
      raise EjsonPrunableError
    rescue Kubectl::ResourceNotFoundError => e
      @logger.debug("Secret/#{EjsonSecretProvisioner::EJSON_KEYS_SECRET} does not exist: #{e}")
    end


In [40]:
input_ids = tokenizer(test_example["code"],return_tensors='pt').input_ids

In [47]:
dataset_processed["train"]["input_ids"][0]

tensor([    1,  3495, 21872, 19817,    30,  1652,  1640,    67, 10817,    67,
        25164,    67,  2150,    12,  1977,    67,   892,    13,   203,  1377,
          501,   273,   225,  1163,    67,   892,    18,   291,    67,    69,
           35,    12,  2310,    13,   692,  1163,    67,   892,    18, 25650,
           67,  2452,   294,  2618,   203,  1377,   309,  8614,   332,   839,
         9581,  1717,  2866,   426,  3362,  2866, 23222,    67, 12249,    55,
           18,  6702,    35,    12,   892,  3292,  2625,    67,  1128, 29489,
          869,    67,    87,    13,   203,  3639,   613,    67,  4148,  7059,
         2890,    18,  1106,    97, 13094,  4209,   364,   225,  3735,  1977,
           67,   892,  1532,   203,  3639,  7152,    67,  1128,    12,   892,
           13,   309,   501,  3292,  2625,    67,  1128, 29489,  6706,    35,
          203,  1377,   469,   203,  3639,  1640,    67,  8172,    67,  1128,
           12,   892,  3292,  4327, 17337,  1163,    67,   892, 

In [44]:
pickled_model = pickle.load(open('model.pkl', 'rb'))

In [56]:
outputs = pickled_model.generate(dataset_processed["train"]["input_ids"][0].to(device))
print("Generated docstring:", tokenizer.decode(outputs[0], skip_special_tokens=True))

ValueError: ignored

In [51]:
# After training

# Save the model using .save_pretrained()
model_directory = "./t5-small"
trainer.save_model(model_directory)

In [63]:
!sudo apt-get install git-lfs
!git config --global user.email "maitreya.vaghulade@gmail.com"
!git config --global user.name "MaitreyaV"

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
git-lfs is already the newest version (3.0.2-1ubuntu0.2).
0 upgraded, 0 newly installed, 0 to remove and 16 not upgraded.


In [64]:
!huggingface-cli login


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|
    
    A token is already saved on your machine. Run `huggingface-cli whoami` to get more information or `huggingface-cli logout` if you want to log out.
    Setting a new token will erase the existing one.
    To login, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Token: 
Add token as git credential? (Y/n) Y
Token is valid (permission: write).
[1m[31mCannot authenticate thro

In [65]:
repo_url = "https://huggingface.co/MaitreyaV/codet5-small-code-summarization-ruby"

In [66]:
from huggingface_hub import Repository

repo = Repository(local_dir="checkpoint", # note that this directory must not exist already
                  clone_from=repo_url,
                  git_user="MaitreyaV",
                  git_email="maitreya.vaghulade@gmail.com",
                  use_auth_token=True,
)

Cloning https://huggingface.co/MaitreyaV/codet5-small-code-summarization-ruby into local empty directory.


OSError: ignored

In [69]:
trainer.save_model("/content/MaitreyaV/Model")

In [70]:
tokenizer.save_pretrained("my-tokenizer")

('/content/MaitreyaV/Tokenizer/tokenizer_config.json',
 '/content/MaitreyaV/Tokenizer/special_tokens_map.json',
 '/content/MaitreyaV/Tokenizer/vocab.json',
 '/content/MaitreyaV/Tokenizer/merges.txt',
 '/content/MaitreyaV/Tokenizer/added_tokens.json')

In [67]:
# Push the model to the Hugging Face Model Hub
model_name_on_hub = "t5-code2docstring"  # Replace with your desired model name
trainer.push_to_hub(model_name_on_hub)

OSError: ignored

In [61]:
# Move the model and input tensors to the same device
device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")
model = model.to(device)
input_ids = dataset_processed["train"]["input_ids"][0].to(device)

# Generate text
# Generate text
outputs = pickled_model.generate(input_ids, num_beams=4, do_sample=True)

# Decode the generated sequence
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)

# Print the generated text
print("Generated docstring:", generated_text)



ValueError: ignored

In [72]:
save_directory="./MaitreyaV/NewModel" # save in the current working directory, you can change this of course
model.model.save_pretrained(save_directory)

In [80]:
tokenizer_directory = "./MaitreyaV/Tokenizer"

In [89]:
from transformers import T5ForConditionalGeneration,AutoTokenizer
hf_repo = "MaitreyaV/t5-hf-ruby2text"
model = T5ForConditionalGeneration.from_pretrained(hf_repo)
tokenizer_pretrained = AutoTokenizer.from_pretrained(hf_repo)

Downloading (…)lve/main/config.json:   0%|          | 0.00/1.62k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/242M [00:00<?, ?B/s]

Downloading (…)neration_config.json:   0%|          | 0.00/163 [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/1.31k [00:00<?, ?B/s]

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/639k [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/294k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/15.5k [00:00<?, ?B/s]

In [90]:
# prepare for the model
input_ids = tokenizer_pretrained(test_example['code'], return_tensors='pt').input_ids
# generate
outputs = model.generate(input_ids)
print("Generated docstring:", tokenizer_pretrained.decode(outputs[0], skip_special_tokens=True))



Generated docstring: Check if the secret is not pruned


In [83]:
print("Ground truth:", test_example['docstring'])

Ground truth: make sure to never prune the ejson-keys secret


In [85]:
!huggingface-cli login


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|
    
    A token is already saved on your machine. Run `huggingface-cli whoami` to get more information or `huggingface-cli logout` if you want to log out.
    Setting a new token will erase the existing one.
    To login, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Token: 
Add token as git credential? (Y/n) n
Token is valid (permission: write).
Your token has been saved to /roo

In [86]:
!huggingface-cli repo MaitreyaV/{}.format(MaitreyaV)
!huggingface-cli login
!huggingface-cli push

/bin/bash: -c: line 1: syntax error near unexpected token `('
/bin/bash: -c: line 1: `huggingface-cli repo MaitreyaV/{}.format(MaitreyaV)'

    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|
    
    A token is already saved on your machine. Run `huggingface-cli whoami` to get more information or `huggingface-cli logout` if you want to log out.
    Setting a new token will erase the existing one.
    To login, `huggingface_hub` requires a token generated from https://huggingf

In [87]:
model.push_to_hub("t5-hf-ruby2text")

pytorch_model.bin:   0%|          | 0.00/242M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/MaitreyaV/t5-hf-ruby2text/commit/cdcbcc3b5f86ee8a6faaa4cdc41ceb8a8ccb8604', commit_message='Upload T5ForConditionalGeneration', commit_description='', oid='cdcbcc3b5f86ee8a6faaa4cdc41ceb8a8ccb8604', pr_url=None, pr_revision=None, pr_num=None)

In [88]:
tokenizer.push_to_hub("t5-hf-ruby2text")

CommitInfo(commit_url='https://huggingface.co/MaitreyaV/t5-hf-ruby2text/commit/25958a6cc73dc2361f27ec6329c625ca22fb2a9a', commit_message='Upload tokenizer', commit_description='', oid='25958a6cc73dc2361f27ec6329c625ca22fb2a9a', pr_url=None, pr_revision=None, pr_num=None)