In [1]:
import torch
import evaluate
from rich import print, print_json
from shinkai.core.expirement import Expirement, ExpirementMetrics
from transformers import AutoModelForCausalLM, AutoTokenizer
from datasets import DatasetDict 
from shinkai.constants import CACHE_DIR
from shinkai.core.loader import load_ift
from tqdm import tqdm

In [2]:
ds = load_ift(testSize=0.005)
print(ds)

In [3]:
device = "mps" if torch.cuda.is_available() else "cpu"
model = AutoModelForCausalLM.from_pretrained("deepseek-ai/deepseek-coder-1.3b-base", cache_dir="../" + CACHE_DIR).to(device)
tokenizer = AutoTokenizer.from_pretrained("deepseek-ai/deepseek-coder-1.3b-base", cache_dir="../" + CACHE_DIR)

model.eval()

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(32256, 2048)
    (layers): ModuleList(
      (0-23): 24 x LlamaDecoderLayer(
        (self_attn): LlamaAttention(
          (q_proj): Linear(in_features=2048, out_features=2048, bias=False)
          (k_proj): Linear(in_features=2048, out_features=2048, bias=False)
          (v_proj): Linear(in_features=2048, out_features=2048, bias=False)
          (o_proj): Linear(in_features=2048, out_features=2048, bias=False)
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear(in_features=2048, out_features=5504, bias=False)
          (up_proj): Linear(in_features=2048, out_features=5504, bias=False)
          (down_proj): Linear(in_features=5504, out_features=2048, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm((2048,), eps=1e-06)
        (post_attention_layernorm): LlamaRMSNorm((2048,), eps=1e-06)
      )
    )
    (norm): LlamaRMSNorm((2048,), eps=1e-06)
    (rotary_emb)

In [4]:
ds_test = ds["test"]

In [None]:
promt = """
You are a Swift documentation assistant.

Your task is to add precise and complete Swift-style documentation comments (///) to the function below. Follow Apple's Xcode documentation conventions.

The documentation must include:
- A one-sentence summary of the function’s purpose.
- A "- Parameters:" section explaining each parameter clearly, including its type and role.
- A "- Returns:" section that describes the return value (if the function returns something), with its type.

Guidelines:
- Use concise, technical English appropriate for developer-facing API docs.
- Include type hints in explanations (e.g., “an integer”, “a string array”).
- If the function is generic (<T>), explain the generic behavior clearly.
- Do not include any commentary or explanation outside the code block.
- The output must be a single Swift code block containing only the function and its documentation.

```swift
func median(of numbers: [Double]) -> Double? {
    guard !numbers.isEmpty else { return nil }
    let sorted = numbers.sorted()
    let mid = sorted.count / 2
    return sorted.count % 2 == 0
        ? (sorted[mid - 1] + sorted[mid]) / 2
        : sorted[mid]
}
```
"""

print(model.generation_config)
inputs = tokenizer(promt, return_tensors="pt").to(device)
outputs = model.generate(
    **inputs,
    max_new_tokens = 200,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.eos_token_id,   # чтобы декодер не ругался
    do_sample=True,
    temperature=0.5,
    top_p=0.95      # запрет на повтор 4-грамм
)
print(outputs)

In [None]:
result = tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
print(result)

In [None]:
outputs = model.generate(
    **inputs,
    max_new_tokens=128,
    return_dict_in_generate=True,
    output_scores = False
)

# Получаем только сгенерированную часть
generated_tokens = outputs.sequences[0][inputs.input_ids.shape[1]:]
result = tokenizer.decode(generated_tokens, skip_special_tokens=True)
print(result)

### Metrics Test

In [44]:
# promt = """
# You are a senior Swift developer.
# Given a Swift function, generate a short, plain-language summary as a documentation comment using ///.
# Keep the comment concise — just 1–2 sentences describing what the function does.
# Return only the documentation and the original function.
# It is very important that you only provide the final output without any additional comments or remarks
# func median(of numbers: [Double]) -> Double? {
#     guard !numbers.isEmpty else { return nil }
#     let sorted = numbers.sorted()
#     let mid = sorted.count / 2
#     return sorted.count % 2 == 0
#         ? (sorted[mid - 1] + sorted[mid]) / 2
#         : sorted[mid]
# }
# """

# Лучшая версия promts
promt = """You are documenting a Swift codebase.
Given a function and a short description of where and how it is used, write a documentation comment that helps future developers understand its purpose.
Use ///-style Swift comments.
Include parameter and return descriptions, and emphasize intent over implementation.
Return only the comment and the function.
func add(_ a: Int, _ b: Int) -> Int {
    return a + b
}
"""

print(model.generation_config)
inputs = tokenizer(promt, return_tensors="pt").to(device)
outputs = model.generate(
    **inputs,
    max_new_tokens = 200,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.eos_token_id,   # чтобы декодер не ругался
    do_sample=True,
    temperature=0.6,
    top_p=0.95)

print(outputs)

In [45]:
# Получаем длины исходных входов
input_lengths = inputs['input_ids'].shape[1]
print(input_lengths)
# Убираем токены prompt'а
generated_tokens = outputs[:, input_lengths:]
print(generated_tokens)
# Декодируем только новые токены
predictions = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)

for predict in predictions:
    print("Predict: \n", predict)

In [46]:
print(outputs.shape)

In [47]:
predictions = []

for input in tqdm(ds_test["input"]):
    promt = f"""You are documenting a Swift codebase.
Given a function and a short description of where and how it is used, write a documentation comment that helps future developers understand its purpose.
Use ///-style Swift comments.
Include parameter and return descriptions, and emphasize intent over implementation.
Return only the comment and the function.
{input}
    """
    inputs = tokenizer(promt, return_tensors="pt").to(device)
    outputs = model.generate(
        **inputs,
        max_new_tokens = 200,
        eos_token_id=tokenizer.eos_token_id,
        pad_token_id=tokenizer.eos_token_id,   # чтобы декодер не ругался
        do_sample=True,
        temperature=0.7,
        top_p=0.95)
    input_lengths = inputs.input_ids.shape[1]
    generated_tokens = outputs[:, input_lengths:]
    result = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
    predictions.append(result[0])

100%|██████████| 6/6 [02:37<00:00, 26.27s/it]


In [50]:
for predict in predictions:
    print("[bold green] Predict [/ bold green]")
    print(predict)

In [53]:
references = ds_test["output"]

print(references)

In [54]:
rouge = evaluate.load("rouge")

In [55]:
# Вычисление метрик
results = rouge.compute(predictions=predictions, references=references)

In [56]:
# Вывод результатов
for metric, score in results.items():
    print(f"{metric}: {score:.4f}")

In [57]:
print(results)

In [59]:
bleu = evaluate.load("bleu")
results = bleu.compute(predictions=predictions, references=references)
print(results)