# Problema 1: Scopul unui algoritm

In [32]:
from numpy.random import choice
from math import sqrt

class KMeans():
    def __init__(self, numarCentroizi) -> None:
        self.numarCentroizi = numarCentroizi
        self.centroizi = []

    def alegereCentroizi(self, input):
        pozitii = [i for i in range(input.shape[0])]
        pozitii_centroizi = choice(pozitii, self.numarCentroizi)
        self.centroizi = [input[i] for i in pozitii_centroizi]

    def distantaDintreDouaPuncte(self, punct1, punct2):
        x = [(punct1[0, i] - punct2[0, i]) **2 for i in range(punct1.shape[1])]
        distanta = sqrt(sum(x))
        return distanta

    def closeCentroidForAPoint(self, punct):
        ind = 0
        distantaMinima = self.distantaDintreDouaPuncte(punct, self.centroizi[0])

        for i in range(len(self.centroizi)):
            distanta = self.distantaDintreDouaPuncte(punct, self.centroizi[i])
            if distanta < distantaMinima:
                distantaMinima = distanta
                ind = i
        return ind

    def _sumaPuncte(self, input, c, indiceCentroid):
        return sum([input[i] for i in range(input.shape[0]) if c[i] == indiceCentroid])

    def _numarPuncte(self, c, indiceCentroid):
        return c.count(indiceCentroid)
    
    def train(self, trainingInput):
        self.alegereCentroizi(trainingInput)
        convergent = False

        while not convergent:
            c = []
            for i in range(trainingInput.shape[0]):
                punct = trainingInput[i]
                ind = self.closeCentroidForAPoint(punct)
                c.append(ind)
            
            schimbarePozitieCentroidMaxima = -1
            for indiceCentroid in range(0, self.numarCentroizi):
                if self._numarPuncte(c, indiceCentroid) != 0:
                    centroidNou = self._sumaPuncte(trainingInput, c, indiceCentroid) / self._numarPuncte(c, indiceCentroid)
                else:
                    centroidNou = self.centroizi[indiceCentroid]
                distanta = self.distantaDintreDouaPuncte(self.centroizi[indiceCentroid], centroidNou)

                if distanta > schimbarePozitieCentroidMaxima:
                    schimbarePozitieCentroidMaxima = distanta
                self.centroizi[indiceCentroid] = centroidNou
            
            if schimbarePozitieCentroidMaxima < 0.05:
                convergent = True
    
    def predict(self, input):
        return [self.closeCentroidForAPoint(i) for i in input]

In [33]:
from transformers import RobertaTokenizer, RobertaModel
import torch
import pandas as pd
import numpy

def get_embedding_for_a_code_snippet(code_snippet) -> numpy.array:
    tokenizer = RobertaTokenizer.from_pretrained("microsoft/codebert-base")
    model = RobertaModel.from_pretrained("microsoft/codebert-base")

    # Tokenizează codul Python
    inputs = tokenizer(code_snippet, return_tensors="pt")

    # Obține embedding-uri folosind modelul CodeBERT
    with torch.no_grad():
        outputs = model(**inputs)

    # Extrage embedding-urile
    embedding = outputs.last_hidden_state.mean(dim=1).numpy()
    return embedding

def read_code_snippet(filePath):
    with open(filePath, "r") as file:
        code = file.read()
    return code

def citeste_din_fisier(filePath):
    dataframe = pd.read_csv(filePath)
    return dataframe

def scop(filePath, poz_scop):
    dataframe = citeste_din_fisier(filePath)
    code_file_paths = dataframe["code_snippet"]
    code_scopes = dataframe["scop"]
    set_code_scopes = set(code_scopes)
    set_code_scopes = [s for s in set_code_scopes]
    return set_code_scopes[poz_scop]

def genereazaClasificator(filePath):
    dataframe = citeste_din_fisier(filePath)
    code_file_paths = dataframe["code_snippet"]
    code_scopes = dataframe["scop"]
    set_code_scopes = set(code_scopes)
    numar_centroizi = len(set_code_scopes)

    codes = [read_code_snippet(file) for file in code_file_paths]
    embeddings = [get_embedding_for_a_code_snippet(code) for code in codes]

    clasificator = KMeans(numar_centroizi)
    clasificator.train(numpy.array(embeddings))
    return clasificator

def genereaza_scopul_unui_algoritm(cod, clasificator):
    cod = get_embedding_for_a_code_snippet(cod)
    return clasificator.predict(numpy.array([cod]))[0]

## Exemplu 1

In [37]:
clasificator = genereazaClasificator("scop.csv")
cod = "def sum_of_two_numbers(a, b): return a + b"
poz_scop = genereaza_scopul_unui_algoritm(cod, clasificator)
scop_cod = scop("scop.csv", poz_scop)
print(scop_cod)

Algoritmi matematici


## Exemplu 2

In [35]:
clasificator = genereazaClasificator("scop.csv")
cod = """def suma_cifrelor_impare(numar):
    numar_str = str(numar)
    suma_impare = 0
    for cifra in numar_str:
        cifra_int = int(cifra)
        if cifra_int % 2 != 0:
            suma_impare += cifra_int
    return suma_impare
"""
poz_scop = genereaza_scopul_unui_algoritm(cod, clasificator)
scop_cod = scop("scop.csv", poz_scop)
print(scop_cod)

Algoritmi pe cifrele unui numar


# Problema 2: Specificatii pentru un algoritm

In [28]:

from transformers import GPT2Tokenizer, GPT2LMHeadModel

# def generate_comments_with_model(code):
#     # Tokenizează codul pentru a-l transforma în input-ul modelului
#     tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
#     inputs = tokenizer.encode("generate comment for code:\n" + code, return_tensors="pt", max_length=512, truncation=True)

#     # Generează comentarii folosind modelul
#     model = GPT2LMHeadModel.from_pretrained("gpt2")
#     outputs = model.generate(inputs, max_length=150, num_return_sequences=1, temperature=0.8, do_sample=True, top_k=50)

#     # Decodează rezultatul pentru a obține comentariul generat
#     generated_comment = tokenizer.decode(outputs[0], skip_special_tokens=True)

#     return generated_comment


def generate_comments_with_model(code):
    # Load the tokenizer and model
    tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
    model = GPT2LMHeadModel.from_pretrained("gpt2")

    # Create the prompt for the model
    prompt = "Generate a detailed comment for the following Python function:\n\n" + code + "\n\nComment:\n"

    # Tokenize the input
    inputs = tokenizer.encode(prompt, return_tensors="pt", max_length=512, truncation=True)

    # Generate the comment
    outputs = model.generate(inputs, max_length=150, num_return_sequences=1, temperature=0.8, do_sample=True, top_k=50)

    # Decode the generated output
    generated_comment = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # Extract the comment part
    comment_start = generated_comment.find("Comment:") + len("Comment:")
    if comment_start == -1:
        return "No comment generated"
    comment = generated_comment[comment_start:].strip()

    return comment


In [29]:
python_code_without_comments = """
def calculate_sum_of_two_numbers(a,b):
    return a + b
"""

# Generare de comentarii pentru codul Python dat
generated_comment = generate_comments_with_model(python_code_without_comments)
print(generated_comment)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


The following example shows how to calculate the sum of two numbers at once (the one with the right index and the other with the left index:


def sum_of_two_numbers(a,b):

[a,b] = sum(a + b)


So we got the same result for the two numbers (and we have this one in our program): sum([a,b] + sum([a,b] + sum([a,b]))).


(What is


# Problema 3: Generare de cod

In [7]:
from transformers import AutoTokenizer, AutoModelForCausalLM

def incarcareModel():
    # Load tokenizer and model
    tokenizer = AutoTokenizer.from_pretrained("Daoguang/PyCodeGPT")
    model = AutoModelForCausalLM.from_pretrained("Daoguang/PyCodeGPT")
    return tokenizer, model
 

def genereazaCod(descriere, tokenizer, model, max_length=100):
   # Encode the input with attention mask
    inputs = tokenizer(descriere, return_tensors="pt", padding=True)

    # Generate code
    outputs = model.generate(inputs['input_ids'], attention_mask=inputs['attention_mask'], max_length=max_length, num_return_sequences=1)

    # Decode the generated code
    generated_code = tokenizer.decode(outputs[0], skip_special_tokens=True) 
    return generated_code


## Exemplu 1

In [8]:
tokenizer, model = incarcareModel()
descriere = "Write a python function that has 2 parameters a and b and return their sum."
cod = genereazaCod(descriere, tokenizer, model, max_length=50)
print(cod)

Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


Write a python function that has 2 parameters a and b and return their sum.

def sum_of_two_numbers(a, b):
    return a + b

print(sum_of_two_numbers(1,


## Exemplu 2

In [9]:
tokenizer, model = incarcareModel()
descriere = "Write a python function that has 2 parameters a and b and returns their highest common factor."
cod = genereazaCod(descriere, tokenizer, model, max_length=50)
print(cod)

Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


Write a python function that has 2 parameters a and b and returns their highest common factor.

def gcd(a, b):
    if b == 0:
        return a
    return gcd(b, a % b)

def gcd


## Exemplu 3

In [13]:
tokenizer, model = incarcareModel()
descriere = "Write a python function that has one parameter, an array, and sorts it using merge sort."
cod = genereazaCod(descriere, tokenizer, model, max_length=700)
print(cod)

Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


Write a python function that has one parameter, an array, and sorts it using merge sort.

# In[ ]:


def mergeSort(arr):
    if len(arr) > 1:
        mid = len(arr) // 2
        left = arr[:mid]
        right = arr[mid:]

        mergeSort(left)
        mergeSort(right)

        i = 0
        j = 0
        k = 0

        while i < len(left) and j < len(right):
            if left[i] < right[j]:
                arr[k] = left[i]
                i += 1
            else:
                arr[k] = right[j]
                j += 1
            k += 1

        while i < len(left):
            arr[k] = left[i]
            i += 1
            k += 1

        while j < len(right):
            arr[k] = right[j]
            j += 1
            k += 1

# In[ ]:


mergeSort(arr)


# In[ ]:


# Sort the array using merge sort

# In[ ]:


def mergeSort(arr):
    if len(arr) > 1:
        mid = len(arr) // 2
        left = arr[:mid]
        right = arr[mid:]

        mergeSort(left)
        mergeSort(right)
