# Problema 1: Scopul unui algoritm

In [14]:
from numpy.random import choice
from math import sqrt

class KMeans():
    def __init__(self, numarCentroizi) -> None:
        self.numarCentroizi = numarCentroizi
        self.centroizi = []

    def alegereCentroizi(self, input):
        pozitii = [i for i in range(input.shape[0])]
        pozitii_centroizi = choice(pozitii, self.numarCentroizi)
        self.centroizi = [input[i] for i in pozitii_centroizi]

    def distantaDintreDouaPuncte(self, punct1, punct2):
        x = [(punct1[0, i] - punct2[0, i]) **2 for i in range(punct1.shape[1])]
        distanta = sqrt(sum(x))
        return distanta

    def closeCentroidForAPoint(self, punct):
        ind = 0
        distantaMinima = self.distantaDintreDouaPuncte(punct, self.centroizi[0])

        for i in range(len(self.centroizi)):
            distanta = self.distantaDintreDouaPuncte(punct, self.centroizi[i])
            if distanta < distantaMinima:
                distantaMinima = distanta
                ind = i
        return ind

    def _sumaPuncte(self, input, c, indiceCentroid):
        return sum([input[i] for i in range(input.shape[0]) if c[i] == indiceCentroid])

    def _numarPuncte(self, c, indiceCentroid):
        return c.count(indiceCentroid)
    
    def train(self, trainingInput):
        self.alegereCentroizi(trainingInput)
        convergent = False

        while not convergent:
            c = []
            for i in range(trainingInput.shape[0]):
                punct = trainingInput[i]
                ind = self.closeCentroidForAPoint(punct)
                c.append(ind)
            
            schimbarePozitieCentroidMaxima = -1
            for indiceCentroid in range(0, self.numarCentroizi):
                centroidNou = self._sumaPuncte(trainingInput, c, indiceCentroid) / self._numarPuncte(c, indiceCentroid)
                distanta = self.distantaDintreDouaPuncte(self.centroizi[indiceCentroid], centroidNou)

                if distanta > schimbarePozitieCentroidMaxima:
                    schimbarePozitieCentroidMaxima = distanta
                self.centroizi[indiceCentroid] = centroidNou
            
            if schimbarePozitieCentroidMaxima < 0.05:
                convergent = True
    
    def predict(self, input):
        return [self.closeCentroidForAPoint(i) for i in input]

In [None]:
def get_embedding_for_a_code_snippet(code_snippet) -> numpy.array:
    tokenizer = RobertaTokenizer.from_pretrained("microsoft/codebert-base")
    model = RobertaModel.from_pretrained("microsoft/codebert-base")

    # Tokenizează codul Python
    inputs = tokenizer(code_snippet, return_tensors="pt")

    # Obține embedding-uri folosind modelul CodeBERT
    with torch.no_grad():
        outputs = model(**inputs)

    # Extrage embedding-urile
    embedding = outputs.last_hidden_state.mean(dim=1).numpy()
    return embedding

# Problema 2: Specificatii pentru un algoritm

# Problema 3: Generare de cod

In [7]:
from transformers import AutoTokenizer, AutoModelForCausalLM

def incarcareModel():
    # Load tokenizer and model
    tokenizer = AutoTokenizer.from_pretrained("Daoguang/PyCodeGPT")
    model = AutoModelForCausalLM.from_pretrained("Daoguang/PyCodeGPT")
    return tokenizer, model
 

def genereazaCod(descriere, tokenizer, model, max_length=100):
   # Encode the input with attention mask
    inputs = tokenizer(descriere, return_tensors="pt", padding=True)

    # Generate code
    outputs = model.generate(inputs['input_ids'], attention_mask=inputs['attention_mask'], max_length=max_length, num_return_sequences=1)

    # Decode the generated code
    generated_code = tokenizer.decode(outputs[0], skip_special_tokens=True) 
    return generated_code


## Exemplu 1

In [8]:
tokenizer, model = incarcareModel()
descriere = "Write a python function that has 2 parameters a and b and return their sum."
cod = genereazaCod(descriere, tokenizer, model, max_length=50)
print(cod)

Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


Write a python function that has 2 parameters a and b and return their sum.

def sum_of_two_numbers(a, b):
    return a + b

print(sum_of_two_numbers(1,


## Exemplu 2

In [9]:
tokenizer, model = incarcareModel()
descriere = "Write a python function that has 2 parameters a and b and returns their highest common factor."
cod = genereazaCod(descriere, tokenizer, model, max_length=50)
print(cod)

Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


Write a python function that has 2 parameters a and b and returns their highest common factor.

def gcd(a, b):
    if b == 0:
        return a
    return gcd(b, a % b)

def gcd


## Exemplu 3

In [13]:
tokenizer, model = incarcareModel()
descriere = "Write a python function that has one parameter, an array, and sorts it using merge sort."
cod = genereazaCod(descriere, tokenizer, model, max_length=700)
print(cod)

Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


Write a python function that has one parameter, an array, and sorts it using merge sort.

# In[ ]:


def mergeSort(arr):
    if len(arr) > 1:
        mid = len(arr) // 2
        left = arr[:mid]
        right = arr[mid:]

        mergeSort(left)
        mergeSort(right)

        i = 0
        j = 0
        k = 0

        while i < len(left) and j < len(right):
            if left[i] < right[j]:
                arr[k] = left[i]
                i += 1
            else:
                arr[k] = right[j]
                j += 1
            k += 1

        while i < len(left):
            arr[k] = left[i]
            i += 1
            k += 1

        while j < len(right):
            arr[k] = right[j]
            j += 1
            k += 1

# In[ ]:


mergeSort(arr)


# In[ ]:


# Sort the array using merge sort

# In[ ]:


def mergeSort(arr):
    if len(arr) > 1:
        mid = len(arr) // 2
        left = arr[:mid]
        right = arr[mid:]

        mergeSort(left)
        mergeSort(right)
