In [1]:
import git
import os
repo_url = 'https://github.com/sahajsoft/Pravarthan'
# clone the repo if it doesn't exist
expanded_path = os.path.expanduser('~/Pravarthan')

if not os.path.exists(expanded_path):
    repo = git.Repo.clone_from(repo_url, expanded_path)
else:
    repo = git.Repo(expanded_path)    


In [2]:
from llama_index.core import VectorStoreIndex, Document

commits = list(repo.iter_commits('master'))
documents = [{'message': commit.message, 'files': commit.stats.files, 'sha': commit.hexsha} for commit in commits]


In [None]:
from sentence_transformers import SentenceTransformer
import weaviate
import weaviate.classes.config as wc
import weaviate.classes as wvc

model = SentenceTransformer('BAAI/bge-small-en-v1.5')

def embeddings(text):
    return model.encode(text)


client = weaviate.connect_to_local()
if not client.collections.exists(name = 'commit_embeddings'):
    client.collections.create(name = 'commit_embeddings')


In [None]:
from weaviate.classes.query import Filter
from weaviate.classes.config import Property, DataType

collection = client.collections.get('code_embeddings')
weaviate_docs = []
print('Preparing Weaviate docs...')


In [55]:
map_embeddings = {}
for index, doc in enumerate(documents):
    sha = doc['sha']
    map_embeddings[sha] = embeddings(doc['message'])


In [85]:
weaviate_docs= []
for index, doc in enumerate(documents):
    weaviate_doc = wvc.data.DataObject(
        properties={
            'sha': doc['sha'],
            'affected_files': list(doc['files'].keys()),
            'message': doc['message']
        },
        vector=list(map_embeddings[doc['sha']])
    )
    weaviate_docs.append(weaviate_doc)


In [None]:
collection.data.insert_many(weaviate_docs)

In [None]:
retrieved_docs = collection.query.near_vector(
    near_vector=list(embeddings('fine me places where the work_status changed ')),
    limit=5,
    return_metadata=wvc.query.MetadataQuery(certainty=True)

)

retrieved_messages = [o.properties['message'] for o in retrieved_docs.objects]

print(retrieved_messages)


In [104]:
def generate_commit_prompt(retrieved_doc, query):
    prompt_template = "here is the user query {}:\n\n".format(query)
    prompt_template += "Here are the commit details help me by suggesting where i need to make change and what change :\n\n"
    
    prompt_template += f"Commit Message: {retrieved_doc['message']}\n"
    prompt_template += "Affected Files:\n"
    for file in ob['affected_files']:
        prompt_template += f"  - {file}\n"
        prompt_template += "    Content:\n"
        prompt_template += f"{repo.git.show('{}:{}'.format(retrieved_doc['sha'], file))}\n\n"

    return prompt_template

In [None]:
print(generate_commit_prompt(retrieved_docs[2].object, ''' When a user changes the work status, the data must be logged in the system. The data to be stored must include the following:

Who changed the status
When was the change made
What is the change made
'''))