#### Import libraries

In [None]:
from datasets import load_dataset
from qdrant_client import models,QdrantClient
from sentence_transformers import SentenceTransformer

#### Load Dataset

In [None]:
raw_datasets = load_dataset('neulab/conala',trust_remote_code=True)

In [None]:
raw_datasets

In [None]:
dataset = raw_datasets['train']

In [None]:
# view dataset
dataset[0:5]

##### Coverting hugging-face dataset to pandas

In [None]:
df_pandas = dataset.to_pandas()

In [None]:
df_pandas.head(3)

In [None]:
df_pandas.info()

In [None]:
df_pandas.describe()

In [None]:
df_pandas = df_pandas.dropna()
df_pandas.isnull().sum()

In [None]:
# dictionary is easy formatting for creating embedding
df_dict = df_pandas.to_dict(orient='records')

#### Creating Embedding and in-memory vector databse

In [None]:
encoder = SentenceTransformer('all-MiniLM-L6-v2') # model to create embedding

In [None]:
# create vector database client
qdrant = QdrantClient(":memory:") # creating in-memory Qdrant instance

In [None]:
# create collection
qdrant.recreate_collection(
    collection_name = 'conala',
    vectors_config = models.VectorParams(
        size = encoder.get_sentence_embedding_dimension(),
        distance = models.Distance.COSINE
    )
)

In [None]:
# vectorize
qdrant.upload_points(
    collection_name='conala',
    points=[
        models.PointStruct(
            id=idx,
            vector=encoder.encode(doc['intent']+" "+doc['rewritten_intent']).tolist(),
            payload=doc
        )for idx,doc in enumerate(df_dict)
    ]
)

### Take User Query

In [None]:
user_prompt = input('Enter python coding query')

### Embed user query and perfrom retrival on vectordb

In [None]:
hits = qdrant.search(
    collection_name='conala',
    query_vector=encoder.encode(user_prompt).tolist(),
    limit=3
)

In [None]:
# define a variable to hold the search results
search_results = [hit.payload for hit in hits]

In [None]:
print("Relevant Code Snippets:\n")
for item in search_results:
    print(f"intent: {item['rewritten_intent']} \ncode: {item['snippet']}")
    print()