In [None]:
# pip install qdrant-client groq sentence-transformers dspy-ai fastembed gradio pyjwt --upgrade

In [None]:
import pandas as pd
df = pd.read_csv("healthcare_dataset.csv")

In [None]:
def format_row(row):
    return (
        f"Name: {row['Name']}, Age: {row['Age']}, Gender: {row['Gender']}, "
        f"Blood Type: {row['Blood Type']}, Medical Condition: {row['Medical Condition']}, "
        f"Date of Admission: {row['Date of Admission']}, Doctor: {row['Doctor']}, "
        f"Hospital: {row['Hospital']}, Insurance Provider: {row['Insurance Provider']}, "
        f"Billing Amount: {row['Billing Amount']}, Room Number: {row['Room Number']}, "
        f"Admission Type: {row['Admission Type']}, Discharge Date: {row['Discharge Date']}, "
        f"Medication: {row['Medication']}, Test Results: {row['Test Results']}"
        "\n\n".lower()
    )

# Apply the function to each row and create a new column with the formatted text
df['formatted_text'] = df.apply(format_row, axis=1)

# Convert the formatted text into a list (or any other format you need)
text_data = df['formatted_text'].tolist()

In [None]:
from random import shuffle
sampled_dataset = text_data[:128]
shuffle(sampled_dataset)

In [None]:
#encoding
from sentence_transformers import SentenceTransformer
model = SentenceTransformer("BAAI/bge-large-en-v1.5", device='cuda')
vectors = model.encode(sampled_dataset)
vectors[0].shape


In [None]:
import os
os.environ['QDRANT__SERVICE__API_KEY']="qdrant_api_key"
os.environ['QDRANT__SERVICE__JWT_RBAC']='true'

In [None]:
from qdrant_client import QdrantClient
from qdrant_client.models import Distance, VectorParams

# Initialize the client

client = QdrantClient(
    url="https://b3cc6dd2-4f57-4e59-8799-e66f63cccca3.us-east4-0.gcp.cloud.qdrant.io:6333",
    # url='http://localhost:6333',
    api_key=os.environ['QDRANT__SERVICE__API_KEY'],
)

In [None]:
client.recreate_collection(
    collection_name="phi_data",
    vectors_config=VectorParams(size=1024, distance=Distance.COSINE),
)

client.upload_collection(
    collection_name="phi_data",
    ids=[i for i in range(len(sampled_dataset))],
    vectors=vectors,
    parallel=4,
    max_retries=3,
)

In [None]:
def get_context(text):
    query_vector = model.encode(text)

    hits = client.search(
        collection_name="phi_data",
        query_vector=query_vector,
        limit=3  # Return 5 closest points
    )
    s=''
    for x in [sampled_dataset[i.id] for i in hits]:
        s = s + x
    return s

In [None]:
import jwt
import time


# API key used as the secret to sign the token
api_key = 'eXaMplE12345Key67890Api'


# Current time in seconds since the Unix epoch
current_time = int(time.time())


# JWT payload
payload = {
    'exp': current_time + 3600,  # Token expires in 1 hour
    'value_exists': {
        'collection': 'demo_collection',
        'matches': [
            {'key': 'user', 'value': 'John'}
        ]
    },
    "access": [
    {
        "collection": "demo_collection",
        "access": "r",
        "payload": {
            "user": "John"
      }
    }
  ]  # Read-only global access
}


# Encode the JWT token
encoded_jwt = jwt.encode(payload, api_key, algorithm='HS256')


# Print the JWT token
print(encoded_jwt)