In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
!pip install --upgrade transformers accelerate bitsandbytes

In [None]:
# Load model directly
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
device = torch.device('cuda:0')

tokenizer = AutoTokenizer.from_pretrained("unsloth/DeepSeek-R1-Distill-Qwen-7B-unsloth-bnb-4bit")
model = AutoModelForCausalLM.from_pretrained("unsloth/DeepSeek-R1-Distill-Qwen-7B-unsloth-bnb-4bit").to(device)

In [None]:
filepath = '/kaggle/input/extracted-text/extracted_text.txt'

with open(filepath, 'r', encoding = 'utf-8') as file:
    text = file.read()

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
chunks = text_splitter.split_text(text)

print(f"Total Chunks: {len(chunks)}")

In [None]:
from sentence_transformers import SentenceTransformer

embedder = SentenceTransformer("all-MiniLM-L6-v2")

vectors = embedder.encode(chunks)

print(f"Embedding Shape: {vectors.shape}")  # (num_chunks, embedding_size)


In [None]:
jsonObj = pd.read_json(path_or_buf="/kaggle/input/trainh/train.jsonl", lines=True)
jsonObj.head()

In [None]:
for i in range (0, len(jsonObj['text'])):
    (jsonObj['labels'][i].extend(jsonObj['text'][i]))

print('Completed')

jsonObj.drop(['text'], axis = 1)

In [None]:
!pip install chromadb

In [None]:
import chromadb

# Initialize ChromaDB
chroma_client = chromadb.PersistentClient(path="/kaggle/working/chroma_db")

text_collection = chroma_client.get_or_create_collection(name="text_collection")

# Add data
text_collection.add(
    ids=[str(i) for i in range(len(chunks))],
    documents=chunks,
    embeddings=vectors.tolist()
)

print("Stored in ChromaDB successfully!")
# Add data
json_ids = [str(i) for i in jsonObj["id"].tolist()]
json_labels = jsonObj["labels"].tolist()

json_collection = chroma_client.get_or_create_collection(name="json_collection")
json_embeddings = embedder.encode(jsonObj["labels"].tolist())

# print("Sample JSON Labels Before Storage:", json_labels[0])
# print("Sample JSON IDs Before Storage:", json_ids[0])
# print("Sample JSON Embeddings Before Storage:", json_embeddings[0])


BATCH_SIZE = 40000  

for i in range(0, len(json_ids), BATCH_SIZE):
    batch_ids = json_ids[i:i+BATCH_SIZE]
    batch_embeddings = json_embeddings[i:i+BATCH_SIZE]
    batch_documents = [str(doc) if doc is not None else "Empty Document" for doc in json_labels[i:i+BATCH_SIZE]]
    
    json_collection.add(
        ids=batch_ids,
        documents = batch_documents,
        embeddings=batch_embeddings,
           )

print("Stored JSON data in ChromaDB successfully!")

In [None]:
def retrieve_similar_text(query, k=3):
    query_vector = embedder.encode([query]).tolist()
    text_results = text_collection.query(query_embeddings=query_vector, n_results=k)
    retrieved_texts = text_results.get("documents", [[]])[0] if text_results.get("documents") else []
    # print("JSON Collection Count:", json_collection.count())
    json_results = json_collection.query(query_embeddings=query_vector, n_results=k, include = ['documents'])
    # print("Full JSON Query Results:", json_results)
    # retrieved_jsons = json_results.get("documents", [[]])[0] if json_results.get("documents") else []
    
    retrieved_jsons = json_results.get("documents", [])
    retrieved_jsons = retrieved_jsons[0] if retrieved_jsons else []
    
    # print("retrieved_texts:", retrieved_texts)
    # print("retrieved_jsons:", retrieved_jsons)

    retrieved_texts = [text for text in retrieved_texts if isinstance(text, str)]
    retrieved_jsons = [json_text for json_text in retrieved_jsons if isinstance(json_text, str)]
    
    retrieved_context = "\n".join(retrieved_texts + retrieved_jsons)
    
    return retrieved_context if retrieved_context else "No relevant context found."

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer

def generate_response(query):  # lambda_value controls retrieval vs generation
    retrieved_text = retrieve_similar_text(query, k=3)  # Get relevant text

    if not retrieved_text or retrieved_text == "No relevant documents found.":
        return "I couldn't find relevant information."

    print('goyal 1', retrieved_text)
    
    # Generate a retrieval-augmented response
    retrieval_prompt = f'''You are an expert lawyer!!
    This is the given context, use to this to summarize the condtion and explain all charges which will be applicable on such scenario and summarize how the proceedings will take place and required proofs which is acceptable in indian courts.
    Context:\n{retrieved_text}\n\n
    Query: {query} Understand the query as a professional, respond as if you are directly talikng to victim or witness and help them how to fight agaisnt the crime that happened with them and summarise everything in 800 tokens\n Answer:'''
    retrieval_inputs = tokenizer(retrieval_prompt, return_tensors="pt", truncation=True).to("cuda")
    retrieval_output = model.generate(**retrieval_inputs , max_new_tokens=1000)
    retrieval_response = tokenizer.decode(retrieval_output[0], skip_special_tokens=True)

    if "Answer:" in retrieval_response:
        retrieval_response = retrieval_response.split("Answer:")[-1].strip()


    print('goyal 3')
    # Weighted blending of retrieval vs. model generation
    final_response = f"[Retrieved Info: {retrieval_response}]\n"
    return final_response

# Example usage:
query = "What if some lady molested me?"
lambda_value = 1  # Higher = more reliance on retrieved text
response = generate_response(query)
print(f"Generated Response:\n", response)


In [None]:
!ngrok config add-authtoken 2uA5CfIpTqIRSQUxvW19JQOf1eK_759BzJoR9iWLuX66AbN4e

In [None]:
# !pip install fastapi uvicorn pyngrok torch
from fastapi import FastAPI
import torch
import uvicorn
from pyngrok import ngrok

# Load your model
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# model = torch.load("model.pth", map_location=device)  # Replace with your model
# model.eval()

# Create API
app = FastAPI()

@app.post("/predict/")
async def predict(query: str):
    response = generate_response(query)
    print(query)
    return {"prediction": response}



In [None]:
# Expose API using ngrok
ngrok_tunnel = ngrok.connect(8000)
print("Public API URL:", ngrok_tunnel.public_url)


In [None]:
import nest_asyncio  # Fix for event loop issue
import threading
import asyncio

# Apply nest_asyncio fix
nest_asyncio.apply()
asyncio.set_event_loop_policy(asyncio.DefaultEventLoopPolicy())


config = uvicorn.Config(app, host="0.0.0.0", port=8000)
server = uvicorn.Server(config)

loop = asyncio.get_event_loop()
loop.create_task(server.serve())  # ✅ This avoids blocking the loop