In [None]:
import json
import pandas as pd

In [None]:
with open("women_dataset.json", "r", encoding="utf-8") as f:
    data = json.load(f)

In [None]:
records = []
for key, value in data.items():
    if isinstance(value, dict):
        for subkey, content in value.items():
            if isinstance(content, list):
                for item in content:
                    records.append({"section": subkey, "text": str(item)})
            else:
                records.append({"section": subkey, "text": str(content)})
    elif isinstance(value, list):
        for item in value:
            records.append({"section": key, "text": str(item)})

In [None]:
df = pd.DataFrame(records)
print("Knowledge base created with", len(df), "entries")

Knowledge base created with 681 entries


In [None]:
df.head()

Unnamed: 0,section,text
0,Establishment,"{'StatutoryBody': True, 'Date': 'January 1992'..."
1,Mandate,"{'Reference': 'Section 10 of NCW Act, 1990', '..."
2,Leadership,{'Chairperson': {'Name': 'Smt. Vijaya K. Rahat...
3,Mission,"{'LastUpdated': '7 April 2025', 'Text': 'To st..."
4,Vision,"{'LastUpdated': '30 December 2024', 'Text': 'T..."


In [None]:
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity

In [None]:
model = SentenceTransformer('all-MiniLM-L6-v2')

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [None]:
embeddings = model.encode(df["text"].tolist(), show_progress_bar=True)

Batches:   0%|          | 0/22 [00:00<?, ?it/s]

In [None]:
def search_knowledge_base(query, top_n=3):
    query_vec = model.encode([query])
    scores = cosine_similarity(query_vec, embeddings)[0]
    top_idx = scores.argsort()[-top_n:][::-1]
    return df.iloc[top_idx][["section", "text"]]

In [None]:
def juristo_chatbot(user_input):
    results = search_knowledge_base(user_input, top_n=2)
    response = "Here’s what I found:\n"
    for _, row in results.iterrows():
        response += f"{row['text']}\n"
    response += "\nYou can contact 181 (Women Helpline) or visit ncw.nic.in for more details."
    return response


In [None]:
print("JURISTO.ai Type 'exit' to quit.\n")
while True:
    query = input("You: ")
    if query.lower() in ["exit", "quit"]:
        print("JURISTO.ai: Stay safe!")
        break
    print("JURISTO.ai:", juristo_chatbot(query))


JURISTO.ai Type 'exit' to quit.

You: misson
JURISTO.ai: Here’s what I found:
Abduction
Stalking

You can contact 181 (Women Helpline) or visit ncw.nic.in for more details.
You: vission
JURISTO.ai: Here’s what I found:
Bihar
{'topic': 'Defect', 'description': 'Any fault, imperfection or shortcoming in the quality, quantity, potency, purity or standard required to be maintained by law or contract, or as claimed by the trader in relation to any goods.'}

You can contact 181 (Women Helpline) or visit ncw.nic.in for more details.
You: helpline number
JURISTO.ai: Here’s what I found:
7827170170 (24x7 NCW Women Helpline)
{'name': 'National Consumer Helpline', 'toll_free': '1800-11-4000', 'purpose': 'Provides advice, information, and guidance for complaints on defective products, deficiency in services, and unfair trade practices.'}

You can contact 181 (Women Helpline) or visit ncw.nic.in for more details.
You: exit
JURISTO.ai: Stay safe!


# 2

In [None]:
import json, pandas as pd

def load_dataset():
    with open("women_dataset.json", "r", encoding="utf-8") as f:
        data = json.load(f)
    records = []
    for section, content in data.items():
        if isinstance(content, dict):
            for sub, val in content.items():
                if isinstance(val, list):
                    for item in val:
                        records.append({"topic": sub, "text": str(item)})
                else:
                    records.append({"topic": sub, "text": str(val)})
        elif isinstance(content, list):
            for item in content:
                records.append({"topic": section, "text": str(item)})
    df = pd.DataFrame(records).dropna()
    print(f"Loaded {len(df)} knowledge items")
    return df

df = load_dataset()


Loaded 681 knowledge items


In [None]:
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity

model = SentenceTransformer("all-MiniLM-L6-v2")
embeddings = model.encode(df["text"].tolist(), show_progress_bar=True)

def retrieve_info(query, top_n=3):
    q_vec = model.encode([query])
    scores = cosine_similarity(q_vec, embeddings)[0]
    top_idx = scores.argsort()[-top_n:][::-1]
    return df.iloc[top_idx]["text"].tolist()


Batches:   0%|          | 0/22 [00:00<?, ?it/s]

In [None]:
from transformers import pipeline

generator = pipeline("text2text-generation", model="google/flan-t5-base")

def generate_answer_local(query, context):
    prompt = f"Answer this question about women's safety using this info:\n\n{context}\n\nQuestion: {query}"
    response = generator(prompt, do_sample=False)
    return response[0]["generated_text"]


Device set to use cpu


In [None]:
def juristo_ai_chatbot(query):
    retrieved = retrieve_info(query, top_n=3)
    context = "\n".join(retrieved)
    answer = generate_answer_local(query, context)
    return answer

In [None]:
print("JURISTO.ai Type 'exit' to quit.\n")
while True:
    query = input("You: ")
    if query.lower() in ["exit", "quit"]:
        print("JURISTO.ai: Stay safe!")
        break
    print("JURISTO.ai:", juristo_ai_chatbot(query))

JURISTO.ai Type 'exit' to quit.

JURISTO.ai: The mission of the Ministry of Civil Aviation is to ensure the safety of the public.
JURISTO.ai: is a term used to describe a chemical reaction in which a chemical reacts with another chemical to form a chemical compound
JURISTO.ai: can be seen through a telescope
JURISTO.ai: 7827170170
JURISTO.ai: 'name': 'name': 'name': 'name': 'name': 'name': 'name': 'name': 'name': 'name': 'name': 'name': 'name': 'name': 'name': 'name': 'name': 'name': 'name': 'name': 'name': 'name': 'name': 'name': 'name': 'name': 'name': 'name': 'name': 'name': 'name': 'name': 'name': 'name': 'name': 'name': 'name': 'name': 'name': 'name': 'name': 'name': 'name
