In [None]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

constitution_df = pd.read_csv('constituition_of_india.csv')

law_texts = constitution_df['Articles'].tolist()

law_ids = [f"Article_{i}" for i in range(1, len(law_texts) + 1)]

vectorizer = TfidfVectorizer()
law_vectors = vectorizer.fit_transform(law_texts)

def get_most_similar_law(query, top_n=1):
    query_vector = vectorizer.transform([query])
    similarities = cosine_similarity(query_vector, law_vectors)
    top_indices = similarities.argsort()[0][-top_n:][::-1]
    return [(law_ids[i], constitution_df.loc[i, 'Articles']) for i in top_indices]

# Example usage
user_query ="Can you provide information on the human rights in India?"
similar_laws = get_most_similar_law(user_query, top_n=1)
for law_id, law_text in similar_laws:
    print(f"Law ID: {law_id}")
    print(f"Law Text: {law_text}")


Law ID: Article_25
Law Text: 23. Prohibition of traffic in human beings and forced labour
(1) Traffic in human beings and begar and other similar forms of forced labour are prohibited and any contravention of this provision shall be an offence punishable in accordance with law
(2) Nothing in this article shall prevent the State from imposing compulsory service for public purpose, and in imposing such service the State shall not make any discrimination on grounds only of religion, race, caste or class or any of them


In [None]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from transformers import pipeline

constitution_df = pd.read_csv('constituition_of_india.csv')

law_texts = constitution_df['Articles'].tolist()

law_ids = [f"Article_{i}" for i in range(1, len(law_texts) + 1)]

vectorizer = TfidfVectorizer()
law_vectors = vectorizer.fit_transform(law_texts)

summarization_pipeline = pipeline("summarization")

def get_most_similar_law_summary(query, top_n=1):
    query_vector = vectorizer.transform([query])
    similarities = cosine_similarity(query_vector, law_vectors)
    top_indices = similarities.argsort()[0][-top_n:][::-1]
    summaries = []
    for i in top_indices:
        article_summary = summarization_pipeline(constitution_df.loc[i, 'Articles'], max_length=100, min_length=30, do_sample=False)
        summaries.append((law_ids[i], article_summary[0]['summary_text']))
    return summaries

# Example usage
user_query = "Explain the powers of the President of India."
similar_laws = get_most_similar_law_summary(user_query, top_n=1)
for law_id, law_summary in similar_laws:
    print(f"Law ID: {law_id}")
    print(f"Summary: {law_summary}")


No model was supplied, defaulted to sshleifer/distilbart-cnn-12-6 and revision a4f8f3e (https://huggingface.co/sshleifer/distilbart-cnn-12-6).
Using a pipeline without specifying a model name and revision in production is not recommended.
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/1.80k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Your max_length is set to 100, but your input_length is only 15. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=7)


Law ID: Article_60
Summary:  There shall be a President of India . 52. 52. The president of India shall be the president of the country . There shall also be a Vice President of the nation .


In [None]:
'''"What are the fundamental rights in India?"
"Explain the procedure for amending the Constitution."
"Can you explain the concept of judicial review?"
"What are the qualifications required to become the President of India?"
"What is the role of the Supreme Court in protecting constitutional rights?"
"What is the procedure for conducting elections in India?"
"Can you provide information on the right to education in India?"
"What is the difference between a writ and a petition in Indian law?"'''