In [20]:
import nltk
import re
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from nltk.stem import PorterStemmer


In [21]:
def preprocess_text(text):
    text=text.lower()
    words = word_tokenize(text)
    stop_word = set(stopwords.words('english'))
    words = [word for word in words if word not in stop_word]
    stemmer = PorterStemmer()
    words = [stemmer.stem(word) for word in words]

    proprocessed_text= ' '.join(words)
    return proprocessed_text

In [22]:
# !pip install nltk

In [23]:
# import nltk

In [24]:
t='I am Sanjana from Amravati Maharstra'
preprocess_text(t)

'sanjana amravati maharstra'

In [25]:
import pandas as pd
try:
    ds = pd.read_csv("combined_data.csv")
    print(ds.head(2))
except UnicodeDecodeError as err:
    print(f"Error: {err}")    

                                                 URL  \
0  https://lawrato.com/indian-kanoon/ipc/section-140   
1  https://lawrato.com/indian-kanoon/ipc/section-127   

                                         Description  \
0  Description of IPC Section 140\nAccording to s...   
1  Description of IPC Section 127\nAccording to s...   

                                             Offense  \
0  Wearing the dress or carrying any token used b...   
1  Receiving property taken by war or depredation...   

                                Punishment  Cognizable      Bailable  \
0                 3 Months or Fine or Both  Cognizable      Bailable   
1  7 Years + Fine + forfeiture of property  Cognizable  Non-Bailable   

              Court  
0    Any Magistrate  
1  Court of Session  


In [26]:
ds.head(5)

Unnamed: 0,URL,Description,Offense,Punishment,Cognizable,Bailable,Court
0,https://lawrato.com/indian-kanoon/ipc/section-140,Description of IPC Section 140\nAccording to s...,Wearing the dress or carrying any token used b...,3 Months or Fine or Both,Cognizable,Bailable,Any Magistrate
1,https://lawrato.com/indian-kanoon/ipc/section-127,Description of IPC Section 127\nAccording to s...,Receiving property taken by war or depredation...,7 Years + Fine + forfeiture of property,Cognizable,Non-Bailable,Court of Session
2,https://lawrato.com/indian-kanoon/ipc/section-128,Description of IPC Section 128\nAccording to s...,Public servant voluntarily allowing prisoner o...,Imprisonment for Life or 10 Years + Fine,Cognizable,Non-Bailable,Court of Session
3,https://lawrato.com/indian-kanoon/ipc/section-129,Description of IPC Section 129\nAccording to s...,Public servant negligently suffering prisoner ...,Simple Imprisonment 3 Years + Fine,Cognizable,Bailable,Magistrate First Class
4,https://lawrato.com/indian-kanoon/ipc/section-130,Description of IPC Section 130\nAccording to s...,"Aiding escape of, rescuing or harbouring, such...",Imprisonment for Life or 10 Years + Fine,Cognizable,Non-Bailable,Court of Session


In [27]:
ds.isnull().sum()

URL             0
Description     1
Offense        63
Punishment     63
Cognizable     63
Bailable       63
Court          63
dtype: int64

In [28]:
ds.fillna('Not Mentioned',inplace=True)

In [29]:
ds.isnull().sum()

URL            0
Description    0
Offense        0
Punishment     0
Cognizable     0
Bailable       0
Court          0
dtype: int64

In [30]:
ds['Combo']=ds['Description']+ds['Offense']
ds.head(1)

Unnamed: 0,URL,Description,Offense,Punishment,Cognizable,Bailable,Court,Combo
0,https://lawrato.com/indian-kanoon/ipc/section-140,Description of IPC Section 140\nAccording to s...,Wearing the dress or carrying any token used b...,3 Months or Fine or Both,Cognizable,Bailable,Any Magistrate,Description of IPC Section 140\nAccording to s...


In [31]:
ds['Combo']=ds['Combo'].apply(preprocess_text)
ds.head(1)

Unnamed: 0,URL,Description,Offense,Punishment,Cognizable,Bailable,Court,Combo
0,https://lawrato.com/indian-kanoon/ipc/section-140,Description of IPC Section 140\nAccording to s...,Wearing the dress or carrying any token used b...,3 Months or Fine or Both,Cognizable,Bailable,Any Magistrate,descript ipc section 140 accord section 140 in...


In [32]:
new_ds=ds[['Description','Offense','Punishment','Cognizable','Bailable','Court','Combo']]
new_ds.head(1)

Unnamed: 0,Description,Offense,Punishment,Cognizable,Bailable,Court,Combo
0,Description of IPC Section 140\nAccording to s...,Wearing the dress or carrying any token used b...,3 Months or Fine or Both,Cognizable,Bailable,Any Magistrate,descript ipc section 140 accord section 140 in...


In [33]:
new_ds['Combo']=new_ds['Combo'].apply(lambda x:"".join(x))
new_ds.head(1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_ds['Combo']=new_ds['Combo'].apply(lambda x:"".join(x))


Unnamed: 0,Description,Offense,Punishment,Cognizable,Bailable,Court,Combo
0,Description of IPC Section 140\nAccording to s...,Wearing the dress or carrying any token used b...,3 Months or Fine or Both,Cognizable,Bailable,Any Magistrate,descript ipc section 140 accord section 140 in...


In [34]:
import pickle 
with open('preprocess_data.pkl','wb')as file:
    pickle.dump(new_ds,file)
new_ds=pickle.load(open('preprocess_data.pkl','rb'))


In [35]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer, util
# model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
model = SentenceTransformer('sentence-transformers/paraphrase-MiniLM-L6-v2')

    

In [36]:
def suggest_sections(complaint,dataset,min_suggestions=5):
    preprocessed_complaint=preprocess_text(complaint)
    complaint_embedding=model.encode(preprocessed_complaint)
    section_embedding=model.encode(dataset['Combo'].tolist())
    similarities=util.pytorch_cos_sim(complaint_embedding,section_embedding)[0]
    similarity_threshold=0.1
    relevant_indices = []
    while len(relevant_indices)<min_suggestions and similarity_threshold>0:
        relevant_indices=[i for i, sim in enumerate(similarities)if sim>similarity_threshold]
        similarity_threshold-=0.5
        sorted_indices=sorted(relevant_indices,key=lambda i: similarities[i],reverse=True)
        suggestions = dataset.iloc[sorted_indices][['Description','Offense','Punishment','Cognizable','Bailable','Court','Combo']].to_dict(orient='records')
        return suggestions

In [37]:
complaint = input("enter your complaint")
suggest_sections = suggest_sections(complaint,new_ds)
if suggest_sections:
    print("Suggested Section are:")
    for suggestion in suggest_sections:
        print(f"Description: {suggestion['Description']}")
        print(f"Offense: {suggestion['Offense']}")
        print(f"Punishment: {suggestion['Punishment']}")
        print("-------------------------------------------------------------------------------- ")
else:
    print("No record is found")        

Suggested Section are:
Description: Description of IPC Section 302
According to section 302 of Indian penal code, Whoever commits murder shall be punished with death, or imprisonment for life, and shall also be liable to fine.


IPC 302 in Simple Words
Section 302 of the Indian Penal Code states that anyone who commits murder can be punished with the death penalty or life imprisonment, and may also be fined.

Cited by
Ramdeo Rai Yadav vs State Of Bihar
Mohammad Irfan vs State Of Karnataka
Prasad Pradhan vs The State Of Chhattisgarh
Nandu Singh vs The State Of Madhya Pradesh
Kala Singh Gurnam Singh vs The State Of Punjab
Offense: Murder
Punishment: Death or Imprisonment for Life + Fine
-------------------------------------------------------------------------------- 
Description: Description of IPC Section 300
According to section 300 of Indian penal code,
Except in the cases hereinafter excepted, culpable homicide is murder, if the act by which the death is caused is done with the inten

In [38]:
# import torch
# print(torch.__version__)
# print(torch.cuda.is_available())  # This should return True if using a GPU
