In [6]:
from legal_NER.ner_extraction_functions import tag_docs, read_pdf, extract_statutes_provisions_precedents
import pandas as pd
import os
from spacy import displacy
from tqdm import tqdm
import time

In [7]:
openai_extracted = pd.read_csv('openai_extracted.csv', index_col=0)
openai_extracted.columns

Index(['file_name', 'summary', 'court', 'petitioners', 'respondents', 'judges',
       'date', 'org', 'gpe', 'provisions', 'statutes', 'precedents',
       'key_facts', 'type_of_case', 'decision'],
      dtype='object')

In [3]:
text = read_pdf('Docs/food-safety/Amluya_Anand_vs_State_on_8_March_2022.PDF')
doc = tag_docs(text)

In [13]:
done_df = pd.read_csv('ner_extracted.csv', index_col=0)

out_df = pd.DataFrame(columns=['filename', 'error', 'court', 'petitioners', 'respondents', 'judges', 'date', 'org', 'gpe', 'provisions', 'statutes', 'precedents'])

def save_output_to_df(df, filename, error, court, petitioners, respondents, judges, date, org, gpe, provisions, statutes, precedents):
    df.loc[len(df)] = [filename, error, court, petitioners, respondents, judges, date, org, gpe, provisions, statutes, precedents]
    return df

In [9]:
done_df

Unnamed: 0,file_name,court,petitioners,respondents,judges,date,org,gpe,provisions,statutes,precedents
0,Abhijeet_Suryakant_Maske_And_Anr_vs_The_State_...,"['High Court Of Judicature At Bombay', 'Suprem...","['Abhijeet Suryakant Maske', 'Abhijeet Suryaka...","['State Of Maharashtra', 'State of Maharashtra...",[],"['1 March, 2022', '6 November 2020', '21 Septe...","['Baramati Police Station, Dist', 'Baramati Po...","['Pune', 'Mumbai', 'Mumbai', 'Pune']",[],[],[]
1,Amluya_Anand_vs_State_on_8_March_2022.PDF,['High Court Of Judicature For Rajasthan At\n ...,"['Amluya Anand', 'Amluya Anand']","['State', 'State']",[],"['08/03/2022', '17/02/2022', '07.04.2015', '24...","['Swastik Milk Chilling Centre', 'Amul Dairy',...","['Nagaur', 'Thanwla District Nagaur', 'Jodhpur...",[],[],[]
2,Arulmurugan_Starch_Industries_vs_Food_Safety_A...,['High Court Of Judicature At Madras'],"['Arulmurugan Starch Industries', '2018Arulmur...","['Food Safety And Standards', 'Commissioner of...",[],"['11.7.2018', '19.01.2022', '19 January, 2022'...",['Food Safety and Standards Authority of India'],"['New Delhi', 'Chennai', 'Salem District', 'Sa...",[],[],[]
3,Arun_Raosaheb_Khot_vs_State_Of_Maharashtra_on_...,"['High Court Of Judicature At Bombay', 'Suprem...","['Arun Raosaheb Khot', 'Sneha\nNitin', 'Arun R...","['State Of Maharashtra', 'State of Maharashtra']",[],"['31 January, 2022', '6 November 2020', '21 Se...","['Police Station Hadapsar, District Pune']","['Mumbai', 'Mumbai']",[],[],[]
4,Asha_Singh_vs_The_State_Of_Bihar_on_24_March_2...,"['High Court Of Judicature At Patna', 'Patna H...","['Asha Singh', 'Asha Singh']","['State Of Bihar', 'State of Bihar', 'Union Of...",[],"['24-03-2022', 'dt.24-03-2022', '24 March, 202...","['C.B.I.', 'Tata Institute of Social Sciences'...","['Turkauliya', 'Raghunathpur', 'Motihari', 'Bi...",[],[],[]
...,...,...,...,...,...,...,...,...,...,...,...
60,Gyanaduttachouhan_vs_The_Additional_Chief_Secr...,"['High Court Of Orissa At Cuttack', 'Collector...","['Gyanaduttachouhan', 'GyanaduttaChouhan', 'Gy...","['Additional Chief Secretary To', 'Additional ...",[],"['7th July 2021', '23rd May, 2021', '23 March,...",['Veer SurendraSai Instituteof Medical Science...,"['Kendbahal', 'Bargarh District', 'Orissa', 'O...",[],[],[]
61,G_Sudheeshkumar_vs_State_Of_Kerala_on_15_Febru...,['High Court Of Kerala At Ernakulam'],"['G.Sudheeshkumar', 'G.Sudheeshkumar', '2G.Sud...","['State Of Kerala', 'P.V.Kunhikrishnan', 'Stat...",[],"['15th day of February, 2022', '2.4.12', '20.7...",['ADV GOVERNMENT PLEADER'],[],[],[],[]
62,Haldiram_Bhujiawala_Limited_vs_The_State_Of_Jh...,"['High Court Of Jharkhand At Ranchi', 'Collect...","['Haldiram Bhujiawala Limited', 'Kailash Prasa...","['State Of Jharkhand', 'State of Jharkhand']",[],"['27.07.2018', '2.2.2.2', '2.2.2.8', '2.2.2.7'...","['Haldiram Bhujiawala Limited', 'State Food an...","['Bokaro', 'Ranchi']",[],[],[]
63,High_Court_On_Its_Own_Motion_In_The_vs_Bhiwand...,"['High Court Of Judicature At Bombay', 'Suprem...",['LEGISLATION PROVISIONS PUBLIC PENAL PROVISIO...,"['Bhiwandi Nizampur', 'Bhiwandi Nizampur Munic...",[],"['26 February, 2022', 'September 24, 2020', '9...","['TMC', 'MMRDA', 'Bhiwandi Nizampur Municipal ...","['Bhiwandi', 'Mumbai', 'Mumbai', 'Mumbai Metro...",[],[],[]


In [20]:
failed = []
for filename in tqdm(openai_extracted.file_name):
    if filename in out_df.filename.values:
        continue
    try:
        provisions, statutes, precedents = [], [], []
        error = "NA"
        text = read_pdf('Docs/food-safety/' + filename)
        doc = tag_docs(text)
        try:
            provisions, statutes, precedents = extract_statutes_provisions_precedents(doc)
        except Exception as e: 
            print(f"Error encountered while processing file {filename}: {e}")  
            error = e
            provisions = [ent.text for ent in doc.ents if ent.label_ == 'PROVISION']
            statutes = [ent.text for ent in doc.ents if ent.label_ == 'STATUTE']
            precedents = [ent.text for ent in doc.ents if ent.label_ == 'PRECEDENT']
            print(f"Provisions: {provisions}, Statutes: {statutes}, Precedents: {precedents}")
            
        orgs = [ent.text for ent in doc.ents if ent.label_ == 'ORG']
        gpes = [ent.text for ent in doc.ents if ent.label_ == 'GPE']
        judges = [ent.text for ent in doc.ents if ent.label_ == 'JUDGE']
        petitioners = [ent.text for ent in doc.ents if ent.label_ == 'PETITIONER']
        respondents = [ent.text for ent in doc.ents if ent.label_ == 'RESPONDENT']
        court = [ent.text for ent in doc.ents if ent.label_ == 'COURT']
        date = [ent.text for ent in doc.ents if ent.label_ == 'DATE']
        out_df = save_output_to_df(out_df, filename, error, court, petitioners, respondents, judges, date, orgs, gpes, provisions, statutes, precedents)
        out_df.to_csv('ner_extracted_2.csv')
    except Exception as e:
        print(f"Error encountered while processing file {filename}: {e}")
        failed.append(filename)
        continue

 82%|████████▏ | 160/196 [00:10<00:02, 14.92it/s]

Error encountered while processing file Satish_Murari_Lal_Sharma_vs_The_State_Of_Maharashtra_on_22_March_2022.PDF: 'NoneType' object has no attribute 'vocab'


 87%|████████▋ | 171/196 [04:28<07:41, 18.46s/it]

⚠ There was some issue while performing postprocessing, skipping
postprocessing...
Error encountered while processing file Smt_C_Kavitha_vs_Sri_Sivanesan_P_on_19_March_2022.PDF: 'provision_statute_pairs'
Provisions: ['Sec.29', 'Sec.12', 'Sec.125', 'Sec.12', 'Sec.125', 'Sec.12', 'Sec.125', 'Sec.3 explanation 4', 'Sec.12', 'Sec.2(a)', 'Sec.2(f)', 'Sec.2(f)', 'Sec.3', 'Sec.3', 'clause (a) or clause (h); or (d)', 'Sec.3', 'Sec.3', 'Sec.2(f)', 'Sec.17 to Sec.22', 'Sec.20', 'Sec.31', 'Sec.31', 'Sec.31', 'Sec.31', 'Sec.12', 'section 468', 'Sec.12', 'Sec.20 and 21', 'Sec.2(a)', 'Sec.12', 'Sec.12', 'Sec.12', 'Sec.12', 'Sec.12', 'Sec.20', 'Sec.12(1)', 'Sec.125', 'Sec.20(2)', 'Sec.20', 'Sec.29', 'Sec.29'], Statutes: ['Protection of Women from Domestic Violence Act,\n2005', 'Protection of Women from Domestic Violence Act, 2005', 'Cr.', 'Protection of Women from Domestic Violence Act,\n2005', 'Cr.', 'Protection of Women from Domestic Violence Act, 2005', 'Cr', 'Protection of Women from Domestic Vio

 90%|████████▉ | 176/196 [06:58<07:35, 22.75s/it]

⚠ There was some issue while performing postprocessing, skipping
postprocessing...
Error encountered while processing file State_Of_Gujarat_On_Behalf_Of_Amratbhai_vs_Ramsinh_Durgsinh_Rajpurohit_on_9_March_2022.PDF: 'provision_statute_pairs'
Provisions: ['Sections 26(1), 26(2)(I),\n', '26(2)(5), 27 and 59', 'No.2.12.(1)(2)'], Statutes: ['Food Safety & Standard Act, 2006', 'Food Product Standard & Food R/CR.MA/21745/2021'], Precedents: ['State Of Gujarat On Behalf Of Amratbhai ... vs Ramsinh\nDurgsinh Rajpurohit']


 93%|█████████▎| 183/196 [08:13<02:17, 10.56s/it]

Error encountered while processing file S_P_Shrivastava_vs_The_State_Of_Madhya_Pradesh_on_21_January_2022.PDF: 'NoneType' object has no attribute 'vocab'


 96%|█████████▋| 189/196 [23:32<18:04, 154.94s/it]

⚠ There was some issue while performing postprocessing, skipping
postprocessing...
Error encountered while processing file The_Swedish_Club_vs_V8_Pool_Inc_And_3_Ors_on_23_March_2022.PDF: 'provision_statute_pairs'
Provisions: ['Rule 1084', 'Rule 1084', 'Regulation 2.5, paragraph 2', 'Regulation 5.1.3', 'Appendix A2-I.', 'Regulations 2.2 and 2.5', 'Regulation 12 of Standard A2.5.2', 'Rule 1084', 'Rule 1084', 'Regulation 12 of A2.5.2', 'Aquarius III', 'Rule 1084', 'Section 9(1)(a) r/w Section 10(1)(a)', 'Rule 1084', 'Rule 1084', 'Rule 1084', 's. 93(1)', 'clause 12 of Standard A2.5.2', 'Rule 1084', 'Rule 1084', 'Rule 1084', 'Rule 1084', 'clause 12 of A2.5.2'], Statutes: ['Maritime Labour Convention, 2006', 'Maritime Labour Convention', 'Bombay High Court (Original Side) Rules', 'Maritime Labour Convention', 'Maritime Labour Convention', 'MLC', 'Admiralty ( Jurisdiction and Settlement of Maritime Claims) Act, 2017', 'MLC', 'MLC', 'MLC'], Precedents: ['Swedish Club vs V8 Pool Inc.', 'Swedish

 98%|█████████▊| 192/196 [25:46<05:21, 80.35s/it] 

⚠ There was some issue while performing postprocessing, skipping
postprocessing...
Error encountered while processing file Vijaysing_Dnyaneshwar_Gotya_S_O_vs_State_Of_Mha_Thr_Officer_In_Charge_Of_Ps_on_13_January_2022.PDF: 'provision_statute_pairs'
Provisions: ['Sections 188, 272, 273, 328', 'Sections 26(2)(i), 26(2)', '(iv), 26(2)(v), 27(2)(e), 30(2)(a) and 59', 'Section 328', 'C.R. 492', 'Section 328', 'Sec 26(2)(i), 26(2)(v), 26(2)(iv), 27(2)(e),\n', 'Section 328', 'Section 328', 'Sections 26(2)(i), 26(2)(v),\n26(2)(iv), 27(2)(e), 30(2)(e)'], Statutes: ['IPC', 'Food Safety and Standard Act 2006', 'IPC', 'IPC', 'Food Safety and Standard Act, 2006', 'IPC', 'IPC', 'oral squamous cell carcinoma', 'Cigarettes and other Tobacco Products Act 2003', 'Food Safety & Standard Act, 2006'], Precedents: ['Vijaysing @ Dnyaneshwar @ Gotya S/O ... vs State Of Mha', 'Ganesh Pandurang Jadhao vrs', 'Union of India and others vrs', 'Unicorn Industries; (2019) 10 SCC 575', 'Unicorn Industries', 'beenVija

100%|██████████| 196/196 [30:47<00:00,  9.43s/it] 


In [31]:
out_df

Unnamed: 0,filename,error,court,petitioners,respondents,judges,date,org,gpe,provisions,statutes,precedents
0,Abhijeet_Suryakant_Maske_And_Anr_vs_The_State_...,,"[High Court Of Judicature At Bombay, Supreme C...","[Abhijeet Suryakant Maske, Abhijeet Suryakant ...","[State Of Maharashtra, State of Maharashtra, S...","[C.V. Bhadang, C.V. Bhadang, C.V. Bhadang, Sne...","[1 March, 2022, 6 November 2020, 21 September ...","[Baramati Police Station, Dist, Baramati Polic...","[Pune, Mumbai, Mumbai, Pune]","[Section 328 of Indian Penal Code, Section 272...","[Indian Penal Code, Food Safety and Standards ...",[Abhijeet Suryakant Maske And Anr vs The State...
1,Amluya_Anand_vs_State_on_8_March_2022.PDF,'provision_statute_pairs',[High Court Of Judicature For Rajasthan At\n ...,"[Amluya Anand, Amluya Anand]","[State, State]","[Sandeep Mehta, Sandeep Mehta, Merta, CJM Naga...","[08/03/2022, 17/02/2022, 07.04.2015, 24.01.202...","[Swastik Milk Chilling Centre, Amul Dairy, Amu...","[Nagaur, Thanwla District Nagaur, Jodhpur, Ajm...","[Section 468, Section 77, Sections 26(2)(i) an...","[Cr, Food Safety and Standards Act, 2006, Food...","[CRLMP-545/2020]Amluya Anand vs State, V.V.S.S..."
2,Abhijeet_Suryakant_Maske_And_Anr_vs_The_State_...,,"[High Court Of Judicature At Bombay, Supreme C...","[Abhijeet Suryakant Maske, Abhijeet Suryakant ...","[State Of Maharashtra, State of Maharashtra, S...","[C.V. Bhadang, C.V. Bhadang, C.V. Bhadang, Sne...","[1 March, 2022, 6 November 2020, 21 September ...","[Baramati Police Station, Dist, Baramati Polic...","[Pune, Mumbai, Mumbai, Pune]","[Section 328 of Indian Penal Code, Section 272...","[Indian Penal Code, Food Safety and Standards ...",[Abhijeet Suryakant Maske And Anr vs The State...
3,Amluya_Anand_vs_State_on_8_March_2022.PDF,'provision_statute_pairs',[High Court Of Judicature For Rajasthan At\n ...,"[Amluya Anand, Amluya Anand]","[State, State]","[Sandeep Mehta, Sandeep Mehta, Merta, CJM Naga...","[08/03/2022, 17/02/2022, 07.04.2015, 24.01.202...","[Swastik Milk Chilling Centre, Amul Dairy, Amu...","[Nagaur, Thanwla District Nagaur, Jodhpur, Ajm...","[Section 468, Section 77, Sections 26(2)(i) an...","[Cr, Food Safety and Standards Act, 2006, Food...","[CRLMP-545/2020]Amluya Anand vs State, V.V.S.S..."
4,Arulmurugan_Starch_Industries_vs_Food_Safety_A...,,[High Court Of Judicature At Madras],"[Arulmurugan Starch Industries, 2018Arulmuruga...","[Food Safety And Standards, Commissioner of Fo...","[Krishnan Ramasamy, Krishnan Ramasamy, KRISHNA...","[11.7.2018, 19.01.2022, 19 January, 2022, 19.0...",[Food Safety and Standards Authority of India],"[New Delhi, Chennai, Salem District, Salem-1]",[],[],[]
...,...,...,...,...,...,...,...,...,...,...,...,...
196,Vijaysing_Dnyaneshwar_Gotya_S_O_vs_State_Of_Mh...,'provision_statute_pairs',"[High Court Of Judicature At Bombay, Additiona...","[Vijaysing @ Dnyaneshwar @ Gotya, Vijaysingh @...","[State Of Mha, State of\n ...","[Avinash G. Gharote, AVINASH G. GHAROTE, Garew...","[25.11.2021, 24.11.21, 25.11.2021, 13 January,...","[German Cancer Research Centre (DKFZ), Departm...","[Malkapur-Kurha, Harsoda, State of Maharashtra...","[Sections 188, 272, 273, 328, Sections 26(2)(i...","[IPC, Food Safety and Standard Act 2006, IPC, ...",[Vijaysing @ Dnyaneshwar @ Gotya S/O ... vs St...
197,Viky_Vikash_Ku_Agrawal_vs_State_Of_Odisha_Opp_...,,[High Court Of Orissa At Cuttack],"[Viky @ Vikash Ku Agrawal, Viky @ Vikash Ku Ag...","[State Of Odisha, State of Odisha]","[S.K. Sahoo, S.K. Sahoo, S.K. Sahoo, J.M.F.C.,...","[11.01.2021, 9 February, 2022, 13.04.2021, 9 F...",[],[],[section 272/273/420/269/270 of Indian Penal C...,"[Indian Penal Code, Cr., Food Safety and Stand...",[]
198,Vishnu_Gupta_Lalla_vs_The_State_Of_Madhya_Prad...,,"[High Court Of Madhya Pradesh, Supreme Court]","[Vishnu Gupta @ Lalla, Vishnu Gupta @ Lalla]","[State Of Madhya Pradesh, State Of Madhya Prad...","[Anjuli Palo, ANJULI PALO]","[10.8.2021, 22.1.2021, 3 January, 2022, 3 Janu...","[P.S.\nRanjhi, Khadya Suraksha Aur Manak Adhin...",[Jabalpur],"[Section 420 of Indian Penal Code, Section 272...","[Indian Penal Code, Cr, Food Safety and Standa...",[Vishnu Gupta @ Lalla vs The State Of Madhya P...
199,V_Balakrishnan_vs_The_State_Represented_By_on_...,,"[High Court Of Judicature At Madras, Judicial ...","[V.Balakrishnan, V.Balakrishnan]","[State, 2.Tamil Nadu Food Safety, Public Prose...","[M.Nirmal Kumar, M.Nirmal Kumar, M.NIRMAL KUMAR]","[08.05.2017, 08.05.2017, 17.08.2017, 17.08.201...","[Kadavul Mixture Company, Tamil Nadu Food Safe...","[Telungupalayam, Coimbatore, Coimbatore, Coimb...","[Section 482 of Criminal Procedure Code, Secti...","[Criminal Procedure Code, Food Safety and Stan...",[]
