In [4]:
import os
import subprocess
import sys
import json
import spacy
from spacy.tokens import DocBin

def verify_json_content(input_path):
    try:
        with open(input_path, 'r', encoding='utf-8') as f:
            data = json.load(f)
            # Print the first annotation for debugging
            if data and "annotations" in data:
                print(f"Sample annotation in {os.path.basename(input_path)}: {data['annotations'][0]}")
            else:
                print(f"No annotations found in {os.path.basename(input_path)}")
    except Exception as e:
        print(f"Error reading {input_path}: {e}")

def verify_spacy_file(file_path):
    nlp = spacy.blank("en")
    doc_bin = DocBin().from_disk(file_path)
    docs = list(doc_bin.get_docs(nlp.vocab))
    
    if docs:
        print(f"Number of docs in {file_path}: {len(docs)}")
        for doc in docs[:1]:  # Display only the first doc for brevity
            print(f"Text: {doc.text}")
            for ent in doc.ents:
                print(f"Entity: {ent.text}, Label: {ent.label_}")
    else:
        print(f"No documents found in {file_path}")

def convert_json_to_spacy(input_dir, output_dir):
    # Ensure the output directory exists
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    # Get the path to the current Python executable
    python_executable = sys.executable
    
    # Loop through all files in the input directory
    for filename in os.listdir(input_dir):
        if filename.endswith('.json'):
            input_path = os.path.join(input_dir, filename)
            
            # Verify JSON content before conversion
            verify_json_content(input_path)
            
            # Convert the JSON file to spaCy format
            subprocess.run([python_executable, '-m', 'spacy', 'convert', input_path, output_dir])
            print(f"Converted {filename} to spaCy format.")

# Define your input and output directories
input_dir = '/home/hp/Documents/Mini_Project/Labelled/json'
output_dir = '/home/hp/Documents/Mini_Project/Labelled/spacy'

convert_json_to_spacy(input_dir, output_dir)

# Verify the content of a sample .spacy file after conversion
sample_spacy_file = os.path.join(output_dir, os.listdir(output_dir)[0])
verify_spacy_file(sample_spacy_file)


Sample annotation in 64.json: ['Appeal No. 205 of 1953.\r\nAppeal from the Judgment and Order dated the 24th February, 1953, of the High Court of Judicature at Calcutta in Appeal from Original Order No. 19 of 1952, arising out of the Order dated the 23rd day of August, 1951, of the High Court of Calcutta in its Ordinary Original Civil Jurisdiction Matter No. 157 of 1951.\r\nK. P. Khaitan, (section N. Mukherjea and Rajinder Narain, with him) for the appellant.\r\nM. C. Setalvad, Attorney General for India, (A. N. Sen, V. section Sawhney and section P. Varma, with him) for the respondents.\r\nNovember 1.\r\nThe Judgment of the Court was delivered by MUKHERJEA J.\r\nThis appeal is directed against a judgment of an appellate bench of the Calcutta High Court, dated the 24th February, 1953, reversing, on appeal, the judgment and order of a single Judge sitting on the Original Side of that Court, passed on an application under section 34 of the .\r\nThe material facts are not in controversy a

Traceback (most recent call last):
  File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/usr/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/home/hp/.local/lib/python3.10/site-packages/spacy/__main__.py", line 4, in <module>
    setup_cli()
  File "/home/hp/.local/lib/python3.10/site-packages/spacy/cli/_util.py", line 87, in setup_cli
    command(prog_name=COMMAND)
  File "/usr/lib/python3/dist-packages/click/core.py", line 1128, in __call__
    return self.main(*args, **kwargs)
  File "/home/hp/.local/lib/python3.10/site-packages/typer/core.py", line 778, in main
    return _main(
  File "/home/hp/.local/lib/python3.10/site-packages/typer/core.py", line 216, in _main
    rv = self.invoke(ctx)
  File "/usr/lib/python3/dist-packages/click/core.py", line 1659, in invoke
    return _process_result(sub_ctx.command.invoke(sub_ctx))
  File "/usr/lib/python3/dist-packages/click/core.

Converted 51.json to spaCy format.
Sample annotation in 70.json: ['No. 132 of 1951.\r\nPeti tion under article 32 of the Constitution for a writ in the nature of mandamus.\r\nThe material facts are set out in the judgment.\r\nNuruddin Abroad for the petitioner.\r\nK.N. Aggarwal for the respondents.\r\nFebruary 27.\r\nThe Judgment of the Court was delivered by DAs J.\r\nThis is an application under article 32 of the Constitution made by Mohammad Yasin for the protection of his fundamental right of carrying on his business which, according to him, is being infringed by the respondent.\r\nThe case sought to be made out in the petition may be shortly stated as follows: The petitioner is a wholesale dealer in fresh vegetables and fruits at Jalalabad in the district of Muzaffarnagar in the State of Uttar Pradesh and claims to have been carrying on such business for the last 7 years or so at his shop situated in the town of Jalalabad.\r\nThe vegetable and fruit growers used to bring their goo

Traceback (most recent call last):
  File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/usr/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/home/hp/.local/lib/python3.10/site-packages/spacy/__main__.py", line 4, in <module>
    setup_cli()
  File "/home/hp/.local/lib/python3.10/site-packages/spacy/cli/_util.py", line 87, in setup_cli
    command(prog_name=COMMAND)
  File "/usr/lib/python3/dist-packages/click/core.py", line 1128, in __call__
    return self.main(*args, **kwargs)
  File "/home/hp/.local/lib/python3.10/site-packages/typer/core.py", line 778, in main
    return _main(
  File "/home/hp/.local/lib/python3.10/site-packages/typer/core.py", line 216, in _main
    rv = self.invoke(ctx)
  File "/usr/lib/python3/dist-packages/click/core.py", line 1659, in invoke
    return _process_result(sub_ctx.command.invoke(sub_ctx))
  File "/usr/lib/python3/dist-packages/click/core.

Converted 12.json to spaCy format.
Sample annotation in 18.json: ['eal No. XXXIV of 1950.\r\nAppeal by special leave from an Award of the All India Industrial Tribunal (Bank Disputes) Bombay, dated 1st Janu ary, 1950.\r\nThe facts of the case are set out in the judg ment.\r\nDr. Bakshi Tek Chand (Veda Vyas and S.K. Kapur, with him) for the appellant.\r\nB. Sen for the respondents.\r\nAlladi Krishnaswami Aiyar (Jindra Lal, with him) for the Union of India. 1950.\r\nMay 26.\r\nThe Court delivered judgment as follows : KANIA C.J I have read the judgments prepared by Messrs. Fazl Ali, Mahajan and Mukherjea JJ.\r\n461 in this case.\r\nAs the views in those judgments in respect of the nature of the duties and functions of the Industrial Tribunal do not show agreement I consider it necessary to add a few words of my own.\r\nIn my opinion, the functions and duties of the Indus trial Tribunal are very much like those of a body discharg ing judicial functions, although it is not a Court.\r\nThe 

Traceback (most recent call last):
  File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/usr/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/home/hp/.local/lib/python3.10/site-packages/spacy/__main__.py", line 4, in <module>
    setup_cli()
  File "/home/hp/.local/lib/python3.10/site-packages/spacy/cli/_util.py", line 87, in setup_cli
    command(prog_name=COMMAND)
  File "/usr/lib/python3/dist-packages/click/core.py", line 1128, in __call__
    return self.main(*args, **kwargs)
  File "/home/hp/.local/lib/python3.10/site-packages/typer/core.py", line 778, in main
    return _main(
  File "/home/hp/.local/lib/python3.10/site-packages/typer/core.py", line 216, in _main
    rv = self.invoke(ctx)
  File "/usr/lib/python3/dist-packages/click/core.py", line 1659, in invoke
    return _process_result(sub_ctx.command.invoke(sub_ctx))
  File "/usr/lib/python3/dist-packages/click/core.

Converted 52.json to spaCy format.
Error reading /home/hp/Documents/Mini_Project/Labelled/json/84.json: list index out of range
[38;5;2m✔ Generated output file (0 documents):
/home/hp/Documents/Mini_Project/Labelled/spacy/84.spacy[0m
Converted 84.json to spaCy format.
Sample annotation in 71.json: ['Appeal No. 159 of 1951.\r\nAppeal by special leave from the judgment and order dated 13th of April, 1951, of the High Court of Judicature at Madras (Rajamannar C.J. and Somasundaram J.) in C.M.P. No. 122/15 of 1950.\r\nM.C. Setalvad (C. R. Pattabhi Raman, with him) for the appellant.\r\nC.K. Daphtary (M. Natesan, with him) for the respondent No.1 V.K.T. Chari, Advocate General of Madras (R. Ganapathi lyer, with him) for respondent No. 4. 1952.\r\nMarch 17.\r\nThe Judgment of the Court was delivered by CHANDRASEKHARA AIYAR J.\r\nThis appeal arises as the result of special leave to appeal granted by this Court on the 1st of May, 1951, against an order of the Madras High Court dated 13th Apr