In [1]:
import json

# Path to the JSONL file, assuming it's in the same directory as this notebook
file_path = "doccano_test copy.jsonl"
print(f"Attempting to read and parse: {file_path}")

try:
    with open(file_path, 'r', encoding='utf-8') as f:
        error_found = False
        print(f"Reading file: {file_path}")
        for i, line_content_from_file in enumerate(f):
            stripped_line = line_content_from_file.strip()
            if not stripped_line: # Skip empty lines if they are not intended to be valid JSON objects
                print(f"Skipping empty line at line number {i+1}")
                continue
            try:
                json.loads(stripped_line)
            except json.JSONDecodeError as e:
                print(f"--------------------------------------------------")
                print(f"Error decoding JSON on line {i+1} (after stripping whitespace):")
                print(f"Error message: {e}")
                print(f"Problematic line content (stripped): '{stripped_line}'")
                # For original line, rstrip only the trailing newline for cleaner display
                print(f"Original problematic line content: '{line_content_from_file.rstrip('\n')}'")
                print(f"--------------------------------------------------")
                error_found = True
                # To stop after the first error, uncomment the next line:
                # break 
        
        if not error_found:
            print("Successfully parsed all lines. The file seems to be a valid JSONL.")
        else:
            print("Found errors in the JSONL file. See details above.")

except FileNotFoundError:
    print(f"Error: File not found at '{file_path}'. Please ensure the file exists in the same directory as this notebook or provide the correct path.")
except Exception as e:
    print(f"An unexpected error occurred: {e}")

Attempting to read and parse: doccano_test copy.jsonl
Reading file: doccano_test copy.jsonl
Successfully parsed all lines. The file seems to be a valid JSONL.


In [2]:
import pandas as pd

In [3]:
test_df = pd.read_json(file_path, lines=True)
print(test_df.head())

     id                                               text  \
0  7519  in an explicit task, social impression assessm...   
1  5954  broadening the motivation to cooperate: revisi...   
2  6277  in addition, in studies that have not found em...   
3  9078  Results from the questionnaire were consistent...   
4  6756  "a more surprising result is that targeted inf...   

                                            entities  \
0  [{'id': 7811, 'label': 'cause', 'start_offset'...   
1  [{'id': 3753, 'label': 'non-causal', 'start_of...   
2  [{'id': 4598, 'label': 'non-causal', 'start_of...   
3  [{'id': 9404, 'label': 'non-causal', 'start_of...   
4  [{'id': 5907, 'label': 'cause', 'start_offset'...   

                                           relations Comments  
0  [{'id': 3515, 'from_id': 7811, 'to_id': 7812, ...       []  
1                                                 []       []  
2                                                 []       []  
3                                 

In [4]:
# save it to a CSV file with name test.csv
test_df.to_csv('test.csv', index=False)
print("DataFrame saved to 'test.csv'.")

DataFrame saved to 'test.csv'.
