In [1]:
import json

def jsonl_to_json(input_file, output_file):
    """
    Convert JSONL (JSON Lines) file to a standard JSON array format.
    
    Parameters:
    - input_file: Path to the input JSONL file
    - output_file: Path to the output JSON file
    """
    data = []
    
    # Read the JSONL file line by line
    with open(input_file, 'r', encoding='utf-8') as f:
        for line in f:
            # Skip empty lines
            if line.strip():
                try:
                    # Parse each line as JSON and append to list
                    data.append(json.loads(line))
                except json.JSONDecodeError as e:
                    print(f"Error parsing line: {e}")
                    print(f"Problematic line: {line}")
    
    # Write the list as a JSON array to the output file
    with open(output_file, 'w', encoding='utf-8') as f:
        json.dump(data, f, indent=2, ensure_ascii=False)
    
    print(f"Conversion complete!")
    print(f"Total records converted: {len(data)}")
    print(f"Output saved to: {output_file}")
    
    return data


if __name__ == "__main__":
    # Example usage
    input_file = "data.jsonl"  # Replace with your JSONL file path
    output_file = "data.json"   # Replace with desired output path
    
    # Convert the file
    data = jsonl_to_json(input_file, output_file)
    
    # Optional: Print some basic information about the data
    if data:
        print(f"\nFirst record sample:")
        print(json.dumps(data[0], indent=2))

Conversion complete!
Total records converted: 250795
Output saved to: data.json

First record sample:
{
  "id": "4c241fd2-12c7-45cb-a88f-3e45000f7fc6",
  "student_id": 1433417,
  "student_name": "Nidhi Gujar",
  "student_type": "International",
  "university_name": "Texas A&M University, College Station",
  "university_name_stripped": "texas a&m university college station",
  "course_name": "Public Health",
  "credential": "Masters",
  "credential_standardized": "Masters (Technical)",
  "categorical_course_name": "Bio_Biomed_Health_LifeSci",
  "target_degree": "masters",
  "application_status": 6,
  "admission_result": 0,
  "application_term": "fall",
  "application_year": 2025,
  "gpa": "75",
  "gpa_scale": 100.0,
  "gpa_normalized": 7.5,
  "gpa_missing": 0,
  "undergrad_major": "Dental Science",
  "ug_major_bucket": "Bio_Biomed_Health_LifeSci",
  "undergrad_university": "Yashwantrao Chavan Dental College ",
  "undergrad_canonical": "Yashwantrao Chavan Dental College, Ahmednagar",
  "