In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!cd /content/drive/MyDrive/ISE-244/
!curl -k -o /content/drive/MyDrive/ISE-244/raw_review_Electronics.jsonl.gz https://datarepo.eng.ucsd.edu/mcauley_group/data/amazon_2023/raw/review_categories/Electronics.jsonl.gz
!curl -k -o /content/drive/MyDrive/ISE-244/raw_meta_Electronics.jsonl.gz https://datarepo.eng.ucsd.edu/mcauley_group/data/amazon_2023/raw/meta_categories/meta_Electronics.jsonl.gz

In [None]:
import json
import csv
import gzip

def jsonl_to_csv(input_path, output_path):
    """
    Convert a JSONL file (possibly compressed with gzip) to a CSV file.
    
    Parameters:
        input_path (str): The file path to the JSONL input file.
        output_path (str): The file path where the CSV output should be written.
    """
    # Determine if input is gzipped by checking the file extension
    if input_path.endswith('.gz'):
        open_func = gzip.open
        open_mode = 'rt'
    else:
        open_func = open
        open_mode = 'r'

    # Open the input file and prepare the output CSV file
    with open_func(input_path, open_mode, encoding='utf-8') as file, \
         open(output_path, 'w', newline='', encoding='utf-8') as csvfile:
        # Read the first line to determine the field names
        first_line = json.loads(file.readline().strip())
        fieldnames = list(first_line.keys())
        
        # Create a CSV writer with field names, specifying escape character and quoting behavior
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames, escapechar='\\', quoting=csv.QUOTE_MINIMAL)
        writer.writeheader()
        
        # Write the first data line
        writer.writerow(first_line)
        
        # Read the rest of the lines in the file and write to CSV
        for line in file:
            data = json.loads(line.strip())
            writer.writerow(data)

    print("JSONL has been converted to CSV successfully.")

path = '/content/drive/MyDrive/ISE-244'
input_path = f'{path}/raw_review_Electronics.jsonl.gz'
output_path = f'{path}/raw_review_Electronics.csv'
# Example usage:
jsonl_to_csv(input_path, output_path)

In [None]:
def jsonl_to_csv(input_path, output_path):
    """
    Convert a JSONL file (possibly compressed with gzip) to a CSV file, handling dynamic fieldnames.
    
    Parameters:
        input_path (str): The file path to the JSONL input file.
        output_path (str): The file path where the CSV output should be written.
    """
    # Determine if input is gzipped by checking the file extension
    if input_path.endswith('.gz'):
        open_func = gzip.open
        open_mode = 'rt'
    else:
        open_func = open
        open_mode = 'r'

    # Open the input file and prepare the output CSV file
    with open_func(input_path, open_mode, encoding='utf-8') as file, \
         open(output_path, 'w', newline='', encoding='utf-8') as csvfile:
        writer = None
        all_fieldnames = set()

        # Process each line in the JSONL file
        for line in file:
            data = json.loads(line.strip())
            # Update fieldnames
            new_keys = set(data.keys())
            if new_keys > all_fieldnames:
                all_fieldnames.update(new_keys)
                csvfile.seek(0)
                csvfile.truncate()
                writer = csv.DictWriter(csvfile, fieldnames=list(all_fieldnames), escapechar='\\', quoting=csv.QUOTE_MINIMAL)
                writer.writeheader()
            if writer is not None:
                writer.writerow(data)

    print("JSONL has been converted to CSV successfully.")

path = '/content/drive/MyDrive/ISE-244'
input_path = f'{path}/raw_meta_Electronics.jsonl.gz'
output_path = f'{path}/raw_meta_Electronics.csv'
# Example usage:
jsonl_to_csv(input_path, output_path)