In [1]:
import json
import re
import os

def list_files(directory):
    """ List all files in a given directory """
    return [file for file in os.listdir(directory) if os.path.isfile(os.path.join(directory, file))]



In [2]:
def regex_fixer(sub_function):
    def fixer(file_paths):
        for file_path in file_paths:
            try:
                with open(file_path, 'r') as file:
                    json.load(file)
                print(f"{file_path}: JSON is valid, no changes made.")
            except json.JSONDecodeError as e:
                with open(file_path, 'r') as file:
                    file_content = file.read()
                fixed_content = sub_function(file_content)

                try:
                    # Validate if the fixed content is a valid JSON
                    json.loads(fixed_content)
                    # If valid, overwrite the file
                    with open(file_path, 'w') as file:
                        file.write(fixed_content)
                    print(f"{file_path}: fixed.")
                except json.JSONDecodeError:
                    # If still invalid, report and leave as is
                    print(f"{file_path}: Could not fix the JSON structure.")
    return fixer

fix_space_between_keyvalpairs = regex_fixer(lambda text: re.sub(r'"\s*[^":]*\s*"(?=\s*\d+\s*":\s*">)', '","', text))

In [3]:
def replace_internal_quotes(match):
    # Get the match string
    string = match.group()

    # If the match is at the start or end of the string, keep it unchanged
    if string.startswith(': ">') or string.endswith('",'):
        return string

    # Otherwise, replace the internal double quote
    return string.replace('"', "'")

fix_doublequotes_in_values = regex_fixer(lambda text: re.sub(r': ".*?"|".*?"(,|$)',  replace_internal_quotes, text))

In [4]:
directory_path = './thanks_output/'
files = [file for file in list_files(directory_path) if file.endswith(".json")]

In [5]:
fix_doublequotes_in_values([os.path.join(directory_path, file) for file in files])

./thanks_output/hjbRO7pcc1Vy.json: Could not fix the JSON structure.
./thanks_output/tUCqxSEss72p.json: Could not fix the JSON structure.
./thanks_output/RADzk77t0cLN.json: Could not fix the JSON structure.
./thanks_output/jOlk10JMb6tK.json: Could not fix the JSON structure.
./thanks_output/02rAtvTP3veR.json: Could not fix the JSON structure.
./thanks_output/gmJDpygbX6v6.json: Could not fix the JSON structure.
./thanks_output/M7BDpgL9rV9w.json: Could not fix the JSON structure.
./thanks_output/3LVPLB42KfS2.json: Could not fix the JSON structure.
./thanks_output/w0DDjD26Wdh9.json: Could not fix the JSON structure.
./thanks_output/K3D7m1nNkMD6.json: Could not fix the JSON structure.
./thanks_output/9Q6mMVUbMALH.json: Could not fix the JSON structure.
./thanks_output/v6DzpS6jo3wu.json: Could not fix the JSON structure.
./thanks_output/KzsGTLFgnFJ3.json: Could not fix the JSON structure.
./thanks_output/jKkEQF9x2kwa.json: Could not fix the JSON structure.
./thanks_output/UfrS4OXs7cCA.json:

In [4]:
fix_space_between_keyvalpairs([os.path.join(directory_path, file) for file in files])

./simple_output/ZpVWXZ5Rm1d4.json: Could not fix the JSON structure.
./simple_output/8IL6BlAeEKzq.json: fixed.
./simple_output/5xMqOyU4olMk.json: fixed.
./simple_output/SguE61fHqAik.json: fixed.
./simple_output/489vzcyWYNEa.json: fixed.
./simple_output/XPfQCZrRtUJh.json: fixed.
./simple_output/lWpMnBWGcCWV.json: fixed.
./simple_output/7Qofwz69dNuW.json: fixed.
./simple_output/VLgNoHpQS7iV.json: Could not fix the JSON structure.
./simple_output/S7x9KsqiyNGX.json: Could not fix the JSON structure.
./simple_output/jwxsDk8H9VHN.json: fixed.
./simple_output/ChLU4F1TGtZS.json: fixed.
./simple_output/VdgnfWj9M3Or.json: Could not fix the JSON structure.
./simple_output/XHlKVNlSnHRD.json: Could not fix the JSON structure.
./simple_output/mdKHFSgniozP.json: Could not fix the JSON structure.
./simple_output/t08taoiH8cPF.json: Could not fix the JSON structure.
./simple_output/iw8R6QqiTe8I.json: fixed.
./simple_output/ikwgLT1G5fN3.json: fixed.
./simple_output/J0ptS4crljtU.json: fixed.
./simple_outp