In [32]:
import argparse
import collections
import csv
import json
import codecs

def get_superset_of_column_names_from_file(json_file_path):
    """Read in the json dataset file and return the superset of column names."""
    column_names = set()
    with codecs.open(json_file_path, encoding='utf-8') as fin:
        for line in fin:
            line_contents = json.loads(line)
            column_names.update(set(line_contents))
    return column_names

def read_and_write_file(json_file_path, csv_file_path, column_names):
    """Read in the json dataset file and write it out to a csv file, given the column names."""
    with open(csv_file_path, 'w',newline='') as fout:
        csv_file = csv.writer(fout)
        csv_file.writerow(list(column_names))
        with codecs.open(json_file_path, encoding='utf-8') as fin:
            for line in fin:
                line_contents = json.loads(line)
                csv_file.writerow(get_row(line_contents, column_names))
                
def get_row(line_contents, column_names):
    """Return a csv compatible row given column names and a dict."""
    row = []
    for column_name in column_names:
        if '.' not in column_name:
            if column_name not in line_contents:
                return None
            line_value= line_contents[column_name]         
        if isinstance(line_value, str):
            # Convert a string that might have non-ascii chars to pure ascii string.
            row.append(str(line_value.encode('utf-8'))[2:-1])
        elif line_value is not None:
            row.append('{0}'.format(line_value))
        else:
            row.append('')
    return row    

json_file = 'C:\\Users\\manvi\\OneDrive\\Documents\\NLP\\business.json'
csv_file = 'C:\\Users\\manvi\\OneDrive\\Documents\\NLP\\business.csv'
    
print("generating csv file for businesses: ", csv_file)
column_names = get_superset_of_column_names_from_file(json_file)
read_and_write_file(json_file, csv_file, column_names)

json_file = 'C:\\Users\\manvi\\OneDrive\\Documents\\NLP\\review.json'
csv_file = 'C:\\Users\\manvi\\OneDrive\\Documents\\NLP\\review.csv'
    
print("generating csv file for reviews: ", csv_file)
column_names = get_superset_of_column_names_from_file(json_file)
read_and_write_file(json_file, csv_file, column_names)

generating csv file:  C:\Users\manvi\OneDrive\Documents\NLP\business.csv
