# XML > JSON > CSV conversion

In [1]:
# this is converting the xml to json and printing to a file

In [2]:
import json, xmltodict, xml

In [3]:
XML = 'xml-file.xml'

In [4]:
with open(XML) as fd:
    doc = xmltodict.parse(fd.read())
    
j = json.dumps(doc)

In [5]:
output_file = open('test' + '.json', 'w')
output_file.write(j)

1647593

Here's a sample of the converted file: 

`{
  "Recordset": {
    "@setCount": "3209", 
    "Record": [
      {
        "@setEntry": "0", 
        "Access-Restrictions": null, 
        "AccessionNumber": "2002-01", 
        "Archives-Location": "JB-13", 
        "Date-Received": "29 May 2002", 
        "Description": "Co-Op News Bulletins", 
        "Donor-Office": "Co-Coordinator Shani Pearson Seton Annex 3A", 
        "Extent": null, 
        "Inclusive-Date": "Summer 1999 to Winter 2000", 
        "Notes": null, 
        "Title": "Co-operative Education"
      }, `
      
For the script to work, I had to remove the following:

`"Recordset": {
    "@setCount": "3209",`
    
Don't forget to remove the closing `}` at the bottom of the file.

In [8]:
# this code is converting the json to csv

In a text editor, paste the following script:

In [None]:
# source code found here: https://github.com/vinay20045/json-to-csv #

import sys
import json
import csv

##
# Convert to string keeping encoding in mind...
##
def to_string(s):
    try:
        return str(s)
    except:
        #Change the encoding type if needed
        return s.encode('utf-8')


##
# This function converts an item like 
# {
#   "item_1":"value_11",
#   "item_2":"value_12",
#   "item_3":"value_13",
#   "item_4":["sub_value_14", "sub_value_15"],
#   "item_5":{
#       "sub_item_1":"sub_item_value_11",
#       "sub_item_2":["sub_item_value_12", "sub_item_value_13"]
#   }
# }
# To
# {
#   "node_item_1":"value_11",
#   "node_item_2":"value_12",
#   "node_item_3":"value_13",
#   "node_item_4_0":"sub_value_14", 
#   "node_item_4_1":"sub_value_15",
#   "node_item_5_sub_item_1":"sub_item_value_11",
#   "node_item_5_sub_item_2_0":"sub_item_value_12",
#   "node_item_5_sub_item_2_0":"sub_item_value_13"
# }
##
def reduce_item(key, value):
    global reduced_item
    
    #Reduction Condition 1
    if type(value) is list:
        i=0
        for sub_item in value:
            reduce_item(key+'_'+to_string(i), sub_item)
            i=i+1

    #Reduction Condition 2
    elif type(value) is dict:
        sub_keys = value.keys()
        for sub_key in sub_keys:
            reduce_item(key+'_'+to_string(sub_key), value[sub_key])
    
    #Base Condition
    else:
        reduced_item[to_string(key)] = to_string(value)


if __name__ == "__main__":
    if len(sys.argv) != 4:
        print ("\nUsage: python json_to_csv.py <node> <json_in_file_path> <csv_out_file_path>\n")
    else:
        #Reading arguments
        node = sys.argv[1]
        json_file_path = sys.argv[2]
        csv_file_path = sys.argv[3]

        fp = open(json_file_path, 'r')
        json_value = fp.read()
        raw_data = json.loads(json_value)
        fp.close()
        
        try:
            data_to_be_processed = raw_data[node]
        except:
            data_to_be_processed = raw_data

        processed_data = []
        header = []
        for item in data_to_be_processed:
            reduced_item = {}
            reduce_item(node, item)

            header += reduced_item.keys()

            processed_data.append(reduced_item)

        header = list(set(header))
        header.sort()

        with open(csv_file_path, 'w+') as f:
            writer = csv.DictWriter(f, header, quoting=csv.QUOTE_ALL)
            writer.writeheader()
            for row in processed_data:
                writer.writerow(row)

        print ("Just completed writing csv file with %d columns" % len(header))

Save this file as `json_to_csv.py`, making sure it's in the same folder as the json file you want to convert.
Navigate to the folder in a terminal window, and then type the following command:

`json_to_csv.py <node> <json_in_file_path> <csv_out_file_path>`

For this file, the `<node>` is `Record`; the `<json_in_file_path>` is `test.json`; and the `<csv_out_file_path>` is `output.csv`.