CSV Input Parser - Parses data from .dat file, and writes the data into an .xml file or JSON file

In [1]:
import csv, json
import pandas as pd
import numpy as np
import os

Writing the .dat file into XML format

In [2]:
#we can convert to dataframe to store our data, transpose, and now we have each column as a row

#filename
#dat_file = "radix2_dim1_double_n1_c2c_inplace.dat"

#data = pd.read_csv(dat_file, sep = '\s+', header = 1, usecols = np.arange(7), index_col = None)
#was able to put the data into a df via this method, however it cuts off the remaining cols after '...'

In [3]:
# process each record from the dataframe

def xml_encode(row):
    # we output a record
    xmlItem = ['  <data>']

    # or each field in the row we create a XML markup in a <field name=...>...</field> format
    for field in row.index:
        xmlItem.append('    <var var_name="{0}">{1}</var>'.format(field, row[field]))
   
    # this marks the end of the record
    xmlItem.append('  </data>')

    # return a string back to the calling method
    return '\n'.join(xmlItem)


# write the data into xml format

def write_xml(xmlFileName, data):
    with open(xmlFileName, 'w') as xmlFile:

        # write the headers
        xmlFile.write('<?xml version="1.0" encoding="UTF-8"?>\n')
        xmlFile.write('<data>\n')

        # write the data
        xmlFile.write('\n'.join(data.apply(xml_encode, axis=1)))

        # write the footer
        xmlFile.write('\n</data>')

 Writing the .dat file into JSON format

In [10]:
# writes our .dat file to JSON 
def write_to_json (dat_file):
    data = get_data(dat_file) #gets our data in list format 
    output_file = dat_file[:-3] + "json" #what we will write to 
    
    with open (output_file, 'w', encoding = 'utf-8') as f:
            json.dump(data, f, ensure_ascii=False, indent=4) #writes the list to JSON file
            
            
# edit for forking         
# gets the data into a list of lists       
def get_data (dat_file):
    data = [i.strip('#').split() for i in open(dat_file).readlines()] #gets list of lists, remove '#' from column title
    del data[1][-1] #remove '...' from column title
    
    info = data[0] #split the file into three parts for ease of handling
    titles = data[1]
    stats = data[2::]

    for i in range(len(stats)): #loop through the file and add all the samples into one list inside the test's list
        stats[i][4] = stats[i][4::]
        del stats[i][5::]
    
        data = info + titles + stats #combine the three parts all cleaned into one list
    
    return data



#prints the data in the JSON file
def print_data (dat_file):
    output_file = dat_file[:-3] + "json" #what we will write to
    
    with open(output_file) as json_file:
        data = json.load(json_file)
        for line in data:
            print(line)

In [5]:
#write_to_json (dat_file) #write out file to JSON format
#print_data(dat_file) #print our file 

Iterating over files in a directory 

In [6]:
# get directory from user

def get_directory():
    directory = input("Enter the directory you want to use: ")
    return directory



# iterate over all the files in a directory
def iterate_through_directory():
    path = get_directory()
    
    for filename in os.listdir(path):
        if filename.endswith('.dat'):
            #with open(os.path.join(path, filename)) as f:
                write_to_json(filename)
                print_data(filename)

In [154]:
iterate_through_directory()

KeyboardInterrupt: Interrupted by user

In [151]:
write_to_json('radix2_dim1_double_n1_c2c_inplace.dat')

In [152]:
get_data_cleaned('radix2_dim1_double_n1_c2c_inplace.dat')


['/home/AMD/feizheng/work/tags/rocm-3.5/rocFFT/scripts/perf/timing.py',
 '-b',
 '1',
 '-x',
 '2048',
 '-X',
 '536870912',
 '-r',
 '2',
 '-D',
 '-1',
 '-d',
 '1',
 '-f',
 'double',
 '-I',
 '-N',
 '10',
 '-w',
 '/home/AMD/feizheng/work/tags/rocm-3.5/rocFFT/build/clients/staging',
 '-o',
 '/home/AMD/feizheng/work/tags/rocm-3.5/rocFFT/scripts/perf/dir0/radix2_dim1_double_n1_c2c_inplace.dat',
 'dimension',
 'xlength',
 'nbatch',
 'nsample',
 'samples',
 ['1',
  '2048',
  '1',
  '10',
  ['0.08622020000000001',
   '4.7269e-05',
   '8.147300000000001e-05',
   '3.6358e-05',
   '3.9995000000000006e-05',
   '3.3623e-05',
   '3.8563e-05',
   '3.8242e-05',
   '3.1349e-05',
   '3.8343e-05']],
 ['1',
  '4096',
  '1',
  '10',
  ['0.0140441',
   '5.0996e-05',
   '5.1046e-05',
   '5.1447000000000004e-05',
   '5.0044e-05',
   '4.9664e-05',
   '5.1016e-05',
   '4.8372e-05',
   '3.8963e-05',
   '4.3472e-05']],
 ['1',
  '8192',
  '1',
  '10',
  ['0.0136258',
   '0.00010492700000000001',
   '6.2938e-05',
   

In [153]:
print_data('radix2_dim1_double_n1_c2c_inplace.dat')

/home/AMD/feizheng/work/tags/rocm-3.5/rocFFT/scripts/perf/timing.py
-b
1
-x
2048
-X
536870912
-r
2
-D
-1
-d
1
-f
double
-I
-N
10
-w
/home/AMD/feizheng/work/tags/rocm-3.5/rocFFT/build/clients/staging
-o
/home/AMD/feizheng/work/tags/rocm-3.5/rocFFT/scripts/perf/dir0/radix2_dim1_double_n1_c2c_inplace.dat
dimension
xlength
nbatch
nsample
samples
['1', '2048', '1', '10', ['0.08622020000000001', '4.7269e-05', '8.147300000000001e-05', '3.6358e-05', '3.9995000000000006e-05', '3.3623e-05', '3.8563e-05', '3.8242e-05', '3.1349e-05', '3.8343e-05']]
['1', '4096', '1', '10', ['0.0140441', '5.0996e-05', '5.1046e-05', '5.1447000000000004e-05', '5.0044e-05', '4.9664e-05', '5.1016e-05', '4.8372e-05', '3.8963e-05', '4.3472e-05']]
['1', '8192', '1', '10', ['0.0136258', '0.00010492700000000001', '6.2938e-05', '5.7618e-05', '8.6022e-05', '4.8462e-05', '4.7639e-05', '4.753e-05', '4.757e-05', '4.756e-05']]
['1', '16384', '1', '10', ['0.0165262', '6.4562e-05', '6.0363e-05', '5.978300000000001e-05', '5.55150000