## Data Format: Text

In [1]:
from nose.tools import assert_equal
import csv

### Problem 1

In [66]:
def get_cols_by_index(fname, cols, delim=','):
    '''
    Gets columns of a delimited text file by index or column number
    
    Params
    -------
    fname: the file name (and path)
    cols : a list of indices for the columns (a list of column numbers)
    delim: the delimiter character to pass to csv.reader
    
    Returns
    -------
    A list of tuples where each element corresponds to a row of data
    from the text file. The tuples should have the same length as cols,
    and each element of the tuple should be a field from the text file.
    
    '''
    
    with open(fname, 'r') as csvfile:
        reader=csv.reader(csvfile, delimiter=delim)
        result=[]
        for row in reader:
            content = tuple(row[i] for i in cols)
            result.append(content)
    return result

In [41]:
# did you return the correct datatype?
random_answer = get_cols_by_index('/Users/Constance/airports.csv', cols=[1,2])
assert_equal(type(random_answer[0]), tuple)

# is the header still there?
all_cols = ['iata', 'airport', 'city', 'state', 'country', 'lat', 'long']
assert_equal(tuple(all_cols),
            get_cols_by_index('/Users/Constance/airports.csv', delim=',', cols=list(range(7)))[0])

# test with comma and 7 cols
fifth_row = !sed -n 6p '/Users/Constance/airports.csv'
fifth_row = tuple(x.strip('"') for x in fifth_row[0].split(','))
fifth_row_test = get_cols_by_index('/Users/Constance/airports.csv', delim=',', cols=list(range(7)))[5]
assert_equal(fifth_row, fifth_row_test)

# test with comma and 1 col
eigth_row_one_col = !sed -n 9p '/Users/Constance/airports.csv' | cut -d ',' -f2
eigth_row_one_col = tuple(x.strip('"') for x in eigth_row_one_col[0].split(','))
eigth_row_one_col_test = get_cols_by_index('/Users/Constance/airports.csv', delim=',', cols=[1])[8]
assert_equal(eigth_row_one_col, eigth_row_one_col_test)

### Problem 2

In [104]:
def get_cols_by_name(fname, cols, delim=','):
    '''
    Gets columns of a delimited text file by name. Assumes the first
    row of the text file contains the header or column names.
    
    Params
    -------
    fname: the file name (and path)
    cols : a list of names of the fields (columns) to be returned
    delim: the delimiter character to pass to csv.reader
    
    Returns
    -------
    A list of tuples where each element corresponds to a row of data
    from the text file. The tuples should have the same length as cols,
    and each element of the tuple should be a field from the text file.
    
    '''
    
    with open(fname, 'r') as csvfile:
        reader=csv.reader(csvfile, delimiter=delim)
        header=csv.DictReader(open(fname, 'r')).fieldnames 
        colnum=[]
        result=[]
        for m in cols:
            n=header.index(m)
            colnum.append(n)
        for row in reader:
            content = tuple(row[i] for i in colnum)
            result.append(content)
    return result

In [105]:
# did you return the correct datatype?
random_answer = get_cols_by_name('/Users/Constance/airports.csv', cols=['iata', 'city'])
assert_equal(type(random_answer[0]), tuple)
[assert_equal(type(x), tuple) for x in random_answer]

# is the header still there?
all_cols = ['iata', 'airport', 'city', 'state', 'country', 'lat', 'long']
assert_equal(tuple(all_cols),
            get_cols_by_name('/Users/Constance/airports.csv', delim=',', cols=all_cols)[0])

# test with comma and 1 col
eigth_row_one_col = !sed -n 9p '/Users/Constance/airports.csv' | cut -d ',' -f6
eigth_row_one_col = tuple(x.strip('"') for x in eigth_row_one_col[0].split(','))
eigth_row_one_col_test = get_cols_by_name('/Users/Constance/airports.csv', delim=',', cols=['lat'])[8]
assert_equal(eigth_row_one_col, eigth_row_one_col_test)

### Problem 3

In [136]:
def my_read_csv(fname, cols, delim = ','):
    '''
    Gets columns of a delimited text file by name or index. Assumes 
    the first row of the text file contains the header or column names.
    
    Params
    -------
    fname: the file name (and path)
    cols : a list of names or indices of the fields (columns) to be 
           returned
    delim: the delimiter character to pass to csv.reader
    
    Returns
    -------
    A list of tuples where each element corresponds to a row of data
    from the text file. The tuples should have the same length as cols,
    and each element of the tuple should be a field from the text file.
    
    '''
    
    if all(isinstance(x, int) for x in cols)==True:
        return get_cols_by_index(fname, cols, delim=',')
    elif all(isinstance(x, str) for x in cols)==True:
        return get_cols_by_name(fname, cols, delim=',')
    else:
        return []

In [139]:
# did you return the correct datatype?
random_answer = my_read_csv('/Users/Constance/airports.csv', cols=[1,2])
assert_equal(type(random_answer[0]), tuple)
[assert_equal(type(x), tuple) for x in random_answer]

# is the header still there?
all_cols = ['iata', 'airport', 'city', 'state', 'country', 'lat', 'long']
assert_equal(tuple(all_cols),
            my_read_csv('/Users/Constance/airports.csv', delim=',', cols=all_cols)[0])

# test with comma and 1 col by name
myans1 = !sed -n 21p '/Users/Constance/airports.csv' | cut -d ',' -f6
myans1 = tuple(x.strip('"') for x in myans1[0].split(','))
mytest1 = my_read_csv('/Users/Constance/airports.csv', delim=',', cols=['lat'])[20]
assert_equal(myans1, mytest1)

# test with comma and 7 cols by index
myans3 = !sed -n 6p '/Users/Constance/airports.csv'
myans3 = tuple(x.strip('"') for x in myans3[0].split(','))
mytest3 = my_read_csv('/Users/Constance/airports.csv', delim=',', cols=list(range(7)))[5]
assert_equal(myans3, mytest3)

# test with comma and 1 col by index
myans4 = !sed -n 9p '/Users/Constance/airports.csv' | cut -d ',' -f2
myans4 = tuple(x.strip('"') for x in myans4[0].split(','))
mytest4 = my_read_csv('/Users/Constance/airports.csv', delim=',', cols=[1])[8]
assert_equal(myans4, mytest4)

# does the error handling work?
assert_equal([], my_read_csv('/Users/Constance/airports.csv', delim=',', cols=[1, 'a']))
