In [None]:
import csv

## CSV

### Reading

In [None]:
# Read the first 10 lines of the csv file
# Note that the file is tab delimited
with open('us_zipcodes.csv', 'r') as f:
    reader = csv.reader(f, delimiter='\t')
    count = 0
    for row in reader:
        print(row)
        count +=1
        if count == 10:
            break

In [None]:
# Read the first 10 lines of the csv file
# skip the header line in the dataset
with open('us_zipcodes.csv', 'r') as f:
    reader = csv.reader(f, delimiter='\t')
    next(reader)
    # next(reader) keep repeating to skip an additional line.
    count = 0
    for row in reader:
        print(row)
        count +=1
        if count == 10:
            break

In [None]:
# Read the first 10 lines of the csv file
# Skip the header line by turning the reader into a list and starting from 1+
with open('us_zipcodes.csv', 'r') as f:
    reader = list(csv.reader(f, delimiter='\t'))
    count = 0
    for row in reader[1:]:
        print(row)
        count +=1
        if count == 10:
            break

In [None]:
# Create a dictionary per line instead
with open('us_zipcodes.csv', 'r') as f:
    reader = csv.DictReader(f, delimiter='\t')
    count = 0
    for row in reader:
        print(row)
        count +=1
        if count == 10:
            break

### Filtering data

In [None]:
with open('us_zipcodes.csv', 'r') as f:
    data = list(csv.DictReader(f, delimiter='\t'))

In [None]:
primes = []
for number in range(2, 99999):
    for factor in range(2, int(number**0.5) + 1):
        if number % factor == 0:
            break
    else:
        primes.append(number)

In [None]:
data = [row for row in data if int(row['postal code']) in primes and row['state code'] == 'AZ']
len(data)
for i in (range(10)):
    print(data[i])


### Writing

In [None]:
with open('az_prime_zipcodes.csv', 'w') as f:
    writer = csv.writer(f)
    # print a header
    header = 'postal code' + ',' + 'place name' + ',' + 'county'
    print(header, file=f)
    for row in data:
        writer.writerow([row['postal code'], row['place name'], row['county']])
        
        

In [None]:
!head az_prime_places.csv

### Homework help!

csvfilter.py 

* `-f`|`--file`: A *required* argument that is a readable file
* `-v`|`--val`: A *required* "value" to match against each record
* `-c`|`--col`: An optional "column" to search for the given value
* `-o`|`--outfile`: An optional output file name (default `'out.csv'`)
* `-d`|`--delimiter`: An optional delimiter to use to parse the file (default `','`)

In [None]:
!grep Tucson az_prime_zipcodes.csv

In [None]:
!./csvchk.py -g Tucson --limit 10 az_prime_zipcodes.csv

In [None]:
import csv
import re
import sys

infile = open('az_prime_zipcodes.csv', 'rt')
outfile = open('tucson_az_prime_zipcodes.csv', 'wt')

reader = csv.DictReader(infile, delimiter=',')
search_column = 'place name'
search_for = 'Tucson'

# Check to see if the user is requesting a particular column
if search_column and search_column not in reader.fieldnames:
    print(f'Choose from {", ".join(reader.fieldnames)}')
    sys.exit(1)

# Create a "csv writer"
writer = csv.DictWriter(outfile, fieldnames=reader.fieldnames)
writer.writeheader()

for rec in reader:
    text = rec.get(search_column) if search_column else ' '.join(rec.values())

    if re.search(search_for, text, re.IGNORECASE):
        writer.writerow(rec)

In [None]:
!head tucson_az_prime_zipcodes.csv