# Reading CSV Manually

Data file from https://www2.census.gov/programs-surveys/popest/datasets/2010-2018/state/detail/

In [None]:
%%bash
head est2018-pop-res.csv

Hard way

In [None]:
with open('est2018-pop-res.csv') as csv:
    line = csv.readline()
    values = line.split(",")
    print(values)

In [None]:
with open('est2018-pop-res.csv') as csv:
    line = csv.readline().strip()
    values = line.split(",")
    print(values)

In [None]:
with open('est2018-pop-res.csv') as csv:
    counter = 0
    max_lines = 10
    for line in csv:    
        values = line.strip().split(",")
        print(values)
        counter += 1
        if counter > max_lines:
            break

# Using the CSV Module

In [None]:
import csv

In [None]:
with open('est2018-pop-res.csv') as f:
    csv_reader = csv.reader(f)
    counter = 0
    max_lines = 10
    for row in csv_reader:
        print(row)
        counter += 1
        if counter > max_lines:
            break        

In [None]:
help(csv.reader)

In [None]:
%%bash
head est2018-pop-res.psv

In [None]:
with open('est2018-pop-res.psv') as f:
    csv_reader = csv.reader(f,delimiter='|')
    counter = 0
    max_lines = 10
    for row in csv_reader:
        print(row)
        counter += 1
        if counter > max_lines:
            break        

# Using Pandas

Note that we typically import pandas as pd, giving it a shorter alias name.

In [None]:
import pandas as pd

In [None]:
data = pd.read_csv('est2018-pop-res.csv')
data.head()

# Something Practical:

### Sum up all the state populations to see if they match the total for the US

**Algorithm:**

When can see that the field `SUMLEV` tells us if this row is for the total US population (`SUMLEV` = `010`) or for a single state (`SUMLEV` = `040`)

We'll read the file using the CSV reader.

Loop through the file.  If the row has the `SUMLEV` = `'010'` then remember that as the total population.  If the row has the `SUMLEV` = `'040'` then keep a running total of state population.

In [None]:
import csv

In [None]:
total_us = 0
total_states = 0

with open('est2018-pop-res.csv') as f:
    r = csv.reader(f)
    next(r, None) # skips the header
    for line in r:
        sumlev = line[0]
        popestimate = line[5]
        if sumlev == '010':
            total_us = int(popestimate)
        elif sumlev == '040':
            total_states += int(popestimate)

print("Total US Population was      {}".format(total_us))
print("Sum of State Populations was {}".format(total_states))