### Parse CSV file

In [1]:
import os

DATADIR = ""
DATAFILE = "beatles-diskography.csv"

In [15]:
def parse_file(datafile):
    data = []
    with open(datafile, "r") as f:
        header = f.readline().split(",")
        counter = 0
        for line in f:
            if counter == 10:
                break
                
            fields = line.split(",")
            entry = {}
            
            for i, value in enumerate(fields):
                entry[header[i].strip()] = value.strip()
                
            data.append(entry)
            counter += 1

    return data

In [16]:
parse_file(DATAFILE)

[{'BPI Certification': 'Gold',
  'Label': 'Parlophone(UK)',
  'RIAA Certification': 'Platinum',
  'Released': '22 March 1963',
  'Title': 'Please Please Me',
  'UK Chart Position': '1',
  'US Chart Position': '-'},
 {'BPI Certification': 'Platinum',
  'Label': 'Parlophone(UK)',
  'RIAA Certification': 'Gold',
  'Released': '22 November 1963',
  'Title': 'With the Beatles',
  'UK Chart Position': '1',
  'US Chart Position': '-'},
 {'BPI Certification': '',
  'Label': 'Capitol(CAN)',
  'RIAA Certification': '',
  'Released': '25 November 1963',
  'Title': 'Beatlemania! With the Beatles',
  'UK Chart Position': '-',
  'US Chart Position': '-'},
 {'BPI Certification': '',
  'Label': 'Vee-Jay(US)',
  'RIAA Certification': '',
  'Released': '10 January 1964',
  'Title': 'Introducing... The Beatles',
  'UK Chart Position': '-',
  'US Chart Position': '2'},
 {'BPI Certification': '',
  'Label': 'Capitol(US)',
  'RIAA Certification': '5xPlatinum',
  'Released': '20 January 1964',
  'Title': 'Me

In [22]:
import csv
import os
import pprint

DATADIR = ""
DATAFILE = "beatles-diskography.csv"

In [27]:
def parse_csv(datafile):
    data = []
    n = 0
    with open(datafile, 'r') as sd:
        r = csv.DictReader(sd)
        for line in r:
            data.append(line)
        return data

In [29]:
if __name__ == '__main__':
    datafile = os.path.join(DATADIR, DATAFILE)
    parse_csv(datafile)
    d = parse_csv(datafile)
    pprint.pprint(d)

[OrderedDict([('Title', 'Please Please Me'),
              ('Released', '22 March 1963'),
              ('Label', 'Parlophone(UK)'),
              ('UK Chart Position', '1'),
              ('US Chart Position', '-'),
              ('BPI Certification', 'Gold'),
              ('RIAA Certification', 'Platinum')]),
 OrderedDict([('Title', 'With the Beatles'),
              ('Released', '22 November 1963'),
              ('Label', 'Parlophone(UK)'),
              ('UK Chart Position', '1'),
              ('US Chart Position', '-'),
              ('BPI Certification', 'Platinum'),
              ('RIAA Certification', 'Gold')]),
 OrderedDict([('Title', 'Beatlemania! With the Beatles'),
              ('Released', '25 November 1963'),
              ('Label', 'Capitol(CAN)'),
              ('UK Chart Position', '-'),
              ('US Chart Position', '-'),
              ('BPI Certification', ''),
              ('RIAA Certification', '')]),
 OrderedDict([('Title', 'Introducing... The Beatles')

### Parse Excel File

In [33]:
import xlrd

datafile = "2013_ERCOT_Hourly_Load_Data.xls"

In [39]:
def parse_file(datafile):
    workbook = xlrd.open_workbook(datafile)
    sheet = workbook.sheet_by_index(0)

    data = [[sheet.cell_value(r, col) 
                for col in range(sheet.ncols)] 
                    for r in range(sheet.nrows)]
    print ("\nList Comprehension")
    print ("data[3][2]:")
    print (data[3][2])

    print ("\nCells in a nested loop:")    
    for row in range(sheet.nrows):
        for col in range(sheet.ncols):
            if row == 50:
                print (sheet.cell_value(row, col))


    ### other useful methods:
    print ("\nROWS, COLUMNS, and CELLS:")
    print ("Number of rows in the sheet:")
    print (sheet.nrows)
    print ("Type of data in cell (row 3, col 2):")
    print (sheet.cell_type(3, 2))
    print ("Value in cell (row 3, col 2):")
    print (sheet.cell_value(3, 2))
    print ("Get a slice of values in column 3, from rows 1-3:")
    print (sheet.col_values(3, start_rowx=1, end_rowx=4))

    print ("\nDATES:")
    print ("Type of data in cell (row 1, col 0):")
    print (sheet.cell_type(1, 0))
    exceltime = sheet.cell_value(1, 0)
    print ("Time in Excel format:")
    print (exceltime)
    print ("Convert time to a Python datetime tuple, from the Excel float:")
    print (xlrd.xldate_as_tuple(exceltime, 0))

    return data

In [41]:
def parse_file_calculation(datafile):
    workbook = xlrd.open_workbook(datafile)
    sheet = workbook.sheet_by_index(0)

    data = [[sheet.cell_value(r, col) 
                for col in range(sheet.ncols)] 
                    for r in range(sheet.nrows)]
    cv = sheet.col_values(1, start_rowx = 1, end_rowx = None)
    
    maxval = max(cv)
    minval = min(cv)
    
    maxpos = cv.index(maxval) + 1
    minpos = cv.index(minval) + 1
    
    maxtime = sheet.cell_value(maxpos, 0)
    realtime = xlrd.xldate_as_tuple(maxtime, 0)
    mintime = sheet.cell_value(minpos, 0)
    realmintime = xlrd.xldate_as_tuple(mintime, 0)
    
    data = {
        'maxtime': realtime,
        'maxvalue': maxval,
        'mintime': realmintime,
        'minvalue': minval,
        'avgcoast': sum(cv)/float(len(cv))
    }
    
    return data


In [42]:
data = parse_file_calculation(datafile)

import pprint
pprint.pprint(data)

{'avgcoast': 10976.933460679751,
 'maxtime': (2013, 8, 13, 17, 0, 0),
 'maxvalue': 18779.025510000003,
 'mintime': (2013, 2, 3, 4, 0, 0),
 'minvalue': 6602.113898999982}


### Loading and Parsing JSON files

Data Modeling in JSON

- Items May have different fields

- May have nested objects

- May have nested arrays

In [46]:
import json
import requests

BASE_URL = "http://musicbrainz.org/ws/2/"
ARTIST_URL = BASE_URL + "artist/"


# query parameters are given to the requests.get function as a dictionary; this
# variable contains some starter parameters.
query_type = {  "simple": {},
                "atr": {"inc": "aliases+tags+ratings"},
                "aliases": {"inc": "aliases"},
                "releases": {"inc": "releases"}}


def query_site(url, params, uid="", fmt="json"):
    """
    This is the main function for making queries to the musicbrainz API. The
    query should return a json document.
    """
    params["fmt"] = fmt
    r = requests.get(url + uid, params=params)
    print ("requesting", r.url)

    if r.status_code == requests.codes.ok:
        return r.json()
    else:
        r.raise_for_status()


def query_by_name(url, params, name):
    """
    This adds an artist name to the query parameters before making an API call
    to the function above.
    """
    params["query"] = "artist:" + name
    return query_site(url, params)


def pretty_print(data, indent=4):
    """
    After we get our output, we can use this function to format it to be more
    readable.
    """
    if type(data) == dict:
        print (json.dumps(data, indent=indent, sort_keys=True))
    else:
        print (data)


def main():
    """
    Below is an example investigation to help you get started in your
    exploration. Modify the function calls and indexing below to answer the
    questions on the next quiz.

    HINT: Note how the output we get from the site is a multi-level JSON
    document, so try making print statements to step through the structure one
    level at a time or copy the output to a separate output file. Experimenting
    and iteration will be key to understand the structure of the data!
    """

    # Query for information in the database about bands named Nirvana
    results = query_by_name(ARTIST_URL, query_type["simple"], "Nirvana")
    pretty_print(results)

    # Isolate information from the 4th band returned (index 3)
    print ("\nARTIST:")
    pretty_print(results["artists"][3])

    # Query for releases from that band using the artist_id
    artist_id = results["artists"][3]["id"]
    artist_data = query_site(ARTIST_URL, query_type["releases"], artist_id)
    releases = artist_data["releases"]

    # Print information about releases from the selected band
    print ("\nONE RELEASE:")
    pretty_print(releases[0], indent=2)

    release_titles = [r["title"] for r in releases]
    print ("\nALL TITLES:")
    for t in release_titles:
        print (t)

if __name__ == '__main__':
    main()

requesting http://musicbrainz.org/ws/2/artist/?query=artist%3ANirvana&fmt=json
{
    "artists": [
        {
            "area": {
                "id": "6a264f94-6ff1-30b1-9a81-41f7bfabd616",
                "name": "Finland",
                "sort-name": "Finland"
            },
            "country": "FI",
            "disambiguation": "Early 1980's Finnish punk band",
            "id": "85af0709-95db-4fbc-801a-120e9f4766d0",
            "life-span": {
                "ended": null
            },
            "name": "Nirvana",
            "score": "100",
            "sort-name": "Nirvana",
            "tags": [
                {
                    "count": 1,
                    "name": "punk"
                },
                {
                    "count": 1,
                    "name": "finland"
                }
            ],
            "type": "Group"
        },
        {
            "disambiguation": "French band from Martigues, activ during the 70s.",
            "id": "c49

requesting http://musicbrainz.org/ws/2/artist/9282c8b4-ca0b-4c6b-b7e3-4f7762dfc4d6?inc=releases&fmt=json

ONE RELEASE:
{
  "barcode": null,
  "country": "GB",
  "date": "1969",
  "disambiguation": "",
  "id": "0b44cb36-550a-491d-bfd9-8751271f9de7",
  "packaging": null,
  "packaging-id": null,
  "quality": "normal",
  "release-events": [
    {
      "area": {
        "disambiguation": "",
        "id": "8a754a16-0027-3a29-b6d7-2b40ea0481ed",
        "iso-3166-1-codes": [
          "GB"
        ],
        "name": "United Kingdom",
        "sort-name": "United Kingdom"
      },
      "date": "1969"
    }
  ],
  "status": "Official",
  "status-id": "4e304316-386d-3409-af2e-78857eec5cfe",
  "text-representation": {
    "language": "eng",
    "script": "Latn"
  },
  "title": "To Markos III"
}

ALL TITLES:
To Markos III
Travelling on a Cloud
Songs Of Love And Praise
Songs of Love and Praise
Songs of Love and Praise
All of Us
Secret Theatre
The Story of Simon Simopath
Me And My Friend
All of U