# read csv
there are many ways to read csv files:
1. fileObject.readline()
2. pandas.read_csv(file)
3. csv.DictReader(fileObject)
4. csv.reader(fileObject)

`csv` module or `unicodecsv` is more robust to read something with comma or citation mark

In [1]:
file ="data/beatles.csv"
data=[]
with open(file, "r") as f:
    header = f.readline().split(",")  # first line is header
    counter = 0
    for line in f:
        if counter == 10: # only read 10 lines
            break
        fields = line.split(",")
        entry = {}
        for i, value in enumerate(fields):
            entry[header[i].strip()] =value.strip()
        data.append(entry)
        counter +=1

In [3]:
data[0]

{'BPI Certification': 'Gold',
 'Label': 'Parlophone(UK)',
 'RIAA Certification': 'Platinum',
 'Released': '22 March 1963',
 'Title': 'Please Please Me',
 'UK Chart Position': '1',
 'US Chart Position': '-'}

In [8]:
import pandas as pd
df = pd.read_csv(file)
df.head()

Unnamed: 0,Title,Released,Label,UK Chart Position,US Chart Position,BPI Certification,RIAA Certification
0,Please Please Me,22 March 1963,Parlophone(UK),1,-,Gold,Platinum
1,With the Beatles,22 November 1963,Parlophone(UK),1,-,Platinum,Gold
2,Beatlemania! With the Beatles,25 November 1963,Capitol(CAN),-,-,,
3,Introducing... The Beatles,10 January 1964,Vee-Jay(US),-,2,,
4,Meet the Beatles!,20 January 1964,Capitol(US),-,1,,5xPlatinum


In [13]:
import csv
data = []
with open(file) as f:
    reader = csv.DictReader(f) # automatical read header
        data.append(row)

In [16]:
data[0]

OrderedDict([('Title', 'Please Please Me'),
             ('Released', '22 March 1963'),
             ('Label', 'Parlophone(UK)'),
             ('UK Chart Position', '1'),
             ('US Chart Position', '-'),
             ('BPI Certification', 'Gold'),
             ('RIAA Certification', 'Platinum')])

In [24]:
import csv
data = []
with open(file) as f:
    reader = csv.reader(f)
    title = next(reader)
    for row in reader:
        data.append(row)

In [25]:
print(title)
print(data[0])

['Title', 'Released', 'Label', 'UK Chart Position', 'US Chart Position', 'BPI Certification', 'RIAA Certification']
['Please Please Me', '22 March 1963', 'Parlophone(UK)', '1', '-', 'Gold', 'Platinum']


# read Excel

In [38]:
import xlrd
file = 'data/2013_ERCOT_Hourly_Load_Data.xls'
workbook = xlrd.open_workbook(file)
sheet = workbook.sheet_by_index(0) # all data is in sheet
data = [[sheet.cell_value(r, col) 
            for col in range(sheet.ncols)] 
                for r in range(sheet.nrows)] # imbeded list
print(data[0])
print(sheet.cell_value(0,1))  # get cell value
print(sheet.col_values(1,start_rowx=1,end_rowx=5)) # col values

['Hour_End', 'COAST', 'EAST', 'FAR_WEST', 'NORTH', 'NORTH_C', 'SOUTHERN', 'SOUTH_C', 'WEST', 'ERCOT']
COAST
[7606.263544000012, 7388.082713999988, 7178.867877999996, 7038.822581000004]


In [39]:
import pandas as pd
df = pd.read_excel(file)
df.head()

Unnamed: 0,Hour_End,COAST,EAST,FAR_WEST,NORTH,NORTH_C,SOUTHERN,SOUTH_C,WEST,ERCOT
0,2013-01-01 01:00:00,7606.263544,1073.892438,1411.750567,784.978166,10369.09439,2206.675077,4368.490945,882.931901,28704.077028
1,2013-01-01 02:00:00,7388.082714,1035.021938,1403.472287,776.307387,10152.358518,2159.733208,4233.587967,872.40475,28020.968769
2,2013-01-01 03:00:00,7178.867878,1036.088697,1395.05315,768.125748,9988.051418,2065.114706,4082.86286,868.853938,27383.018395
3,2013-01-01 04:00:00,7038.822581,1032.648841,1395.50882,770.937969,9946.658655,1990.903699,4010.489608,865.701201,27051.671374
4,2013-01-01 05:00:00,6990.85794,1042.823044,1401.216842,779.089313,10096.66419,1954.807585,4038.655997,879.924249,27184.03916


# JSON
1. use `requests.get(url, params).json()` to get remote json file into dict
2. use `json.load()` to get local json file into dict

In [44]:
import json
import requests
# request artist: Nirvana
r = requests.get("http://musicbrainz.org/ws/2/artist/", params={'fmt': 'json', 'query': 'artist:Nirvana'})
print(type(r))
print("requesting", r.url)

<class 'requests.models.Response'>
requesting http://musicbrainz.org/ws/2/artist/?fmt=json&query=artist%3ANirvana


In [70]:
if r.status_code == requests.codes.ok:
    data =  r.json()  # dictionary
    a = json.dumps(data, indent=4, sort_keys=True) # similar to json.stringify in JavaScript
    d = json.loads(a) # read stri into dict
print(a[0:200])

{
    "artists": [
        {
            "area": {
                "id": "6a264f94-6ff1-30b1-9a81-41f7bfabd616",
                "name": "Finland",
                "sort-name": "Finland"
            }


In [62]:
import pprint # useful for imbedded dict
pp = pprint.PrettyPrinter(indent=4)
pp.pprint(d["artists"][0])

{   'area': {   'id': '6a264f94-6ff1-30b1-9a81-41f7bfabd616',
                'name': 'Finland',
                'sort-name': 'Finland'},
    'country': 'FI',
    'disambiguation': "Early 1980's Finnish punk band",
    'id': '85af0709-95db-4fbc-801a-120e9f4766d0',
    'life-span': {'ended': None},
    'name': 'Nirvana',
    'score': '100',
    'sort-name': 'Nirvana',
    'tags': [{'count': 1, 'name': 'punk'}, {'count': 1, 'name': 'finland'}],
    'type': 'Group'}


In [65]:
pd.DataFrame.from_dict(d["artists"], orient='columns').head()

Unnamed: 0,aliases,area,begin-area,country,disambiguation,gender,id,life-span,name,score,sort-name,tags,type
0,,"{'id': '6a264f94-6ff1-30b1-9a81-41f7bfabd616',...",,FI,Early 1980's Finnish punk band,,85af0709-95db-4fbc-801a-120e9f4766d0,{'ended': None},Nirvana,100,Nirvana,"[{'count': 1, 'name': 'punk'}, {'count': 1, 'n...",Group
1,,,,,"French band from Martigues, activ during the 70s.",,c49d69dc-e008-47cf-b5ff-160fafb1fe1f,{'ended': None},Nirvana,100,Nirvana,,
2,,,,,founded in 1987 by a Michael Jackson double/im...,,3aa878c0-224b-41e5-abd1-63be359d2bca,"{'begin': '1987', 'ended': None}",Nirvana,100,Nirvana,,
3,,"{'id': '8a754a16-0027-3a29-b6d7-2b40ea0481ed',...","{'id': 'f03d09b3-39dc-4083-afd6-159e3f0d462f',...",GB,60s band from the UK,,9282c8b4-ca0b-4c6b-b7e3-4f7762dfc4d6,"{'begin': '1967', 'ended': None}",Nirvana,100,Nirvana,"[{'count': 1, 'name': 'rock'}, {'count': 1, 'n...",Group
4,"[{'begin-date': None, 'end-date': None, 'local...","{'id': '489ce91b-6658-3307-9877-795b68554c98',...","{'id': 'a640b45c-c173-49b1-8030-973603e895b5',...",US,90s US grunge band,,5b11f4ce-a62d-471e-81fc-a69a8278c7da,"{'begin': '1988-01', 'end': '1994-04-05', 'end...",Nirvana,100,Nirvana,"[{'count': 9, 'name': 'rock'}, {'count': 4, 'n...",Group


In [85]:
file ="data/artist.json"
with open(file) as f:
    d2 = json.load(f)

In [73]:
pp.pprint(d2)

{   'area': {   'id': '8a754a16-0027-3a29-b6d7-2b40ea0481ed',
                'name': 'United Kingdom',
                'sort-name': 'United Kingdom'},
    'country': 'GB',
    'disambiguation': '60s band from the UK',
    'id': '9282c8b4-ca0b-4c6b-b7e3-4f7762dfc4d6',
    'life-span': {'begin': '1967', 'ended': None},
    'name': 'Nirvana',
    'score': '100',
    'sort-name': 'Nirvana',
    'tags': [   {'count': 1, 'name': 'rock'},
                {'count': 1, 'name': 'pop'},
                {'count': 1, 'name': 'progressive rock'},
                {'count': 1, 'name': 'orchestral'},
                {'count': 1, 'name': 'british'},
                {'count': 1, 'name': 'power pop'},
                {'count': 1, 'name': 'psychedelic rock'},
                {'count': 1, 'name': 'soft rock'},
                {'count': 1, 'name': 'symphonic rock'},
                {'count': 1, 'name': 'english'}],
    'type': 'Group'}


In [84]:
ID = d["artists"][0]["id"]
r = requests.get("http://musicbrainz.org/ws/2/artist/"+ID, params={'fmt': 'json', "inc": "releases"})
print("requesting", r.url)
pp.pprint(r.json())

requesting http://musicbrainz.org/ws/2/artist/85af0709-95db-4fbc-801a-120e9f4766d0?fmt=json&inc=releases
{   'area': {   'disambiguation': '',
                'id': '6a264f94-6ff1-30b1-9a81-41f7bfabd616',
                'iso-3166-1-codes': ['FI'],
                'name': 'Finland',
                'sort-name': 'Finland'},
    'begin_area': None,
    'country': 'FI',
    'disambiguation': "Early 1980's Finnish punk band",
    'end_area': None,
    'gender': None,
    'gender-id': None,
    'id': '85af0709-95db-4fbc-801a-120e9f4766d0',
    'ipis': [],
    'isnis': [],
    'life-span': {'begin': None, 'end': None, 'ended': False},
    'name': 'Nirvana',
    'releases': [   {   'barcode': '',
                        'country': 'FI',
                        'date': '1980',
                        'disambiguation': '',
                        'id': '3e25396c-5c66-4609-8e47-37f250d323c7',
                        'packaging': 'Cardboard/Paper Sleeve',
                        'packaging-id': '