## Data Structures  

In [2]:
#!/usr/bin/python
# -*- coding: utf-8 -*-

# Initial setup

%precision 3
%matplotlib inline

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt


pd.set_option('max_columns', 50)

### Series

In [20]:
# Create a Series with a list
s = pd.Series([7, 'Heisenberg', 3.14, -1789710578, 'Happy Eating!'])
s

0                7
1       Heisenberg
2             3.14
3      -1789710578
4    Happy Eating!
dtype: object

In [21]:
# Specify an index upon creation
s = pd.Series([7, 'Heisenberg', 3.14, -1789710578, 'Happy Eating!'],
              index=['A', 'Z', 'C', 'Y', 'E'])
s

A                7
Z       Heisenberg
C             3.14
Y      -1789710578
E    Happy Eating!
dtype: object

In [5]:
# Create a Series using a dictionary
d = {'Chicago': 1000, 
     'New York': 1300, 
     'Portland': 900, 
     'San Francisco': 1100,
     'Austin': 450, 
     'Boston': None}
cities = pd.Series(d)
cities

Austin            450
Boston            NaN
Chicago          1000
New York         1300
Portland          900
San Francisco    1100
dtype: float64

In [24]:
# Retrieve items by index
cities[['Chicago', 'Portland', 'San Francisco']]

Chicago          1000
Portland          900
San Francisco    1100
dtype: float64

In [28]:
# Use Boolean masking and indexing
mask = cities < 1000
print mask
print
print cities[mask]

Austin            True
Boston           False
Chicago          False
New York         False
Portland          True
San Francisco    False
dtype: bool

Austin      450
Portland    900
dtype: float64


In [34]:
# Change the values in a Series using indexing
print 'Old value: {}'.format(cities['Chicago'])
cities['Chicago'] = 1400
print 'New value: {}'.format(cities['Chicago'])

# Change the values using Boolean masking
cities[cities < 1000] = 750
cities

Old value: 1400.0
New value: 1400.0


Austin            750
Boston            NaN
Chicago          1400
New York         1300
Portland          750
San Francisco    1100
dtype: float64

In [37]:
# Use idiomatic Python
print 'Seattle' in cities
print 'San Francisco' in cities

False
True


In [41]:
# Mathematical operations using scalars and functions
print cities / 3
print
print np.square(cities)

Austin           250.000000
Boston                  NaN
Chicago          466.666667
New York         433.333333
Portland         250.000000
San Francisco    366.666667
dtype: float64

Austin            562500
Boston               NaN
Chicago          1960000
New York         1690000
Portland          562500
San Francisco    1210000
dtype: float64


In [42]:
# Add two Series returning a union
print cities[['Chicago', 'New York', 'Portland']]
print
print cities[['Austin', 'New York']]
print
print cities[['Chicago', 'New York', 'Portland']] + cities[['Austin', 'New York']]

Chicago     1400
New York    1300
Portland     750
dtype: float64

Austin       750
New York    1300
dtype: float64

Austin       NaN
Chicago      NaN
New York    2600
Portland     NaN
dtype: float64


In [7]:
# Check NULL with isnull and notnull
print cities.isnull()
print
print cities.notnull()

Austin           False
Boston            True
Chicago          False
New York         False
Portland         False
San Francisco    False
dtype: bool

Austin            True
Boston           False
Chicago           True
New York          True
Portland          True
San Francisco     True
dtype: bool


### DataFrame

In [3]:
# Read in data
data = {'year': [2010, 2011, 2012, 2011, 2012, 2010, 2011, 2012],
        'team': ['Bears', 'Bears', 'Bears', 'Packers', 'Packers', 'Lions', 'Lions', 'Lions'],
        'wins': [11, 8, 10, 15, 11, 6, 10, 4],
        'losses': [5, 8, 6, 1, 5, 10, 6, 12]}
football = pd.DataFrame(data, columns=['year', 'team', 'wins', 'losses'])
football

Unnamed: 0,year,team,wins,losses
0,2010,Bears,11,5
1,2011,Bears,8,8
2,2012,Bears,10,6
3,2011,Packers,15,1
4,2012,Packers,11,5
5,2010,Lions,6,10
6,2011,Lions,10,6
7,2012,Lions,4,12


In [8]:
# Read in csv
batting =  pd.read_csv('data/batting.csv')
batting.head()

Unnamed: 0,Name,MarApr_AB,MarApr_PA,MarApr_H,MarApr_AVG,FullSeason_AVG
0,DJ LeMahieu,79,88,23,0.291,0.348
1,Daniel Murphy,81,90,30,0.37,0.347
2,Jose Altuve,95,110,29,0.305,0.338
3,Joey Votto,83,98,19,0.229,0.326
4,Charlie Blackmon,0,0,0,,0.324


In [9]:
# Write out json
football.to_json('football.json')

### Database

In [None]:
# Read/write DataFrames directly from/to a database
'''
from pandas.io import sql
import sqlite3

conn = sqlite3.connect('/Users/gjreda/Dropbox/gregreda.com/_code/towed')
query = "SELECT * FROM towed WHERE make = 'FORD';"

results = sql.read_sql(query, con=conn)
results.head()
'''

### Clipboard

In [12]:
# Read in clipboard specifying sep and col names
# http://www.baseball-reference.com/players/r/riverma01.shtml
rivera = pd.read_clipboard()
rivera.head()

### URL

In [10]:
# Read directly from a URL
url = 'https://raw.github.com/gjreda/best-sandwiches/master/data/best-sandwiches-geocode.tsv'
from_url = pd.read_table(url, sep='\t')
from_url.head()

Unnamed: 0,rank,sandwich,restaurant,description,price,address,city,phone,website,full_address,formatted_address,lat,lng
0,1,BLT,Old Oak Tap,The B is applewood smoked&mdash;nice and snapp...,$10,2109 W. Chicago Ave.,Chicago,773-772-0406,theoldoaktap.com,"2109 W. Chicago Ave., Chicago","2109 West Chicago Avenue, Chicago, IL 60622, USA",41.895734,-87.67996
1,2,Fried Bologna,Au Cheval,Thought your bologna-eating days had retired w...,$9,800 W. Randolph St.,Chicago,312-929-4580,aucheval.tumblr.com,"800 W. Randolph St., Chicago","800 West Randolph Street, Chicago, IL 60607, USA",41.884672,-87.647754
2,3,Woodland Mushroom,Xoco,Leave it to Rick Bayless and crew to come up w...,$9.50.,445 N. Clark St.,Chicago,312-334-3688,rickbayless.com,"445 N. Clark St., Chicago","445 North Clark Street, Chicago, IL 60654, USA",41.890602,-87.630925
3,4,Roast Beef,Al&rsquo;s Deli,"The Francophile brothers behind this deli, whi...",$9.40.,914 Noyes St.,Evanston,,alsdeli.net,"914 Noyes St., Evanston","914 Noyes Street, Evanston, IL 60201, USA",42.058442,-87.684425
4,5,PB&amp;L,Publican Qualty Meats,"When this place opened in February, it quickly...",$10,825 W. Fulton Mkt.,Chicago,312-445-8977,publicanqualitymeats.com,"825 W. Fulton Mkt., Chicago","825 West Fulton Market, Chicago, IL 60607, USA",41.886637,-87.648553
