# Demo - More! Pandas

In [None]:
import pandas as pd
data = pd.Series([0.25, 0.5, 0.75, 1.0])
data

In [None]:
# view raw values
data.values

In [None]:
# view index
data.index

In [None]:
# we can index, just like a standard Python list
data[1]

In [None]:
# Because index is a RangeIndex, we can do
# normal slicing from offset 1 to offset 2
# 
data[1:3]

In [None]:
# create a series with non-integer indices
data = pd.Series([0.25, 0.5, 0.75, 1.0], index=['a', 'b', 'c', 'd'])

In [None]:
data

In [None]:
data.index

In [None]:
# similar to dict indexing
data['c']

In [None]:
# if you are particularly perverse...
data = pd.Series([0.25, 0.5, 0.75, 1.0], index=[2, 5, 3, 7])
data

In [None]:
data[3]

# Implicit and Explicit Indexing

In [None]:
data = pd.Series(['a', 'b', 'c'], index=['Python', 'C++', 'Ruby'])
data

In [None]:
# index is no longer a range
# indices are like keys in a dictionary
data.index

In [None]:
data[1] # index by numeric index, not offset!

In [None]:
# but slicing still works as before
data[1:3] # offset 1...offset 2

# __`loc`__ and __`iloc`__
* .loc is a __*label*__-based indexing method
* .iloc is an __*integer*__-based indexing method

In [None]:
data

In [None]:
data.loc['Python'] # 1 here is a label, not an offset

In [None]:
data.loc['Python':'C++'] # 1 and 3 are labels, not integer offsets

In [None]:
data.iloc[1] # 1 is an offset, not a label

In [None]:
data.iloc[1:3] # 1..3 is a Python slice based on offsets

# Sales Data

In [None]:
dat = pd.read_csv("data/Sales-Win-Loss.csv")

In [None]:
dat.columns

In [None]:
dat['Opportunity Result']

# Counting Values

In [None]:
dat['Opportunity Result'].value_counts()

In [None]:
dat['Supplies Group'].value_counts()

In [None]:
dat['Elapsed Days In Sales Stage'].value_counts()

# Top Five Values

In [None]:
dat['Supplies Subgroup'].value_counts()[:5]

# Extracting Columns

In [None]:
dat.head()

In [None]:
region_results = dat[["Region", "Opportunity Result"]]

In [None]:
region_results.shape

In [None]:
region_results.head()

# Creating a DataFrame from dicts

In [None]:
presidents = pd.DataFrame([
    { 'name': 'Barack Obama', 'elect': 2008, 'born': 1961 },
    { 'name': 'George W. Bush', 'elect': 2000, 'born': 1946 },
    { 'name': 'Bill Clinton', 'elect': 1992, 'born': 1946 },
    { 'name': 'George H.W. Bush', 'elect': 1988, 'born': 1924 },
])
presidents

# Setting the Index of a DataFrame

In [None]:
presidents.columns

In [None]:
presidents.set_index('name', inplace=True)

In [None]:
presidents

In [None]:
presidents['born'].idxmax() # who is the youngest president?

In [None]:
presidents['born']['Bill Clinton']

In [None]:
presidents.loc['Bill Clinton']

In [None]:
presidents.loc['Bill Clinton']['born']

In [None]:
# note that we are looking at presidents dataframe here
presidents['born']
# pd.DataFrame(presidents['born'])

In [None]:
presidents['born'][2]

In [None]:
presidents.iloc[2]

In [None]:
presidents.iloc[2]['born']

In [None]:
presidents['born']['Bill Clinton']

In [None]:
presidents.loc['Bill Clinton']['born']

In [None]:
presidents.loc['Bill Clinton']['elect']

# Merging Two DataFrames

In [None]:
presidents_dads = pd.DataFrame([
    { 'son': 'Barack Obama', 'father': 'Barack Obama, Sr.' },
    { 'son': 'George W. Bush', 'father': 'George H.W. Bush' },
    { 'son': 'George H.W. Bush', 'father': 'Prescott Bush' },
])

presidents_dads

In [None]:
presidents

In [None]:
# in order to merge, we're going to need 'name' as a column,
# but right now it's the index, so let's add it as a column too
presidents['name1'] = presidents.index

In [None]:
presidents

In [None]:
pd.merge(presidents, presidents_dads, 
         left_on='name', right_on='son')

In [None]:
pd.merge(presidents, presidents_dads, left_on='name',
         right_on='son').drop('son', axis=1)

In [None]:
pd.merge(presidents, presidents_dads, left_on='name',
         right_on='son', how='left').drop('son', axis=1)

In [None]:
final = pd.merge(presidents, presidents_dads, left_on='name',
         right_on='son', how='left').drop('son', axis=1).fillna('MISSING')

In [None]:
final

In [None]:
final.set_index('name1', inplace=True)
final

In [None]:
final['name'] = final.index

In [None]:
final

In [None]:
final = final.reset_index()

In [None]:
final

In [None]:
final.drop(['name1'], axis=1, inplace=True)

In [None]:
final

In [None]:
final.set_index('name', inplace=True)
final