In [1]:
# A pandas Series can be queried either by the index position or the index label. If you don't give an 
# index to the series when querying, the position and the label are effectively the same values. To 
# query by numeric location, starting at zero, use the iloc attribute. To query by the index label, 
# you can use the loc attribute. 

# Lets start with an example. We'll use students enrolled in classes coming from a dictionary

import pandas as pd
students_classes = {'Alice': 'Physics',
                   'Jack': 'Chemistry',
                   'Molly': 'English',
                   'Sam': 'History'}
s = pd.Series(students_classes)
s

Alice      Physics
Jack     Chemistry
Molly      English
Sam        History
dtype: object

In [2]:
# So, for this series, if you wanted to see the fourth entry we would we would use the iloc 
# attribute with the parameter 3.
s.iloc[3]

'History'

In [3]:
# If you wanted to see what class Molly has, we would use the loc attribute with a parameter 
# of Molly.
s.loc['Molly']

'English'

In [4]:
# Keep in mind that iloc and loc are not methods, they are attributes. So you don't use 
# parentheses to query them, but square brackets instead, which is called the indexing operator. 
# In Python this calls get or set for an item depending on the context of its use.


In [9]:
students_classes = {100: 'Physics',
                   101: 'Chemistry',
                   'Molly': 'English',
                   'Sam': 'History'}
s = pd.Series(students_classes)
print(s.loc['Molly'])
print(s.iloc[0])
print(s.loc[101])


English
Physics
Chemistry


In [11]:
#if you pass in an integer parameter, 
# the operator will behave as if you want it to query via the iloc attribute
students_classes = {'Alice': 'Physics',
                   'Jack': 'Chemistry',
                   'Molly': 'English',
                   'Sam': 'History'}
s = pd.Series(students_classes)
print(s[3])

History


In [12]:
# If you pass in an object, it will query as if you wanted to use the label based loc attribute.
s['Molly']

'English'

In [13]:
grades = pd.Series([90, 80, 70, 60])

total = 0
for grade in grades:
    total+=grade
print(total/len(grades))

75.0


In [14]:
# This works, but it's slow. Modern computers can do many tasks simultaneously, especially, 
# but not only, tasks involving mathematics.

# Pandas and the underlying numpy libraries support a method of computation called vectorization. 
# Vectorization works with most of the functions in the numpy library, including the sum function.

In [15]:
# Here's how we would really write the code using the numpy sum method. First we need to import 
# the numpy module

import numpy as np

# Then we just call np.sum and pass in an iterable item. In this case, our panda series.

total = np.sum(grades)
print(total/len(grades))   #faster

75.0


In [17]:
# Now both of these methods create the same value, but is one actually faster? The Jupyter 
# Notebook has a magic function which can help. 

# First, let's create a big series of random numbers. This is used a lot when demonstrating 
# techniques with Pandas
numbers = pd.Series(np.random.randint(0,1000,10000))

# Now lets look at the top five items in that series to make sure they actually seem random. We
# can do this with the head() function
numbers.head()
%%timeit -n 100
total = 0
for number in numbers:
    total+=number

total/len(numbers)


UsageError: Line magic function `%%timeit` not found.


In [18]:
s = pd.Series([1, 2, 3])

# We could add some new value, maybe a university course
s.loc['History'] = 102

s

0            1
1            2
2            3
History    102
dtype: int64

In [19]:
kelly_classes = pd.Series(['Philosophy', 'Arts', 'Math'], index=['Kelly', 'Kelly', 'Kelly'])
kelly_classes

Kelly    Philosophy
Kelly          Arts
Kelly          Math
dtype: object

In [21]:
students_classes = pd.Series({'Alice': 'Physics',
                   'Jack': 'Chemistry',
                   'Molly': 'English',
                   'Sam': 'History'})

all_students_classes = students_classes.append(kelly_classes)

# This creates a series which has our original people in it as well as all of Kelly's courses
all_students_classes

Alice       Physics
Jack      Chemistry
Molly       English
Sam         History
Kelly    Philosophy
Kelly          Arts
Kelly          Math
dtype: object

In [24]:
all_students_classes.loc['Sam']

'History'

In [26]:
all_students_classes.loc['Kelly']

Kelly    Philosophy
Kelly          Arts
Kelly          Math
dtype: object