In [65]:
import pandas as pd
student = {'A': 'Phy',
                   'B': 'Chem',
                   'C': 'Maths',
                   'D': 'Bio'}
s = pd.Series(student)
s

A      Phy
B     Chem
C    Maths
D      Bio
dtype: object

In [66]:
s.iloc[3]

'Bio'

In [67]:
s.loc['B']

'Chem'

In [68]:
# Pandas tries to make our code a bit more readable and provides a sort of smart syntax using 
# the indexing operator directly on the series itself. For instance, if you pass in an integer parameter, 
# the operator will behave as if you want it to query via the iloc attribute
s[3]

  s[3]


'Bio'

In [69]:
# If you pass in an object, it will query as if you wanted to use the label based loc attribute.
s['C']

'Maths'

In [70]:
# So what if your index is a list of integers? Pandas can't determine automatically
# whether you're intending to query by index position or index label. So 
# you need to be careful when using the indexing operator on the Series itself. The safer option 
# is to be more explicit and use the iloc or loc attributes directly.

# Here's an example using class and their classcode information, where classes are indexed by 
# classcodes, in the form of integers
class_code = {99: 'Physics',
              100: 'Chemistry',
              101: 'English',
              102: 'History'}
s = pd.Series(class_code)

In [71]:
s[0]

KeyError: 0

In [None]:
# average grade
grades = pd.Series([90, 80, 70, 60])

total = 0
for grade in grades:
    total+=grade
print(total/len(grades))

In [None]:
# This works, but it's slow.
# Pandas and the underlying numpy libraries support a method of computation called vectorization. 
# Vectorization works with most of the functions in the numpy library, including the sum function.

In [None]:
import numpy as np
total = np.sum(grades)
print(total/len(grades))

In [None]:
# to test which is faster make series of random numbers 
numbers = pd.Series(np.random.randint(0,1000,10000))

# to see first 5 elements
numbers.head()

In [None]:
# The ipython interpreter has something called magic functions(begin with %)
# If we type this sign and then hit the Tab key, you
# can see a list of the available magic functions.

In [None]:
# we're going to use what's called a cellular magic function,start with two 
# percentage signs and wrap the code in the current Jupyter cell. The function we're going to use 
# is called timeit. This function will run our code a few times to determine, on average, how long 
# it takes.

# Let's run timeit with our original iterative code. You can give timeit the number of loops that 
# you would like to run. By default, it is 1,000 loops. I'll ask timeit here to use 100 runs because 
# we're recording this. Note that in order to use a cellular magic function, it has to be the first 
# line in the cell

In [None]:
%%timeit -n 100
total = 0
for number in numbers:
    total+=number

total/len(numbers)

In [None]:
%%timeit -n 100
total = np.sum(numbers)
total/len(numbers)

In [None]:
# vectorization is the ability for a computer to execute multiple instructions
# at once, and with high performance chips, especially graphics cards, you can get dramatic
# speedups. Modern graphics cards can run thousands of instructions in parallel.

In [None]:
# A Related feature in pandas and nummy is called broadcasting :
# apply an operation to every value in the series, changing the series. For instance, if we
# wanted to increase every random variable by 2, we could do so quickly using the += operator 
# directly on the Series object. 

# Let's look at the head of our series
numbers.head()

In [None]:
numbers+=2
numbers.head()

In [None]:
# the procedural way 
for label, value in numbers.items():
    numbers.at[label]= value+2
numbers.head()

In [None]:
# lets compare their times

In [None]:
%%timeit -n 10
# we'll create a blank new series of items to deal with
s = pd.Series(np.random.randint(0,1000,1000))
# And we'll just rewrite our loop from above.
for label, value in s.items():
    s.loc[label]= value+2

In [None]:
%%timeit -n 10
# We need to recreate a series
s = pd.Series(np.random.randint(0,1000,1000))
# And we just broadcast with +=
s+=2

In [None]:
# The .loc attribute lets you not only modify data in place but also add new data
# as well. If the value you pass in as the index doesn't exist, then
# a new entry is added. And keep in mind, indices can have mixed types. 
# While it's important to be aware of the typing going on underneath, Pandas will automatically 
# change the underlying NumPy types as appropriate.

In [73]:
s = pd.Series([1, 2, 3])

# We could add some new value, maybe a university course
s.loc['History'] = 102

s

0            1
1            2
2            3
History    102
dtype: int64

In [72]:
# an example where index values are not unique
students_classes = pd.Series({'Alice': 'Physics',
                   'Jack': 'Chemistry',
                   'Molly': 'English',
                   'Sam': 'History'})
students_classes

Alice      Physics
Jack     Chemistry
Molly      English
Sam        History
dtype: object

In [74]:
# Now lets create a Series just for some new student Kelly, which lists all of the courses
# she has taken. We'll set the index to Kelly, and the data to be the names of courses.
kelly_classes = pd.Series(['Philosophy', 'Arts', 'Math'], index=['Kelly', 'Kelly', 'Kelly'])
kelly_classes

Kelly    Philosophy
Kelly          Arts
Kelly          Math
dtype: object

In [77]:
all_students_classes = pd.concat([students_classes, kelly_classes])
all_students_classes

Alice       Physics
Jack      Chemistry
Molly       English
Sam         History
Kelly    Philosophy
Kelly          Arts
Kelly          Math
dtype: object