In [1]:
import numpy as np
from pandas import Series, DataFrame
import pandas as pd
from numpy.random import randn

In [2]:
# create a Series for use
series1 = Series([1,2,3,4], index=['A', 'B', 'C', 'D'])
series1

A    1
B    2
C    3
D    4
dtype: int64

In [3]:
# using the reindex function, this is an example of how pandas handles missing data.  We create a second Series using the first
# but invoke the reindex function adding 2 additional values
series2 = series1.reindex(['A', 'B', 'C', 'D', 'E', 'F'])
series2

A    1.0
B    2.0
C    3.0
D    4.0
E    NaN
F    NaN
dtype: float64

pandas gracefully adds the NaN values to the new indices missing value

In [4]:
# we can reindex again, this time using the fill_value function to set the value to Zero
series2.reindex(['A', 'B', 'C', 'D', 'E', 'F', 'G'], fill_value=0)

A    1.0
B    2.0
C    3.0
D    4.0
E    NaN
F    NaN
G    0.0
dtype: float64

In [5]:
# we can use methods to fill values
series3 = Series(['USA', 'Mexico', 'Canada'], index=[0,5,10])
series3

0        USA
5     Mexico
10    Canada
dtype: object

In [8]:
# now we have a Series with questionable index.  We can call reindex and update
# the lesson has us create a range
ranger = range(15)
for r in ranger: 
    print(r)

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14


In [32]:
# using reindex and the method='ffill' (forward fill) argument, we can populate all of the values with any existing value found
# in the set.  This method 'forward fills' until the next existing value is found, the 'forward fills' using the next existing 
# value, etc.
series4 = series3.reindex(ranger, method='ffill')
series4

0        USA
1        USA
2        USA
3        USA
4        USA
5     Mexico
6     Mexico
7     Mexico
8     Mexico
9     Mexico
10    Canada
11    Canada
12    Canada
13    Canada
14    Canada
dtype: object

This is not the expected output and does not match the output of the video (he's using Python 2.7 and likely an earlier version of pandas')

The pandas 0.21.0 docs suggest the output should be as the lecture suggested
http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.reindex.html

# Answer!
in the lecture QA, another user discovered the Series var is immutable.  He simply cast the output to a new var to achieve the 
expected results.  Updating series3.reindex code and re-running....

In [27]:
# This demo should fill a dataframe
# we create a 5x5 dataframe with an index, exluding 'C'
df_fill = DataFrame(randn(25).reshape(5,5), index=['A', 'B', 'D', 'E', 'F'],
                   columns=['column1', 'column2', 'column3', 'column4', 'column5'])
df_fill

Unnamed: 0,column1,column2,column3,column4,column5
A,1.27159,-1.467061,-0.511015,-0.221016,0.582158
B,2.030178,-0.366906,-1.400952,-0.460804,-0.313815
D,1.098716,-0.908437,1.235179,1.89411,1.082023
E,-0.170423,1.183865,-0.051562,-1.531815,-0.890966
F,-0.11148,-0.946,0.960293,0.766318,-0.338645


In [28]:
# Hey! we forgot 'C'! let's reindex...
# It should be noted that you have to call reindex to add indices, then best practice to save to separate variable
# this takes advantage of pandas ability to deal with null values
df_fill2 = df_fill.reindex(['A', 'B', 'C', 'D', 'E', 'F'])
df_fill2

Unnamed: 0,column1,column2,column3,column4,column5
A,1.27159,-1.467061,-0.511015,-0.221016,0.582158
B,2.030178,-0.366906,-1.400952,-0.460804,-0.313815
C,,,,,
D,1.098716,-0.908437,1.235179,1.89411,1.082023
E,-0.170423,1.183865,-0.051562,-1.531815,-0.890966
F,-0.11148,-0.946,0.960293,0.766318,-0.338645


In [29]:
# This Example adds new columns
# best practices: make a new list of column names
new_columns = ['column1', 'column2', 'column3', 'column4', 'column5', 'column6']

In [30]:
df_fill2.reindex(columns=new_columns)
df_fill2

Unnamed: 0,column1,column2,column3,column4,column5
A,1.27159,-1.467061,-0.511015,-0.221016,0.582158
B,2.030178,-0.366906,-1.400952,-0.460804,-0.313815
C,,,,,
D,1.098716,-0.908437,1.235179,1.89411,1.082023
E,-0.170423,1.183865,-0.051562,-1.531815,-0.890966
F,-0.11148,-0.946,0.960293,0.766318,-0.338645


No change... I double checked my syntax with the lecture notes: same.  On to the docs!

In [33]:
df_fill3 = df_fill2.reindex(columns=new_columns)
df_fill3

Unnamed: 0,column1,column2,column3,column4,column5,column6
A,1.27159,-1.467061,-0.511015,-0.221016,0.582158,
B,2.030178,-0.366906,-1.400952,-0.460804,-0.313815,
C,,,,,,
D,1.098716,-0.908437,1.235179,1.89411,1.082023,
E,-0.170423,1.183865,-0.051562,-1.531815,-0.890966,
F,-0.11148,-0.946,0.960293,0.766318,-0.338645,


# Answer!
After taking a break to solve the 'ffill' issue, I took note that the latest versions of Python/pandas makes the df immutable.  In order to achieve the output as described in the lecture, you have to save the output to a new df variable.