# Intro to Pandas

## Reindex

In [1]:
# Basic Imports
import numpy as np
from pandas import Series, DataFrame
import pandas as pd
import numpy as np

#### Small review about Index Objects

In [14]:
# Create a Series with proper index
my_ser = Series([1,2,3,4],index=['A','B','C','D'])

# Get the index
my_index = my_ser.index

In [15]:
# Show
my_index

Index(['A', 'B', 'C', 'D'], dtype='object')

In [16]:
# Grab an index value
my_index[1]

'B'

In [17]:
# Slice by the index
my_index[1:]

Index(['B', 'C', 'D'], dtype='object')

### What happen if we try to change the index value?
It can't be changed using **[ ]**

In [18]:
my_index[1] = 'M'

TypeError: Index does not support mutable operations

### Important!

As you can see the Series's Index is not mutable by using the **"[ ]"** index method. We will learn ho to reindex a Data Frame in the following code. 

## Reindex

### The .reindex() method
We can reindex a Series using the reindex() method.

In [19]:
#Lets create a new series
ser1 = Series([1,2,3,4],index=['A','B','C','D'])

In [20]:
#Show
ser1

A    1
B    2
C    3
D    4
dtype: int64

In [26]:
#Call reindex to rearrange the data to a new index

ser2 = ser1.reindex(index= ['A','B','C','D','E','F'])

In [27]:
#Show
ser2

A    1.0
B    2.0
C    3.0
D    4.0
E    NaN
F    NaN
dtype: float64

**Note:** As you can see, pandas manage new index values by inserting 'NaN to every new index added.

#### Reindexing and filling new values

In [29]:
# We can alos fill in values for new indexes
ser2.reindex(index = ['A','B','C','D','E','F','G'], fill_value = 0)

A    1.0
B    2.0
C    3.0
D    4.0
E    NaN
F    NaN
G    0.0
dtype: float64

 Another method for filling values

In [30]:
#Using a particular method for filling values
ser3 = Series(data= ['Spain','Germany','France'], index=[0,5,10])

#Show
ser3

0       Spain
5     Germany
10     France
dtype: object

In [32]:
#Can use a forward fill for interploating values vetween indices 

ser3.reindex(index = range(15)) #, method='ffill')

0       Spain
1         NaN
2         NaN
3         NaN
4         NaN
5     Germany
6         NaN
7         NaN
8         NaN
9         NaN
10     France
11        NaN
12        NaN
13        NaN
14        NaN
dtype: object

### Reindexing rows, columns or both

In [36]:
# Import numpy random.randn
from numpy.random import randn
randn_data = randn(16).reshape((4,4))

#Lets make a datafram ewith some random values
d_frame1 = DataFrame(data = randn_data, index=['A','B','D','E'], columns=['col1','col2','col3','col4'])

#Show
d_frame1

Unnamed: 0,col1,col2,col3,col4
A,0.188115,1.150984,-2.078939,0.602193
B,2.063304,-1.185261,-1.093977,1.884081
D,1.157636,-0.745683,-0.669416,-0.680024
E,0.792419,1.304869,0.835689,0.212577


#### Adding an index value that doesn't exist

In [37]:
#Notice we forgot 'C' , lets reindex it into dframe

d_frame2 = d_frame1.reindex(index= ['A','B','C','D','E'])
d_frame2

Unnamed: 0,col1,col2,col3,col4
A,0.188115,1.150984,-2.078939,0.602193
B,2.063304,-1.185261,-1.093977,1.884081
C,,,,
D,1.157636,-0.745683,-0.669416,-0.680024
E,0.792419,1.304869,0.835689,0.212577


#### Reindex column

Adding a column that doesn't exist

In [38]:
#Can also explicitly reindex columns
new_columns = ['col1','col2','col3','col4','col5']

d_frame2.reindex(columns = new_columns)

Unnamed: 0,col1,col2,col3,col4,col5
A,0.188115,1.150984,-2.078939,0.602193,
B,2.063304,-1.185261,-1.093977,1.884081,
C,,,,,
D,1.157636,-0.745683,-0.669416,-0.680024,
E,0.792419,1.304869,0.835689,0.212577,


**Note:** Here we keep all the columns name (from 1 to 4) in the same string formatt!

#### What happen if we rename all columns?

In [39]:
# Here we change the string case 
upper_columns = ['COL1','COL2','COL3','COL4','COL5']

d_frame2.reindex(columns = upper_columns)

Unnamed: 0,COL1,COL2,COL3,COL4,COL5
A,,,,,
B,,,,,
C,,,,,
D,,,,,
E,,,,,


If we change the name of every column they will be cosiderated as new column

In [40]:
#Reindex quickly using the label-indexing with iloc (we'll see this more in the future)

#Show original
d_frame1

Unnamed: 0,col1,col2,col3,col4
A,0.188115,1.150984,-2.078939,0.602193
B,2.063304,-1.185261,-1.093977,1.884081
D,1.157636,-0.745683,-0.669416,-0.680024
E,0.792419,1.304869,0.835689,0.212577


In [41]:
new_columns = ['col1','col2','col3','col4','col5']

d_frame1.reindex(index= ['A','B','D','E'], columns = new_columns, )

Unnamed: 0,col1,col2,col3,col4,col5
A,0.188115,1.150984,-2.078939,0.602193,
B,2.063304,-1.185261,-1.093977,1.884081,
D,1.157636,-0.745683,-0.669416,-0.680024,
E,0.792419,1.304869,0.835689,0.212577,


### How to rename columns?

#### The .rename() method

In [42]:
# show the original DF
d_frame1

Unnamed: 0,col1,col2,col3,col4
A,0.188115,1.150984,-2.078939,0.602193
B,2.063304,-1.185261,-1.093977,1.884081
D,1.157636,-0.745683,-0.669416,-0.680024
E,0.792419,1.304869,0.835689,0.212577


In [43]:
# The following method is NOT in-place

d_frame1.rename(columns= {'col1':'A1', 'col2':'A2'})

Unnamed: 0,A1,A2,col3,col4
A,0.188115,1.150984,-2.078939,0.602193
B,2.063304,-1.185261,-1.093977,1.884081
D,1.157636,-0.745683,-0.669416,-0.680024
E,0.792419,1.304869,0.835689,0.212577


In [44]:
d_frame1

Unnamed: 0,col1,col2,col3,col4
A,0.188115,1.150984,-2.078939,0.602193
B,2.063304,-1.185261,-1.093977,1.884081
D,1.157636,-0.745683,-0.669416,-0.680024
E,0.792419,1.304869,0.835689,0.212577


# Let's do some exercise!