In [1]:
import pandas as pd

In [2]:
men2004 = pd.read_csv('men2004.csv')

In [3]:
men2004

Unnamed: 0,Athlete,Medals
0,"PHELPS, Michael",8
1,"THORPE, Ian",4
2,"SCHOEMAN, Roland",3
3,"PEIRSOL, Aaron",3
4,"CROCKER, Ian",3
5,"KITAJIMA, Kosuke",3
6,"HANSEN, Brendan",3
7,"VAN DEN HOOGENBAND, Pieter",3
8,"HACKETT, Grant",3
9,"MORITA, Tomomi",2


In [4]:
men2008 = pd.read_csv("men2008.csv")

In [5]:
men2008

Unnamed: 0,Athlete,Medals
0,"PHELPS, Michael",8
1,"LOCHTE, Ryan",4
2,"BERNARD, Alain",3
3,"SULLIVAN, Eamon",3
4,"LAUTERSTEIN, Andrew",3
...,...,...
57,"LAGUNOV, Evgeniy",1
58,"BERENS, Ricky",1
59,"LURZ, Thomas",1
60,"MALLET, Gregory",1


#### The below use of `.append()` stacks the rows from the left df on top of the rows from the right df, preserving the original indexes of each (so we have data indexes repeated in this case, because `ignore_index` is set to False)

In [6]:
men2004.append(men2008, ignore_index=False)

Unnamed: 0,Athlete,Medals
0,"PHELPS, Michael",8
1,"THORPE, Ian",4
2,"SCHOEMAN, Roland",3
3,"PEIRSOL, Aaron",3
4,"CROCKER, Ian",3
...,...,...
57,"LAGUNOV, Evgeniy",1
58,"BERENS, Ricky",1
59,"LURZ, Thomas",1
60,"MALLET, Gregory",1


#### If we instead set the `ignore_index` value to `True`, we get new index labels created, but the data is still sorted so that the left df is on top of the right df (no automatic sorting)

In [7]:
men2004.append(men2008, ignore_index=True)

Unnamed: 0,Athlete,Medals
0,"PHELPS, Michael",8
1,"THORPE, Ian",4
2,"SCHOEMAN, Roland",3
3,"PEIRSOL, Aaron",3
4,"CROCKER, Ian",3
...,...,...
116,"LAGUNOV, Evgeniy",1
117,"BERENS, Ricky",1
118,"LURZ, Thomas",1
119,"MALLET, Gregory",1


#### `pd.concat()` 
This method allows us way more flexibility.

`axis=` Default behavior is to combine vertically, but we can change this with `axis=1`

`keys=[]` This allows us to label the data from the original data frame, producing a multi-index

`names=[]` We can append labels to our indices.

In [11]:
men0408 = pd.concat([men2004, men2008], ignore_index=False, keys = [2004, 2008], names=['Year'])

In [12]:
men0408

Unnamed: 0_level_0,Unnamed: 1_level_0,Athlete,Medals
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2004,0,"PHELPS, Michael",8
2004,1,"THORPE, Ian",4
2004,2,"SCHOEMAN, Roland",3
2004,3,"PEIRSOL, Aaron",3
2004,4,"CROCKER, Ian",3
...,...,...,...
2008,57,"LAGUNOV, Evgeniy",1
2008,58,"BERENS, Ricky",1
2008,59,"LURZ, Thomas",1
2008,60,"MALLET, Gregory",1


#### `.reset_index()`
This method changes our previous indexes into new columns in our df, and then creates a brand new index.  We can also drop previous indexes by chaining `.drop()` method

In [14]:
men0408.reset_index()#.drop(columns = 'level_1')

Unnamed: 0,Year,level_1,Athlete,Medals
0,2004,0,"PHELPS, Michael",8
1,2004,1,"THORPE, Ian",4
2,2004,2,"SCHOEMAN, Roland",3
3,2004,3,"PEIRSOL, Aaron",3
4,2004,4,"CROCKER, Ian",3
...,...,...,...,...
116,2008,57,"LAGUNOV, Evgeniy",1
117,2008,58,"BERENS, Ricky",1
118,2008,59,"LURZ, Thomas",1
119,2008,60,"MALLET, Gregory",1


In [15]:
men2004.head()

Unnamed: 0,Athlete,Medals
0,"PHELPS, Michael",8
1,"THORPE, Ian",4
2,"SCHOEMAN, Roland",3
3,"PEIRSOL, Aaron",3
4,"CROCKER, Ian",3


In [16]:
men2008.head()

Unnamed: 0,Athlete,Medals
0,"PHELPS, Michael",8
1,"LOCHTE, Ryan",4
2,"BERNARD, Alain",3
3,"SULLIVAN, Eamon",3
4,"LAUTERSTEIN, Andrew",3


Renaming column names to show example where column names are not identical.

In [17]:
men2004.columns = ['Name', 'Medals']

In [18]:
men2004['Success'] = 'Yes'

In [19]:
men2004.head()

Unnamed: 0,Name,Medals,Success
0,"PHELPS, Michael",8,Yes
1,"THORPE, Ian",4,Yes
2,"SCHOEMAN, Roland",3,Yes
3,"PEIRSOL, Aaron",3,Yes
4,"CROCKER, Ian",3,Yes


Below, we can see that we've stacked the rows on top of each other, but there are NaN values for all of the non-shared columns.

In [20]:
pd.concat([men2004, men2008], keys = [2004, 2008], names= ['Year'])

Unnamed: 0_level_0,Unnamed: 1_level_0,Name,Medals,Success,Athlete
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2004,0,"PHELPS, Michael",8,Yes,
2004,1,"THORPE, Ian",4,Yes,
2004,2,"SCHOEMAN, Roland",3,Yes,
2004,3,"PEIRSOL, Aaron",3,Yes,
2004,4,"CROCKER, Ian",3,Yes,
...,...,...,...,...,...
2008,57,,1,,"LAGUNOV, Evgeniy"
2008,58,,1,,"BERENS, Ricky"
2008,59,,1,,"LURZ, Thomas"
2008,60,,1,,"MALLET, Gregory"


#### Below, we tidy up the data in the individual data frames before concatenating them

In [21]:
men2004.drop(labels= ['Success'], axis = 1, inplace=True)
men2004.head()

Unnamed: 0,Name,Medals
0,"PHELPS, Michael",8
1,"THORPE, Ian",4
2,"SCHOEMAN, Roland",3
3,"PEIRSOL, Aaron",3
4,"CROCKER, Ian",3


Because the columns are arranged identically and only labeled differently, we can override the column names of one with the other.

In [23]:
men2008.columns = men2004.columns
men2008.head()

Unnamed: 0,Name,Medals
0,"PHELPS, Michael",8
1,"LOCHTE, Ryan",4
2,"BERNARD, Alain",3
3,"SULLIVAN, Eamon",3
4,"LAUTERSTEIN, Andrew",3


In [24]:
pd.concat([men2004, men2008], keys = [2004, 2008], names = ['Year'])

Unnamed: 0_level_0,Unnamed: 1_level_0,Name,Medals
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2004,0,"PHELPS, Michael",8
2004,1,"THORPE, Ian",4
2004,2,"SCHOEMAN, Roland",3
2004,3,"PEIRSOL, Aaron",3
2004,4,"CROCKER, Ian",3
...,...,...,...
2008,57,"LAGUNOV, Evgeniy",1
2008,58,"BERENS, Ricky",1
2008,59,"LURZ, Thomas",1
2008,60,"MALLET, Gregory",1


Below, we are reading in the csvs and transposing the index to be the 'Athlete' column.

In [25]:
men2004 = pd.read_csv('men2004.csv', index_col= 'Athlete')
men2008 = pd.read_csv('men2008.csv', index_col= 'Athlete')
men2004.head()

Unnamed: 0_level_0,Medals
Athlete,Unnamed: 1_level_1
"PHELPS, Michael",8
"THORPE, Ian",4
"SCHOEMAN, Roland",3
"PEIRSOL, Aaron",3
"CROCKER, Ian",3


In [26]:
men2008.head()

Unnamed: 0_level_0,Medals
Athlete,Unnamed: 1_level_1
"PHELPS, Michael",8
"LOCHTE, Ryan",4
"BERNARD, Alain",3
"SULLIVAN, Eamon",3
"LAUTERSTEIN, Andrew",3


`ignore_index = False` is important in this case, because otherwise the index information (the names of the athletes) would be lost.

In [28]:
pd.concat([men2004, men2008], ignore_index= False, keys= [2004, 2008])

Unnamed: 0_level_0,Unnamed: 1_level_0,Medals
Unnamed: 0_level_1,Athlete,Unnamed: 2_level_1
2004,"PHELPS, Michael",8
2004,"THORPE, Ian",4
2004,"SCHOEMAN, Roland",3
2004,"PEIRSOL, Aaron",3
2004,"CROCKER, Ian",3
...,...,...
2008,"LAGUNOV, Evgeniy",1
2008,"BERENS, Ricky",1
2008,"LURZ, Thomas",1
2008,"MALLET, Gregory",1
