In [198]:
import numpy as np
import pandas as pd

In [199]:
path = 'titanic_train.csv' #locating csv
df = pd.read_csv(path) #take note that pandas is infering the headers from the get-go. 
df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [84]:
#assume that these are the column names at the start of each section below.
old_names = ['PassengerId', 'Survived', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp',
       'Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked']

In [200]:
#We can change the old names to these. It's good practice to lowercase and add underscores
new_names = ['passenger_id', 'number_survived', 'p_class', 'passenger_name',
       'passenger_sex', 'passenger_age', 'relative_type', 'parch_number',
       'ticket_number', 'fare_index', 'cabin_number', 'embarked_status']

## These three are the five most straightforward ways to replace columns names

In [201]:
#changing headers when you import the csv
columns = ['passenger_id', 'survived', 'pclass', 'name', 'sex', 'age', 'sib_sp',
       'parch', 'ticket', 'fare', 'cabin', 'embarked'] #creating a list with the new columns names

df = pd.read_csv(path, names=columns, header = 0) #assigning new headers,
                                                        #make sure to use header = 0 
df.head()

Unnamed: 0,passenger_id,survived,pclass,name,sex,age,sib_sp,parch,ticket,fare,cabin,embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [98]:
# replace all of the column names by overwriting the 'columns' attribute
df_cols = ['passenger_id', 'survived', 'p_class', 'name', 'sex', 'age', 'sib_sp',
       'parch', 'ticket', 'fare', 'cabin', 'embarked']
df.columns = df_cols
df.columns

Index(['passenger_id', 'survived', 'p_class', 'name', 'sex', 'age', 'sib_sp',
       'parch', 'ticket', 'fare', 'cabin', 'embarked'],
      dtype='object')

In [203]:
#This will replace the existing names with a list names that you provide, in the order you provide them.
df.columns = ['passenger_id', 'number_survived', 'p_class', 'passenger_name',
       'passenger_sex', 'passenger_age', 'relative_type', 'parch_number',
       'ticket_number', 'fare_index', 'cabin_number', 'embarked_status']

In [204]:
# This will reassign a column name by index
df.columns.values[2] = 'c'
df.columns

Index(['passenger_id', 'number_survived', 'c', 'passenger_name',
       'passenger_sex', 'passenger_age', 'relative_type', 'parch_number',
       'ticket_number', 'fare_index', 'cabin_number', 'embarked_status'],
      dtype='object')

In [205]:
# This is like the step above, but we're reassigning multiple columns
df.columns.values[2:4] = 'd', 'e'
df.columns

Index(['passenger_id', 'number_survived', 'd', 'e', 'passenger_sex',
       'passenger_age', 'relative_type', 'parch_number', 'ticket_number',
       'fare_index', 'cabin_number', 'embarked_status'],
      dtype='object')

In [206]:
#Rename columns by using a dictionary, the 'rename' method and 'inplace = True'
df.rename(columns={'passenger_id':'PASSENGERID', 'survived':'SURVIVED'}, inplace=True)

#you can forgo using 'inplace = True' by setting the new df to itself, df = df.rename.....
df.columns

Index(['PASSENGERID', 'number_survived', 'd', 'e', 'passenger_sex',
       'passenger_age', 'relative_type', 'parch_number', 'ticket_number',
       'fare_index', 'cabin_number', 'embarked_status'],
      dtype='object')

## These are ways to clean and modify the column names

In [208]:
df.columns = new_names

In [209]:
# replace spaces with underscores in the column names by using the 'str.replace' method
df.columns = df.columns.str.replace(' ', '_')
df.columns

Index(['passenger_id', 'number_survived', 'p_class', 'passenger_name',
       'passenger_sex', 'passenger_age', 'relative_type', 'parch_number',
       'ticket_number', 'fare_index', 'cabin_number', 'embarked_status'],
      dtype='object')

In [210]:
#You can remove characters from column headers
df.columns = df.columns.str.replace('_','$')
df.columns

Index(['passenger$id', 'number$survived', 'p$class', 'passenger$name',
       'passenger$sex', 'passenger$age', 'relative$type', 'parch$number',
       'ticket$number', 'fare$index', 'cabin$number', 'embarked$status'],
      dtype='object')

In [211]:
#You can undo what you did above with lambda
df.rename(columns=lambda x: x.replace('$', ' '), inplace=True)
df.columns

Index(['passenger id', 'number survived', 'p class', 'passenger name',
       'passenger sex', 'passenger age', 'relative type', 'parch number',
       'ticket number', 'fare index', 'cabin number', 'embarked status'],
      dtype='object')

In [212]:
#this capitalizes each header
df.columns.str.upper()

Index(['PASSENGER ID', 'NUMBER SURVIVED', 'P CLASS', 'PASSENGER NAME',
       'PASSENGER SEX', 'PASSENGER AGE', 'RELATIVE TYPE', 'PARCH NUMBER',
       'TICKET NUMBER', 'FARE INDEX', 'CABIN NUMBER', 'EMBARKED STATUS'],
      dtype='object')

In [213]:
#this brings each header to lowercase
df.columns.str.lower()

Index(['passenger id', 'number survived', 'p class', 'passenger name',
       'passenger sex', 'passenger age', 'relative type', 'parch number',
       'ticket number', 'fare index', 'cabin number', 'embarked status'],
      dtype='object')

In [214]:
##this capitalizes just the first letter of each word
df.columns.str.capitalize() #OR: df.columns.str.title()

Index(['Passenger id', 'Number survived', 'P class', 'Passenger name',
       'Passenger sex', 'Passenger age', 'Relative type', 'Parch number',
       'Ticket number', 'Fare index', 'Cabin number', 'Embarked status'],
      dtype='object')

## Alternative Ways to Modify Columns Names

In [229]:
df.columns = old_names

In [218]:
#Update columns with a Lambda Function
df.rename(columns=lambda x: x[1:], inplace=True) #this 'chops' the first letter off
df.columns

Index(['assengerId', 'urvived', 'class', 'ame', 'ex', 'ge', 'ibSp', 'arch',
       'icket', 'are', 'abin', 'mbarked'],
      dtype='object')

In [222]:
#this strips any whitespace (including new lines) from the front and back of each string
df.rename(columns=lambda x: x.lstrip(), inplace=True)

In [223]:
#This replaces by zipping and then replacing the old names with new names.
df.rename(columns=dict(zip(old_names, new_names)), inplace=True)
df.columns

Index(['passenger_id', 'number_survived', 'p_class', 'passenger_name',
       'passenger_sex', 'passenger_age', 'relative_type', 'parch_number',
       'ticket_number', 'fare_index', 'cabin_number', 'embarked_status'],
      dtype='object')

In [225]:
#list comprehension. This "strips" capital letter 'S' from each column name. 
#You can replace S with whatever you want
df.columns = [col.strip('S') for col in df.columns]
df.columns

Index(['PassengerId', 'urvived', 'Pclass', 'Name', 'ex', 'Age', 'ibSp',
       'Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked'],
      dtype='object')

In [228]:
#this chops off the first letter of each header
df.columns = df.columns.str.slice(1)
df.columns

Index(['assengerId', 'urvived', 'class', 'ame', 'ex', 'ge', 'ibSp', 'arch',
       'icket', 'are', 'abin', 'mbarked'],
      dtype='object')

In [244]:
df.columns = old_names

In [238]:
#this adds a string 'S' to the front of each column name
df.columns = 'S' + df.columns
df.columns

Index(['SPassengerId', 'SSurvived', 'SPclass', 'SName', 'SSex', 'SAge',
       'SSibSp', 'SParch', 'STicket', 'SFare', 'SCabin', 'SEmbarked'],
      dtype='object')

In [239]:
#This list comprehension will chop off the letter S if found in each column
#and basically undo what I did above
df.columns = [col[1:] if col[0] == 'S' else col for col in df]
df.columns

Index(['PassengerId', 'Survived', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp',
       'Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked'],
      dtype='object')

In [251]:
#This concatenates a list of new headers by using keys and setting keys to list of new headers
pd.concat([c for _, c in df.items()], axis=1, keys=new_names).columns

Index(['passenger_id', 'number_survived', 'p_class', 'passenger_name',
       'passenger_sex', 'passenger_age', 'relative_type', 'parch_number',
       'ticket_number', 'fare_index', 'cabin_number', 'embarked_status'],
      dtype='object')

### One to Know About but Never Use: the .name attribute

In [122]:
df.columns = old_names #resetting column names to default. Just ignore this part

In [111]:
#If you set df.columns = [list of column names], then the df.PassengerId.name 
#will be 'PassengerId'. If you set df.PassengerId.name = 'c' then df.columns 
#will still give you ['PassengerId', ...], and df.PassengerId.name will give you 'c'

In [112]:
df.columns

Index(['PassengerId', 'Survived', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp',
       'Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked'],
      dtype='object')

In [116]:
df.PassengerId.name = 'c'
df.columns[0] #indexing for just the PassengerId column. As expected, it doesn't say 'C'

'PassengerId'

In [117]:
df.PassengerId.name #but here is does

'c'