## Importing CSV files

In [1]:
with open('titanic.csv') as f:   # reading from a .csv
    text = f.readlines()

text is too long, no point in seeing it here and let it take 1000 lines of space in Github

In [29]:
len(text)

892

The pandas method:

In [5]:
import pandas as pd

In [6]:
pd.read_csv('titanic.csv')

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.2500,S,
1,1,1,female,38.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.9250,S,
3,1,1,female,35.0,1,0,53.1000,S,C
4,0,3,male,35.0,0,0,8.0500,S,
...,...,...,...,...,...,...,...,...,...
886,0,2,male,27.0,0,0,13.0000,S,
887,1,1,female,19.0,0,0,30.0000,S,B
888,0,3,female,,1,2,23.4500,S,
889,1,1,male,26.0,0,0,30.0000,C,C


index_col parameter

In [7]:
pd.read_csv('titanic.csv', index_col = 'pclass')

Unnamed: 0_level_0,survived,sex,age,sibsp,parch,fare,embarked,deck
pclass,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
3,0,male,22.0,1,0,7.2500,S,
1,1,female,38.0,1,0,71.2833,C,C
3,1,female,26.0,0,0,7.9250,S,
1,1,female,35.0,1,0,53.1000,S,C
3,0,male,35.0,0,0,8.0500,S,
...,...,...,...,...,...,...,...,...
2,0,male,27.0,0,0,13.0000,S,
1,1,female,19.0,0,0,30.0000,S,B
3,0,female,,1,2,23.4500,S,
1,1,male,26.0,0,0,30.0000,C,C


header parameter

In [16]:
pd.read_csv('titanic.csv', header = 0)  # define which row should serve as headero of the dataframe, it can be none as well

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.2500,S,
1,1,1,female,38.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.9250,S,
3,1,1,female,35.0,1,0,53.1000,S,C
4,0,3,male,35.0,0,0,8.0500,S,
...,...,...,...,...,...,...,...,...,...
886,0,2,male,27.0,0,0,13.0000,S,
887,1,1,female,19.0,0,0,30.0000,S,B
888,0,3,female,,1,2,23.4500,S,
889,1,1,male,26.0,0,0,30.0000,C,C


names parameter to change the column labels

In [17]:
pd.read_csv('titanic.csv', header = 0, names = ['alive', 'class', 'gender', 'age', 'sibsp', 'parch', 'price', 'emb', 'deck'])

Unnamed: 0,alive,class,gender,age,sibsp,parch,price,emb,deck
0,0,3,male,22.0,1,0,7.2500,S,
1,1,1,female,38.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.9250,S,
3,1,1,female,35.0,1,0,53.1000,S,C
4,0,3,male,35.0,0,0,8.0500,S,
...,...,...,...,...,...,...,...,...,...
886,0,2,male,27.0,0,0,13.0000,S,
887,1,1,female,19.0,0,0,30.0000,S,B
888,0,3,female,,1,2,23.4500,S,
889,1,1,male,26.0,0,0,30.0000,C,C


usecols parameter to tell only which columns to import

In [23]:
titanic = pd.read_csv('titanic.csv', header = 0, index_col= 'pclass', usecols = ['survived', 'pclass', 'sex', 'age'])

In [24]:
titanic.head()

Unnamed: 0_level_0,survived,sex,age
pclass,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
3,0,male,22.0
1,1,female,38.0
3,1,female,26.0
1,1,female,35.0
3,0,male,35.0


changing column labels

In [25]:
titanic.columns = ['alive', 'gender', 'age']

In [27]:
titanic.index.name = 'class'

In [28]:
titanic.head()

Unnamed: 0_level_0,alive,gender,age
class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
3,0,male,22.0
1,1,female,38.0
3,1,female,26.0
1,1,female,35.0
3,0,male,35.0


## Importing messy CSV files

In [1]:
import pandas as pd

In [11]:
pd.read_csv('titanic_raw.csv')  # messy data

Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,This,is,the,Titanic,Dataset.
It,contains,all,passengers,that,were,aboard,the,titanic,when
the,tragic,disaster,occured,in,the,year,19,1,2
0,0,3,male,22.0,1,0,$7.25,S,
1,1,1,female,38.0,1,0,$71.2833,C,C
2,1,3,female,26.0,0,0,$7.925,S,
...,...,...,...,...,...,...,...,...,...
891,0,2,male,24.0,0,0,$10.5,S,
892,0,3,male,34.0,1,1,$14.4,S,
893,0,3,male,36.0,0,0,$7.8958,S,
End,of,the,Dataset,,,,,,


In [5]:
col_names = ['Survived', 'Class', 'Gender', 'Age', 'Sibsp', 'Parch', 'Fare', 'Emb', 'Deck']

In [12]:
titanic = pd.read_csv('titanic_raw.csv', skiprows = 3, skipfooter = 2, header =  None, names = col_names)

  return func(*args, **kwargs)


In [13]:
titanic.head()

Unnamed: 0,Survived,Class,Gender,Age,Sibsp,Parch,Fare,Emb,Deck
0,0,3,male,22.0,1,0,$7.25,S,
1,1,1,female,38.0,1,0,$71.2833,C,C
2,1,3,female,26.0,0,0,$7.925,S,
3,1,1,female,35.0,1,0,$53.1,S,C
4,0,3,male,35.0,0,0,$8.05,S,


In [16]:
titanic.to_csv('titanic_clean.csv')  # saving the clean csv to disk

In [17]:
pd.read_csv('titanic_clean.csv')

Unnamed: 0.1,Unnamed: 0,Survived,Class,Gender,Age,Sibsp,Parch,Fare,Emb,Deck
0,0,0,3,male,22.0,1,0,$7.25,S,
1,1,1,1,female,38.0,1,0,$71.2833,C,C
2,2,1,3,female,26.0,0,0,$7.925,S,
3,3,1,1,female,35.0,1,0,$53.1,S,C
4,4,0,3,male,35.0,0,0,$8.05,S,
...,...,...,...,...,...,...,...,...,...,...
889,889,1,1,male,26.0,0,0,$30.0,C,C
890,890,0,3,male,32.0,0,0,$7.75,Q,
891,891,0,2,male,24.0,0,0,$10.5,S,
892,892,0,3,male,34.0,1,1,$14.4,S,
