In [2]:
%matplotlib inline
import pandas as pd
import numpy as np

In [3]:
from IPython.core.display import HTML
css = open('../style/style-table.css').read() + open('../style/style-notebook.css').read()
HTML('<style>{}</style>'.format(css))

In [4]:
print(open('foo.csv').read())
pd.read_csv('foo.csv')

date,A,B,C
20090101,a,1,2
20090102,b,3,4
20090103,c,4,5



Unnamed: 0,date,A,B,C
0,20090101,a,1,2
1,20090102,b,3,4
2,20090103,c,4,5


In [5]:
pd.read_csv('foo.csv', index_col=0)

Unnamed: 0_level_0,A,B,C
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
20090101,a,1,2
20090102,b,3,4
20090103,c,4,5


In [6]:
pd.read_csv('foo.csv', index_col=[0, 'A'])

Unnamed: 0_level_0,Unnamed: 1_level_0,B,C
date,A,Unnamed: 2_level_1,Unnamed: 3_level_1
20090101,a,1,2
20090102,b,3,4
20090103,c,4,5


In [7]:
from StringIO import StringIO
data = 'a,b,c~1,2,3~4,5,6'
print data
pd.read_csv(StringIO(data), lineterminator='~')

a,b,c~1,2,3~4,5,6


Unnamed: 0,a,b,c
0,1,2,3
1,4,5,6


In [8]:
data = 'a, b, c\n1, 2, 3\n4, 5, 6'
print data

a, b, c
1, 2, 3
4, 5, 6


In [9]:
pd.read_csv(StringIO(data), skipinitialspace=True)

Unnamed: 0,a,b,c
0,1,2,3
1,4,5,6


In [10]:
data = 'a,b,c\n1,2,3\n4,5,6\n7,8,9'

In [11]:
print data

a,b,c
1,2,3
4,5,6
7,8,9


In [12]:
df = pd.read_csv(StringIO(data), dtype=object)
df

Unnamed: 0,a,b,c
0,1,2,3
1,4,5,6
2,7,8,9


In [13]:
df['a'][0]

'1'

In [14]:
df = pd.read_csv(StringIO(data), dtype={'b': object, 'c': np.float64})

In [15]:
df['c'][0]

3.0

In [16]:
data = 'a,b,c\n1,2,3\n4,5,6\n7,8,9'
print data


a,b,c
1,2,3
4,5,6
7,8,9


In [17]:
pd.read_csv(StringIO(data))

Unnamed: 0,a,b,c
0,1,2,3
1,4,5,6
2,7,8,9


In [18]:
pd.read_csv(StringIO(data), names=['foo', 'bar', 'baz'], header=0)


Unnamed: 0,foo,bar,baz
0,1,2,3
1,4,5,6
2,7,8,9


In [19]:
pd.read_csv(StringIO(data), names=['foo', 'bar', 'baz'], header=None) #header=None results in reading the first line as a data item

Unnamed: 0,foo,bar,baz
0,a,b,c
1,1,2,3
2,4,5,6
3,7,8,9


In [20]:
data = 'skip this skip it\na,b,c\n1,2,3\n4,5,6\n7,8,9'
print data

skip this skip it
a,b,c
1,2,3
4,5,6
7,8,9


In [21]:
pd.read_csv(StringIO(data), header=1)

Unnamed: 0,a,b,c
0,1,2,3
1,4,5,6
2,7,8,9


In [22]:
data = 'a,b,c,d\n1,2,3,foo\n4,5,6,bar\n7,8,9,baz'
print pd.read_csv(StringIO(data))
pd.read_csv(StringIO(data), usecols=['b', 'd'])

   a  b  c    d
0  1  2  3  foo
1  4  5  6  bar
2  7  8  9  baz


Unnamed: 0,b,d
0,2,foo
1,5,bar
2,8,baz


In [23]:
pd.read_csv(StringIO(data), usecols=[0, 2, 3])

Unnamed: 0,a,c,d
0,1,3,foo
1,4,6,bar
2,7,9,baz


In [24]:
data = '\na,b,c\n  \n# commented line\n1,2,3\n\n4,5,6'

In [25]:
print data


a,b,c
  
# commented line
1,2,3

4,5,6


In [26]:
pd.read_csv(StringIO(data), comment='#')

Unnamed: 0,a,b,c
0,1,2,3
1,4,5,6


In [27]:
data = 'a,b,c\n\n1,2,3\n\n\n4,5,6'
print data

a,b,c

1,2,3


4,5,6


In [29]:
pd.read_csv(StringIO(data), skip_blank_lines=True)

Unnamed: 0,a,b,c
0,1,2,3
1,4,5,6


In [31]:
pd.read_csv(StringIO(data), skip_blank_lines=False)

Unnamed: 0,a,b,c
0,,,
1,1.0,2.0,3.0
2,,,
3,,,
4,4.0,5.0,6.0


In [32]:
data = '#comment\na,b,c\nA,B,C\n1,2,3'

In [35]:
pd.read_csv(StringIO(data), comment='#', header=1)

Unnamed: 0,A,B,C
0,1,2,3


In [41]:
data = 'A,B,C\n#comment\na,b,c\n1,2,3'
pd.read_csv(StringIO(data), comment='#', skiprows=2)

Unnamed: 0,a,b,c
0,1,2,3


In [42]:
data = '# empty\n# second empty line\n# third empty' \
'line\nX,Y,Z\n1,2,3\nA,B,C\n1,2.,4.\n5.,NaN,10.0'

In [43]:
data

'# empty\n# second empty line\n# third emptyline\nX,Y,Z\n1,2,3\nA,B,C\n1,2.,4.\n5.,NaN,10.0'

In [45]:
print data


# empty
# second empty line
# third emptyline
X,Y,Z
1,2,3
A,B,C
1,2.,4.
5.,NaN,10.0


In [61]:
pd.read_csv(StringIO(data), comment='#', skiprows=4, header=1)

Unnamed: 0,A,B,C
0,1,2.0,4
1,5,,10


In [63]:
print open('tmp.csv').read()

ID,level,category
Patient1,123000,x # really unpleasant
Patient2,23000,y # wouldn't take his medicine
Patient3,1234018,z # awesome



In [65]:
df = pd.read_csv('tmp.csv')
df

Unnamed: 0,ID,level,category
0,Patient1,123000,x # really unpleasant
1,Patient2,23000,y # wouldn't take his medicine
2,Patient3,1234018,z # awesome


In [68]:
df = pd.read_csv('tmp.csv',comment='#')
df

Unnamed: 0,ID,level,category
0,Patient1,123000,x
1,Patient2,23000,y
2,Patient3,1234018,z
