In [57]:
import numpy as np
import pandas as pd

In [58]:
# Labels -> data is organized in columns, which can have custom names
# Index -> label for rows, it can be in different formats
df = pd.DataFrame([10,20,30,40], columns=['numbers'], index=['a', 'b', 'c', 'd'])
df

Unnamed: 0,numbers
a,10
b,20
c,30
d,40


In [59]:
df.index

Index([u'a', u'b', u'c', u'd'], dtype='object')

In [60]:
df.columns

Index([u'numbers'], dtype='object')

In [61]:
df.ix['c']

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  """Entry point for launching an IPython kernel.


numbers    30
Name: c, dtype: int64

In [62]:
df.ix[['a', 'd']]

Unnamed: 0,numbers
a,10
d,40


In [63]:
df.ix[df.index[1:3]] # selection via Index object

Unnamed: 0,numbers
b,20
c,30


In [64]:
df.sum() # sum per column

numbers    100
dtype: int64

In [65]:
df.apply(lambda x: x ** 2) # same as df**2

Unnamed: 0,numbers
a,100
b,400
c,900
d,1600


In [66]:
# enlarging the DataFrame object in both dimensions is possible
df['floats'] = (1.4,2.5,3.5,4.5) 
# new column is generated
df

Unnamed: 0,numbers,floats
a,10,1.4
b,20,2.5
c,30,3.5
d,40,4.5


In [67]:
df['floats']

a    1.4
b    2.5
c    3.5
d    4.5
Name: floats, dtype: float64

In [68]:
# we can define the indices
df['names'] = pd.DataFrame(['Yves', 'Guido', 'Felix', 'Francecs'],\
                           index=['d','a', 'b','c'])
df

Unnamed: 0,numbers,floats,names
a,10,1.4,Guido
b,20,2.5,Felix
c,30,3.5,Francecs
d,40,4.5,Yves


In [69]:
# appending data kills indices
df.append({'numbers': 100, 'floats': 5.75, 'names': 'Henry'},\
          ignore_index=True)

Unnamed: 0,numbers,floats,names
0,10,1.4,Guido
1,20,2.5,Felix
2,30,3.5,Francecs
3,40,4.5,Yves
4,100,5.75,Henry


In [70]:
# so, we can append a DataFrame object, providing appropriate index
df = df.append(pd.DataFrame({'numbers': 100, 'floats': 5.75, \
                             'names': 'Henry'}, index=['z',]))
df

Unnamed: 0,floats,names,numbers
a,1.4,Guido,10
b,2.5,Felix,20
c,3.5,Francecs,30
d,4.5,Yves,40
z,5.75,Henry,100


In [71]:
# join adds a new column
df.join(pd.DataFrame([1,4,9,16,25], index=['a','b','c','d','y'],\
                    columns=['squares',]))

Unnamed: 0,floats,names,numbers,squares
a,1.4,Guido,10,1.0
b,2.5,Felix,20,4.0
c,3.5,Francecs,30,9.0
d,4.5,Yves,40,16.0
z,5.75,Henry,100,


In [72]:
# outer join to keep all data
df = df.join(pd.DataFrame([1,4,9,16,25], index=['a','b','c','d','y'],\
                    columns=['squares',]), how='outer')
df

Unnamed: 0,floats,names,numbers,squares
a,1.4,Guido,10.0,1.0
b,2.5,Felix,20.0,4.0
c,3.5,Francecs,30.0,9.0
d,4.5,Yves,40.0,16.0
y,,,,25.0
z,5.75,Henry,100.0,


In [73]:
df[['numbers', 'squares']].mean()
# column-wise mean

numbers    40.0
squares    11.0
dtype: float64