In [1]:
# The only reason for time is so that everything doesn't happen at once.
import numpy as np
import pandas as pd

## First Steps with DataFrame Class

In [2]:
df = pd.DataFrame([10, 20, 30, 40], columns=['numbers'], index=['a','b','c','d'])

In [3]:
df

Unnamed: 0,numbers
a,10
b,20
c,30
d,40


In [4]:
df.index # the index values

Index([u'a', u'b', u'c', u'd'], dtype='object')

In [5]:
df.columns # the column names

Index([u'numbers'], dtype='object')

In [6]:
df.loc['c'] # selection via index

numbers    30
Name: c, dtype: int64

In [7]:
df.loc[['a','d']] # selection of multiple indices

Unnamed: 0,numbers
a,10
d,40


In [8]:
df.loc[df.index[1:3]] # selectin via Index object

Unnamed: 0,numbers
b,20
c,30


In [9]:
df.sum() # sum per column

numbers    100
dtype: int64

In [10]:
df.apply(lambda x: x **2) # square of every element

Unnamed: 0,numbers
a,100
b,400
c,900
d,1600


In [11]:
df ** 2 # again square, this time NumPy-like

Unnamed: 0,numbers
a,100
b,400
c,900
d,1600


Enlarging the DataFrame object in both dimensions is possible:

In [14]:
df['floats'] = (1.5, 2.5, 3.5, 4.5) # new column is generated

In [15]:
df

Unnamed: 0,numbers,floats
a,10,1.5
b,20,2.5
c,30,3.5
d,40,4.5


In [16]:
df['floats']

a    1.5
b    2.5
c    3.5
d    4.5
Name: floats, dtype: float64

A whole DataFrame object can also be taken to define a new column. In such a case, indices are aligned automatically:

In [17]:
df['names'] = pd.DataFrame(['Yves', 'Guido', 'Felix', 'Francesec'], index=['d','a','b','c'])

In [18]:
df

Unnamed: 0,numbers,floats,names
a,10,1.5,Guido
b,20,2.5,Felix
c,30,3.5,Francesec
d,40,4.5,Yves


Appending data works similarly. However, in the following example we see a side effect that is usually to be avoided - the index
gets replaced by a simple numbered index:

In [19]:
df.append({'numbers':100, 'floats':5.75, 'names':'Henry'}, ignore_index=True)
# temporary object; df not changed

Unnamed: 0,numbers,floats,names
0,10,1.5,Guido
1,20,2.5,Felix
2,30,3.5,Francesec
3,40,4.5,Yves
4,100,5.75,Henry


It is often better to append a DataFrame object, providing the appropriate index information. This preserves the index:

In [20]:
df = df.append(pd.DataFrame({'numbers':100, 'floats':5.75, 'names':'Herny'}, index=['z']))
df

Unnamed: 0,floats,names,numbers
a,1.5,Guido,10
b,2.5,Felix,20
c,3.5,Francesec,30
d,4.5,Yves,40
z,5.75,Herny,100
