In [47]:
import pandas as pd

In [48]:
df = pd.DataFrame([10,20,30,40], columns=['numbers'], index=['a', 'b', 'c', 'd'])

In [49]:
df

Unnamed: 0,numbers
a,10
b,20
c,30
d,40


In [50]:
# Cell selection reverse of RC cola - Column then Row by label
df['numbers']['b']

20

In [51]:
# get rows
df.index

Index(['a', 'b', 'c', 'd'], dtype='object')

In [52]:
# get columns
df.columns

Index(['numbers'], dtype='object')

In [53]:
# lookup by row id
df.loc['c']

numbers    30
Name: c, dtype: int64

In [54]:
df.iloc[1:3] # lookup by row index inclusive to exclusive

Unnamed: 0,numbers
b,20
c,30


In [55]:
df.loc[['a', 'd']] # multi-lookup single row

Unnamed: 0,numbers
a,10
d,40


In [56]:
df.sum() # sum per column

numbers    100
dtype: int64

In [57]:
df['numbers'].apply(lambda x : x**2) # lambda function on all members of column

a     100
b     400
c     900
d    1600
Name: numbers, dtype: int64

In [58]:
df ** 2 # same on whole df

Unnamed: 0,numbers
a,100
b,400
c,900
d,1600


In [59]:
# Adding a column - provide a tuple
df['floats'] = (1.5, 2.5, 3.5, 4.5)
df

Unnamed: 0,numbers,floats
a,10,1.5
b,20,2.5
c,30,3.5
d,40,4.5


In [60]:
# add a column by adding a dataframe, specifying index mapping
df['names'] = pd.DataFrame(['Yves', 'Sandra', 'Lilli', 'Henry'], index=['d', 'a', 'b', 'c'])
df

Unnamed: 0,numbers,floats,names
a,10,1.5,Sandra
b,20,2.5,Lilli
c,30,3.5,Henry
d,40,4.5,Yves


In [78]:
# When appending, always include the index, or you lose all indices
df = df.append(pd.DataFrame({'numbers': 50, 'floats': 5.2, 'names': 'Mork'}, index=['y',]))
df

Unnamed: 0,numbers,floats,names
a,10,1.5,Sandra
b,20,2.5,Lilli
c,30,3.5,Henry
d,40,4.5,Yves
y,50,5.2,Mork
y,50,5.2,Mork
y,50,5.2,Mork


In [84]:
# Mean of specified columns - ignoring NaN
df[['numbers']].mean()

numbers    35.714286
dtype: float64

In [85]:
df[['numbers', 'floats']].std()

numbers    16.183472
dtype: float64

In [86]:
import numpy as np

In [87]:
np.random.seed(100)

In [90]:
a = np.random.standard_normal((9,4)) # standard normal - mean 0, std 1
a

array([[-0.33177714, -0.68921798,  2.03460756, -0.55071441],
       [ 0.75045333, -1.30699234,  0.58057334, -1.10452309],
       [ 0.69012147,  0.68689007, -1.56668753,  0.90497412],
       [ 0.7788224 ,  0.42823287,  0.10887199,  0.02828363],
       [-0.57882582, -1.1994512 , -1.70595201,  0.36916396],
       [ 1.87657343, -0.37690335,  1.83193608,  0.00301743],
       [-0.07602347,  0.00395759, -0.18501411, -2.48715154],
       [-1.70465121, -1.13626101, -2.97331547,  0.03331728],
       [-0.24888867, -0.45017644,  0.1324278 ,  0.02221393]])

In [91]:
# now make it a DataFrame
df = pd.DataFrame(a)
df

Unnamed: 0,0,1,2,3
0,-0.331777,-0.689218,2.034608,-0.550714
1,0.750453,-1.306992,0.580573,-1.104523
2,0.690121,0.68689,-1.566688,0.904974
3,0.778822,0.428233,0.108872,0.028284
4,-0.578826,-1.199451,-1.705952,0.369164
5,1.876573,-0.376903,1.831936,0.003017
6,-0.076023,0.003958,-0.185014,-2.487152
7,-1.704651,-1.136261,-2.973315,0.033317
8,-0.248889,-0.450176,0.132428,0.022214


In [92]:
# Assign columns after df creation
df.columns = ['No. 1', 'No. 2', 'No. 3', 'No. 4']
df

Unnamed: 0,No. 1,No. 2,No. 3,No. 4
0,-0.331777,-0.689218,2.034608,-0.550714
1,0.750453,-1.306992,0.580573,-1.104523
2,0.690121,0.68689,-1.566688,0.904974
3,0.778822,0.428233,0.108872,0.028284
4,-0.578826,-1.199451,-1.705952,0.369164
5,1.876573,-0.376903,1.831936,0.003017
6,-0.076023,0.003958,-0.185014,-2.487152
7,-1.704651,-1.136261,-2.973315,0.033317
8,-0.248889,-0.450176,0.132428,0.022214
