In [1]:
import pandas as pd

In [2]:
import numpy as np

In [3]:
A = pd.Series([2, 4, 6], index=[0, 1, 2])
B = pd.Series([1, 3, 5], index=[1, 2, 3])

In [4]:
A

0    2
1    4
2    6
dtype: int64

In [5]:
B

1    1
2    3
3    5
dtype: int64

In [6]:
A+B

0    NaN
1    5.0
2    9.0
3    NaN
dtype: float64

In [7]:
A.add(B,fill_value=0.0)  ## To add the specified value to the index which is not commonly present

0    2.0
1    5.0
2    9.0
3    5.0
dtype: float64

In [8]:
A.add(B,fill_value=30.0)

0    32.0
1     5.0
2     9.0
3    35.0
dtype: float64

In [9]:
A

0    2
1    4
2    6
dtype: int64

In [10]:
B

1    1
2    3
3    5
dtype: int64

In [11]:
rng = np.random.RandomState(10)

In [12]:
rng

<mtrand.RandomState at 0x8533558>

In [13]:
rng.randint(0,20, (2,2))

array([[ 9,  4],
       [15,  0]])

In [14]:
A = pd.DataFrame(rng.randint(0,20, (2,2)), columns=list('AB'))

In [15]:
A

Unnamed: 0,A,B
0,17,16
1,17,8


In [16]:
A.stack() ### Stacks one upon the other keeping index as the criteria

0  A    17
   B    16
1  A    17
   B     8
dtype: int32

In [17]:
A.stack().mean()

14.5

In [18]:
A.mean()

A    17.0
B    12.0
dtype: float64

In [19]:
A

Unnamed: 0,A,B
0,17,16
1,17,8


In [20]:
B = pd.DataFrame(rng.randint(0, 10, (3, 3)),columns=list('BAC'))

In [21]:
B

Unnamed: 0,B,A,C
0,9,0,8
1,6,4,3
2,0,4,6


In [22]:
C = A+B

In [23]:
C

Unnamed: 0,A,B,C
0,17.0,25.0,
1,21.0,14.0,
2,,,


In [24]:
C.stack()

0  A    17.0
   B    25.0
1  A    21.0
   B    14.0
dtype: float64

In [25]:
C.stack().mean()

19.25

In [26]:
C.add(A,fill_value=17.5) ### adds only the integer part of the fill_values , neglects the decimal values

Unnamed: 0,A,B,C
0,34.0,41.0,
1,38.0,22.0,
2,,,


In [27]:
A

Unnamed: 0,A,B
0,17,16
1,17,8


In [28]:
B

Unnamed: 0,B,A,C
0,9,0,8
1,6,4,3
2,0,4,6


In [29]:
A.add(B,fill_value=100)

Unnamed: 0,A,B,C
0,17.0,25.0,108.0
1,21.0,14.0,103.0
2,104.0,100.0,106.0


In [30]:
A

Unnamed: 0,A,B
0,17,16
1,17,8


In [31]:
B

Unnamed: 0,B,A,C
0,9,0,8
1,6,4,3
2,0,4,6


In [32]:
C

Unnamed: 0,A,B,C
0,17.0,25.0,
1,21.0,14.0,
2,,,


In [34]:
A ** B

Unnamed: 0,A,B,C
0,1.0,68719480000.0,
1,83521.0,262144.0,
2,,1.0,


# Handling missing data

In [35]:
d = np.array([1,2,None,4])

In [36]:
d

array([1, 2, None, 4], dtype=object)

In [37]:
d.sum()

TypeError: unsupported operand type(s) for +: 'int' and 'NoneType'

In [38]:
s = np.array([1,2,np.nan,3])

In [39]:
s

array([  1.,   2.,  nan,   3.])

In [40]:
s.sum()

nan

In [42]:
np.nansum(s)

6.0

In [43]:
d = pd.Series([1,2,np.nan,3,None])

In [44]:
d

0    1.0
1    2.0
2    NaN
3    3.0
4    NaN
dtype: float64

In [45]:
np.nansum(d)

6.0

In [46]:
d.isnull()

0    False
1    False
2     True
3    False
4     True
dtype: bool

In [47]:
d.notnull()

0     True
1     True
2    False
3     True
4    False
dtype: bool

In [48]:
d[d.notnull()]

0    1.0
1    2.0
3    3.0
dtype: float64

In [49]:
d[d.isnull()]

2   NaN
4   NaN
dtype: float64

In [50]:
d.dropna()

0    1.0
1    2.0
3    3.0
dtype: float64

In [51]:
d = pd.Series([1,2,np.nan,3,None])

In [52]:
d

0    1.0
1    2.0
2    NaN
3    3.0
4    NaN
dtype: float64

In [53]:
d.fillna(0)

0    1.0
1    2.0
2    0.0
3    3.0
4    0.0
dtype: float64

In [54]:
d.fillna(999)

0      1.0
1      2.0
2    999.0
3      3.0
4    999.0
dtype: float64

In [55]:
d.fillna(method='ffill')

0    1.0
1    2.0
2    2.0
3    3.0
4    3.0
dtype: float64

In [56]:
d.fillna(method='bfill')

0    1.0
1    2.0
2    3.0
3    3.0
4    NaN
dtype: float64

 ### Combining Datasets

In [57]:
df = pd.DataFrame({'A':['A1','A2','A3'], 'B':['B1','B2','B3']}, index=['idA','idB','idC'])

In [58]:
df

Unnamed: 0,A,B
idA,A1,B1
idB,A2,B2
idC,A3,B3


In [59]:
d1 = pd.Series(['A','B','C'], index=[1,2,3])

In [60]:
d2 = pd.Series(['D','E','F'], index=[3,5,6])

In [62]:
d1

1    A
2    B
3    C
dtype: object

In [63]:
d2

3    D
5    E
6    F
dtype: object

In [61]:
pd.concat([d1,d2])

1    A
2    B
3    C
3    D
5    E
6    F
dtype: object

In [64]:
pd.concat([d2,d1])

3    D
5    E
6    F
1    A
2    B
3    C
dtype: object

In [65]:
pd.concat([d1,d2],axis=1)

Unnamed: 0,0,1
1,A,
2,B,
3,C,D
5,,E
6,,F


In [66]:
pd.concat([d1,d2],axis=0)

1    A
2    B
3    C
3    D
5    E
6    F
dtype: object

In [67]:
d1.append(d2)

1    A
2    B
3    C
3    D
5    E
6    F
dtype: object

In [68]:
d2.append(d1)

3    D
5    E
6    F
1    A
2    B
3    C
dtype: object

In [69]:
## Quickly get a data-frame

def make_df(cols, ind):
    data = {c: [str(c) + str(i) for i in ind]
            for c in cols}
    print data
    return pd.DataFrame(data, ind)

In [70]:
d1 = make_df('ABC',range(3))

{'A': ['A0', 'A1', 'A2'], 'C': ['C0', 'C1', 'C2'], 'B': ['B0', 'B1', 'B2']}


In [71]:
d1

Unnamed: 0,A,B,C
0,A0,B0,C0
1,A1,B1,C1
2,A2,B2,C2


In [72]:
d2 = make_df('DEF',range(3))

{'E': ['E0', 'E1', 'E2'], 'D': ['D0', 'D1', 'D2'], 'F': ['F0', 'F1', 'F2']}


In [73]:
d2

Unnamed: 0,D,E,F
0,D0,E0,F0
1,D1,E1,F1
2,D2,E2,F2


In [75]:
pd.concat([d1,d2])

Unnamed: 0,A,B,C,D,E,F
0,A0,B0,C0,,,
1,A1,B1,C1,,,
2,A2,B2,C2,,,
0,,,,D0,E0,F0
1,,,,D1,E1,F1
2,,,,D2,E2,F2


In [76]:
d5 = make_df('ABC',[1,2])

{'A': ['A1', 'A2'], 'C': ['C1', 'C2'], 'B': ['B1', 'B2']}


In [77]:
d5

Unnamed: 0,A,B,C
1,A1,B1,C1
2,A2,B2,C2


In [78]:
df6 = make_df('BCD', [3, 4])

{'C': ['C3', 'C4'], 'B': ['B3', 'B4'], 'D': ['D3', 'D4']}


In [79]:
df6

Unnamed: 0,B,C,D
3,B3,C3,D3
4,B4,C4,D4


In [80]:
pd.concat([d5,df6], join='inner')

Unnamed: 0,B,C
1,B1,C1
2,B2,C2
3,B3,C3
4,B4,C4


In [81]:
pd.concat([d5,df6], join='outer')

Unnamed: 0,A,B,C,D
1,A1,B1,C1,
2,A2,B2,C2,
3,,B3,C3,D3
4,,B4,C4,D4
