In [1]:
import pandas as pd
import numpy as np

In [2]:
# Because Pandas is designed to work with numpy, any numpy ufunc will work on pandas Series and DataFrame objects.
#Let"s start by defining a simple Series and DataFrame

In [3]:
rng = np.random.RandomState()
ser = pd.Series(rng.random(10))
print(ser)

0    0.044712
1    0.036464
2    0.716810
3    0.818711
4    0.518332
5    0.408167
6    0.198433
7    0.659751
8    0.729705
9    0.975261
dtype: float64


In [4]:
print(id(rng))

1006920197488


In [5]:
type(ser)

pandas.core.series.Series

In [6]:
df = pd.DataFrame(rng.randint(0, 10, (4, 4)),
                 columns=['A','B','C','D'])
df


Unnamed: 0,A,B,C,D
0,2,3,3,0
1,9,7,3,6
2,3,4,2,5
3,9,2,9,2


In [7]:
df.mean(axis=0)

A    5.75
B    4.00
C    4.25
D    3.25
dtype: float64

In [7]:
df.mean(1) # axis=1

0    2.25
1    4.50
2    5.75
3    3.75
dtype: float64

In [8]:
df.median(axis = 'columns')  

0    2.0
1    4.5
2    5.0
3    3.0
dtype: float64

In [9]:
np.exp(ser) ## or for a slightly more complex calculation

0    1.077949
1    1.328899
2    2.446030
3    2.273499
4    1.235629
5    1.705493
6    2.204084
7    1.998100
8    1.841966
9    1.682486
dtype: float64

In [10]:
np.sin(df* np.pi/4)

Unnamed: 0,A,B,C,D
0,1.224647e-16,1.0,0.707107,1.0
1,0.7071068,1.224647e-16,-1.0,-0.7071068
2,-0.7071068,0.7071068,-0.707107,1.224647e-16
3,1.0,1.224647e-16,0.0,0.7071068


In [11]:
area = pd.Series({'Alaska':1723337, 'Texas':695662,
                 'California': 423967}, name='area')
population = pd.Series({'California':38332521, 'Texas': 26448193,
                       'New York':19651127}, name = 'population')
area

Alaska        1723337
Texas          695662
California     423967
Name: area, dtype: int64

In [15]:
print(population)

California    38332521
Texas         26448193
New York      19651127
Name: population, dtype: int64


In [13]:
population / area

Alaska              NaN
California    90.413926
New York            NaN
Texas         38.018740
dtype: float64

In [17]:
area.index |population.index

Index(['Alaska', 'California', 'New York', 'Texas'], dtype='object')

In [18]:
A = pd.Series([2,4,6], index=[0,1,2])
B = pd.Series([1,3,5], index=[1,2,3])
A

0    2
1    4
2    6
dtype: int64

In [19]:
B

1    1
2    3
3    5
dtype: int64

In [20]:
A + B

0    NaN
1    5.0
2    9.0
3    NaN
dtype: float64

In [21]:
A*B

0     NaN
1     4.0
2    18.0
3     NaN
dtype: float64

In [22]:
print(A)
print(B)

0    2
1    4
2    6
dtype: int64
1    1
2    3
3    5
dtype: int64


In [23]:
fill = np.mean(A).astype(int)
fill

4

In [25]:
A.add(B, fill_value=fill)

0    6.0
1    5.0
2    9.0
3    9.0
dtype: float64

In [26]:
A = pd.DataFrame(rng.randint(0, 20, (3,5)),
                columns=list('ABCDE'))
A

Unnamed: 0,A,B,C,D,E
0,15,17,11,19,16
1,13,17,7,4,17
2,9,4,2,19,18


In [27]:
B = pd.DataFrame(rng.randint(0, 10,(3,3)),
                columns=list('BAC'))
B

Unnamed: 0,B,A,C
0,1,0,7
1,3,5,2
2,8,2,3


In [28]:
A + B

Unnamed: 0,A,B,C,D,E
0,15,18,18,,
1,18,20,9,,
2,11,12,5,,


In [51]:
Data = A.stack()
Data.sum()/len(Data)

AttributeError: 'numpy.ndarray' object has no attribute 'stack'

In [53]:
fill = A.stack().mean()
fill

AttributeError: 'numpy.ndarray' object has no attribute 'stack'

In [33]:
A.add(B, fill_value=fill,axis=0)

Unnamed: 0,A,B,C,D,E
0,15,18,18,31.533333,28.533333
1,18,20,9,16.533333,29.533333
2,11,12,5,31.533333,30.533333


In [34]:
A.add(B, fill_value=fill,axis='rows')

Unnamed: 0,A,B,C,D,E
0,15,18,18,31.533333,28.533333
1,18,20,9,16.533333,29.533333
2,11,12,5,31.533333,30.533333


In [35]:
A = rng.randint(10, size=(3,4))
A

array([[6, 3, 3, 1],
       [4, 2, 8, 2],
       [0, 6, 2, 9]])

In [36]:
A[0]

array([6, 3, 3, 1])

In [37]:
A-A[0]

array([[ 0,  0,  0,  0],
       [-2, -1,  5,  1],
       [-6,  3, -1,  8]])

In [38]:
df =pd.DataFrame(A, columns=list('QRST'))
df

Unnamed: 0,Q,R,S,T
0,6,3,3,1
1,4,2,8,2
2,0,6,2,9


In [39]:
df.iloc[0] # Integer loc or index loc which used to select particular row

Q    6
R    3
S    3
T    1
Name: 0, dtype: int32

In [40]:
df - df.iloc[0]

Unnamed: 0,Q,R,S,T
0,0,0,0,0
1,-2,-1,5,1
2,-6,3,-1,8


In [41]:
df['R']

0    3
1    2
2    6
Name: R, dtype: int32

In [42]:
df.subtract(df['R'], axis=0)

Unnamed: 0,Q,R,S,T
0,3,0,0,-2
1,2,0,6,0
2,-6,0,-4,3


In [43]:
df.subtract(df['R'], axis=1)

Unnamed: 0,Q,R,S,T,0,1,2
0,,,,,,,
1,,,,,,,
2,,,,,,,


In [44]:
df

Unnamed: 0,Q,R,S,T
0,6,3,3,1
1,4,2,8,2
2,0,6,2,9


In [45]:
halfrow = df.iloc[0, ::2]
halfrow

Q    6
S    3
Name: 0, dtype: int32

In [46]:
df - halfrow

Unnamed: 0,Q,R,S,T
0,0.0,,0.0,
1,-2.0,,5.0,
2,-6.0,,-1.0,
