# Pandas Basics 
## Part 1

In [2]:
import numpy as np

In [3]:
import pandas as pd

In [4]:
pd.Series(data = [1,2,3,4,5], index = ['a','b','c','d','e'])

a    1
b    2
c    3
d    4
e    5
dtype: int64

In [5]:
myDict = {'f' : 6, 'g' : 7, 'h' : 8, 'i' : 9, 'j' : 10}

In [6]:
myDict

{'f': 6, 'g': 7, 'h': 8, 'i': 9, 'j': 10}

In [7]:
pd.Series(myDict)

f     6
g     7
h     8
i     9
j    10
dtype: int64

In [8]:
mySeries1 = pd.Series([1,2,3,4,5], index = ['a','b','c','d','e'])

In [9]:
mySeries1

a    1
b    2
c    3
d    4
e    5
dtype: int64

In [10]:
mySeries2 = pd.Series([1,2,7,4,2], index = ['a','b','c','f','e'])

In [11]:
mySeries2

a    1
b    2
c    7
f    4
e    2
dtype: int64

In [12]:
mySeries1 + mySeries2

a     2.0
b     4.0
c    10.0
d     NaN
e     7.0
f     NaN
dtype: float64

In [13]:
mySeries2['f']

4

In [14]:
mySeries3 = mySeries1.append(mySeries2)

In [15]:
mySeries3

a    1
b    2
c    3
d    4
e    5
a    1
b    2
c    7
f    4
e    2
dtype: int64

In [16]:
mySeries4 = mySeries1.reindex(['e','d','c','b','a'])

In [17]:
mySeries4

e    5
d    4
c    3
b    2
a    1
dtype: int64

## Part 2

In [18]:
from numpy.random import randn

In [19]:
mydata = randn(3,4)

In [20]:
mydata

array([[ 0.72702551,  0.8109801 , -0.29167381,  0.96218158],
       [-0.81677122, -0.69249581,  0.73772094,  0.5132407 ],
       [-1.4950023 , -0.48898661, -0.72408385, -0.08216776]])

In [21]:
myDataFrame = pd.DataFrame(mydata, ['R1','R2','R3'],['C1','C2','C3','C4'])

In [22]:
myDataFrame

Unnamed: 0,C1,C2,C3,C4
R1,0.727026,0.81098,-0.291674,0.962182
R2,-0.816771,-0.692496,0.737721,0.513241
R3,-1.495002,-0.488987,-0.724084,-0.082168


In [23]:
myDataFrame['C1']

R1    0.727026
R2   -0.816771
R3   -1.495002
Name: C1, dtype: float64

In [24]:
myDataFrame[['C1']]

Unnamed: 0,C1
R1,0.727026
R2,-0.816771
R3,-1.495002


In [25]:
myDataFrame['C5'] = myDataFrame['C1'] + myDataFrame['C2'] + myDataFrame['C3'] + myDataFrame['C4']

In [26]:
myDataFrame

Unnamed: 0,C1,C2,C3,C4,C5
R1,0.727026,0.81098,-0.291674,0.962182,2.208513
R2,-0.816771,-0.692496,0.737721,0.513241,-0.258305
R3,-1.495002,-0.488987,-0.724084,-0.082168,-2.790241


#### axis=0 -> for Rows axis=1 -> Columns

In [27]:
myDataFrame.drop('C2', axis = 1)

Unnamed: 0,C1,C3,C4,C5
R1,0.727026,-0.291674,0.962182,2.208513
R2,-0.816771,0.737721,0.513241,-0.258305
R3,-1.495002,-0.724084,-0.082168,-2.790241


In [28]:
myDataFrame

Unnamed: 0,C1,C2,C3,C4,C5
R1,0.727026,0.81098,-0.291674,0.962182,2.208513
R2,-0.816771,-0.692496,0.737721,0.513241,-0.258305
R3,-1.495002,-0.488987,-0.724084,-0.082168,-2.790241


In [29]:
myDataFrame.drop('C2', axis = 1, inplace = True)

In [30]:
myDataFrame

Unnamed: 0,C1,C3,C4,C5
R1,0.727026,-0.291674,0.962182,2.208513
R2,-0.816771,0.737721,0.513241,-0.258305
R3,-1.495002,-0.724084,-0.082168,-2.790241


## Part 3

In [31]:
myDataFrame.iloc[-1] # index

C1   -1.495002
C3   -0.724084
C4   -0.082168
C5   -2.790241
Name: R3, dtype: float64

In [32]:
myDataFrame.loc['R3']

C1   -1.495002
C3   -0.724084
C4   -0.082168
C5   -2.790241
Name: R3, dtype: float64

In [33]:
myDataFrame.loc[['R1','R2'],['C4','C5']]

Unnamed: 0,C4,C5
R1,0.962182,2.208513
R2,0.513241,-0.258305


In [34]:
myDataFrame.iloc[[0,1],[2,3]]

Unnamed: 0,C4,C5
R1,0.962182,2.208513
R2,0.513241,-0.258305


## Part 4

In [35]:
cond = myDataFrame > 0

In [36]:
cond

Unnamed: 0,C1,C3,C4,C5
R1,True,False,True,True
R2,False,True,True,False
R3,False,False,False,False


In [37]:
myDataFrame[cond] # ~ is used to negate condition

Unnamed: 0,C1,C3,C4,C5
R1,0.727026,,0.962182,2.208513
R2,,0.737721,0.513241,
R3,,,,


In [38]:
myDataFrame['C3']

R1   -0.291674
R2    0.737721
R3   -0.724084
Name: C3, dtype: float64

In [40]:
myDataFrame['C3'] > 0

R1    False
R2     True
R3    False
Name: C3, dtype: bool

In [39]:
myDataFrame[myDataFrame['C3'] > 0]

Unnamed: 0,C1,C3,C4,C5
R2,-0.816771,0.737721,0.513241,-0.258305


In [41]:
myDataFrame

Unnamed: 0,C1,C3,C4,C5
R1,0.727026,-0.291674,0.962182,2.208513
R2,-0.816771,0.737721,0.513241,-0.258305
R3,-1.495002,-0.724084,-0.082168,-2.790241


In [42]:
myDataFrame[myDataFrame['C4'] > 0]

Unnamed: 0,C1,C3,C4,C5
R1,0.727026,-0.291674,0.962182,2.208513
R2,-0.816771,0.737721,0.513241,-0.258305


In [43]:
myDataFrame[myDataFrame['C3'] > 0 and myDataFrame['C4'] > 0]

ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().

In [44]:
myDataFrame[myDataFrame['C3'] > 0 & myDataFrame['C4'] > 0]

TypeError: cannot compare a dtyped [float64] array with a scalar of type [bool]

In [45]:
myDataFrame[(myDataFrame['C3'] > 0) & (myDataFrame['C4'] > 0)]

Unnamed: 0,C1,C3,C4,C5
R2,-0.816771,0.737721,0.513241,-0.258305


In [46]:
myDataFrame

Unnamed: 0,C1,C3,C4,C5
R1,0.727026,-0.291674,0.962182,2.208513
R2,-0.816771,0.737721,0.513241,-0.258305
R3,-1.495002,-0.724084,-0.082168,-2.790241


In [47]:
cond1 = myDataFrame['C3'] > 0

In [48]:
cond2 = myDataFrame['C4'] > 0

In [49]:
myDataFrame[cond1 & cond2]

Unnamed: 0,C1,C3,C4,C5
R2,-0.816771,0.737721,0.513241,-0.258305


In [50]:
myDataFrame

Unnamed: 0,C1,C3,C4,C5
R1,0.727026,-0.291674,0.962182,2.208513
R2,-0.816771,0.737721,0.513241,-0.258305
R3,-1.495002,-0.724084,-0.082168,-2.790241


In [51]:
myNewRowIndex = ['row1','row2','row3']

In [52]:
myNewColIndex = ['col1','col2','col3','col4']

In [53]:
myDataFrame['NewIndex'] = myNewRowIndex

In [54]:
myDataFrame

Unnamed: 0,C1,C3,C4,C5,NewIndex
R1,0.727026,-0.291674,0.962182,2.208513,row1
R2,-0.816771,0.737721,0.513241,-0.258305,row2
R3,-1.495002,-0.724084,-0.082168,-2.790241,row3


In [55]:
myDataFrame.set_index('NewIndex')

Unnamed: 0_level_0,C1,C3,C4,C5
NewIndex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
row1,0.727026,-0.291674,0.962182,2.208513
row2,-0.816771,0.737721,0.513241,-0.258305
row3,-1.495002,-0.724084,-0.082168,-2.790241


In [56]:
myDataFrame

Unnamed: 0,C1,C3,C4,C5,NewIndex
R1,0.727026,-0.291674,0.962182,2.208513,row1
R2,-0.816771,0.737721,0.513241,-0.258305,row2
R3,-1.495002,-0.724084,-0.082168,-2.790241,row3


In [57]:
myDataFrame.set_index('NewIndex', inplace = True)

In [58]:
myDataFrame

Unnamed: 0_level_0,C1,C3,C4,C5
NewIndex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
row1,0.727026,-0.291674,0.962182,2.208513
row2,-0.816771,0.737721,0.513241,-0.258305
row3,-1.495002,-0.724084,-0.082168,-2.790241
