# Pandas basics
## video 1

In [1]:
import numpy as np
import pandas as pd

In [2]:
pd.Series(data=[1, 2, 3, 4], index=['a', 'b', 'c', 'd'])

a    1
b    2
c    3
d    4
dtype: int64

In [3]:
myDict = {'e': 20, 'f': 200, 'g': 2000, 'h':15}

In [4]:
myDict

{'e': 20, 'f': 200, 'g': 2000, 'h': 15}

In [5]:
pd.Series(myDict)

e      20
f     200
g    2000
h      15
dtype: int64

In [7]:
mySeriesOne = pd.Series([1, 2, 3, 4, 5], ['a', 'b', 'c', 'd', 'e'])

In [8]:
mySeriesOne

a    1
b    2
c    3
d    4
e    5
dtype: int64

In [9]:
mySeriesTwo = pd.Series([1, 2, 7, 4, 2], ['a', 'b', 'c', 'f', 'e'])

### how to add two series

In [10]:
mySeriesOne + mySeriesTwo

a     2.0
b     4.0
c    10.0
d     NaN
e     7.0
f     NaN
dtype: float64

In [12]:
mySeriesTwo['f']

4

## Video 2

In [13]:
from numpy.random import randn

In [14]:
randn(3,4)

array([[ 0.07844603,  0.7827474 ,  0.87269125, -0.86623395],
       [ 0.21923202, -1.50033388,  0.59625298, -2.63132901],
       [-0.11361944, -0.73565592, -0.25023278,  1.25048708]])

In [15]:
mydata = randn(3,4)

### how data frames are created

In [16]:
myDataFrame = pd.DataFrame(mydata, ['R1', 'R2', 'R3'], ['C1', 'C2', 'C3', 'C4'])

In [17]:
myDataFrame

Unnamed: 0,C1,C2,C3,C4
R1,-0.997805,0.195875,0.56684,-0.350797
R2,1.808099,-0.781285,0.136599,-0.194857
R3,-0.05853,-0.263455,1.153296,-0.024029


In [18]:
myDataFrame['C1']

R1   -0.997805
R2    1.808099
R3   -0.058530
Name: C1, dtype: float64

In [19]:
myDataFrame[['C1', 'C3']]

Unnamed: 0,C1,C3
R1,-0.997805,0.56684
R2,1.808099,0.136599
R3,-0.05853,1.153296


### insert Column

In [21]:
myDataFrame['C5'] = myDataFrame['C1'] * myDataFrame['C3']

In [23]:
myDataFrame

Unnamed: 0,C1,C2,C3,C4,C5
R1,-0.997805,0.195875,0.56684,-0.350797,-0.565595
R2,1.808099,-0.781285,0.136599,-0.194857,0.246985
R3,-0.05853,-0.263455,1.153296,-0.024029,-0.067503


### delete column

In [24]:
myDataFrame.drop('C2', axis=1)  #axis 0 is row and axis 1 in column

Unnamed: 0,C1,C3,C4,C5
R1,-0.997805,0.56684,-0.350797,-0.565595
R2,1.808099,0.136599,-0.194857,0.246985
R3,-0.05853,1.153296,-0.024029,-0.067503


In [25]:
myDataFrame

Unnamed: 0,C1,C2,C3,C4,C5
R1,-0.997805,0.195875,0.56684,-0.350797,-0.565595
R2,1.808099,-0.781285,0.136599,-0.194857,0.246985
R3,-0.05853,-0.263455,1.153296,-0.024029,-0.067503


In [26]:
myDataFrame.drop('C2', axis=1, inplace=True)  #inplace should be true if you want to replace permanently

In [27]:
myDataFrame

Unnamed: 0,C1,C3,C4,C5
R1,-0.997805,0.56684,-0.350797,-0.565595
R2,1.808099,0.136599,-0.194857,0.246985
R3,-0.05853,1.153296,-0.024029,-0.067503


## Video 3

### row operations

In [29]:
myDataFrame.loc['R2'] # row selection

C1    1.808099
C3    0.136599
C4   -0.194857
C5    0.246985
Name: R2, dtype: float64

In [30]:
myDataFrame.iloc[1]

C1    1.808099
C3    0.136599
C4   -0.194857
C5    0.246985
Name: R2, dtype: float64

In [32]:
myDataFrame.loc[['R1', 'R2'], ['C4', 'C5']]

Unnamed: 0,C4,C5
R1,-0.350797,-0.565595
R2,-0.194857,0.246985


In [33]:
myDataFrame.loc[['R1'], ['C4']]

Unnamed: 0,C4
R1,-0.350797


## Video 4

### conditional selection

In [34]:
myDataFrame > 0

Unnamed: 0,C1,C3,C4,C5
R1,False,True,False,False
R2,True,True,False,True
R3,False,True,False,False


In [35]:
myDataFrame[myDataFrame > 0]

Unnamed: 0,C1,C3,C4,C5
R1,,0.56684,,
R2,1.808099,0.136599,,0.246985
R3,,1.153296,,


In [36]:
myDataFrame[myDataFrame['C3'] > 0]

Unnamed: 0,C1,C3,C4,C5
R1,-0.997805,0.56684,-0.350797,-0.565595
R2,1.808099,0.136599,-0.194857,0.246985
R3,-0.05853,1.153296,-0.024029,-0.067503


In [37]:
myDataFrame[myDataFrame['C1'] > 0]

Unnamed: 0,C1,C3,C4,C5
R2,1.808099,0.136599,-0.194857,0.246985


## Video 5

In [38]:
myDataFrame

Unnamed: 0,C1,C3,C4,C5
R1,-0.997805,0.56684,-0.350797,-0.565595
R2,1.808099,0.136599,-0.194857,0.246985
R3,-0.05853,1.153296,-0.024029,-0.067503


In [39]:
myDataFrame[(myDataFrame['C1'] > 0) & (myDataFrame['C5'] > 0)]

Unnamed: 0,C1,C3,C4,C5
R2,1.808099,0.136599,-0.194857,0.246985


In [41]:
myDataFrame[(myDataFrame['C1'] > 0) & (myDataFrame['C3'] > 0)]

Unnamed: 0,C1,C3,C4,C5
R2,1.808099,0.136599,-0.194857,0.246985


In [42]:
myDataFrame

Unnamed: 0,C1,C3,C4,C5
R1,-0.997805,0.56684,-0.350797,-0.565595
R2,1.808099,0.136599,-0.194857,0.246985
R3,-0.05853,1.153296,-0.024029,-0.067503


In [43]:
myNewIndex = ['row1', 'row2', 'row3']

In [44]:
myDataFrame['NewIndex'] = myNewIndex
myDataFrame

Unnamed: 0,C1,C3,C4,C5,NewIndex
R1,-0.997805,0.56684,-0.350797,-0.565595,row1
R2,1.808099,0.136599,-0.194857,0.246985,row2
R3,-0.05853,1.153296,-0.024029,-0.067503,row3


In [45]:
myDataFrame.set_index('NewIndex')

Unnamed: 0_level_0,C1,C3,C4,C5
NewIndex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
row1,-0.997805,0.56684,-0.350797,-0.565595
row2,1.808099,0.136599,-0.194857,0.246985
row3,-0.05853,1.153296,-0.024029,-0.067503


In [46]:
myDataFrame

Unnamed: 0,C1,C3,C4,C5,NewIndex
R1,-0.997805,0.56684,-0.350797,-0.565595,row1
R2,1.808099,0.136599,-0.194857,0.246985,row2
R3,-0.05853,1.153296,-0.024029,-0.067503,row3


### Ques:- How to setup index system for columns