# Getting Started

Pandas is a very important library for working with tabular data in various formats e.g. csv, json, excel etc. I will focus mainly on 2 crucial data structures in pandas :

1. Series
2. DataFrame

### Series

In [37]:
import numpy as np
import pandas as pd

In [2]:
obj = pd.Series([4,7,-5,3])
obj

0    4
1    7
2   -5
3    3
dtype: int64

In [3]:
# we can get the values inside pandas Series
obj.values

array([ 4,  7, -5,  3])

In [5]:
# we can also get the indexes from pandas Series
obj.index

RangeIndex(start=0, stop=4, step=1)

In [9]:
# an example of boolean filtering
obj[obj>2]

0    4
1    7
3    3
dtype: int64

In [10]:
# we can perform simple arthimetic operations with Series
obj*2

0     8
1    14
2   -10
3     6
dtype: int64

In [15]:
# we can create Series object from a dict as well
sdata = {'x':-10, 'y':7, 'z':18 }
obj3 = pd.Series(sdata)
obj3

x   -10
y     7
z    18
dtype: int64

In [16]:
index = ['x','y','z','w']
obj4 = pd.Series(sdata,index=index)
obj4

x   -10.0
y     7.0
z    18.0
w     NaN
dtype: float64

In [17]:
# check if any entries are null
obj4.isnull()

x    False
y    False
z    False
w     True
dtype: bool

### DataFrame

In [22]:
# create a DataFrame using a dict
dfdata = {'names':['ram','krishna','narasimha'],
         'order':[1,2,0],
         'enemy':['ravana','kansa','hiranyakashipu']}

df = pd.DataFrame(dfdata)
df

Unnamed: 0,names,order,enemy
0,ram,1,ravana
1,krishna,2,kansa
2,narasimha,0,hiranyakashipu


In [23]:
# get the first five rows
df.head()

Unnamed: 0,names,order,enemy
0,ram,1,ravana
1,krishna,2,kansa
2,narasimha,0,hiranyakashipu


In [24]:
# get the last five rows
df.tail()

Unnamed: 0,names,order,enemy
0,ram,1,ravana
1,krishna,2,kansa
2,narasimha,0,hiranyakashipu


In [25]:
# access a particular column
df.names

0          ram
1      krishna
2    narasimha
Name: names, dtype: object

In [26]:
df.iloc[2]

names         narasimha
order                 0
enemy    hiranyakashipu
Name: 2, dtype: object

In [35]:
# creating a new column
df['power'] = 'infinite'
df

Unnamed: 0,names,order,enemy,power
0,ram,1,ravana,infinite
1,krishna,2,kansa,infinite
2,narasimha,0,hiranyakashipu,infinite


In [36]:
# deleting a column
df.drop('power',inplace=True,axis=1)
df

Unnamed: 0,names,order,enemy
0,ram,1,ravana
1,krishna,2,kansa
2,narasimha,0,hiranyakashipu


### Dropping entries from an Axis

In [40]:
obj = pd.Series(np.arange(5),index=list('abcde'))
obj

a    0
b    1
c    2
d    3
e    4
dtype: int64

In [42]:
# drop a row from a series
obj.drop('c')

a    0
b    1
d    3
e    4
dtype: int64

In [43]:
# drop multiple rows from a series
obj.drop(['a','b'])

c    2
d    3
e    4
dtype: int64

For a DataFrame we can drop both rows and columns quite easily

In [49]:
data = pd.DataFrame(np.arange(16).reshape((4, 4)),
           index=['Ohio', 'Colorado', 'Utah', 'New York'],
           columns=['one', 'two', 'three', 'four'])
data

Unnamed: 0,one,two,three,four
Ohio,0,1,2,3
Colorado,4,5,6,7
Utah,8,9,10,11
New York,12,13,14,15


In [50]:
# by default when we call drop a row is deleted
data.drop(['Utah','Ohio'])

Unnamed: 0,one,two,three,four
Colorado,4,5,6,7
New York,12,13,14,15


In [53]:
# we can remove a column as well by specifying the axis as 1
data.drop('three',axis=1)

Unnamed: 0,one,two,four
Ohio,0,1,3
Colorado,4,5,7
Utah,8,9,11
New York,12,13,15


In [55]:
# we can perform the operation inplace 
data.drop('two',axis=1,inplace=True)
data

Unnamed: 0,one,three,four
Ohio,0,2,3
Colorado,4,6,7
Utah,8,10,11
New York,12,14,15


### Indexing, Selection and Filtering

In [56]:
obj = pd.Series(np.arange(4),index=list('abcd'))
obj

a    0
b    1
c    2
d    3
dtype: int64

In [57]:
obj['b'] # use index value

1

In [58]:
obj[2] # use index integer

2

In [60]:
obj[2:4] # slicing a Series object

c    2
d    3
dtype: int64

In [62]:
obj[list('bd')]  # access specific 

b    1
d    3
dtype: int64