# Pandas Tutorial

In [1]:
# importing libraries
import numpy as np
import pandas as pd

In [2]:
df = pd.DataFrame(np.arange(0,20).reshape(5,4), index=['Row1','Row2','Row3','Row4','Row5'], columns=['Column1','Column2','Column3','Column4'])

In [3]:
df.head()

Unnamed: 0,Column1,Column2,Column3,Column4
Row1,0,1,2,3
Row2,4,5,6,7
Row3,8,9,10,11
Row4,12,13,14,15
Row5,16,17,18,19


In [4]:
# Exporting data to excel
df.to_csv('Test.csv')

In [5]:
# Accessing the elements in the dataframe

# There are two methods -> 1. .loc[Names/subscripts of the rows/columns] and 2. .iloc[indexing through numbers of rows/columns]

print(df.loc['Row1'])
print(type(df.loc['Row1']))

Column1    0
Column2    1
Column3    2
Column4    3
Name: Row1, dtype: int32
<class 'pandas.core.series.Series'>


In [6]:
df.iloc[0]

Column1    0
Column2    1
Column3    2
Column4    3
Name: Row1, dtype: int32

In [7]:
print(df.iloc[0][3])
print(type(df.iloc[0][3]))

3
<class 'numpy.int32'>


In [8]:
# Accessing 1st row and last column element in both methods

print("Accessing elements through loc method is {}".format(df.loc['Row1']['Column4']))
print("Accessing elements through iloc method is {}".format(df.iloc[0][3]))
print("Type of element is {}".format(type(df.iloc[0][3])))

Accessing elements through loc method is 3
Accessing elements through iloc method is 3
Type of element is <class 'numpy.int32'>


In [9]:
# Accessing multiple elements in both methods

print("Complete Dataframe is \n {}".format(df))
print("Accessing subset of elements through loc method is \n {}"
      .format(df.loc[['Row1','Row2','Row3'],['Column2','Column3','Column4']]))
print("Accessing subset of elements through iloc method is \n {}"
      .format(df.iloc[0:3,1:]))
print("Type of dataframe is {}".format(type(df.iloc[0:3,1:])))

Complete Dataframe is 
       Column1  Column2  Column3  Column4
Row1        0        1        2        3
Row2        4        5        6        7
Row3        8        9       10       11
Row4       12       13       14       15
Row5       16       17       18       19
Accessing subset of elements through loc method is 
       Column2  Column3  Column4
Row1        1        2        3
Row2        5        6        7
Row3        9       10       11
Accessing subset of elements through iloc method is 
       Column2  Column3  Column4
Row1        1        2        3
Row2        5        6        7
Row3        9       10       11
Type of dataframe is <class 'pandas.core.frame.DataFrame'>


In [10]:
# accessing directly with column names

df['Column1']

Row1     0
Row2     4
Row3     8
Row4    12
Row5    16
Name: Column1, dtype: int32

In [11]:
df[['Column1','Column3']]

Unnamed: 0,Column1,Column3
Row1,0,2
Row2,4,6
Row3,8,10
Row4,12,14
Row5,16,18


In [12]:
# converting df into array

df

Unnamed: 0,Column1,Column2,Column3,Column4
Row1,0,1,2,3
Row2,4,5,6,7
Row3,8,9,10,11
Row4,12,13,14,15
Row5,16,17,18,19


In [13]:
df.iloc[1:4,2:].values

array([[ 6,  7],
       [10, 11],
       [14, 15]])

In [14]:
df['Column4'].value_counts()

7     1
19    1
11    1
3     1
15    1
Name: Column4, dtype: int64

In [15]:
# finding unique values
df['Column4'].unique()

array([ 3,  7, 11, 15, 19], dtype=int64)

In [16]:
# checking for null values

df.isnull().sum()

Column1    0
Column2    0
Column3    0
Column4    0
dtype: int64

In [17]:
# checking for NA values
df.isna().sum()

Column1    0
Column2    0
Column3    0
Column4    0
dtype: int64