#Create and Use Pandas Series Objects.
## Pandas = Panel Data Package. Used to handle heterogenous and table data.

In [1]:
import pandas as pd

##Series -- Series combines the features of 1D array and Dict.

In [2]:
# Define a Series object
series1 = pd.Series([22,33,11,44,55]); series1

0    22
1    33
2    11
3    44
4    55
dtype: int64

##Series value attribute, to display the values in a series in 1D array.

In [3]:
series1.values

array([22, 33, 11, 44, 55])

In [4]:
series1.index

RangeIndex(start=0, stop=5, step=1)

##Slicing the Series Values using Series Index.

In [5]:
series1[0]

22

In [6]:
series1[1:3]

1    33
2    11
dtype: int64

##Filter multiple values using Fancy Indexing.

In [7]:
series1

0    22
1    33
2    11
3    44
4    55
dtype: int64

In [8]:
series1[[0,3,1]]

0    22
3    44
1    33
dtype: int64

#Conditional filtering of a series.

In [9]:
series1[series1 > 30]

1    33
3    44
4    55
dtype: int64

##Create a series object by converting a Dict object into a Series Object.

In [10]:
PIN_code = {247667: 'Roorkee' , 248001: 'Dehradun', 249401: 'Haridwar', 263001: 'Nainital'}

In [11]:
#Creating series Object from a Dictionary
series2 = pd.Series(PIN_code); series2

247667     Roorkee
248001    Dehradun
249401    Haridwar
263001    Nainital
dtype: object

##Modify Indexes using Index Arguments

In [12]:
series2 = pd.Series(data=PIN_code, index=[248001,249401,263001,247667,249001]); series2

248001    Dehradun
249401    Haridwar
263001    Nainital
247667     Roorkee
249001         NaN
dtype: object

##Name the series using the name attribute.

In [13]:
series2.name = 'CITY'; series2

248001    Dehradun
249401    Haridwar
263001    Nainital
247667     Roorkee
249001         NaN
Name: CITY, dtype: object

##Naming the Series indexes using the index attribute.

In [14]:
series2.index.name='PINs'; series2

PINs
248001    Dehradun
249401    Haridwar
263001    Nainital
247667     Roorkee
249001         NaN
Name: CITY, dtype: object

##Modify the Series Index using the index attribute.

In [15]:
series2.index = [0,1,2,3,4]; series2

0    Dehradun
1    Haridwar
2    Nainital
3     Roorkee
4         NaN
Name: CITY, dtype: object

#DATAFRAMES

*   Dataframes is a very import data structure from pandas package.
*   It is used as a 2D data in a tabular format of rows and columns.
*   It combines the features if 2D array and dictionary.



###Creating Dataframes from Dictionaries.

In [16]:
D = {'metro': ['Delhi','Delhi','Delhi','Mumbai','Mumbai','Mumbai'],
     'year':[2011,2012,2013,2011,2012,2013],
     'popcr':[1.67,1.72,1.77,2.07,2.11,2.15]
     }

In [17]:
DF = pd.DataFrame(D)

In [18]:
DF

Unnamed: 0,metro,year,popcr
0,Delhi,2011,1.67
1,Delhi,2012,1.72
2,Delhi,2013,1.77
3,Mumbai,2011,2.07
4,Mumbai,2012,2.11
5,Mumbai,2013,2.15


##To modify Column and Row Indices, we can use columns and index arguments respectively.

In [19]:
## The below code changes the sequences of the columns, adds a new column and then changes the rownums.
DF = pd.DataFrame(D,columns=['metro','popcr','year','area'], index = list(range(1,7))); DF

Unnamed: 0,metro,popcr,year,area
1,Delhi,1.67,2011,
2,Delhi,1.72,2012,
3,Delhi,1.77,2013,
4,Mumbai,2.07,2011,
5,Mumbai,2.11,2012,
6,Mumbai,2.15,2013,


##Dataframes have *values* attribute, which is a 2D array object.

In [20]:
DF.values

array([['Delhi', 1.67, 2011, nan],
       ['Delhi', 1.72, 2012, nan],
       ['Delhi', 1.77, 2013, nan],
       ['Mumbai', 2.07, 2011, nan],
       ['Mumbai', 2.11, 2012, nan],
       ['Mumbai', 2.15, 2013, nan]], dtype=object)

In [21]:
DF.index

Int64Index([1, 2, 3, 4, 5, 6], dtype='int64')

In [22]:
DF.columns

Index(['metro', 'popcr', 'year', 'area'], dtype='object')

In [23]:
DF.year

1    2011
2    2012
3    2013
4    2011
5    2012
6    2013
Name: year, dtype: int64

In [24]:
DF['popcr']

1    1.67
2    1.72
3    1.77
4    2.07
5    2.11
6    2.15
Name: popcr, dtype: float64

##Slice rows along a column

In [25]:
DF['popcr'][1:3]

2    1.72
3    1.77
Name: popcr, dtype: float64

In [26]:
DF.popcr[1:3]

2    1.72
3    1.77
Name: popcr, dtype: float64

##To rename the row index, use *index.name* attribute

In [27]:
DF.index.name='row_index'; DF

Unnamed: 0_level_0,metro,popcr,year,area
row_index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,Delhi,1.67,2011,
2,Delhi,1.72,2012,
3,Delhi,1.77,2013,
4,Mumbai,2.07,2011,
5,Mumbai,2.11,2012,
6,Mumbai,2.15,2013,


##To rename the column index, use *columns.name* attribute

In [28]:
DF.columns.name='metro_data'; DF

metro_data,metro,popcr,year,area
row_index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,Delhi,1.67,2011,
2,Delhi,1.72,2012,
3,Delhi,1.77,2013,
4,Mumbai,2.07,2011,
5,Mumbai,2.11,2012,
6,Mumbai,2.15,2013,


##Accessing DataFrame values by using below methods:

*   loc[] -- *loc* is used with index labels.
*   iloc[] -- *iloc* is used with index values.



In [29]:
DF

metro_data,metro,popcr,year,area
row_index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,Delhi,1.67,2011,
2,Delhi,1.72,2012,
3,Delhi,1.77,2013,
4,Mumbai,2.07,2011,
5,Mumbai,2.11,2012,
6,Mumbai,2.15,2013,


In [30]:
DF.loc[1]

metro_data
metro    Delhi
popcr     1.67
year      2011
area       NaN
Name: 1, dtype: object

In [31]:
DF.loc[1,'metro']

'Delhi'

In [32]:
DF.loc[1][0]

'Delhi'

In [33]:
DF.loc[1:3,'metro']

row_index
1    Delhi
2    Delhi
3    Delhi
Name: metro, dtype: object

##iloc examples

In [34]:
DF.iloc[1,0]

'Delhi'

In [35]:
DF.iloc[1:3,0]

row_index
2    Delhi
3    Delhi
Name: metro, dtype: object

In [36]:
DF.loc[1:3,['metro','popcr','year']]

metro_data,metro,popcr,year
row_index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,Delhi,1.67,2011
2,Delhi,1.72,2012
3,Delhi,1.77,2013


In [37]:
DF.loc[[1,3],['metro','popcr','year']]

metro_data,metro,popcr,year
row_index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,Delhi,1.67,2011
3,Delhi,1.77,2013


##iloc Examples

In [38]:
DF.iloc[[0,2],[0,1,2]]

metro_data,metro,popcr,year
row_index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,Delhi,1.67,2011
3,Delhi,1.77,2013


##Conditional Filtering

In [39]:
DF

metro_data,metro,popcr,year,area
row_index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,Delhi,1.67,2011,
2,Delhi,1.72,2012,
3,Delhi,1.77,2013,
4,Mumbai,2.07,2011,
5,Mumbai,2.11,2012,
6,Mumbai,2.15,2013,


In [40]:
DF.iloc[:,[0,1]][DF.popcr>1.8]

metro_data,metro,popcr
row_index,Unnamed: 1_level_1,Unnamed: 2_level_1
4,Mumbai,2.07
5,Mumbai,2.11
6,Mumbai,2.15


##Series and DataFrame Operations

##Element Addition of values of 2 Series objects.

In [41]:
pd.Series([1,2,3,4,5,6], index=range(6)) + pd.Series([1,2,3,4,5], index=range(1,6))

0     NaN
1     3.0
2     5.0
3     7.0
4     9.0
5    11.0
dtype: float64

In [42]:
series1 * 9

0    198
1    297
2     99
3    396
4    495
dtype: int64

##Initialize a Dataframe column.

In [43]:
DF['area'] = 6000; DF

metro_data,metro,popcr,year,area
row_index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,Delhi,1.67,2011,6000
2,Delhi,1.72,2012,6000
3,Delhi,1.77,2013,6000
4,Mumbai,2.07,2011,6000
5,Mumbai,2.11,2012,6000
6,Mumbai,2.15,2013,6000


##Assign sequential values in the new column.

In [44]:
import numpy as np
DF['area'] = np.arange(6000,6600,100)
DF

metro_data,metro,popcr,year,area
row_index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,Delhi,1.67,2011,6000
2,Delhi,1.72,2012,6100
3,Delhi,1.77,2013,6200
4,Mumbai,2.07,2011,6300
5,Mumbai,2.11,2012,6400
6,Mumbai,2.15,2013,6500


##Add a non-existing column to a Dataframe using the assignment operator

In [45]:
DF['northern'] = (DF['metro'] == 'Delhi')
DF

metro_data,metro,popcr,year,area,northern
row_index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,Delhi,1.67,2011,6000,True
2,Delhi,1.72,2012,6100,True
3,Delhi,1.77,2013,6200,True
4,Mumbai,2.07,2011,6300,False
5,Mumbai,2.11,2012,6400,False
6,Mumbai,2.15,2013,6500,False


##Delete a column from a Dataframe, use *del()* keyword.

In [46]:
del DF['northern']
DF

metro_data,metro,popcr,year,area
row_index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,Delhi,1.67,2011,6000
2,Delhi,1.72,2012,6100
3,Delhi,1.77,2013,6200
4,Mumbai,2.07,2011,6300
5,Mumbai,2.11,2012,6400
6,Mumbai,2.15,2013,6500


##Drop a column from a Dataframe, use the *drop()* method.

*   Dropping is soft delete, the base Dataframe remains intact.
*   Dropping columns( set parameter as, axis = 1)
*   Dropping rows( set parameter as, axis = 0)



In [47]:
##Dropping the columns
DF.drop(['area','year'], axis = 1)

metro_data,metro,popcr
row_index,Unnamed: 1_level_1,Unnamed: 2_level_1
1,Delhi,1.67
2,Delhi,1.72
3,Delhi,1.77
4,Mumbai,2.07
5,Mumbai,2.11
6,Mumbai,2.15


In [48]:
##Dropping the rows
DF.drop([1,3,5], axis = 0)

metro_data,metro,popcr,year,area
row_index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2,Delhi,1.72,2012,6100
4,Mumbai,2.07,2011,6300
6,Mumbai,2.15,2013,6500


In [49]:
DF

metro_data,metro,popcr,year,area
row_index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,Delhi,1.67,2011,6000
2,Delhi,1.72,2012,6100
3,Delhi,1.77,2013,6200
4,Mumbai,2.07,2011,6300
5,Mumbai,2.11,2012,6400
6,Mumbai,2.15,2013,6500


##Data with missing rows using *isnull()* method

In [50]:
series2

0    Dehradun
1    Haridwar
2    Nainital
3     Roorkee
4         NaN
Name: CITY, dtype: object

In [51]:
series2.isnull()

0    False
1    False
2    False
3    False
4     True
Name: CITY, dtype: bool

In [52]:
series2.notnull()

0     True
1     True
2     True
3     True
4    False
Name: CITY, dtype: bool

##Processing Dataframe and Series

*   *numpy.abs( )* -- Compute absolute value of each element.
*   *numpy.exp( )* -- Compute the exponential value of each element.
*   *numpy.div( )* -- Scale values of a DF column or DF object.

In [53]:
import numpy as np
##A_3_4 = np.arange(12).reshape(3,4); A_3_4
A_3_4 = np.random.randn(12).reshape(3,4); A_3_4

array([[-0.16488536, -1.49972301,  0.9095036 ,  1.66683706],
       [ 0.24642085, -0.03134912,  1.54277571,  0.30501382],
       [ 0.92254537, -0.50317264,  0.50253909, -0.70487503]])

In [54]:
DF1 = pd.DataFrame(A_3_4); DF1

Unnamed: 0,0,1,2,3
0,-0.164885,-1.499723,0.909504,1.666837
1,0.246421,-0.031349,1.542776,0.305014
2,0.922545,-0.503173,0.502539,-0.704875


In [55]:
np.abs(DF1)

Unnamed: 0,0,1,2,3
0,0.164885,1.499723,0.909504,1.666837
1,0.246421,0.031349,1.542776,0.305014
2,0.922545,0.503173,0.502539,0.704875


In [56]:
np.exp(DF1)

Unnamed: 0,0,1,2,3
0,0.847991,0.223192,2.48309,5.295392
1,1.279438,0.969137,4.677556,1.356644
2,2.515686,0.604609,1.652913,0.49417


In [57]:
DF['popcr'].div(1.2)

row_index
1    1.391667
2    1.433333
3    1.475000
4    1.725000
5    1.758333
6    1.791667
Name: popcr, dtype: float64

#BROADCAST a Function to all columns and rows of a Dataframe

*   use *apply( )* method.Lambda function.

In [58]:
DF

metro_data,metro,popcr,year,area
row_index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,Delhi,1.67,2011,6000
2,Delhi,1.72,2012,6100
3,Delhi,1.77,2013,6200
4,Mumbai,2.07,2011,6300
5,Mumbai,2.11,2012,6400
6,Mumbai,2.15,2013,6500


In [59]:
DF['metro'].apply(lambda x: x.upper())

row_index
1     DELHI
2     DELHI
3     DELHI
4    MUMBAI
5    MUMBAI
6    MUMBAI
Name: metro, dtype: object

In [60]:
DF1.apply(lambda x: max(x) - min(x) , axis=1)

0    3.166560
1    1.574125
2    1.627420
dtype: float64