# Learning Pandas

In [16]:
import pandas as pd
from pandas import Series, DataFrame

import numpy as np

In [17]:
# Pandas Series
obj=pd.Series([45,-3,5,1,3])
print(obj)
print(obj.index)
print(obj.values)

0    45
1    -3
2     5
3     1
4     3
dtype: int64
RangeIndex(start=0, stop=5, step=1)
[45 -3  5  1  3]


In [18]:
# Pandas Series overriding index
obj=pd.Series([30,29,18,2,23], index=['Tharani','Balaji','Kamal','Gowtham',"Gowtham"])  
# In "Gowtham" "" will not cause any issue
print(obj)
print(obj.index)
print(obj.values)
# Printing a single index
print("Single Indexing\n",obj['Gowtham'])
print("Multi Indexing\n",obj[['Gowtham','Tharani']])

Tharani    30
Balaji     29
Kamal      18
Gowtham     2
Gowtham    23
dtype: int64
Index(['Tharani', 'Balaji', 'Kamal', 'Gowtham', 'Gowtham'], dtype='object')
[30 29 18  2 23]
Single Indexing
 Gowtham     2
Gowtham    23
dtype: int64
Multi Indexing
 Gowtham     2
Gowtham    23
Tharani    30
dtype: int64


In [19]:
# Math operation in pandas
print("Passing condition for Index\n",obj[obj>20])
print("Multiply each value with\n",obj*2)
print("Taking exponential\n",np.exp(obj)) #Index-value link is preserved even passed to numpy exponential function

Passing condition for Index
 Tharani    30
Balaji     29
Gowtham    23
dtype: int64
Multiply each value with
 Tharani    60
Balaji     58
Kamal      36
Gowtham     4
Gowtham    46
dtype: int64
Taking exponential
 Tharani    1.068647e+13
Balaji     3.931334e+12
Kamal      6.565997e+07
Gowtham    7.389056e+00
Gowtham    9.744803e+09
dtype: float64


In [20]:
# Creating a pandas series using Python dict
dictdata = {'Tharani':30,'Kamal':28,'Balaji':14,'Gowthi':2,'Gowthi':23 } # Duplicate is not allowed in Python Dict
pdseries = pd.Series(dictdata)
print(pdseries)


Tharani    30
Kamal      28
Balaji     14
Gowthi     23
dtype: int64


In [21]:
# Changing the order of the index in Series
statesorder=['somename','Balaji','Gowthi','Kamal','Tharani'] 
dictdata = {'Tharani':30,'Kamal':28,'Balaji':14,'Gowthi':2,'Gowthi':23 } # Duplicate is not allowed in Python Dict
pdseries = pd.Series(dictdata, index=statesorder)
print(pdseries)
# Here somename is not in the dictdata, So it is coming as 'NaN'(Not a Number)
print(pd.isnull(pdseries)) # It is giving True for the somename

somename     NaN
Balaji      14.0
Gowthi      23.0
Kamal       28.0
Tharani     30.0
dtype: float64
somename     True
Balaji      False
Gowthi      False
Kamal       False
Tharani     False
dtype: bool


# DataFrame

In [22]:
data = {'state':['Tamilnadu', 'Kerala', 'Andra','Delhi','Orisha'],
        'Population':[1.5,2.3,4,7.5,34],
        'Year':[2000,2002,2003,2020,2010]}
frame = pd.DataFrame(data)
frame
#  For large table frame.head will give you a first five entries

Unnamed: 0,state,Population,Year
0,Tamilnadu,1.5,2000
1,Kerala,2.3,2002
2,Andra,4.0,2003
3,Delhi,7.5,2020
4,Orisha,34.0,2010


In [23]:
# If you specify a sequence of columns, the DataFrame’s columns will be arranged in
# that order
frame = pd.DataFrame(data,columns=['Year','Population','state'])
frame

Unnamed: 0,Year,Population,state
0,2000,1.5,Tamilnadu
1,2002,2.3,Kerala
2,2003,4.0,Andra
3,2020,7.5,Delhi
4,2010,34.0,Orisha


In [24]:
# Passing extra non containing values will appear as missing values
frame = pd.DataFrame(data,columns=['Year','Population','state','Income'])
frame # Here income displayed as a Missing values

Unnamed: 0,Year,Population,state,Income
0,2000,1.5,Tamilnadu,
1,2002,2.3,Kerala,
2,2003,4.0,Andra,
3,2020,7.5,Delhi,
4,2010,34.0,Orisha,


In [25]:
# Selecting a coloum in the DataFrame
print(frame.Income)
print(frame['Population'])

0    NaN
1    NaN
2    NaN
3    NaN
4    NaN
Name: Income, dtype: object
0     1.5
1     2.3
2     4.0
3     7.5
4    34.0
Name: Population, dtype: float64


In [26]:
print(frame.loc[2]) #Print based on Location
frame['Income'] = 20 # assigning the single value to the complete colume.
print(frame)
frame['Income'] = [2,45,67,32.0,2]
print("\nChanging the value for Entire coloum\n",frame)
frame['Income'] = np.arange(5) # alternativily with nparray
print("\nChanging the value for Entire coloum\n",frame)

Year           2003
Population        4
state         Andra
Income          NaN
Name: 2, dtype: object
   Year  Population      state  Income
0  2000         1.5  Tamilnadu      20
1  2002         2.3     Kerala      20
2  2003         4.0      Andra      20
3  2020         7.5      Delhi      20
4  2010        34.0     Orisha      20

Changing the value for Entire coloum
    Year  Population      state  Income
0  2000         1.5  Tamilnadu     2.0
1  2002         2.3     Kerala    45.0
2  2003         4.0      Andra    67.0
3  2020         7.5      Delhi    32.0
4  2010        34.0     Orisha     2.0

Changing the value for Entire coloum
    Year  Population      state  Income
0  2000         1.5  Tamilnadu       0
1  2002         2.3     Kerala       1
2  2003         4.0      Andra       2
3  2020         7.5      Delhi       3
4  2010        34.0     Orisha       4


In [27]:
# Specifying the index will only update the values specified index
# Other positions are made as Nan(Not a Number)
val  = pd.Series([12,34,54],index=[0,2,3])
frame['Income'] = val
print(frame)
# Assigning a column that doesn’t exist will create a new column.
# Here 'Hello' colume name is newly inserted in the table if not exist
frame['Hello'] = val
print(frame)
# Another example
frame['MyFlag'] = frame.state == 'Kerala'
print(frame)

   Year  Population      state  Income
0  2000         1.5  Tamilnadu    12.0
1  2002         2.3     Kerala     NaN
2  2003         4.0      Andra    34.0
3  2020         7.5      Delhi    54.0
4  2010        34.0     Orisha     NaN
   Year  Population      state  Income  Hello
0  2000         1.5  Tamilnadu    12.0   12.0
1  2002         2.3     Kerala     NaN    NaN
2  2003         4.0      Andra    34.0   34.0
3  2020         7.5      Delhi    54.0   54.0
4  2010        34.0     Orisha     NaN    NaN
   Year  Population      state  Income  Hello  MyFlag
0  2000         1.5  Tamilnadu    12.0   12.0   False
1  2002         2.3     Kerala     NaN    NaN    True
2  2003         4.0      Andra    34.0   34.0   False
3  2020         7.5      Delhi    54.0   54.0   False
4  2010        34.0     Orisha     NaN    NaN   False


In [28]:
# Deleting the existing coloum
del frame['Income']
print(frame)
# del frame.state --> This wont work

   Year  Population      state  Hello  MyFlag
0  2000         1.5  Tamilnadu   12.0   False
1  2002         2.3     Kerala    NaN    True
2  2003         4.0      Andra   34.0   False
3  2020         7.5      Delhi   54.0   False
4  2010        34.0     Orisha    NaN   False


In [29]:
# Nested dict of dict in Pandas DataFrame
dictdata = {'Tamilnadu':{'coimbatore':762,'Kangeyam':876.5,'Housur':334},
            'Karnataka':{'Housur':232,'Madiwala':324,'Ec':933,'Silkboard':342}}
# Here all place names are taken as Index in nested dict
frame = pd.DataFrame(dictdata)
print(frame)
# Even you can tranpose the pd.DataFrame similar to ndarray
print("\nTransposing the frame\n",frame.T)
# If explicit index value is specified then it will inserted accordingly
frame = pd.DataFrame(dictdata, index=['coimbatore', 'Kanheyam','Ec','Silkboard'])
print("\n With given index\n",frame) # Here only specified index values are inserted

dictdata = {'Tamilnadu':frame['Tamilnadu'][:-1],
             'Karnataka':frame['Karnataka'][:]}
frame = pd.DataFrame(dictdata)
frame

            Tamilnadu  Karnataka
coimbatore      762.0        NaN
Kangeyam        876.5        NaN
Housur          334.0      232.0
Madiwala          NaN      324.0
Ec                NaN      933.0
Silkboard         NaN      342.0

Transposing the frame
            coimbatore  Kangeyam  Housur  Madiwala     Ec  Silkboard
Tamilnadu       762.0     876.5   334.0       NaN    NaN        NaN
Karnataka         NaN       NaN   232.0     324.0  933.0      342.0

 With given index
             Tamilnadu  Karnataka
coimbatore      762.0        NaN
Kanheyam          NaN        NaN
Ec                NaN      933.0
Silkboard         NaN      342.0


Unnamed: 0,Tamilnadu,Karnataka
Ec,,933.0
Kanheyam,,
Silkboard,,342.0
coimbatore,762.0,


In [30]:
# Setting name attribute for the 'index' and 'coloums'
frame.index.name='District'
frame.columns.name='State'
frame

State,Tamilnadu,Karnataka
District,Unnamed: 1_level_1,Unnamed: 2_level_1
Ec,,933.0
Kanheyam,,
Silkboard,,342.0
coimbatore,762.0,


In [31]:
# Frame.values will return the values in Two dimentional array
frame.values

array([[ nan, 933.],
       [ nan,  nan],
       [ nan, 342.],
       [762.,  nan]])

# Index Objects

In [32]:
series = pd.Series(range(3), index=['a','b','c'])
print(series)
index=series.index
print("\nPrinting Index\n",index)
index[2:3]

# index[1] = 'x' # This will give error since index objects are immutable

a    0
b    1
c    2
dtype: int64

Printing Index
 Index(['a', 'b', 'c'], dtype='object')


Index(['c'], dtype='object')

In [33]:
# creating a index labels to work with pandas 'Series' and 'DataFrame'
labels = pd.Index(np.arange(3))
print(labels)
series = pd.Series(['Hi','Hello','How'], index=labels)
print(series)
print("\nIs series.index is same as labels\n",series.index is labels)

Int64Index([0, 1, 2], dtype='int64')
0       Hi
1    Hello
2      How
dtype: object

Is series.index is same as labels
 True


In [34]:
# We can have duplicate elements in the Index
labels_second = pd.Index(['bar','foo','bar','far'])
print(labels_second)
# ??labels[:]

Index(['bar', 'foo', 'bar', 'far'], dtype='object')


In [35]:
# some methods used with Index
new_label = labels.append(labels_second)
print("\nAppend\n",new_label)
new_label = labels.difference(labels_second)
print("\n Difference\n",new_label)
new_label = labels_second.difference(labels)
print("\n Difference\n",new_label)


Append
 Index([0, 1, 2, 'bar', 'foo', 'bar', 'far'], dtype='object')

 Difference
 Int64Index([0, 1, 2], dtype='int64')

 Difference
 Index(['bar', 'far', 'foo'], dtype='object')


# Essential Functionality

In [36]:
# Reindexing
?? Duplicate index cannot be reindexed
# print(obj)
# obj.drop(labels=['Gowtham'])
# print(obj)
# reindexed_series = obj.reindex()
# print("\n reindexed_series \n",reindexed_series)

obj = pd.Series([23,45,56,123], index=[0,2,5,7])
print(obj)

new_obj = obj.reindex([2,0,5,7])
print(new_obj)

# Reindexing with forward fill 'ffill'
new_obj = obj.reindex(np.arange(8), method='ffill')
print(new_obj)

Object ` Duplicate index cannot be reindexed` not found.
0     23
2     45
5     56
7    123
dtype: int64
2     45
0     23
5     56
7    123
dtype: int64
0     23
1     23
2     45
3     45
4     45
5     56
6     56
7    123
dtype: int64


# Dropping Entries from an Axis

In [37]:
obj = pd.Series(np.arange(6), index=['a','b','c','d','e','e'])
print(obj)

a    0
b    1
c    2
d    3
e    4
e    5
dtype: int32


In [38]:
new_obj = obj.drop('c')
print(new_obj)
new_obj = new_obj.drop(['b','e']) # Here removes the both index of 'e
print(new_obj)
print("\n Printing old Series \n")
# Here old object stil intact
print(obj)

a    0
b    1
d    3
e    4
e    5
dtype: int32
a    0
d    3
dtype: int32

 Printing old Series 

a    0
b    1
c    2
d    3
e    4
e    5
dtype: int32


In [39]:
frame=pd.DataFrame(np.arange(16).reshape(4,4),index=['Tamilnadu','Kerala','Karnataka','Delhi'],
                    columns=['one','two','three','four'])
frame

Unnamed: 0,one,two,three,four
Tamilnadu,0,1,2,3
Kerala,4,5,6,7
Karnataka,8,9,10,11
Delhi,12,13,14,15


In [40]:
frame.drop(['Kerala'])

Unnamed: 0,one,two,three,four
Tamilnadu,0,1,2,3
Karnataka,8,9,10,11
Delhi,12,13,14,15


In [41]:
print(frame.drop(['three'],axis='columns'))
print(frame) # Actual frame details is still not modified
# Then how to modify the actual frame object without returning new object
frame.drop(['three'],axis='columns',inplace=True)
# Now print the original frame's data
print(frame)

           one  two  four
Tamilnadu    0    1     3
Kerala       4    5     7
Karnataka    8    9    11
Delhi       12   13    15
           one  two  three  four
Tamilnadu    0    1      2     3
Kerala       4    5      6     7
Karnataka    8    9     10    11
Delhi       12   13     14    15
           one  two  four
Tamilnadu    0    1     3
Kerala       4    5     7
Karnataka    8    9    11
Delhi       12   13    15


# Indexing, Selection, and Filtering

In [42]:
# The main difference between the np array and Series is in nparray you cannot use index
print(obj)

a    0
b    1
c    2
d    3
e    4
e    5
dtype: int32


In [43]:
print(obj['b']) # access through index value
print(obj[1]) # access through integer value
print(obj[['b','d']]) # access through index value
print(obj[[1,4]]) # access through integer value
print("Range using interger\n", obj[2:4])
print("Range using index\n", obj['a':'c'])
print("Conditional indexing \n",obj[obj<3])
obj[obj<3]=0 # Modify the element usign index
print(obj)

1
1
b    1
d    3
dtype: int32
b    1
e    4
dtype: int32
Range using interger
 c    2
d    3
dtype: int32
Range using index
 a    0
b    1
c    2
dtype: int32
Conditional indexing 
 a    0
b    1
c    2
dtype: int32
a    0
b    0
c    0
d    3
e    4
e    5
dtype: int32


In [44]:
# Indexing in the DataFrame
print(frame) 

           one  two  four
Tamilnadu    0    1     3
Kerala       4    5     7
Karnataka    8    9    11
Delhi       12   13    15


In [45]:
print(frame['one'])
print(frame[['one','four']])

Tamilnadu     0
Kerala        4
Karnataka     8
Delhi        12
Name: one, dtype: int32
           one  four
Tamilnadu    0     3
Kerala       4     7
Karnataka    8    11
Delhi       12    15


In [46]:
print(frame[frame['four']>7])

           one  two  four
Karnataka    8    9    11
Delhi       12   13    15


In [47]:
print(frame<5)
frame[frame<11]=0
print(frame)

             one    two   four
Tamilnadu   True   True   True
Kerala      True  False  False
Karnataka  False  False  False
Delhi      False  False  False
           one  two  four
Tamilnadu    0    0     0
Kerala       0    0     0
Karnataka    0    0    11
Delhi       12   13    15


### Selection with loc and iloc

In [48]:
# loc and iloc
# Using loc
print(frame.loc['Delhi',['two']]) # selecting a subset of a row element
print(frame.loc['Delhi',['two','four']])

two    13
Name: Delhi, dtype: int32
two     13
four    15
Name: Delhi, dtype: int32


In [49]:
# Using iloc
print(frame.iloc[3,[0,2]])

For the same oper

SyntaxError: invalid syntax (<ipython-input-49-2483c8a92d6f>, line 4)

## Integer Indexes

In [None]:
# Always use loc and iloc for indexing in pandas
# since it is very confusing for integer index
series = pd.Series(np.arange(3.0))
print(series)
# print(series[-1]) # In pandas above code will through error
# This is because inferring what user wants(label-bssed indexing or position-based) is difficult

# so to maintain the consistent if you have an axis index containing integers, data selection will always be label oriented.

# for precise handling use loc(for labels) and iloc(for integers)
print("Using loc\n",series.loc[:2])
print("Using iloc\n",series.iloc[:2])

# Arithmetic and Data Alignment

In [None]:
# In Pandas when you adding together objects, if any index pairs are not same, the respective index
# in the result will be Union of the index pairs.

In [None]:
series1 = pd.Series([7.3, 3.5, -2.5, 9.3, 2.6],index=['a','c','e','f','g'])
series2 = pd.Series([3.0, -2.3, 4.7, 2.1, 8.3],index=['a','e','f','g','h'])
print("series1\n",series1)
print("series2\n",series2)

In [None]:
print(series1+series2)
# Missing values are introduced in the label locations that dont overlap

In [None]:
# In case of DataFrame, alingment is performed on both the rows and the columns.
dataFrame1 = pd.DataFrame(np.arange(9.0).reshape(3,3),columns=list('bcd'),
                          index=['Tamilnadu','Kerala','Mumbai'])

dataFrame2 = pd.DataFrame(np.arange(12.0).reshape(4,3),columns=list('bde'),
                          index=['karnataka','Tamilnadu','Kerala','Delhi'])

In [None]:
print("dataFrame1\n",dataFrame1)
print("dataFrame2\n",dataFrame2)

In [None]:
print("Sum of two DataFrame:\n",dataFrame1 + dataFrame2)
print("Sun of two DataFrame:\n",dataFrame1 - dataFrame2)

In [None]:
# If you add DataFrame object with no row and column label then result object will be all Nulls.
dataFrame1 = pd.DataFrame({'A':[1,2]})
dataFrame2 = pd.DataFrame({'B':[1,2]})
print("dataFrame1\n",dataFrame1)
print("dataFrame2\n",dataFrame2)

In [None]:
print("Sum of two DataFrame:\n",dataFrame1 + dataFrame2)
print("Sub of two DataFrame:\n",dataFrame1 - dataFrame2)

# Arithmetic methods with fill values

In [None]:
?? Needs to be completed

### Operations between DataFrame and Series

In [None]:
# broadcasting in numpy array
arr = np.arange(12.0).reshape(4,3)
print(arr)

In [None]:
print(arr - arr[0])
# Here all the rows are subtracted with a[0]. This is called brodcasting.

In [None]:
# Broadcasting in Series and DataFrame
frame = pd.DataFrame(np.arange(12.).reshape(4,3), columns=list('bde'),index=['Kerala', 'Tamilnadu', 'Karnataka', 'Delhi'])
print(frame)
series = frame.iloc[0]
print(series)

In [None]:
# Performing Arithamatic between DataFrame and Series
# Here index of the series is matched with columns of the DataFrame.
print(frame - series) # Actally broadcasting heppens here.


In [None]:
# If an index value is not found in either the DataFrame’s columns or the Series’s index,
# the objects will be reindexed to form the union
series2 = pd.Series(range(3),index=list('aef'))
print(frame)
print(series2)

In [None]:
print(frame + series2)
print(frame - series2)

In [None]:
# colume wise arithamatic
print(frame)
series3 = frame['d']
print(series3)

In [None]:
# The axis number that you pass is the axis to match on. In this case we mean to match
# on the DataFrame’s row index (axis='index' or axis=0) and broadcast across
frame.sub(series3,axis=0)

## Function Application and Mapping

In [None]:
frame = pd.DataFrame(np.random.randn(4,3),columns=list('bef'),index=['Tamil','English','hindi','Malayalam'])
frame

In [None]:
np.abs(frame)

In [None]:
f = lambda x:x.max() - x.min()
frame.apply(f, axis=0) # here axis='index'

In [None]:
frame.apply(f, axis=1) # here axis='index'

In [None]:
# The function passed to apply need not return a scalar value; it can also return a Series
# with multiple values

def f(x):
    return pd.Series([x.min(), x.max()], index=['min', 'max'])
print(frame)

In [None]:
frame.apply(f)

In [None]:
print(frame)
format1 = lambda x : '%.2f' % x
frame.applymap(format1)

## Sorting and Ranking

In [None]:
series = Series(np.arange(4),index=['r','a','c','s'])
series

In [None]:
# Index is sorted alphabetically.
series.sort_index() 

In [None]:
frame = DataFrame(np.arange(12).reshape(4,3), index=['r','a','c','s'],columns=['one','three','Four'])
frame

In [None]:
frame.sort_index()

In [None]:
frame.sort_index(axis=1)

In [None]:
# You can use ascending order also.
frame.sort_index(axis=1,ascending=False)

In [None]:
series = pd.Series([4,7,-3,1])
print("Original:\n",series)
print("Sorted:\n",series.sort_values())


In [None]:
series = pd.Series([4,7,np.nan,-3,1])
print("Original:\n",series)
# Here any missing values are sorted to the end
print("Sorted:\n",series.sort_values())

In [None]:
frame = pd.DataFrame({'b':[4,7,-3,2], 'a':[0,1,0,1]})
frame

In [None]:
# Here column b is sorted here.
frame.sort_values(by='b')
print(frame)
# To sort a multiple colume pass a list of names
frame.sort_values(by=['a','b']) # here b column is sorted wrongly because in [a,b] a have higher priority
print("Sorting multiple index\n",frame)

### Ranking

###### This below code will be little confsion for first time readers. 
###### Please refer the below link for more explanation
https://www.w3resource.com/pandas/dataframe/dataframe-rank.php

In [None]:
series = pd.Series([7,-5,7,4,2,0,4])
print(obj.rank())
print(obj.rank(method='first'))
print(obj.rank(method='max'))

#### Axis Indexes with Duplicate Labels

In [None]:
# Here duplicate index values are used
obj = pd.Series(range(5),index=['a','a','b','b','c'])
print(obj)

In [None]:
# is_unique will check if index is unique
obj.index.is_unique

In [None]:
# Data selection is the only problem we could encounter when we have duplicate index labels
print("Accessing duplicate index value:\n",obj['b']) # here it will return as a list
print("Accessing non index value:\n",obj['c']) # here it will return as scalar value

In [None]:
# Same duplicate logic for DataFrame
df = DataFrame(np.random.randn(4,3),index=['a','b','b','c'])
print(df)
print("Accessing the Index:\n",df.loc['b'])

### Computing Descriptive Statistics

In [None]:
df = pd.DataFrame([[1.2,np.nan],[1.3, 0.8],[5.3,0.2],[4,np.nan]],
                  index=['a','b','c','d'],
                  columns=['one','two'])
df

In [None]:
df.sum() # By default it will sum the rows

In [None]:
df.sum(axis='columns')


In [None]:
df.sum(axis='columns',skipna=False)

In [None]:
df.idxmax() # Gives the index of maximum value

In [None]:
df.cumsum() # cumulative addition

In [None]:
# It will give the all statistics about the 
df.describe()

In [None]:
obj = pd.Series(['a', 'a', 'b', 'c'])
obj.describe()

In [None]:
?skew Sample skewness (third moment) of values
?kurt Sample kurtosis (fourth moment) of values
print(df.skew())
print(df.kurt())

### Correlation and Covariance

In [None]:
import pandas_datareader.data as web


In [None]:
# This section needs to be updated

# all_data={ticker: web.get_data_yahoo(ticker)
#          for ticker in ['APPL','IBM','MSFT','GOOG']}

### Unique Values, Value Counts, and Membership

In [65]:
obj = pd.Series(['c','a','d','a','a','a','b','b','c'])
print("Print unique index:",obj.unique()) # prints the unique values
print("Print index frequency :\n",obj.value_counts()) # prints the frequncy of a each value
# Value count as Top level pandas method
# Where you can pass any array or sequnce
print("Value count in Pandas:\n",pd.value_counts(obj , sort=False))

Print unique index: ['c' 'a' 'd' 'b']
Print index frequency :
 a    4
b    2
c    2
d    1
dtype: int64
Value count in Pandas:
 a    4
d    1
c    2
b    2
dtype: int64


In [70]:
# isin checkes the given value in each position
print(obj.isin(['b','c']))

0     True
1    False
2    False
3    False
4    False
5    False
6     True
7     True
8     True
dtype: bool


In [72]:
# object masking
mask = obj.isin(['b','c'])
obj[mask]

0    c
6    b
7    b
8    c
dtype: object

In [78]:
# Another related method related to 'isin'
to_match= pd.Series(['c', 'a', 'b', 'b', 'c', 'a'])
unique_vals = pd.Series(['c', 'b', 'a'])
print(pd.Index(unique_vals)) # unique value series. Here c=0, b=1, a=2
print(pd.Index(unique_vals).get_indexer(to_match)) # matching unique_vals into to_match array

Index(['c', 'b', 'a'], dtype='object')
[0 2 1 1 0 2]


# Data Loading, Storage, and File Formats

### Reading and Writing Data in Text Format

In [79]:
# This section describes how to Read and Write Data in Text format
