In [2]:
import pandas as pd

pd.__version__

'1.5.3'

Pandas is a new library built on NumPy. The Pandas DataFrame can handle heterogeneous types and/or missing data!

The three main data types in Pandas are built on the Numpy ndarray:

Series

DataFrame

Index

A Series is a one-dimensional array-like object containing a sequence of values (of similar types to NumPy types) and an associated array of data labels, called its index. The simplest Series is formed from only an array of data:

In [29]:
obj = pd.Series([4, 7, -5, 3])
obj

0    4
1    7
2   -5
3    3
dtype: int64

In [30]:
obj[1] #obj[1] is only a SINGLE VALUE from your series

7

In [31]:
obj.index[1] #will give index of that particular location (if required)

1

In [32]:
obj = pd.Series([4])
obj #This is also a series but in obj[1] (previous case) you are accessing a SINGLE VALUE from a series, so index is not relevant. While accessing multiple values, indexes becomes relevant

0    4
dtype: int64

In [None]:
#You can get the array representation and index object of the Series via its values and index attributes, respectively:
obj.values



array([ 4,  7, -5,  3])

In [None]:
obj.index

RangeIndex(start=0, stop=4, step=1)

In [None]:
obj2 = pd.Series([4, 7, -5, 3], index=['d', 'b', 'a', 'c'])
obj2

d    4
b    7
a   -5
c    3
dtype: int64

In [None]:
obj2 = pd.Series([4, 7, -5, 3], index=['1','2','3','4'])
obj2

1    4
2    7
3   -5
4    3
dtype: int64

In [None]:
obj2['1':'3']

1    4
2    7
3   -5
dtype: int64

In [None]:
obj2.index

In [None]:
#Like with a NumPy array, data can be accessed by the associated index via the familiar Python square-bracket notation:
obj2[1:3]

b    7
a   -5
dtype: int64

In [None]:
obj2['d':'c']

d    4
b    7
a   -5
c    3
dtype: int64

In [None]:
obj2['d']

In [None]:
obj2['d'] = 6
obj2[['c', 'a', 'd']]

c    3
a   -5
d    6
dtype: int64

In [None]:
obj2[['a','b','c','d']]

a   -5
b    7
c    3
d    6
dtype: int64

In [None]:
#obj1=obj2[['a','b','c','d']]

Here [‘c’, ‘a’, ‘d’] is interpreted as a list of indices, even though it contains strings instead of integers.

Using NumPy functions or NumPy-like operations, such as filtering with a boolean array, scalar multiplication, or applying math functions, will preserve the index-value link:

In [None]:
#obj1[obj1 > 0]

In [None]:
obj2[obj2 > 0]

d    6
b    7
c    3
dtype: int64

In [None]:
obj2 > 0

d     True
b     True
a    False
c     True
dtype: bool

In [None]:
obj2 * 2

d    12
b    14
a   -10
c     6
dtype: int64

In [None]:
import numpy as np
np.exp(obj2)

d     403.428793
b    1096.633158
a       0.006738
c      20.085537
dtype: float64

In [None]:
np.exp(obj2)

It can be used in many contexts where you might use a dict:

In [None]:
'e' in obj2

False

In [None]:
sdata = {'Ohio': 35000, 'Texas': 71000, 'Oregon': 16000, 'Utah': 5000}
obj3 = pd.Series(sdata)
obj3

Ohio      35000
Texas     71000
Oregon    16000
Utah       5000
dtype: int64

In [None]:
states = ['Texas', 'California', 'Ohio', 'Oregon']
obj4 = pd.Series(sdata, index=states)
obj4

Texas         71000.0
California        NaN
Ohio          35000.0
Oregon        16000.0
dtype: float64

since no value for California was found, it appears as NaN (not a number), which is considered in pandas to mark missing or NA values. Since ‘Utah’ was not included in states, it is excluded from the resulting object.

In [None]:
obj4.values

array([71000.,    nan, 35000., 16000.])

In [None]:
#The isnull and notnull functions in pandas should be used to detect missing data:

pd.isnull(obj4)

Texas         False
California     True
Ohio          False
Oregon        False
dtype: bool

In [None]:

pd.notnull(obj4)

Texas          True
California    False
Ohio           True
Oregon         True
dtype: bool

In [None]:
obj4.isnull()

A useful Series feature for many applications is that it automatically aligns by index label in arithmetic operations:

In [None]:
obj3

Ohio      35000
Texas     71000
Oregon    16000
Utah       5000
dtype: int64

In [None]:
obj4

Texas         71000.0
California        NaN
Ohio          35000.0
Oregon        16000.0
dtype: float64

In [None]:
obj3+obj4

California         NaN
Ohio           70000.0
Oregon         32000.0
Texas         142000.0
Utah               NaN
dtype: float64

Both the Series object itself and its index have a name attribute, which integrates with other key areas of pandas functionality:

In [None]:
obj4.name = 'population'
obj4.index.name = 'state'
# obj4.values.name = 'popu'
# obj4.columns.name = 'popu'
obj4

state
Texas         71000.0
California        NaN
Ohio          35000.0
Oregon        16000.0
Name: population, dtype: float64

A Series’s index can be altered in-place by assignment:

In [None]:
obj

In [None]:
obj.index = ['Bob', 'Steve', 'Jeff', 'Ryan']
obj

Bob      4
Steve    7
Jeff    -5
Ryan     3
dtype: int64

# **The Pandas DataFrame Object**

A DataFrame represents a rectangular table of data and contains an ordered collection of columns, each of which can be a different value type (numeric, string, boolean, etc.). The DataFrame has both a row and column index;

In [None]:
data = {'state': ['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada', 'Nevada'],
        'year': [2000, 2001, 2002, 2001, 2002, 2003],
        'pop': [1.5, 1.7, 3.6, 2.4, 2.9, 3.2]}
frame = pd.DataFrame(data)

In [None]:
frame

In [None]:
frame.head()

Unnamed: 0,state,year,pop
0,Ohio,2000,1.5
1,Ohio,2001,1.7
2,Ohio,2002,3.6
3,Nevada,2001,2.4
4,Nevada,2002,2.9


In [None]:
frame2 = pd.DataFrame(data, columns=['year', 'state', 'pop', 'debt'],
                      index=['one', 'two', 'three', 'four',
                             'five', 'six'])
frame2

Unnamed: 0,year,state,pop,debt
one,2000,Ohio,1.5,
two,2001,Ohio,1.7,
three,2002,Ohio,3.6,
four,2001,Nevada,2.4,
five,2002,Nevada,2.9,
six,2003,Nevada,3.2,


A column in a DataFrame can be retrieved as a Series either by dict-like notation or by attribute:

In [None]:
frame2['state'] #preferred

one        Ohio
two        Ohio
three      Ohio
four     Nevada
five     Nevada
six      Nevada
Name: state, dtype: object

In [None]:
frame2.state # (1. SHould be a python variable and 2. New columns cannot be created with this syntax)

one        Ohio
two        Ohio
three      Ohio
four     Nevada
five     Nevada
six      Nevada
Name: state, dtype: object

Rows can also be retrieved by position or name with the special loc attribute:

In [None]:
frame2

Unnamed: 0,year,state,pop,debt
one,2000,Ohio,1.5,
two,2001,Ohio,1.7,
three,2002,Ohio,3.6,
four,2001,Nevada,2.4,
five,2002,Nevada,2.9,
six,2003,Nevada,3.2,


In [None]:
frame2.loc['three']

year     2002
state    Ohio
pop       3.6
debt      NaN
Name: three, dtype: object

In [None]:
frame2['debt'] = 16.5
frame2

Unnamed: 0,year,state,pop,debt
one,2000,Ohio,1.5,16.5
two,2001,Ohio,1.7,16.5
three,2002,Ohio,3.6,16.5
four,2001,Nevada,2.4,16.5
five,2002,Nevada,2.9,16.5
six,2003,Nevada,3.2,16.5


In [None]:

frame2['debt'] = np.arange(6.)
frame2

Unnamed: 0,year,state,pop,debt
one,2000,Ohio,1.5,0.0
two,2001,Ohio,1.7,1.0
three,2002,Ohio,3.6,2.0
four,2001,Nevada,2.4,3.0
five,2002,Nevada,2.9,4.0
six,2003,Nevada,3.2,5.0


When you are assigning lists or arrays to a column, the value’s length must match the length of the DataFrame. If you assign a Series, its labels will be realigned exactly to the DataFrame’s index, inserting missing values in any holes:

In [None]:
frame2

Unnamed: 0,year,state,pop,debt
one,2000,Ohio,1.5,0.0
two,2001,Ohio,1.7,1.0
three,2002,Ohio,3.6,2.0
four,2001,Nevada,2.4,3.0
five,2002,Nevada,2.9,4.0
six,2003,Nevada,3.2,5.0


In [None]:
val = pd.Series([-1.2, -1.5, -1.7], index=['two', 'four', 'five'])
frame2['debt'] = val
frame2

Unnamed: 0,year,state,pop,debt
one,2000,Ohio,1.5,
two,2001,Ohio,1.7,-1.2
three,2002,Ohio,3.6,
four,2001,Nevada,2.4,-1.5
five,2002,Nevada,2.9,-1.7
six,2003,Nevada,3.2,


In [None]:
# frame2.fillna("0")

Assigning a column that doesn’t exist will create a new column. The del keyword will delete columns as with a dict.

As an example of del, let’s add a new column of boolean values where the state column equals ‘Ohio’

In [None]:
frame2['eastern'] = frame2['state'] == 'Ohio'
frame2

Unnamed: 0,year,state,pop,debt,eastern
one,2000,Ohio,1.5,,True
two,2001,Ohio,1.7,-1.2,True
three,2002,Ohio,3.6,,True
four,2001,Nevada,2.4,-1.5,False
five,2002,Nevada,2.9,-1.7,False
six,2003,Nevada,3.2,,False


In [None]:
frame2.eastern = frame2['state'] == 'Ohio'
# frame2

In [None]:
frame2

Unnamed: 0,year,state,pop,debt
one,2000,Ohio,1.5,
two,2001,Ohio,1.7,-1.2
three,2002,Ohio,3.6,
four,2001,Nevada,2.4,-1.5
five,2002,Nevada,2.9,-1.7
six,2003,Nevada,3.2,


In [None]:
del frame2['debt']
# frame2.columns
frame2

Unnamed: 0,year,state,pop
one,2000,Ohio,1.5
two,2001,Ohio,1.7
three,2002,Ohio,3.6
four,2001,Nevada,2.4
five,2002,Nevada,2.9
six,2003,Nevada,3.2


If the nested dict is passed to the DataFrame, pandas will interpret the outer dict keys as the columns and the inner keys as the row indices:

In [None]:
pop = {'Nevada': {2001: 2.4, 2002: 2.9},
       'Ohio': {2000: 1.5, 2001: 1.7, 2002: 3.6}}

In [None]:

frame3 = pd.DataFrame(pop)
frame3

Unnamed: 0,Nevada,Ohio
2001,2.4,1.7
2002,2.9,3.6
2000,,1.5


Given a two-dimensional array of data, we can create a DataFrame with any specified column and index names. If omitted, an integer index will be used for each:

In [None]:
pd.DataFrame(np.random.rand(3, 2),
             columns=['foo', 'bar'],
             index=['a', 'b', 'c'])

Unnamed: 0,foo,bar
a,0.86658,0.953733
b,0.752284,0.948972
c,0.531509,0.593248


You can transpose the DataFrame (swap rows and columns) with similar syntax to a NumPy array:

In [None]:
frame3.T

Unnamed: 0,2001,2002,2000
Nevada,2.4,2.9,
Ohio,1.7,3.6,1.5


In [None]:
pd.DataFrame(pop, index=[2003, 2002, 2001])

Unnamed: 0,Nevada,Ohio
2003,,
2002,2.9,3.6
2001,2.4,1.7


Dicts of Series are treated in much the same way:

In [None]:
frame3

Unnamed: 0,Nevada,Ohio
2001,2.4,1.7
2002,2.9,3.6
2000,,1.5


In [None]:
pdata = {'Ohio': frame3['Ohio'][:-1],
         'Nevada': frame3['Nevada'][:2]}
pd.DataFrame(pdata)

Unnamed: 0,Ohio,Nevada
2001,1.7,2.4
2002,3.6,2.9


If a DataFrame’s index and columns have their name attributes set, these will also be displayed:

In [None]:
frame3.index.name = 'year'
frame3.columns.name = 'state'
frame3

state,Nevada,Ohio
year,Unnamed: 1_level_1,Unnamed: 2_level_1
2001,2.4,1.7
2002,2.9,3.6
2000,,1.5


As with Series, the values attribute returns the data contained in the DataFrame as a two-dimensional ndarray:

In [None]:
frame3.values

array([[2.4, 1.7],
       [2.9, 3.6],
       [nan, 1.5]])

In [None]:
type(frame3.values)

numpy.ndarray

# **The Pandas Index Object**

Index objects are responsible for holding the axis labels and other metadata (like the axis name or names). Any array or other sequence of labels you use when constructing a Series or DataFrame is internally converted to an Index:

In [None]:
obj = pd.Series(range(3), index=['a', 'b', 'c'])
index = obj.index
index

Index(['a', 'b', 'c'], dtype='object')

In [None]:
# obj.index=['a', 'b', 'e']
# obj.index

In [None]:
index[1:]


Index(['b', 'c'], dtype='object')

Index objects are immutable and thus can’t be modified by the user:

In [None]:
index[1] = 'd'

TypeError: Index does not support mutable operations

In [None]:
labels = pd.Index(np.arange(3))
labels

Int64Index([0, 1, 2], dtype='int64')

In [None]:
obj2 = pd.Series([1.5, -2.5, 0], index=labels)
obj2

0    1.5
1   -2.5
2    0.0
dtype: float64

Unlike Python sets, a pandas Index can contain duplicate labels:

In [None]:
dup_labels = pd.Index(['foo', 'foo', 'bar', 'bar'])
dup_labels

Index(['foo', 'foo', 'bar', 'bar'], dtype='object')

Selections with duplicate labels will select all occurrences of that label.


**Method**	**Description**

append	Concatenate with additional Index objects, producing a new Index

difference	Compute set difference as an Index

intersection	Compute set intersection

union	Compute set union

isin	Compute boolean array indicating whether each value is contained in the passed collection

delete	Compute new Index with element at index i deleted

drop	Compute new Index by deleting passed values

insert	Compute new Index by inserting element at index i

is_monotonic	Returns True if each element is greater than or equal to the previous element

is_unique	Returns True if the Index has no duplicate values

unique	Compute the array of unique values in the Index

In [None]:
indA = pd.Index([1, 3, 5, 7, 9])
indB = pd.Index([2, 3, 5, 7, 11])
indA.is_monotonic_increasing

True

In [None]:
indA.intersection(indB)

Int64Index([3, 5, 7], dtype='int64')

# Essential **Functionality**

**Reindexing**

In [None]:

obj = pd.Series([4.5, 7.2, -5.3, 3.6], index=['d', 'b', 'a', 'c'])
obj

d    4.5
b    7.2
a   -5.3
c    3.6
dtype: float64

Calling reindex on this Series rearranges the data according to the new index, introducing missing values if any index values were not already present:

In [None]:
# obj.index=['a', 'b', 'c', 'd', 'e']
# obj

In [None]:
obj2 = obj.reindex(['a', 'b', 'c', 'd', 'e'])
obj2

a   -5.3
b    7.2
c    3.6
d    4.5
e    NaN
dtype: float64

In [None]:
obj3 = pd.Series(['blue', 'purple', 'yellow'], index=[0, 2, 4])
obj3

0      blue
2    purple
4    yellow
dtype: object

In [None]:
obj3.reindex(range(6))

0      blue
1       NaN
2    purple
3       NaN
4    yellow
5       NaN
dtype: object

In [None]:
obj3.reindex(range(6), method='ffill') #ffill, which forward-fills the values:

0      blue
1      blue
2    purple
3    purple
4    yellow
5    yellow
dtype: object

In [None]:

# We can also backfill
obj3.reindex(range(6), method='bfill')

0      blue
1    purple
2    purple
3    yellow
4    yellow
5       NaN
dtype: object

In [None]:
# or fill with a specified value
obj3.reindex(range(6), fill_value='white')

0      blue
1     white
2    purple
3     white
4    yellow
5     white
dtype: object

With DataFrame, reindex can alter either the (row) index, columns, or both.

In [None]:
import numpy as np

In [None]:
frame = pd.DataFrame(np.arange(9).reshape((3, 3)),
                     index=['a', 'c', 'd'],
                     columns=['Ohio', 'Texas', 'California'])
frame

Unnamed: 0,Ohio,Texas,California
a,0,1,2
c,3,4,5
d,6,7,8


In [None]:
frame2 = frame.reindex(['a', 'b', 'c', 'd'])
frame2

Unnamed: 0,Ohio,Texas,California
a,0.0,1.0,2.0
b,,,
c,3.0,4.0,5.0
d,6.0,7.0,8.0


In [None]:
states = ['Texas', 'Utah', 'California']
frame.reindex(columns=states)

Unnamed: 0,Texas,Utah,California
a,1,,2
c,4,,5
d,7,,8


**Dropping Entries from an Axis**

 the drop method will return a new object with the indicated value or values deleted from an axis:

In [None]:

obj = pd.Series(np.arange(5.), index=['a', 'b', 'c', 'd', 'e'])
obj

a    0.0
b    1.0
c    2.0
d    3.0
e    4.0
dtype: float64

In [None]:

new_obj = obj.drop('c')
new_obj

a    0.0
b    1.0
d    3.0
e    4.0
dtype: float64

In [None]:
obj.drop(['d', 'c'])

a    0.0
b    1.0
e    4.0
dtype: float64

Index values can be deleted from either axis.

In [None]:
data = pd.DataFrame(np.arange(16).reshape((4, 4)),
                    index=['Ohio', 'Colorado', 'Utah', 'New York'],
                    columns=['one', 'two', 'three', 'four'])
data

Unnamed: 0,one,two,three,four
Ohio,0,1,2,3
Colorado,4,5,6,7
Utah,8,9,10,11
New York,12,13,14,15


In [None]:
data.drop(['Colorado', 'Ohio'])

Unnamed: 0,one,two,three,four
Utah,8,9,10,11
New York,12,13,14,15


In [None]:
data

Unnamed: 0,one,two,three,four
Ohio,0,1,2,3
Colorado,4,5,6,7
Utah,8,9,10,11
New York,12,13,14,15


In [None]:
data.drop('two', axis=1)

Unnamed: 0,one,three,four
Ohio,0,2,3
Colorado,4,6,7
Utah,8,10,11
New York,12,14,15


In [None]:
data.drop(['two', 'four'], axis='columns')

Unnamed: 0,one,three
Ohio,0,2
Colorado,4,6
Utah,8,10
New York,12,14


In [None]:
data

Unnamed: 0,one,two,three,four
Ohio,0,1,2,3
Colorado,4,5,6,7
Utah,8,9,10,11
New York,12,13,14,15


In [None]:
obj.drop('c', inplace=True)
obj

a    0.0
b    1.0
d    3.0
e    4.0
dtype: float64

Be careful with the inplace, as it destroys any data that is dropped.

**Indexing, Selection, and Filtering**

Series

In [None]:
obj = pd.Series(np.arange(4.), index=['a', 'b', 'c', 'd'])
obj

a    0.0
b    1.0
c    2.0
d    3.0
dtype: float64

In [None]:
obj['b']

1.0

In [None]:
obj[1]

1.0

In [None]:
obj = pd.Series(np.arange(4.), index=[9,8,7,6])
obj

9    0.0
8    1.0
7    2.0
6    3.0
dtype: float64

In [None]:
obj[[1,3]]

KeyError: "None of [Int64Index([1, 3], dtype='int64')] are in the [index]"

In [None]:
obj = pd.Series(np.arange(4.), index=[1,2,3,4])
obj

1    0.0
2    1.0
3    2.0
4    3.0
dtype: float64

In [None]:
obj.iloc[1:3]

2    1.0
3    2.0
dtype: float64

In [None]:
obj[2:4]

c    2.0
d    3.0
dtype: float64

In [None]:

obj[['b', 'a', 'd']]

b    1.0
a    0.0
d    3.0
dtype: float64

In [None]:
obj

1    0.0
2    1.0
3    2.0
4    3.0
dtype: float64

In [None]:
obj[[1, 3]]

b    1.0
d    3.0
dtype: float64

In [None]:
obj[obj < 2]

a    0.0
b    1.0
dtype: float64

In [None]:
obj

d    4.5
b    7.2
a   -5.3
c    3.6
dtype: float64

Caution: Slicing with labels behaves differently than normal Python slicing in that **the endpoint is inclusive:**

In [None]:
obj

9    0.0
8    1.0
7    2.0
6    3.0
dtype: float64

In [None]:

obj['b':'c']

TypeError: cannot do slice indexing on Int64Index with these indexers [b] of type str

Setting using these methods modifies the corresponding section of the Series:

In [None]:
obj['b':'c'] = 5
obj

a    0.0
b    5.0
c    5.0
d    3.0
dtype: float64

Indexing into a **DataFrame** is for retrieving one or more columns either with a single value or sequence:

In [None]:

data = pd.DataFrame(np.arange(16).reshape((4, 4)),
                    index=['Ohio', 'Colorado', 'Utah', 'New York'],
                    columns=['one', 'two', 'three', 'four'])
data

Unnamed: 0,one,two,three,four
Ohio,0,1,2,3
Colorado,4,5,6,7
Utah,8,9,10,11
New York,12,13,14,15


In [None]:
data['one']

Ohio         0
Colorado     4
Utah         8
New York    12
Name: one, dtype: int64

In [None]:
data[['three', 'one']]

Unnamed: 0,three,one
Ohio,2,0
Colorado,6,4
Utah,10,8
New York,14,12


Selection with loc and iloc

In [None]:

data = pd.Series(['a', 'b', 'c','e'], index=[1, 3, 5,7])
data

1    a
3    b
5    c
7    e
dtype: object

In [None]:
data[1]

'a'

In [None]:
data = pd.Series(['a', 'b', 'c','e'], index=[9, 3, 5,7])
data[1]

KeyError: 1

In [None]:
data = pd.Series(['a', 'b', 'c','e'], index=['a', 'b', 'c','d'])
data[1]

'b'

In [None]:
data[1:3]

b    b
c    c
dtype: object

In [None]:
data[[1,5]]

IndexError: positional indexers are out-of-bounds

In [None]:
data = pd.Series(['a', 'b', 'c','e'], index=[1, 3, 5,7])
data

1    a
3    b
5    c
7    e
dtype: object

In [None]:
data.drop([3])

1    a
5    c
7    e
dtype: object

In [None]:
data.loc[1]

'a'

In [None]:
data.loc[1:3]

1    a
3    b
dtype: object

In [None]:
data.iloc[1:3]

3    b
5    c
dtype: object

With DataFrames

In [None]:
import numpy as np

In [None]:
data = pd.DataFrame(np.arange(16).reshape((4, 4)),
                    index=['Ohio', 'Colorado', 'Utah', 'New York'],
                    columns=['one', 'two', 'three', 'four'])
data

Unnamed: 0,one,two,three,four
Ohio,0,1,2,3
Colorado,4,5,6,7
Utah,8,9,10,11
New York,12,13,14,15


In [None]:
data.loc[1:3]

TypeError: cannot do slice indexing on Index with these indexers [1] of type int

In [None]:
data.loc[Ohio:New York] #Error

SyntaxError: invalid syntax. Perhaps you forgot a comma? (<ipython-input-132-eca001aff409>, line 1)

In [None]:
data.loc['Ohio':'New York']

Unnamed: 0,one,two,three,four
Ohio,0,1,2,3
Colorado,4,5,6,7
Utah,8,9,10,11
New York,12,13,14,15


In [None]:
data.iloc[1:3]

Unnamed: 0,one,two,three,four
Colorado,4,5,6,7
Utah,8,9,10,11


In [None]:
data[1:3]

Unnamed: 0,one,two,three,four
Colorado,4,5,6,7
Utah,8,9,10,11


In [None]:
data[1:3]['one']

Colorado    4
Utah        8
Name: one, dtype: int64

In [None]:
data

Unnamed: 0,one,two,three,four
Ohio,0,1,2,3
Colorado,4,5,6,7
Utah,8,9,10,11
New York,12,13,14,15


In [None]:
# data.loc['Ohio','three'] = np.nan
# data


In [None]:
# data['two']=data['two'].fillna('0')
# data

In [None]:
data[:2]

Unnamed: 0,one,two,three,four
Ohio,0,1,2,3
Colorado,4,5,6,7


In [None]:
data['three']>3

Ohio        False
Colorado     True
Utah         True
New York     True
Name: three, dtype: bool

In [None]:
data[data['three']>3]

Unnamed: 0,one,two,three,four
Colorado,4,5,6,7
Utah,8,9,10,11
New York,12,13,14,15


In [None]:
data<5

Unnamed: 0,one,two,three,four
Ohio,True,True,True,True
Colorado,True,False,False,False
Utah,False,False,False,False
New York,False,False,False,False


In [None]:
data[data<5]

Unnamed: 0,one,two,three,four
Ohio,0.0,1.0,2.0,3.0
Colorado,4.0,,,
Utah,,,,
New York,,,,


In [None]:
data

Unnamed: 0,one,two,three,four
Ohio,0,0,0,0
Colorado,0,5,6,7
Utah,8,9,10,11
New York,12,13,14,15


In [None]:
data[data<5]=0

In [None]:
data

Unnamed: 0,one,two,three,four
Ohio,0,0,0,0
Colorado,0,5,6,7
Utah,8,9,10,11
New York,12,13,14,15


In [None]:
# del data[0]
# data[data.loc['Ohio','one']]

In [None]:
data.loc['Colorado',['two','three']]

two      5
three    6
Name: Colorado, dtype: int64

In [None]:
data.iloc[1,[1,2]]

two      5
three    6
Name: Colorado, dtype: int64

In [None]:
data.loc['Utah',['four','one','two']]

four    11
one      8
two      9
Name: Utah, dtype: int64

In [None]:
data.iloc[2,[3,0,1]]

four    11
one      8
two      9
Name: Utah, dtype: int64

Exercises from text.

Indexing options with Dataframe (on Note-HW)

df.at[label_i,label_j]

df.iat[i,j]

In [None]:
data.iloc[1,3]

7

In [None]:
data.iat[1,3] #

7

In [None]:
data.iloc[:,3]

Ohio         0
Colorado     7
Utah        11
New York    15
Name: four, dtype: int64

In [None]:
data.iat[:,3]

Integer Indexes