In [2]:
import pandas as pd
import numpy as np

# pandas.DataFrame.at

> Access a single value for a row/column label pair.

> Similar to loc, in that both provide label-based lookups. Use at if you only need to get or set a single value in a DataFrame or Series.

In [2]:
df = pd.DataFrame([[0, 2, 3], [0, 4, 1], [10, 20, 30]],
                  index=[4, 5, 6], columns=['A', 'B', 'C'])
df

Unnamed: 0,A,B,C
4,0,2,3
5,0,4,1
6,10,20,30


In [3]:
#Get value at specified row/column pair

df.at[4, 'B']

2

In [4]:
#Set value at specified row/column pair

df.at[4, 'B'] = 10
df.at[4, 'B']

10

In [5]:
#Get value within a Series

df.loc[5].at['B']

4

# pandas.DataFrame.iat

In [6]:
df = pd.DataFrame([[0, 2, 3], [0, 4, 1], [10, 20, 30]],
                  columns=['A', 'B', 'C'])
df

Unnamed: 0,A,B,C
0,0,2,3
1,0,4,1
2,10,20,30


In [7]:
#Get value at specified row/column pair

df.iat[1, 2]

1

In [8]:
#Set value at specified row/column pair

df.iat[1, 2] = 10
df.iat[1, 2]

10

In [9]:
#Get value within a series

df.loc[0].iat[1]

2

# pandas.DataFrame.loc


>Access a group of rows and columns by label(s) or a boolean array.

>.loc[] is primarily label based, but may also be used with a boolean array.

>Allowed inputs are:

>> A single label, e.g. 5 or 'a', (note that 5 is interpreted as a label of the index, and never as an integer position along the index).

>>A list or array of labels, e.g. ['a', 'b', 'c'].

>>A slice object with labels, e.g. 'a':'f'.

In [10]:
#Getting values

df = pd.DataFrame([[1, 2], [4, 5], [7, 8]],
     index=['cobra', 'viper', 'sidewinder'],
     columns=['max_speed', 'shield'])
df

Unnamed: 0,max_speed,shield
cobra,1,2
viper,4,5
sidewinder,7,8


In [16]:
#Single label. Note this returns the row as a Series.
df.loc['viper']

max_speed    4
shield       5
Name: viper, dtype: int64

In [12]:
df.loc[['viper', 'sidewinder']]

Unnamed: 0,max_speed,shield
viper,4,5
sidewinder,7,8


In [18]:
#Single label for row and column

df.loc['cobra', 'shield']

2

In [19]:
df.loc['cobra':'viper', 'max_speed']

cobra    1
viper    4
Name: max_speed, dtype: int64

In [20]:
df.loc[[False, False, True]]

Unnamed: 0,max_speed,shield
sidewinder,7,8


In [21]:
df.loc[df['shield'] > 6]

Unnamed: 0,max_speed,shield
sidewinder,7,8


In [22]:
df.loc[df['shield'] > 6, ['max_speed']]

Unnamed: 0,max_speed
sidewinder,7


In [23]:
df.loc[lambda df: df['shield'] == 8]

Unnamed: 0,max_speed,shield
sidewinder,7,8


In [25]:
#Setting values

#Set value for all items matching the list of labels

df.loc[['viper', 'sidewinder'], ['shield']] = 50
df

Unnamed: 0,max_speed,shield
cobra,1,2
viper,4,50
sidewinder,7,50


In [26]:
#Set value for an entire row

df.loc['cobra'] = 10
df

Unnamed: 0,max_speed,shield
cobra,10,10
viper,4,50
sidewinder,7,50


In [27]:
#Set value for an entire column

df.loc[:, 'max_speed'] = 30
df

Unnamed: 0,max_speed,shield
cobra,30,10
viper,30,50
sidewinder,30,50


In [28]:
#Set value for rows matching callable condition

df.loc[df['shield'] > 35] = 0
df

Unnamed: 0,max_speed,shield
cobra,30,10
viper,0,0
sidewinder,0,0


In [29]:
df = pd.DataFrame([[1, 2], [4, 5], [7, 8]],
     index=[7, 8, 9], columns=['max_speed', 'shield'])
df

Unnamed: 0,max_speed,shield
7,1,2
8,4,5
9,7,8


In [30]:
#Slice with integer labels for rows. As mentioned above, note that both the start and stop of the slice are included.

df.loc[7:9]

Unnamed: 0,max_speed,shield
7,1,2
8,4,5
9,7,8


In [31]:
#Getting values with a MultiIndex

#A number of examples using a DataFrame with a MultiIndex

tuples = [
   ('cobra', 'mark i'), ('cobra', 'mark ii'),
   ('sidewinder', 'mark i'), ('sidewinder', 'mark ii'),
   ('viper', 'mark ii'), ('viper', 'mark iii')
]
index = pd.MultiIndex.from_tuples(tuples)
values = [[12, 2], [0, 4], [10, 20],
        [1, 4], [7, 1], [16, 36]]
df = pd.DataFrame(values, columns=['max_speed', 'shield'], index=index)
df

Unnamed: 0,Unnamed: 1,max_speed,shield
cobra,mark i,12,2
cobra,mark ii,0,4
sidewinder,mark i,10,20
sidewinder,mark ii,1,4
viper,mark ii,7,1
viper,mark iii,16,36


In [32]:
#Single label. Note this returns a DataFrame with a single index.

df.loc['cobra']

Unnamed: 0,max_speed,shield
mark i,12,2
mark ii,0,4


In [33]:
#Single index tuple. Note this returns a Series.

df.loc[('cobra', 'mark ii')]

max_speed    0
shield       4
Name: (cobra, mark ii), dtype: int64

In [34]:
#Single label for row and column. Similar to passing in a tuple, this returns a Series.

df.loc['cobra', 'mark i']

max_speed    12
shield        2
Name: (cobra, mark i), dtype: int64

In [35]:
#single tuple. Note using [[]] returns a DataFrame.

df.loc[[('cobra', 'mark ii')]]

Unnamed: 0,Unnamed: 1,max_speed,shield
cobra,mark ii,0,4


In [36]:
#Single tuple for the index with a single label for the column

df.loc[('cobra', 'mark i'), 'shield']

2

In [37]:
#Slice from index tuple to single label

df.loc[('cobra', 'mark i'):'viper']

Unnamed: 0,Unnamed: 1,max_speed,shield
cobra,mark i,12,2
cobra,mark ii,0,4
sidewinder,mark i,10,20
sidewinder,mark ii,1,4
viper,mark ii,7,1
viper,mark iii,16,36


In [42]:
#Slice from index tuple to index tuple

df.loc[('cobra', 'mark i'):('viper', 'mark ii')]

Unnamed: 0,Unnamed: 1,max_speed,shield
cobra,mark i,12,2
cobra,mark ii,0,4
sidewinder,mark i,10,20
sidewinder,mark ii,1,4
viper,mark ii,7,1


# pandas.DataFrame.iloc

> Purely integer-location based indexing for selection by position.

>.iloc[] is primarily integer position based (from 0 to length-1 of the axis), but may also be used with a boolean array.

> Allowed inputs are:

>> An integer, e.g. 5.

>> A list or array of integers, e.g. [4, 3, 0].

>> A slice object with ints, e.g. 1:7.

>> A boolean array.

In [43]:
mydict = [{'a': 1, 'b': 2, 'c': 3, 'd': 4},
          {'a': 100, 'b': 200, 'c': 300, 'd': 400},
          {'a': 1000, 'b': 2000, 'c': 3000, 'd': 4000 }]
df = pd.DataFrame(mydict)
df

Unnamed: 0,a,b,c,d
0,1,2,3,4
1,100,200,300,400
2,1000,2000,3000,4000


In [44]:
#Indexing just the rows

#With a scalar integer.

type(df.iloc[0])

pandas.core.series.Series

In [45]:
df.iloc[0]

a    1
b    2
c    3
d    4
Name: 0, dtype: int64

In [46]:
#With a list of integers.

df.iloc[[0]]

Unnamed: 0,a,b,c,d
0,1,2,3,4


In [47]:
df.iloc[[0, 1]]

Unnamed: 0,a,b,c,d
0,1,2,3,4
1,100,200,300,400


In [48]:
df.iloc[:3]

Unnamed: 0,a,b,c,d
0,1,2,3,4
1,100,200,300,400
2,1000,2000,3000,4000


In [49]:
df.iloc[[True, False, True]]

Unnamed: 0,a,b,c,d
0,1,2,3,4
2,1000,2000,3000,4000


In [50]:
#With a callable, useful in method chains. The x passed to the lambda is the DataFrame being sliced. This selects the rows whose index label even.

df.iloc[lambda x: x.index % 2 == 0]

Unnamed: 0,a,b,c,d
0,1,2,3,4
2,1000,2000,3000,4000


In [51]:
#Indexing both axes

#You can mix the indexer types for the index and columns. Use : to select the entire axis.

#With scalar integers.

df.iloc[0, 1]

2

In [52]:
#With lists of integers.

df.iloc[[0, 2], [1, 3]]

Unnamed: 0,b,d
0,2,4
2,2000,4000


In [53]:
#With slice objects.

df.iloc[1:3, 0:3]

Unnamed: 0,a,b,c
1,100,200,300
2,1000,2000,3000


In [54]:
#With a boolean array whose length matches the columns.

df.iloc[:, [True, False, True, False]]

Unnamed: 0,a,c
0,1,3
1,100,300
2,1000,3000


In [55]:
#With a callable function that expects the Series or DataFrame.

df.iloc[:, lambda df: [0, 2]]


Unnamed: 0,a,c
0,1,3
1,100,300
2,1000,3000


# pandas.DataFrame.empty

> Indicator whether DataFrame is empty.

> True if DataFrame is entirely empty (no items), meaning any of the axes are of length 0.

In [57]:
#An example of an actual empty DataFrame. Notice the index is empty:

df_empty = pd.DataFrame({'A' : []})
df_empty

Unnamed: 0,A


In [58]:
df_empty.empty

True

In [59]:
#If we only have NaNs in our DataFrame, it is not considered empty! We will need to drop the NaNs to make the DataFrame empty:

df = pd.DataFrame({'A' : [np.nan]})

In [60]:
df

Unnamed: 0,A
0,


In [61]:
df.empty

False

In [62]:
df.dropna().empty

True

# pandas.DataFrame.ndim

> Return an int representing the number of axes / array dimensions.

> Return 1 if Series. Otherwise return 2 if DataFrame.

In [64]:
s = pd.Series({'a': 1, 'b': 2, 'c': 3})
s.ndim

1

In [65]:
df = pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]})
df.ndim

2

# pandas.DataFrame.shape

> Return a tuple representing the dimensionality of the DataFrame.

In [3]:
df = pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]})
df.shape
(2, 2)

(2, 2)

In [4]:
df = pd.DataFrame({'col1': [1, 2], 'col2': [3, 4],
                   'col3': [5, 6]})
df.shape

(2, 3)

# pandas.DataFrame.size

> Return an int representing the number of elements in this object.

> Return the number of rows if Series. Otherwise return the number of rows times number of columns if DataFrame.

In [5]:
s = pd.Series({'a': 1, 'b': 2, 'c': 3})
s.size

3

In [6]:
df = pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]})
df.size

4

In [7]:
df = pd.DataFrame({'age':    [ 3,  29],
                   'height': [94, 170],
                   'weight': [31, 115]})
df

Unnamed: 0,age,height,weight
0,3,94,31
1,29,170,115


In [8]:
df.values

array([[  3,  94,  31],
       [ 29, 170, 115]], dtype=int64)

In [9]:
df2 = pd.DataFrame([('parrot',   24.0, 'second'),
                    ('lion',     80.5, 1),
                    ('monkey', np.nan, None)],
                  columns=('name', 'max_speed', 'rank'))
df2.dtypes

name          object
max_speed    float64
rank          object
dtype: object

In [10]:
df2.values

array([['parrot', 24.0, 'second'],
       ['lion', 80.5, 1],
       ['monkey', nan, None]], dtype=object)

### A DataFrame where all columns are the same type (e.g., int64) results in an array of the same type.
### A DataFrame with mixed type columns(e.g., str/object, int64, float32) results in an ndarray of the broadest type that accommodates these mixed types (e.g., object).