In [1]:
import numpy as np 
import pandas as pd 


In [2]:
# DataFrame.loc[source]
# Access a group of rows and columns by label(s) or a boolean array.

# .loc[] is primarily label based, but may also be used with a boolean array.

df = pd.DataFrame([[1, 2], [4, 5], [7, 8]],
     index=['cobra', 'viper', 'sidewinder'],
     columns=['max_speed', 'shield'])
df

Unnamed: 0,max_speed,shield
cobra,1,2
viper,4,5
sidewinder,7,8


In [3]:
# Single label. Note this returns the row as a Series.


df.loc['viper']

max_speed    4
shield       5
Name: viper, dtype: int64

In [4]:
# List of labels. Note using [[]] returns a DataFrame.

df.loc[['viper', 'sidewinder']]

Unnamed: 0,max_speed,shield
viper,4,5
sidewinder,7,8


In [5]:
# Single label for row and column

df.loc['cobra', 'shield']

2

In [6]:
# Slice with labels for row and single label for column. As mentioned above, note that both the start and stop of the slice are included.
df.loc['cobra':'viper', 'max_speed']


cobra    1
viper    4
Name: max_speed, dtype: int64

In [7]:
# Boolean list with the same length as the row axis

df.loc[[False, False, True]]

Unnamed: 0,max_speed,shield
sidewinder,7,8


In [8]:
# DataFrame.axes[source]
# Return a list representing the axes of the DataFrame.

# It has the row axis labels and column axis labels as the only members. They are returned in that order.

df = pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]})
df.axes

[RangeIndex(start=0, stop=2, step=1), Index(['col1', 'col2'], dtype='object')]

In [9]:
# DataFrame.memory_usage(index=True, deep=False)[source]
# Return the memory usage of each column in bytes.

# The memory usage can optionally include the contribution of the index and elements of object dtype.
dtypes = ['int64', 'float64', 'complex128', 'object', 'bool']
data = dict([(t, np.ones(shape=5000, dtype=int).astype(t))
             for t in dtypes])
df = pd.DataFrame(data)
df.memory_usage()

Index           128
int64         40000
float64       40000
complex128    80000
object        40000
bool           5000
dtype: int64

In [10]:
# DataFrame.empty[source]
# Indicator whether Series/DataFrame is empty.

# True if Series/DataFrame is entirely empty (no items), meaning any of the axes are of length 0.
df_empty = pd.DataFrame({'A' : []})
df_empty.empty

True

In [11]:
# DataFrame.infer_objects()[source]
# Attempt to infer better dtypes for object columns.

# Attempts soft conversion of object-dtyped columns, leaving non-object and unconvertible columns unchanged. The inference rules are the same as during normal Series/DataFrame construction.
df = pd.DataFrame({"A": ["a", 1, 2, 3]})
df.infer_objects()

Unnamed: 0,A
0,a
1,1
2,2
3,3


In [12]:
# DataFrame.insert(loc, column, value, allow_duplicates=_NoDefault.no_default)[source]
# Insert column into DataFrame at specified location.

df = pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]})
df
#    col1  col2
# 0     1     3
# 1     2     4
df.insert(1, "newcol", [99, 99])
df
#    col1  newcol  col2
# 0     1      99     3
# 1     2      99     4
df.insert(0, "col1", [100, 100], allow_duplicates=True)
df

Unnamed: 0,col1,col1.1,newcol,col2
0,100,1,99,3
1,100,2,99,4


In [13]:
df.keys()

Index(['col1', 'col1', 'newcol', 'col2'], dtype='object')

In [14]:
# DataFrame.pop(item)[source]
# Return item and drop from frame. Raise KeyError if not found.

df = pd.DataFrame([('falcon', 'bird', 389.0),
                   ('parrot', 'bird', 24.0),
                   ('lion', 'mammal', 80.5),
                   ('monkey', 'mammal', np.nan)],
                  columns=('name', 'class', 'max_speed'))
df

Unnamed: 0,name,class,max_speed
0,falcon,bird,389.0
1,parrot,bird,24.0
2,lion,mammal,80.5
3,monkey,mammal,


In [15]:
df.pop('class')

0      bird
1      bird
2    mammal
3    mammal
Name: class, dtype: object

In [16]:
df

Unnamed: 0,name,max_speed
0,falcon,389.0
1,parrot,24.0
2,lion,80.5
3,monkey,


In [17]:
# DataFrame.xs(key, axis=0, level=None, drop_level=True)[source]
# Return cross-section from the Series/DataFrame.

# This method takes a key argument to select data at a particular level of a MultiIndex.
d = {'num_legs': [4, 4, 2, 2],
     'num_wings': [0, 0, 2, 2],
     'class': ['mammal', 'mammal', 'mammal', 'bird'],
     'animal': ['cat', 'dog', 'bat', 'penguin'],
     'locomotion': ['walks', 'walks', 'flies', 'walks']}
df = pd.DataFrame(data=d)
df = df.set_index(['class', 'animal', 'locomotion'])
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,num_legs,num_wings
class,animal,locomotion,Unnamed: 3_level_1,Unnamed: 4_level_1
mammal,cat,walks,4,0
mammal,dog,walks,4,0
mammal,bat,flies,2,2
bird,penguin,walks,2,2


In [18]:
df.xs("bird")

Unnamed: 0_level_0,Unnamed: 1_level_0,num_legs,num_wings
animal,locomotion,Unnamed: 2_level_1,Unnamed: 3_level_1
penguin,walks,2,2


In [19]:
# Get values at several indexes
df.xs(('mammal', 'dog'))

  df.xs(('mammal', 'dog'))


Unnamed: 0_level_0,num_legs,num_wings
locomotion,Unnamed: 1_level_1,Unnamed: 2_level_1
walks,4,0


In [20]:
# DataFrame.get(key, default=None)[source]
# Get item from object for given key (ex: DataFrame column).

# Returns default value if not found.
df = pd.DataFrame(
    [
        [24.3, 75.7, "high"],
        [31, 87.8, "high"],
        [22, 71.6, "medium"],
        [35, 95, "medium"],
    ],
    columns=["temp_celsius", "temp_fahrenheit", "windspeed"],
    index=pd.date_range(start="2014-02-12", end="2014-02-15", freq="D"),
)
df.get(["temp_celsius", "windspeed"])

Unnamed: 0,temp_celsius,windspeed
2014-02-12,24.3,high
2014-02-13,31.0,high
2014-02-14,22.0,medium
2014-02-15,35.0,medium


In [22]:
# DataFrame.isin(values)[source]
# Whether each element in the DataFrame is contained in values.
df = pd.DataFrame({'num_legs': [2, 4], 'num_wings': [2, 0]},
                  index=['falcon', 'dog'])
df.isin([0,4])

Unnamed: 0,num_legs,num_wings
falcon,False,False
dog,True,True


In [26]:
# DataFrame.where(cond, other=_NoDefault.no_default, *, inplace=False, axis=None, level=None, errors='raise', try_cast=_NoDefault.no_default)[source]
# Replace values where the condition is False.
s = pd.Series(range(5))
s.where(s > 0)

0    3
1    1
2    2
3    3
4    4
dtype: int64

In [30]:
s.where( s ==0, 99)

0     0
1    99
2    99
3    99
4    99
dtype: int64

In [31]:
df = pd.DataFrame(np.arange(10).reshape(-1, 2), columns=['A', 'B'])
df

Unnamed: 0,A,B
0,0,1
1,2,3
2,4,5
3,6,7
4,8,9


In [35]:
m = df % 3 == 0
df.where(m, -df)

Unnamed: 0,A,B
0,0,-1
1,-2,3
2,-4,-5
3,6,-7
4,-8,9


In [36]:
# DataFrame.mask(cond, other=nan, *, inplace=Fa lse, axis=None, level=None, errors='raise', try_cast=_NoDefault.no_default)[source]
# Replace values where the condition is True.

s.mask(s > 0)

0    0.0
1    NaN
2    NaN
3    NaN
4    NaN
dtype: float64

In [39]:
# DataFrame.query(expr, *, inplace=False, **kwargs)[source]
# Query the columns of a DataFrame with a boolean expression.

df = pd.DataFrame({'A': range(1, 6),
                   'B': range(10, 0, -2),
                   'C C': range(10, 5, -1)})
df

Unnamed: 0,A,B,C C
0,1,10,10
1,2,8,9
2,3,6,8
3,4,4,7
4,5,2,6


In [40]:
df.query('A < B')

Unnamed: 0,A,B,C C
0,1,10,10
1,2,8,9
2,3,6,8


In [None]:
df.query("A == B",expr=)