### Streamlining Your Research Laboratory with Python
* Authors:   Mark F. Russo, Ph.D and William Neil 
* Publisher: John Wiley & Sons, Inc.
* License:   MIT (https://opensource.org/licenses/MIT)

In [4]:
# Import modules
import pandas as pd
import random

# Build and create init dictionary and DataFrame
init = {'sample': [f's{i+1:02d}' for i in range(8)],
        'group' : [random.randint(1,4) for _ in range(8)],
        'volume': [random.random() for _ in range(8)]}
df = pd.DataFrame(init)
df

Unnamed: 0,sample,group,volume
0,s01,2,0.74405
1,s02,2,0.247776
2,s03,1,0.325971
3,s04,1,0.183045
4,s05,3,0.032161
5,s06,2,0.429596
6,s07,2,0.630639
7,s08,1,0.962788


In [5]:
df.shape, df.columns

((8, 3), Index(['sample', 'group', 'volume'], dtype='object'))

In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8 entries, 0 to 7
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   sample  8 non-null      object 
 1   group   8 non-null      int64  
 2   volume  8 non-null      float64
dtypes: float64(1), int64(1), object(1)
memory usage: 324.0+ bytes


In [7]:
# Select a column using square brackets and column label
df['sample']


0    s01
1    s02
2    s03
3    s04
4    s05
5    s06
6    s07
7    s08
Name: sample, dtype: object

In [8]:
# Select a column and compute descriptive statistics.
df['volume'].describe()


count    8.000000
mean     0.444503
std      0.312674
min      0.032161
25%      0.231593
50%      0.377783
75%      0.658992
max      0.962788
Name: volume, dtype: float64

In [9]:
# Select multiple columns using square brackets and 
# a list of column labels
df[['sample','volume']]


Unnamed: 0,sample,volume
0,s01,0.74405
1,s02,0.247776
2,s03,0.325971
3,s04,0.183045
4,s05,0.032161
5,s06,0.429596
6,s07,0.630639
7,s08,0.962788


In [10]:
# Select a single row using .loc[] method and a row index, 0-based
df.loc[3]


sample         s04
group            1
volume    0.183045
Name: 3, dtype: object

In [11]:
# Select multiple rows using .iloc[] with index slice notation.
df.iloc[3:6]


Unnamed: 0,sample,group,volume
3,s04,1,0.183045
4,s05,3,0.032161
5,s06,2,0.429596


In [12]:
# Select rows by passing a list of Booleans to .loc[]
bool_idx = [True, False, True, False, True, False, True, False]
df.loc[bool_idx]


Unnamed: 0,sample,group,volume
0,s01,2,0.74405
2,s03,1,0.325971
4,s05,3,0.032161
6,s07,2,0.630639


In [13]:
# Combine dynamic Boolean list generation and Boolean indexing
df[ df['volume'] >= 0.5 ]


Unnamed: 0,sample,group,volume
0,s01,2,0.74405
6,s07,2,0.630639
7,s08,1,0.962788


In [14]:
# Combine Boolean list conditions with conjunction operator &
df[ (df['volume'] >= 0.5) & (df['group'] == 4)]


Unnamed: 0,sample,group,volume
