In [1]:
%reload_ext autoreload
%autoreload 2

import getpass
import mysql.connector
import pandas as pd
import MPDB_utils as ut
from MPDB_settings import MPDB_server, particleQuery
from MPDB_procedures import blank_procedure, blind_procedure, update_env_and_blind, make_syn_blind

## Establish a connection to the MPDB
Enter your username and password when prompted.

In [2]:
connection = mysql.connector.connect(host=MPDB_server,
                                     user=input('Username: '),
                                     passwd=getpass.getpass(prompt='Password: '),
                                     db='micropoll')

## All data wrangling happens in the modules of the following pipeline

- starting with the call of all particles in the MPDB
- certain polymers get excluded (i.e. own contamination or unclear dye signatures)
- geometric mean sizes are calculated from size 1 and 2 (length and width of the GEPARD-fitted ellipses)
- sizes outside the target spectrum are excluded (here only particles >= 50 µm in `Size_1` are targeted)
- shape and colour get simplified for later distinct phenotype creation
- the particles ID column gets used as the index

All the above uses a combined `MP` dataframe as inputs AND outputs. In the last step this dataframe gets filtered to only include the particles relevant in this analysis and splits it in different sub-dataframes
- `env_MP`contains the environmental MP particles (i.e. from "real" samples)
- `IOW_blind_MP` contains the MP particles from procedural IOW blind samples
- `samples_MP` is a concatenation of the two above
- `IPF_blank_MP` contains the MP particles from IPF lab blanks

In [3]:
env_MP, IOW_blind_MP, samples_MP, IPF_blank_MP = pd.read_sql_query(particleQuery, connection
).pipe(ut.poly_exclude
).pipe(ut.particle_amplification
).pipe(ut.geom_mean
).pipe(ut.size_filter
).pipe(ut.shape_colour
).pipe(ut.set_id_to_index
).pipe(ut.separate_MPs)

print(f'Using {len(IOW_blind_MP.Sample.unique())} IOW Blind samples:  ')
print(IOW_blind_MP.Sample.unique())

Using 8 IOW Blind samples:  
['Blank_20.12.2018_w_IS' 'Blank_11.12.2018_w_IS' 'Blank_5.11.19_IS_2'
 'Blank_6.11.19_1' 'Blank_6.11.19_2' 'Blank_5.11.19_IS_1' 'Blank_11.02.19'
 'Blank_5.5.21']


## Removing particles due to lab blanks...

In [4]:
samples_MP_copy, IPF_elimination_list = blank_procedure(samples_MP, IPF_blank_MP)

For blank particle # 26681_0 :  Nothing to clean up.
For blank particle # 26684_0 :  Nothing to clean up.
For blank particle # 26686_0 :  Nothing to clean up.
For blank particle # 26689_0 :  Nothing to clean up.
For blank particle # 26690_0 :  Nothing to clean up.
For blank particle # 26691_0 :  Nothing to clean up.
For blank particle # 26692_0 :  Nothing to clean up.
For blank particle # 26694_0 :  Nothing to clean up.
For blank particle # 28292_0 :  Nothing to clean up.
For blank particle # 42143_0 :  Env. particle # 42107_0 was eliminated.
For blank particle # 42158_0 :  Env. particle # 42107_1 was eliminated.
For blank particle # 42160_0 :  Env. particle # 42106_0 was eliminated.
For blank particle # 42162_0 :  Env. particle # 42097_0 was eliminated.
For blank particle # 42728_0 :  Env. particle # 42819_0 was eliminated.
For blank particle # 42940_0 :  Env. particle # 26498_0 was eliminated.
For blank particle # 42941_0 :  Env. particle # 26579_0 was eliminated.
For blank particle 

## Some intermediate steps...
- The blank procedure was conducted on the combined `samples_MP` dataframe. The results are now proted to the separate `env_MP` and `IOW_blind_MP` dataframes.
- A synthesised blind particle dataframe `syn_blind` is generated for the blind procedure.

In [5]:
IOW_blind_MP, env_MP = update_env_and_blind(samples_MP_copy, IOW_blind_MP)

syn_blind = make_syn_blind(IOW_blind_MP)

## Removing particles due to procedural blinds...

In [6]:
env_MP_copy, IOW_elimination_list = blind_procedure(env_MP, syn_blind)

6  particles elimnated in:   Schlei_S10
9  particles elimnated in:   Schlei_S10_15cm
7  particles elimnated in:   Schlei_S11
8  particles elimnated in:   Schlei_S13
7  particles elimnated in:   Schlei_S14
6  particles elimnated in:   Schlei_S15
7  particles elimnated in:   Schlei_S16
6  particles elimnated in:   Schlei_S17
6  particles elimnated in:   Schlei_S19
7  particles elimnated in:   Schlei_S1_15cm
7  particles elimnated in:   Schlei_S2
7  particles elimnated in:   Schlei_S20
7  particles elimnated in:   Schlei_S21
9  particles elimnated in:   Schlei_S22
7  particles elimnated in:   Schlei_S23
6  particles elimnated in:   Schlei_S24
9  particles elimnated in:   Schlei_S25
5  particles elimnated in:   Schlei_S26
7  particles elimnated in:   Schlei_S27
8  particles elimnated in:   Schlei_S29
7  particles elimnated in:   Schlei_S2_15cm
7  particles elimnated in:   Schlei_S3
7  particles elimnated in:   Schlei_S30
6  particles elimnated in:   Schlei_S31
7  particles elimnated in:   

## Export final list of valid MP particles

In [14]:
env_MP_copy.to_csv('../data/env_MP_clean_list_SchleiSediments.csv')