In [None]:
#import packages

import pandas as pd
pd.set_option('display.max_rows', None)
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt 
import seaborn as sns
import altair as alt
alt.data_transformers.disable_max_rows()
import altair_transform

In [None]:
#import quality checked and blank substracted list of MP from Micropoll_SchleiSediment_blank_subtract.ipynb
env_MP = pd.read_csv('env_MP_clean_list_SchleiSediments.csv',index_col=0)

#import proxy parameters:
#import d50 SED values 
sed_d50 = pd.read_csv('Schlei_Sed_D50_new.csv',index_col=0)

#import <63µm SED values 
sed_63 = pd.read_csv('Schlei_Sed_D50_new.csv',index_col=0)

#import ogranic matter size, TOC, Hg data
sed_OM = pd.read_csv('Schlei_OM.csv',index_col=0)

#import sampling log data
slogs= pd.read_csv('Schlei_sed_sampling_log.csv',index_col=0)

#import potential MP source sata
Dist_WWTP = pd.read_csv('Schlei_Sed_Dist_WWTP.csv',index_col=0)


#rename column names
env_MP.rename(columns = {'Size_1_[µm]':'MPlength'}, inplace = True)
env_MP.rename(columns = {'Size_2_[µm]':'MPwidth'}, inplace = True)

#calculate MP particle dimensions, heigth and volume
env_MP['MPheight'] =  round((env_MP['MPwidth']/ env_MP['MPlength']) * env_MP['MPwidth'])
env_MP['MPvolume'] = round((4/3) * np.pi * env_MP['MPwidth'] * env_MP['MPlength'] * env_MP['MPheight'])

env_MP_a500 = env_MP.loc[env_MP.size_geom_mean >= 500]
env_MP_b500 = env_MP.loc[env_MP.size_geom_mean < 500]


In [None]:
# List Polymer types and Library entry for quality check
#PolyList=env_MP.groupby(['polymer_type','library_entry'])['polymer_type'].count().sort_values(ascending=False).reset_index(name="Frequency")

# List Polymer types of all stations
#PolyList=env_MP.groupby(['polymer_type'])['polymer_type'].count().sort_values(ascending=False).reset_index(name="Frequency")

#List Polymer types per station
PolyList=env_MP.groupby(['Sample','polymer_type'])['polymer_type'].count().reset_index(name="Frequency")
print(PolyList.shape)
#PolyList.head(10)

#PolyGroupy = env_MP.groupby(['Sample', 'polymer_type'])
#PolyList = PolyGroupy.size().to_frame(name='Frequency')
#PolyList.join(PolyGroupy.agg({'Sampling_weight_[kg]': 'mean'})).reset_index()


In [None]:
#function for calculating (T)MP concentration

def p2s(groupy):
    s = groupy.agg(
        Frequency=('Site_name', 'count'),  # using 'Site_name' here for count, could use any other column too... Is there a way to count entries in groups without using a column?
        FrequencyA500=('size_geom_mean', lambda x: (x>=500).sum()),  # using 'Site_name' here for count, could use any other column too... Is there a way to count entries in groups without using a column?
        FrequencyB500=('size_geom_mean', lambda x: (x<500).sum()),  # using 'Site_name' here for count, could use any other column too... Is there a way to count entries in groups without using a column?
        Mass=('Sampling_weight_[kg]', np.mean),  # using "mean" here is actually weird as all entries are the same. Is there something like "first"?
        GPS_LONs = ('GPS_LON', np.mean),
        GPS_LATs = ('GPS_LAT', np.mean),
        Split = ('Fraction_analysed', np.mean),
        MP_D50 = ('size_geom_mean',np.median),
        MPvolume =('MPvolume',np.sum)

        ##MP_D50_A500 = ('size_geom_mean' >= 500.median()),
        #MP_D50_B500 = ('size_geom_mean', lambda x: (x<500).median())
    ).reset_index()
    
    s['Concentration'] =  round(s['Frequency']/ (s['Mass'] * s['Split']))
    s['ConcentrationA500'] =  round(s['FrequencyA500']/ (s['Mass'] * s['Split']))
    s['ConcentrationB500'] =  round(s['FrequencyB500']/ (s['Mass'] * s['Split']))
    s['MPvolume_norm'] =  round(s['MPvolume']/ (s['Mass'] * s['Split']))

    #merge with SED statistics
    s = pd.merge(s,slogs.reset_index(),on=['Sample'], how='left')
    s = pd.merge(s,sed_d50.reset_index(),on=['Sample'], how='left')
    s = pd.merge(s,sed_OM.reset_index(),on=['Sample'], how='left')
    s = pd.merge(s,Dist_WWTP.reset_index(),on=['Sample'], how='left')
    # dictionary
    Regio_Sep =  {'Schlei_S1_15cm': 'inner',
              'Schlei_S2': 'inner',
              'Schlei_S3': 'inner',
              'Schlei_S5': 'river',
              'Schlei_S8': 'inner',
              'Schlei_S10': 'inner',
              'Schlei_S10_15cm': 'inner',
              'Schlei_S11': 'inner',
              'Schlei_S13': 'inner',
              'Schlei_S14': 'outlier',
              'Schlei_S15': 'inner',
              'Schlei_S17': 'inner',
              'Schlei_S19': 'outlier',
              'Schlei_S22': 'outer',
              'Schlei_S23': 'outer',
              'Schlei_S24': 'outer', 
              'Schlei_S25': 'outer',
              'Schlei_S26': 'outer',
              'Schlei_S27': 'outer', 
              'Schlei_S30': 'outer', 
              'Schlei_S31': 'outer'}

    s = s.merge(pd.DataFrame.from_dict(Regio_Sep,orient='index',columns=['Regio_Sep']),left_on='Sample',right_index=True)
    s.columns
    return s



def aggregate_particles(particles):
    groupy_particles_poly = particles.groupby(['Sample','polymer_type'])
    poly_samples = p2s(groupy_particles_poly)
    
    groupy_particles_tot = particles.groupby(['Sample'])
    tot_samples = p2s(groupy_particles_tot)
    return poly_samples, tot_samples



In [None]:
#define which MP type e.g. shape shall be calculated in the following diagrams:
poly_samples, tot_samples = aggregate_particles(env_MP.loc[env_MP['Shape']!='fibre'])
#poly_samples, tot_samples = aggregate_particles(env_MP.loc[env_MP['Shape']=='fibre'])
#poly_samples, tot_samples = aggregate_particles(env_MP.loc[env_MP])

#tot_samples.to_csv('MP_Stats_SchleiSediments.csv')


In [None]:
#make plot for total and relative Polymer type distribution 

Sample_order = ['Schlei_S1_15cm','Schlei_S2','Schlei_S3','Schlei_S5','Schlei_S8', 'Schlei_S10','Schlei_S10_15cm', 'Schlei_S11','Schlei_S13', 'Schlei_S14', 'Schlei_S15',
       'Schlei_S17', 'Schlei_S19', 'Schlei_S22', 'Schlei_S23', 'Schlei_S24', 'Schlei_S25','Schlei_S26','Schlei_S27', 'Schlei_S30', 'Schlei_S31']

selection = alt.selection_multi(fields=['polymer_type'], bind='legend')

Poly_Dist = alt.Chart(poly_samples).mark_bar().encode(
    x= alt.X('Sample',sort = Sample_order),#'polymer_type' == 'Polyamide',# , #'polymer_type' == "Polyamide", #df_new = df[df['Pid'] == 'p01']
    y= alt.Y('Concentration',scale = alt.Scale(type ='linear')),
    color= 'polymer_type',
    tooltip = ['polymer_type', 'Concentration']
).add_selection(
    selection
).transform_filter(
    selection
).interactive(
)

Poly_Dist | Poly_Dist.encode(y=alt.Y('MPvolume',stack='normalize'))


In [None]:
#quick statistics for fibres and irregulars
#mpSampleGroupy = env_MP.groupby(['Sample','Shape'])
#mpStats = mpSampleGroupy['size_geom_mean'].describe()

#idx = pd.IndexSlice
#mpFibreStats = mpStats.loc[idx[:,'fibre'], :].reset_index()
#mpIrrStats = mpStats.loc[idx[:,'irregular'], :].reset_index()


In [None]:
selection = alt.selection_multi(fields=['polymer_type'], bind='legend')

MP_Dist_WWTP_Plot = alt.Chart(poly_samples).mark_point().encode(
    x= alt.X('Dist_WWTP'),#,scale = alt.Scale(domain=(0,40000))),
    y= alt.Y('Concentration',scale = alt.Scale(type ='linear')),
    color= 'polymer_type',
    tooltip = ['polymer_type', 'Sample']
    #).interactive(
)

Reg_Line = MP_Dist_WWTP_Plot.transform_regression('Dist_WWTP', 'Concentration',
                                            method="exp",
                                            #groupby=["Regio_Sep"],
                                           ).mark_line(color="red")

Reg_Params = MP_Dist_WWTP_Plot.transform_regression('Dist_WWTP', 'Concentration',
                                              method="exp",
                                              params=True                                             
    ).mark_text(align='left', lineBreak='\n').encode(
        x=alt.value(120),  # pixels from left
        y=alt.value(20),  # pixels from top
        text='params:N'
    ).transform_calculate(
        params='"r² = " + round(datum.rSquared * 100)/100 + \
    "      y = " + round(datum.coef[0] * 10)/10 + " + e ^ (" + round(datum.coef[1] * 10000)/10000 + "x" + ")" + \n + " "'
    )

#ParamsDF = altair_transform.extract_data(Reg_Params)
alt.layer(MP_Dist_WWTP_Plot, Reg_Line,Reg_Params).add_selection(selection).transform_filter(selection).interactive()
#MP_Dist_WWTP_Plot #+ Reg_Line + Reg_Params


In [None]:
TOC_D50_Plot = alt.Chart(tot_samples).mark_point().encode(
    x= 'D50', #alt.Y('Dx50',scale = alt.Scale(type= 'log')),
    y= alt.Y('TOC',scale = alt.Scale(type= 'linear')),
    tooltip='Sample',
    color='Concentration'
    ).transform_filter('datum.Sample != "Schlei_S8"'
)

Reg_Line = TOC_D50_Plot.transform_regression('D50', 'TOC',
                                            method="pow",
                                            #groupby=["Regio_Sep"],
                                           ).mark_line(color="red")

Reg_Params = TOC_D50_Plot.transform_regression('D50', 'TOC',
                                              method="pow",
                                              params=True                                             
    ).mark_text(align='left', lineBreak='\n').encode(
        x=alt.value(120),  # pixels from left
        y=alt.value(20),  # pixels from top
        text='params:N'
    ).transform_calculate(
        params='"r² = " + round(datum.rSquared * 100)/100 + \
    "      y = " + round(datum.coef[0] * 10)/10 + " + e ^ (" + round(datum.coef[1] * 10000)/10000 + "x" + ")" + \n + " "'
    )

#ParamsDF = altair_transform.extract_data(Reg_Params)

TOC_D50_Plot + Reg_Line + Reg_Params


In [None]:
TOC_MP_D50_Plot = alt.Chart(tot_samples).mark_point().encode(
    x= 'MP_D50', #alt.Y('Dx50',scale = alt.Scale(type= 'log')),
    y= 'TOC', #alt.Y('Concentration',scale = alt.Scale(type= 'log')),
    tooltip='Sample'
)
TOC_MP_D50_Plot + TOC_MP_D50_Plot.transform_regression('TOC',
                          'MP_D50',method="linear"
).mark_line(color="red")


In [None]:
MP_OM_Plot = alt.Chart(tot_samples).mark_point().encode(
    x= 'Hg', #alt.Y('Dx50',scale = alt.Scale(type= 'log')),
    y= 'Concentration', #alt.Y('Concentration',scale = alt.Scale(type= 'log')),
    tooltip='Sample'
)
MP_OM_Plot + MP_OM_Plot.transform_regression('Hg',
                          'Concentration',method="exp"
).mark_line(color="red")


In [None]:
MP_OM_Plot = alt.Chart(poly_samples).mark_point().encode(
    x= 'MP_D50', #alt.Y('Dx50',scale = alt.Scale(type= 'log')),
    y= 'Dx 50', #alt.Y('Concentration',scale = alt.Scale(type= 'log')),
    color='Dist_WWTP',
    tooltip='Sample'
).transform_filter('datum.Sample != "Schlei_S24" & datum.Sample != "Schlei_S31"'
)
MP_OM_Plot + MP_OM_Plot.transform_regression('MP_D50',
                          'Dx 50 ',method="pow"
).mark_line(color="red")

In [None]:
MP_OM_Plot = alt.Chart(poly_samples).mark_point().encode(
    x= 'Dx 50', #alt.Y('Dx50',scale = alt.Scale(type= 'log')),
    y=  alt.Y('Concentration',scale = alt.Scale(type= 'linear')),
    tooltip='Sample'
)
MP_OM_Plot + MP_OM_Plot.transform_regression('Dx 50',
                          'Concentration',method="linear"
).mark_line(color="red")
#649


In [None]:
selection = alt.selection_multi(fields=['polymer_type'], bind='legend')

MP_SED_Plot = alt.Chart(tot_samples).mark_point().encode(
    x='D50',
    y= alt.Y('MPvolume',scale = alt.Scale(type= 'log')),
    color='Regio_Sep'
   # color= 'Dist_WWTP',
   # tooltip = ['polymer_type', 'Sample']
)

Reg_Line = MP_SED_Plot.transform_regression('D50', 'MPvolume',
                                            method="pow",
                                            #groupby=["Dist_WWTP"]
                                            #groupby=["Regio_Sep"],
                                           ).mark_line(color="red")

Reg_Params = MP_SED_Plot.transform_regression('D50', 'MPvolume',
                                              method="exp",
                                              groupby=["Regio_Sep"],
                                              params=True                                             
    ).mark_text(align='left', lineBreak='\n').encode(
        x=alt.value(120),  # pixels from left
        y=alt.value(20),  # pixels from top
        text='params:N'
    ).transform_calculate(
        params='"r² = " + round(datum.rSquared * 100)/100 + \
    "      y = " + round(datum.coef[0] * 10)/10 + " + e ^ (" + round(datum.coef[1] * 10000)/10000 + "x" + ")" + \n + " "'
    )

#ParamsDF = altair_transform.extract_data(Reg_Params)

alt.layer(MP_SED_Plot,Reg_Line,Reg_Params).add_selection(selection).transform_filter(selection).interactive()
#MP_Dist_WWTP_Plot #+ Reg_Line + Reg_Params


In [None]:
MP_SED_Plot = alt.Chart(tot_samples).mark_point().encode(
    x='Depth',
    y= alt.Y('Concentration',scale = alt.Scale(type= 'log')),
    color= 'Regio_Sep',
    tooltip='Sample'
)

Reg_Line = MP_SED_Plot.transform_regression('Depth', 'Concentration',
                                            method="exp",
                                            groupby=["Regio_Sep"],
                                           ).mark_line(color="red")

Reg_Params = MP_SED_Plot.transform_regression('Depth', 'Concentration',
                                              method="pow",
                                              groupby=["Regio_Sep"],
                                              params=True                                             
    ).mark_text(align='left', lineBreak='\n').encode(
        x=alt.value(120),  # pixels from left
        y=alt.value(20),  # pixels from top
        text='params:N'
    ).transform_calculate(
        params='"r² = " + round(datum.rSquared * 100)/100 + \
    "      y = " + round(datum.coef[0] * 10)/10 + " + e ^ (" + round(datum.coef[1] * 10000)/10000 + "x" + ")" + \n + " "'
    )

#ParamsDF = altair_transform.extract_data(Reg_Params)

MP_SED_Plot + Reg_Line #+ Reg_Params

In [None]:

#MP_size_Plot = alt.Chart(env_MP).mark_point().encode(
 #   x= 'Size_1_µm', #alt.Y('Dx50',scale = alt.Scale(type= 'log')),
  #  y=  alt.Y(1/'Frequency',scale = alt.Scale(type= 'log')),
   # )

In [None]:
# make probability size function of mp per station
hist = tot_samples['Size_1_µm'].hist(bins=500,range=[50,1000],alpha=0.9)

In [None]:
alt.Chart(env_MP,width=100,height=80).transform_density(
    'size_geom_mean', #Size_1_µm
    extent=[50,2000],
    as_=['size','density'],
    groupby=['Sample'],
    steps=200,
    bandwidth=0,
    cumulative=False,
    counts=False
).mark_line().encode(
    x='size:Q',
    y=alt.Y('density:Q', stack=None),
    color='Sample'
).facet(
    'Sample:N',
    columns=7
)

In [None]:
# compare d50 of MP to d50 of SED

D50_MP_SED_Plot = alt.Chart(mp_sedStats).mark_point().encode(
    x='D50',
    y= alt.Y('MP_D50',scale = alt.Scale(zero=False)),
    color= 'Regio_Sep',
    tooltip='Sample'
)
 #scale=alt.Scale(zero=False)
Reg_Line_D50 = D50_MP_SED_Plot.transform_regression('D50', 'MP_D50',
                                            method="linear",
                                            groupby=["Regio_Sep"],
                                           ).mark_line(color="red")

Reg_Params_D50 = D50_MP_SED_Plot.transform_regression('D50', 'MP_D50',
                                              method="exp",
                                              groupby=["Regio_Sep"],
                                              params=True                                             
    ).mark_text(align='left', lineBreak='\n').encode(
        x=alt.value(120),  # pixels from left
        y=alt.value(20),  # pixels from top
        text='params:N'
    ).transform_calculate(
        params='"r² = " + round(datum.rSquared * 100)/100 + \
    "      y = " + round(datum.coef[0] * 10)/10 + " + e ^ (" + round(datum.coef[1] * 10000)/10000 + "x" + ")" + \n + " "'
    )

#ParamsDF = altair_transform.extract_data(Reg_Params)


D50_MP_SED_Plot + Reg_Line_D50 #+ Reg_Params_D50


In [None]:
# MP in Gewichtsprozent, calculate MP volumes, height = (w/l) *w

#, large table with all possible proxies, density dist plot, comp to other studies worldwide


In [None]:
#make GLM to account for distance to WWTP and GS ?

In [None]:
#check TOC SED literature...

In [None]:
# sorting of sediments (increase in MP)?