# DATA ORGANIZATION

In [1]:
%matplotlib widget

In [2]:
from utils.io import gen_id_dated, gdf_viewer, gdf_geom, gdf_merger, na_col_drop, na_line_drop, col_ren, dble_col_drop
import re, os
import numpy as np
import geopandas as gpd
import pandas as pd
import datetime as dtm
import matplotlib.pyplot as plt

Data format (excel files)

In [639]:
def compute_BH_length(df, length_col_name='Profondeur', top_col='Litho_top', base_col='Litho_base', verbose=False):
    for i in df.index:
        try:
            float(df.loc[i, top_col])
        except ValueError:
            df.loc[i, top_col] = np.nan

        try:
            float(df.loc[i, base_col])
        except ValueError:
            df.loc[i, base_col] = np.nan

    df[top_col] = df[top_col].astype('float64')
    df[base_col] = df[base_col].astype('float64')

    # compute length based on litho_top and litho_base
    id_list = []

    for i in df.index:
        id_ = df.loc[i,'ID']
        
        if verbose : print(i, id_, df.loc[i, top_col], df.loc[i, base_col])
        if id_ not in id_list:
            id_list.append(id_)
            if isinstance(id_, str):
                sql_id = f"{id_}"
            elif isinstance(id_, float) or isinstance(id_, int):
                sql_id = id_
                
            tmp = df[df['ID'] == sql_id]
            
            if verbose : print(len(tmp))
            #if len(tmp) > 0:
            df.loc[tmp.index, length_col_name] = float(max(tmp[base_col])) - float(min(tmp[top_col]))
    
    df.drop(index=df.query(f'{base_col}.isnull() and {top_col}.isnull()').index, inplace=True)
    df.insert(df.columns.to_list().index('ID')+1, length_col_name, df.pop(length_col_name))
    #df.reset_index(drop=True, inplace=True)
    

In [4]:
pol_field_model={'Arsenic': 'As', 'Cadmium': 'Cd', 'Chrome': 'Cr', 'Chrome VI': 'Cr_VI', 'Cuivre': 'Cu', 
'Mercure': 'Hg', 'Plomb': 'Pb', 'Nickel': 'Ni', 'Zinc': 'Zn', 'Cyanure(?:s)? (?libre(?:s)?)?': 'CN_libre', 
'Cyanures (totaux)': 'CN_tot','Cyanure (totaux)': 'CN_tot','CN_totaux':'CN_tot','Cyanures (APE)': 'CN_APE',
'cyanure (totaux)':'CN_tot', 'cyanure (APE)':'CN_APE', 'cyanure complex': 'CN_comp','Cyanure (APE)': 'CN_APE', 
'thiocyanate': 'thioCN',
'Benzène': 'Bnz', 'Toluène': 'Toln', 'Éthylbenzène': 'EthylBnz', 'Orthoxylène': 'O-Xyl', 
'Para- et métaxylène': 'P-M-Xyl', 'Xylènes': 'Xyl', 'Styrène': 'Styr', 'BTEX totaux': 'BTEX_tot', 
'Phénol': 'Phenol', 'Indice phénol': 'Idc_Phenol', 'Naphtalène': 'Naphta', 'Acénaphtylène': 'Acenaphtyl', 
'Acénaphtène': 'Acenaphtn', 'Fluorène': 'Fluorene', 'Phénanthrène': 'Phenanthr', 'Anthracène': 'Anthrc', 
'Fluoranthène': 'Flranth', 'Pyrène': 'Pyr', 'Benzo(a)anthracène': 'Bnz(a)anthrc', 'Chrysène': 'Chrys', 
'Benzo(b)fluoranthène': 'Bnz(b)flranth', 'Benzo(k)fluoranthène': 'Bnz(k)flranth', 
'Benzo(a)pyrène': 'Bnz(a)pyr','Dibenzo(ah)anthracène': 'Dibnz(ah)anthrc',
'Benzo(ghi)pérylène': 'Bnz(ghi)peryl', 
'Indéno(1,2,3-cd)pyrène': 'Indeno(1,2,3-cd)pyr', 'HAP Totaux (16) - EPA': 'HAP_tot_EPA', 
'1,1-Dichloroéthane': '1,1-DCE', '1,2-Dichloroéthane': '1,2-DCE', '1,1-dichloroéthène': '1,1-DCEn', 
'Cis-1,2-dichloroéthène': 'Cis-1,2-DCEn', 'Trans 1,2-dichloroéthylène': 'Trans 1,2-DCEyl', 
'Dichlorométhane': 'DCM', 'Totaux (cis,trans) 1,2-dichloroéthène(?:s)?': '(cis,trans) 1,2-DCE_tot', 
'1,2-dichloropropane': '1,2-DCP', 'Tétrachloroéthylène': 'TetraCEyn', 'Tétrachlorométhane': 'TCM', 
'1,1,1-Trichloroéthane': '1,1,1-TCE', '1,1,2-Trichloroéthane': '1,1,2-TCE', 'Trichloroéthylène': 'TCEyn', 
'Chloroforme': 'Chloroforme', 'Chlorure de vinyle': 'CVinyl', 'EOX': 'EOX', 
'fraction aromat. >C6-C7': 'Arom_C6C7', 'fraction aromat. >C7-C8': 'Arom_C7C8', 
'fraction aromat. >C8-C10': 'Arom_C8C10', 'fraction aliphat. C5-C6': 'Aliphat_C5C6', 
'fraction aliphat. >C6-C8': 'Aliphat_C6C8', 'fraction aliphat. >C8-C10': 'Aliphat_C8C10', 
'Fraction C5 - C8': 'Fract_C5C8', 'Fraction C8-C10': 'Fract_C8C10', 'Fraction C10-C12': 'Fract_C10C12', 
'Fraction C12-C16': 'Fract_C12C16', 'Fraction C16 - C21': 'Fract_C16C21', 'Fraction C21 - C35': 'Fract_C21C35', 
'Fraction C35 - C40': 'Fract_C35C40', 'Hydrocarbures totaux C10-C35': 'HC_tot_C10C35','C5-C8':'Fract_C5C8', 
'C8-C10':'Fract_C8C10','C10-C12':'Fract_C10C12','C12-C16':'Fract_C12C16','C16-C21':'Fract_C16C21', 
'C21-C35':'Fract_C21C35','C35-C40':'Fract_C35C40', 'totaux C10-C35':'HC_tot_C10C35','C12-C22':'Fract_C12C22', 
'C22-C30':'Fract_C22C30','C30-C40':'Fract_C30C40', 'Totaux C10-C40':'HC_tot_C10C40',
'Hydrocarbures totaux C10-C40':'HC_tot_C10C40', 'MTBE': 'MTBE', 'PCB 28': 'PCB_28', 'PCB 52': 'PCB_52', 
'PCB 101': 'PCB_101', 'PCB 118': 'PCB_118', 'PCB 138': 'PCB_138', 'PCB 153': 'PCB_153', 'PCB 180': 'PCB_180', 
'PCB totaux (7)?': 'PCB_tot', 'Chlorure(?:s)?': 'Chlorure', 'Soufre Total': 'S_tot', 'sulfite(?:s)?': 'sulfite', 
'sulfate(?:s)?': 'sulfate', 'COT':'COT','DBO (5 jours)':'DBO_5j','DCO':'DCO', 
'Ammonium':'NH4','ammoniaque libre':'NH3_libre','Nitrate':'HNO3', 'Nitrite':'HNO2','azote Kjeldahl':'N_Kjdl','sulfures totaux':'Sulfure_tot', 
'sulfure(?:s)? (libre(?:s)?)':'Sulfure_libre','calcium':'Ca','potassium':'K', 'magnésium':'Mg', 'manganèse':'Mn', 
'sodium':"Na", 'fer':'Fe','phosphore (total)':'P_tot','carbonate':'CaCO3', 'bicarbonate':'Bicarb','Phoshore':'P',
'fer ((Fe))? total':'Fe_tot', 'fer (2\+)':'Fe2','fluorure(?:s)?':'Fluorure','bromure (libre)':'B_libre'}


source_dfs initialization

In [5]:
_df = pd.DataFrame()
source_mes_pz, source_mes_sol, source_pz, source_prv_eau = _df, _df, _df, _df
source_prv_sol, source_ouv, source_an, source_litho, source_bh = _df, _df, _df, _df, _df

print(f'source_bh:{len(source_bh)} ; source_pz:{len(source_pz)} ; source_litho:{len(source_litho)} ; '
     f'source_Fac-uknw:{len(source_ouv)} ; source_an:{len(source_an)} ;\nsource_prv_sol:{len(source_prv_sol)} ;'
     f'source_prv_eau:{len(source_prv_eau)} ; source_mes_pz:{len(source_mes_pz)} ; source_mes_sol:{len(source_mes_sol)} ;')

source_bh:0 ; source_pz:0 ; source_litho:0 ; source_Fac-uknw:0 ; source_an:0 ;
source_prv_sol:0 ;source_prv_eau:0 ; source_mes_pz:0 ; source_mes_sol:0 ;


## 1- Profils sols et données forages.xls
* **Sheet : 'Données de forage'**

In [6]:
tmp_dir='../../CF_data/Result_traitem/profils_sols_donnees_forages/'
sheet='donnees_forage'

In [7]:
df = pd.read_excel('../../CF_data/Data_UMONS/ouvrages/Profils sols et données forages.xls', 
                   sheet_name='Données de forage')#, skiprows=2)
df.replace(r'<|>','', inplace=True, regex=True)
df.replace(r'-$',np.nan, inplace=True, regex=True)

gdf_viewer(df)

Rows : 25, columns : 15


interactive(children=(IntSlider(value=10, description='rows', max=25, min=10, readout=False), IntSlider(value=…

In [8]:
df.rename(columns={'Date':'Date_ouv','Profondeur':'Long_for', 'Méthode':'Method', 
                        'Diamètre forage':'Diam_for','Niv. Eau p/r sol':'Niv_eau_sol',
                        'PZ Prof.':'Long_pz', 'PZ Diamètre':'Diam_pz','PZ L.crépinée':'Long_crep', 
                        'Société forage':'Societe'}, inplace=True)

df=df[['ID', 'X', 'Y', 'Z', 'Date_ouv', 'Long_for', 'Diam_for', 'Long_pz', 'Diam_pz', 'Long_crep',
                 'Remarque','Niv_eau_sol','Method', 'Societe']]

In [9]:
df['Type'] = df['Long_pz'].apply(lambda x: 'Forage' if pd.isnull(x) else 'Piezo')
df['Refus'] = ''
df['Type_refus']=''

for i in range(len(df['Remarque'])):
    val = str(df.loc[i,'Remarque'])
    if re.search('[Bb]loqué', val) :
        df.loc[i,'Refus'] = 'x'
        
        if re.search('[lL]aitier', val):
            df.loc[i,'Type_refus'] = 'Laitier'
        elif re.search('[Bb]éton', val):
            df.loc[i,'Type_refus'] = 'Béton'
        elif re.search('[Mm]atériaux', val):
            df.loc[i,'Type_refus'] = 'Matériaux indurés' 
    else: 
        df.loc[i,'Refus'] = '' 

df['Diam_int_pz'] = df['Diam_pz'].apply(lambda x: pd.to_numeric(x.replace(' mm','').split('x')[1].strip(' m')) if not pd.isnull(x) else x)
df['Diam_ext_pz'] = df['Diam_pz'].apply(lambda x: pd.to_numeric(x.replace(' mm','').split('x')[0].strip(' m')) if not pd.isnull(x) else x)
df['Diam_for'] = df['Diam_for'].apply(lambda x: pd.to_numeric(x) if not pd.isnull(x) else x)

df.insert(7, 'Diam_ext_pz', df.pop('Diam_ext_pz')) # move to a specified position
df.insert(8, 'Diam_int_pz', df.pop('Diam_int_pz'))
df.drop(columns=['Remarque', 'Diam_pz'], axis=1, inplace=True)
df.drop(df.query("ID!=ID").index, inplace=True) # delete all ID='NaN' lines

gen_id_dated(df,'ID','Date_ouv')  

Generation of ID-dated...
Using column ' Date_ouv ' in the (geo)dataframe !
Process ended, check the (geo)dataframe


In [10]:
pz = df.query("Type=='Piezo'")
bh = df.query("Type!='Piezo'")

pz.reset_index(inplace=True, drop=True)
bh.reset_index(inplace=True, drop=True)

In [11]:
bh.drop(columns=['Diam_ext_pz', 'Diam_int_pz', 'Long_pz', 'Long_crep', 'Niv_eau_sol',], axis=1, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


In [12]:
gdf_viewer(bh, rows=3), gdf_viewer(pz, rows=3)

Rows : 13, columns : 13


interactive(children=(IntSlider(value=3, description='rows', max=13, min=3, readout=False), IntSlider(value=12…

Rows : 12, columns : 18


interactive(children=(IntSlider(value=3, description='rows', max=12, min=3, readout=False), IntSlider(value=12…

(None, None)

In [13]:
source_pz = pz
source_bh = bh

In [14]:
if not os.path.exists(tmp_dir):
    os.makedirs(tmp_dir)
    
pz.to_csv(tmp_dir+sheet+'_Piezometers.csv', index=False)
bh.to_csv(tmp_dir+sheet+'_Boreholes.csv', index=False)
source_bh.to_csv(tmp_dir+'source_Boreholes.csv', index=False) #all Boreholes data in the source
source_pz.to_csv(tmp_dir+'source_Piezometers.csv', index=False) #all Piezometers data in the source
print(f'source_bh:{len(source_bh)} ; source_pz:{len(source_pz)}')

source_bh:13 ; source_pz:12


* **Sheet : 'Piézométrie'**

In [15]:
tmp_dir='../../CF_data/Result_traitem/profils_sols_donnees_forages/'
sheet='piezometrie'

In [16]:
df = pd.read_excel('../../CF_data/Data_UMONS/ouvrages/Profils sols et données forages.xls', sheet_name='Piézométrie', skiprows=1)
df.replace(r'<|>','', inplace=True, regex=True)
df.replace(r'-$',np.nan, inplace=True, regex=True)

gdf_viewer(df)

Rows : 37, columns : 21


interactive(children=(IntSlider(value=10, description='rows', max=37, min=10, readout=False), IntSlider(value=…

In [17]:
df=na_col_drop(df, 3)
sdf=df[:11].copy()
sdf.reset_index(inplace=True, drop=True)
sdf.rename(columns={'z':'Z',}, inplace=True)
sdf=sdf[['ID', 'Z']]
sdf['Type']='Piezo'


Columns dropped :['Label', 'Commentaires ', 'Unnamed: 5', 'Unnamed: 6', 'Unnamed: 7']



In [18]:
a=0
for x in df.columns:
    if pd.isnull(df.loc[16,x]):
        df.loc[16,x]='col'+str(a)
    a+=1

In [19]:
df.loc[16]=df.loc[16].apply(lambda x : x if not pd.isnull(x) else '')
df.columns = df.loc[16]
df=df[17:]
df.reset_index(inplace=True, drop=True)

#df.drop(columns=[df.columns.to_list()[x] for x in range(0,8)
#                      if re.compile(r"col|unnamed").match(df.columns.to_list()[x])], axis=1, inplace=True) 

In [20]:
gdf_viewer(df)

Rows : 20, columns : 16


interactive(children=(IntSlider(value=10, description='rows', max=20, min=10, readout=False), IntSlider(value=…

In [21]:
df.rename(columns={'col3':'Date_prv', 'col4':'Nappe', 'col5':'ID', 'NP/piézo [m]':'Niv_eau_pz', 
                        'dim. piezo hors sol [m]':'haut_pz-sol', 'NP/sol [m]':'Niv_eau_sol', 
                        'Prof. piézo/piézo [m]':'Long_pz', 'Prof. piézo/sol [m]':'Long_pz-sol', 
                        'CE [mS/cm]':'CE','t° [°C]':'Temp', 'Observations':'Organo'}, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(


In [22]:
df.insert(0, 'ID', df.pop('ID')) # move to first column
df.replace('-', np.nan, inplace=True)
df['CE']=df['CE [µS/cm]'].apply(lambda x: pd.to_numeric(x)/1000 
                                  if re.search('^\d+', str(x)) and not pd.isnull(x) else np.nan)
df.drop('CE [µS/cm]', axis=1, inplace=True)
df.loc[18, 'Niv_eau_pz']=np.nan
#df['Date_prv']=df['Date_prv'].apply(lambda x : str(x.year) if not pd.isnull(x) else '')

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().replace(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['CE']=df['CE [µS/cm]'].apply(lambda x: pd.to_numeric(x)/1000
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/use

In [23]:
df.rename_axis(None, inplace=True, axis=1)
df.drop(df.query("ID!=ID").index, inplace=True) # supprimer les lignes avec ID='NaN'
df.reset_index(inplace=True, drop=True)
df=na_col_drop(df,2)
df['Type_mes'] = 'phys-chim'


Columns dropped :['col0', 'col1', 'col2']



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Type_mes'] = 'phys-chim'


In [24]:
cut_list = ['Nappe', 'haut_pz-sol', 'Long_pz', 'Long_pz-sol']
for x in df.columns:
    if x in cut_list:
        sdf[x] = df[x]
        
df.drop(columns=cut_list, inplace=True)

In [25]:
sdf['ID_2']=df['ID']

In [26]:
mes_pz=df # piezometry and phys-chem measures
pz=sdf # piezometers

In [27]:
source_mes_pz = mes_pz
source_pz, error_df = gdf_merger(source_pz, pz, how='outer', col='ID', )

Ambiguous values in both columns compared, change it manually !
Columns Index(['Long_pz_x', 'Long_pz_y'], dtype='object') must be dropped manually !


In [28]:
error_df

Unnamed: 0,ID,Long_pz_x,Long_pz_y
0,F3M,3.3,2.98
1,F13M,3.5,4.04
2,F15bM,4.0,4.67
3,F16M,4.8,4.85
4,F17dM,3.6,3.97


In [29]:
gdf_viewer(source_pz, rows=3)#, gdf_viewer(error_df)

Rows : 12, columns : 22


interactive(children=(IntSlider(value=3, description='rows', max=12, min=3, readout=False), IntSlider(value=12…

In [30]:
if not os.path.exists(tmp_dir):
    os.makedirs(tmp_dir)
    
pz.to_csv(tmp_dir+sheet+'_Piezometers.csv', index=False)
mes_pz.to_csv(tmp_dir+sheet+'_Measures.csv', index=False)

source_mes_pz.to_csv(tmp_dir+'source_Measures.csv', index=False) #all Boreholes data in the source
source_pz.to_csv(tmp_dir+'source_Piezometers.csv', index=False) #all Piezometers data in the source

print(f'source_bh:{len(source_bh)} ; source_pz:{len(source_pz)} ; source_mes_pz:{len(source_mes_pz)}')

source_bh:13 ; source_pz:12 ; source_mes_pz:17


* **Sheet : 'Equipement'**

In [31]:
tmp_dir='../../CF_data/Result_traitem/profils_sols_donnees_forages/'
sheet='Equipement'

In [32]:
df = pd.read_excel('../../CF_data/Data_UMONS/ouvrages/Profils sols et données forages.xls', sheet_name='Equipement')#, skiprows=1)
df=na_line_drop(df,0)
df=na_col_drop(df,1)
df.replace(r'<|>','', inplace=True, regex=True)
df.replace(r'-$',np.nan, inplace=True, regex=True)

gdf_viewer(df)

Rows : 36, columns : 7


interactive(children=(IntSlider(value=10, description='rows', max=36, min=10, readout=False), IntSlider(value=…

In [33]:
df.drop(columns=['Déplacement'], inplace=True)
name=['ID', 'Equip_top', 'Equip_base', 'Diam_for','Diam_ext_pz', 'Legende']
df=col_ren(df, mode=1, name=name)

In [34]:
compute_BH_length(df, top_col='Equip_top', base_col='Equip_base')

UndefinedVariableError: name 'Litho_base' is not defined

In [None]:
coi = ['ID', 'Profondeur', 'Diam_for', 'Diam_ext_pz']
pz=df[coi].drop_duplicates(['ID'])
pz['Type'] = 'Piezo'

In [None]:
gdf_viewer(df, un_val='ID', rows=3)

In [None]:
pz = pz
equip = df
source_equip = equip

In [None]:
if not os.path.exists(tmp_dir):
    os.makedirs(tmp_dir)
    
equip.to_csv(tmp_dir+sheet+'_Equipments.csv', index=False)
pz.to_csv(tmp_dir+sheet+'_Piezometers.csv', index=False)
#mes_pz.to_csv(tmp_dir+sheet+'_Measures.csv', index=False)

source_equip.to_csv(tmp_dir+'source_Equipments.csv', index=False) #all Piezometers data in the source
#source_mes_pz.to_csv(tmp_dir+'source_Measures.csv', index=False) #all Boreholes data in the source
source_pz.to_csv(tmp_dir+'source_Piezometers.csv', index=False) #all Piezometers data in the source

print(f'source_bh:{len(source_bh)} ; source_pz:{len(source_pz)} ; source_mes_pz:{len(source_mes_pz)} ; '
     f'source_equip:{len(source_equip)}')

* **Sheets: 'Echantillon' + 'Organoleptique'**

In [None]:
tmp_dir='../../CF_data/Result_traitem/profils_sols_donnees_forages/'
sheet='Echant-organo'

In [None]:
df = pd.read_excel('../../CF_data/Data_UMONS/ouvrages/Profils sols et données forages.xls', sheet_name='Echantillon')#, skiprows=1)
df.replace(r'<|>','', inplace=True, regex=True)
df.replace(r'-$',np.nan, inplace=True, regex=True)

gdf_viewer(df)

In [None]:
df.rename(columns={'De':'Ech_top', 'A':'Ech_base', 'Numéro':'ID_ech'}, inplace=True)

In [None]:
sdf = pd.read_excel('../../CF_data/Data_UMONS/ouvrages/Profils sols et données forages.xls', sheet_name='Organoleptique')#, skiprows=1)
df.replace(r'<|>','', inplace=True, regex=True)
df.replace(r'-$',np.nan, inplace=True, regex=True)
gdf_viewer(sdf)

In [None]:
list(sdf.columns)

In [None]:
sdf.rename(columns={'De':'Pol_top', 'A':'Pol_base'}, inplace=True)

In [None]:
mdf, error_df = gdf_merger(df, sdf, 'outer', 'ID')
mdf['Type_ech']='Sol'
mdf.insert(4, 'Type_ech', mdf.pop('Type_ech'))

In [None]:
gdf_viewer(mdf, rows=3)

In [None]:
prv_sol = mdf
source_prv_sol=prv_sol

In [None]:
if not os.path.exists(tmp_dir):
    os.makedirs(tmp_dir)
    
prv_sol.to_csv(tmp_dir+sheet+'_Samples-soil.csv', index=False)
source_prv_sol.to_csv(tmp_dir+'Source_Samples-soil.csv', index=False) #all Samples and organoleptic data in the source
print(f'source_bh:{len(source_bh)} ; source_pz:{len(source_pz)} ; source_mes_pz:{len(source_mes_pz)} ; '
     f'source_prv_sol:{len(source_prv_sol)} ; source_equip:{len(source_equip)}')

* **Sheet : 'Log'**

In [None]:
tmp_dir='../../CF_data/Result_traitem/profils_sols_donnees_forages/'
sheet='Log'

In [None]:
df = pd.read_excel('../../CF_data/Data_UMONS/ouvrages/Profils sols et données forages.xls', sheet_name='Log')#, skiprows=1)
gdf_viewer(df)

In [None]:
df.rename(columns={'De':'Litho_top', 'A':'Litho_base'}, inplace=True)

In [None]:
q=df.query('Keyword.str.contains(".ointe", regex=True)', engine='python').index
df.drop(q, inplace=True)
df.reset_index(drop=True, inplace=True)

In [None]:
compute_BH_length(df)

In [None]:
coi = ['ID', 'Profondeur']
bh=df[coi].drop_duplicates(['ID'])
bh['Type'] = 'Forage'

In [None]:
gdf_viewer(df, rows=3)

In [None]:
litho=df
source_litho=litho

In [None]:
if not os.path.exists(tmp_dir):
    os.makedirs(tmp_dir)

bh.to_csv(tmp_dir+sheet+'_Boreholes.csv', index=False)
litho.to_csv(tmp_dir+sheet+'_Lithology.csv', index=False)
source_litho.to_csv(tmp_dir+'source_Lithology.csv', index=False) #all lithologies or descriptions data in the source

print(f'source_bh:{len(source_bh)} ; source_pz:{len(source_pz)} ; source_mes_pz:{len(source_mes_pz)} ; '
     f'source_prv_sol:{len(source_prv_sol)} ; source_equip:{len(source_equip)} ; source_litho:{len(source_litho)} ;')

## 2-Database MEMORIS3.xlsx
* **Sheet : 'PROFILS_SOL'**

In [None]:
# New file, so the source variables must be overwritten !!
_df=pd.DataFrame()
source_bh, source_pz, source_litho, source_prv_sol, source_mes_pz, source_equip= _df, _df, _df, _df, _df, _df

In [None]:
tmp_dir='../../CF_data/Result_traitem/database_Memoris3/'
sheet='Profils_sol'

In [None]:
df = pd.read_excel('../../CF_data/Data_UMONS/Contamination/Doc_SITEREM/Rapport de synthèse des études de sol et des eaux souterraines. '+
                        'Siterem - 2017/Database MEMORIS3.xlsx', sheet_name='PROFILS_SOL')#, skiprows=2)
df.replace(r'<|>','', inplace=True, regex=True)
df.replace(r'-$',np.nan, inplace=True, regex=True)

gdf_viewer(df, rows=3)

In [None]:
df = df[['Date', 'N°', 'Id', 'Profondeur', 'Description', 'Piézo', 'Unnamed: 6',
                             'Gouge Ø75', 'MFT Ø145', 'carottier', 'tarrière', 'Liner Ø60']] 

In [None]:
df.rename({'Date':'Date_ouv', 'N°':'Ref', 'Id':'idx', 'Piézo':'Type', 'Unnamed: 6':'Societe',
                'MFT Ø145':'MFT_145', 'Gouge Ø75':'Gouge_75', 'Liner Ø60': 'Liner_60'}, axis=1, inplace=True)

In [None]:
print(list(set(df['Date_ouv'].apply(lambda x: x.year if not pd.isnull(x) else x))))

In [None]:
df.loc[df.fillna('').query("Societe.str.contains('x|X')").index, 'Type']='X'

In [None]:
df.loc[df.fillna('').query("Gouge_75.str.contains('SBS|SITER')").index, 'Societe']='SBS Environnement'
df.loc[df.fillna('').query("Gouge_75.str.contains('SBS|SITER')").index, 'Gouge_75']=''

In [None]:
for i in range(len(df['Date_ouv'])-1):
    if not pd.isnull(df.loc[i, 'Date_ouv']) and pd.isnull(df.loc[i+1, 'Date_ouv']):
        df.loc[i+1, 'Date_ouv']=df.loc[i, 'Date_ouv']
        
    if not pd.isnull(df.loc[i, 'Societe']) and pd.isnull(df.loc[i+1, 'Societe']):
        df.loc[i+1, 'Societe']=df.loc[i, 'Societe']
        
    if not pd.isnull(df.loc[i, 'Type']) and pd.isnull(df.loc[i+1, 'Type']) and \
       df.loc[i, 'Ref']==df.loc[i+1, 'Ref']:
        df.loc[i+1, 'Type']=df.loc[i, 'Type']

In [None]:
for i in range(len(df['idx'])-1):    
    if df.loc[i,'Ref']==df.loc[i+1,'Ref'] and not pd.isnull(df.loc[i, 'Profondeur'])\
    and re.findall('Forage',df.loc[i, 'Profondeur']):
        df.loc[i+1,'idx']=df.loc[i, 'Profondeur'][0]+str(df.loc[i, 'Ref'])
        w=df.loc[i, 'Profondeur'][0]
    elif df.loc[i,'Ref']==df.loc[i+1,'Ref'] and not pd.isnull(df.loc[i, 'Profondeur']):
        df.loc[i+1,'idx']=w+str(df.loc[i, 'Ref'])
    
    if df.loc[i,'Ref']==df.loc[i+1,'Ref'] and not pd.isnull(df.loc[i, 'Profondeur'])\
    and re.findall('Tranch',df.loc[i, 'Profondeur']):
        df.loc[i+1,'idx']=df.loc[i, 'Profondeur'][0]+str(df.loc[i, 'Ref'])
        w=df.loc[i, 'Profondeur'][0]
    elif df.loc[i,'Ref']==df.loc[i+1,'Ref'] and not pd.isnull(df.loc[i, 'Profondeur']):
        df.loc[i+1,'idx']=w+str(df.loc[i, 'Ref'])
     
   # if df.loc[i,'Ref']==df.loc[i+1,'Ref'] and not pd.isnull(df.loc[i, 'Profondeur'])\
   # and re.findall('Moni',df.loc[i, 'Profondeur']):
   #     df.loc[i+1,'idx']=df.loc[i, 'Profondeur'][0]+str(df.loc[i, 'Ref'])
   #     w=df.loc[i, 'Profondeur'][0]
   # elif df.loc[i,'Ref']==df.loc[i+1,'Ref'] and not pd.isnull(df.loc[i, 'Profondeur']):
   #     df.loc[i+1,'idx']=w+str(df.loc[i, 'Ref'])

In [None]:
df['Ref']=df['idx'].apply(lambda x : x if re.findall('F|T', str(x)) else '')
df['Ref']=df['idx'].apply(lambda x : x.replace('Monito ', 'Mon') if re.findall('Monit', str(x)) else x)

In [None]:
df['Type']=df['Type'].apply(lambda x: 'Piezo' if not pd.isnull(x) else '')

In [None]:
df['Ref']=df['Ref'].apply(lambda x: re.sub("\.1","a",str(x)) if re.search(r"\.1", str(x)) else x)
df['Ref']=df['Ref'].apply(lambda x: re.sub("\.2","b",str(x)) if re.search(r"\.2", str(x)) else x)
df['Ref']=df['Ref'].apply(lambda x: re.sub("\.3","c",str(x)) if re.search(r"\.3", str(x)) else x)
df['Ref']=df['Ref'].apply(lambda x: re.sub("\.4","d",str(x)) if re.search(r"\.4", str(x)) else x)

In [None]:
gen_id_dated(df, ref_col='Ref', date_col='Date_ouv')

In [None]:
df.loc[df.query('Profondeur!=Profondeur' ).index,'Profondeur']=''

In [None]:
df['Method']=''
            
for i in range(len(df['Method'])):
    if not pd.isnull(df.loc[i, 'Gouge_75']) : df.loc[i, 'Method']='Gouge_75'
    if not pd.isnull(df.loc[i, 'MFT_145']) : df.loc[i, 'Method']='MFT_145'
    if not pd.isnull(df.loc[i, 'Liner_60']) : df.loc[i, 'Method']='Liner_60'
    if not pd.isnull(df.loc[i, 'carottier']) : df.loc[i, 'Method']='carrotier'
    if not pd.isnull(df.loc[i, 'tarrière']) : df.loc[i, 'Method']='tarrière'

In [None]:
df.drop(df.query('Profondeur.str.contains("Forage") and Profondeur!="Forage bloqué"', engine='python').index, inplace=True)
df.drop(df.query('Profondeur.str.contains("Tranc") and Profondeur!="Tranchée bloqué"', engine='python').index, inplace=True)
df.drop(df.query('Profondeur.str.contains(".orage|..ranch", regex=True)', engine='python').index, inplace=True)
df.drop(df.fillna('').query('Description.str.contains("^.orage bloq|^.ranc.* bloq|^.*efus", regex=True)', engine='python').index, inplace=True)
df.drop(df.query('Ref!=Ref').index, inplace=True)
df.drop(columns=['MFT_145','Gouge_75','Liner_60', 'carottier', 'tarrière', 'idx'], axis=1, inplace=True)
df.reset_index(drop=True, inplace=True)

In [None]:
df['Litho_top'] = df['Profondeur'].apply(lambda x: x.replace(',','.').split('-')[0].strip(' m'))
df['Litho_base'] = df['Profondeur'].apply(lambda x: x.replace(',','.').split('-')[-1].strip(' m'))

In [None]:
df.rename({'Ref':'ID'}, axis=1, inplace=True)
if 'Profondeur' in df.columns: df.drop(columns=['Profondeur'], axis=1, inplace=True)

In [None]:
set([x[0] for x in list(set(df.ID)) if isinstance(x,str)])

In [None]:
df.loc[df.query('ID_date.str.contains("T")', engine='python').index, 'Type'] = 'Tranchee'
df.loc[df.query('Type==""', engine='python').index, 'Type'] = 'Forage'

In [None]:
df.loc[1268, ['ID_date','ID']] = df.loc[1267, ['ID_date','ID']]
df.loc[df.query('Description.isnull() or Description.str.len()<1').index, 'Description'] = ''

In [None]:
df.drop(index=df.query('Litho_base.isnull() or Litho_base.str.len()<1 or ' 
                       'Description.str.contains("Bloqu")').index, inplace=True)
df.reset_index(drop=True, inplace=True)

In [None]:
compute_BH_length(df)

In [None]:
# we only have lithologies here, but separate facilities could be interesting for the following
coi = ['ID_date', 'Date_ouv', 'ID', 'Profondeur','Type', 'Societe']

litho = df
bh = df.loc[df.query('Type=="Forage"', engine='python').index] # boreholes 
trch = df.loc[df.query('Type=="Tranchee"', engine='python').index] # trenches
pz = df.loc[df.query('Type=="Piezo"', engine='python').index] # piezometers

bh=bh[coi].drop_duplicates(['ID'])
pz=pz[coi].drop_duplicates(['ID'])
trch=trch[coi].drop_duplicates(['ID'])

In [None]:
len(bh), len(pz), len(trch)

In [None]:
gdf_viewer(pz, un_val='ID', rows=3)

In [None]:
source_litho = df
source_bh = bh
source_pz = pz
source_trch = trch

In [None]:
if not os.path.exists(tmp_dir):
    os.makedirs(tmp_dir)
    
litho.to_csv(tmp_dir+sheet+'_Lithology.csv', index=False)
bh.to_csv(tmp_dir+sheet+'_Boreholes.csv', index=False)
pz.to_csv(tmp_dir+sheet+'_Piezometers.csv', index=False)
trch.to_csv(tmp_dir+sheet+'_Trenches.csv', index=False)

source_litho.to_csv(tmp_dir+'source_Lithology.csv', index=False) #all lithologies or descriptions data in the source
source_bh.to_csv(tmp_dir+'source_Boreholes.csv', index=False)
source_pz.to_csv(tmp_dir+'source_Piezometers.csv', index=False)
source_trch.to_csv(tmp_dir+'source_Trenches.csv', index=False)

print(f'source_bh:{len(source_bh)} ; source_pz:{len(source_pz)} ; source_mes_pz:{len(source_mes_pz)} ; '
     f'source_prv_sol:{len(source_prv_sol)} ; source_litho:{len(source_litho)} ;')

* **Sheet : 'DONNEES PIEZOS'**

In [None]:
tmp_dir='../../CF_data/Result_traitem/database_Memoris3/'
sheet='Donnees_piezos'

In [None]:
df = pd.read_excel('../../CF_data/Data_UMONS/Contamination/Doc_SITEREM/Rapport de synthèse des études de sol et des eaux souterraines. '+
                        'Siterem - 2017/Database MEMORIS3.xlsx', sheet_name='DONNEES PIEZOS', skiprows=2)
df.replace(r'<|>','', inplace=True, regex=True)
df.replace(r'-$',np.nan, inplace=True, regex=True)

gdf_viewer(df, rows=3)

In [None]:
sdf=df[[df.columns.to_list()[1]]+df.columns.to_list()[13:17]]
df=df[df.columns.to_list()[1:13]+df.columns.to_list()[17:22]]

In [None]:
df.rename(columns={'Campagne':'Societe','N_piezo.':'ID','X [m]':'X','Y [m]':'Y','Z tête PZ [m]':'Z',
                        'Zsol [m]':'Zsol', 'Prof_PZ [m]':'Long_pz','Section_crépinée [m]':'Long_crep',
                        'Aquifère':'Nappe', 'Caractéristique':'Caractere',
                        'Diamètre_int [m]':'Diam_int_pz','Surnageant [cm]':'Surnageant','Sousnageant [cm]':'Sousnageant',
                        'Description éch. \nOd/turb.':'Opacite_eau','Remarques':'Rmq'}, inplace=True)

df=df.query("ID ==ID")
df.replace('-',np.nan, inplace=True)

In [None]:
df['Type']=df['Long_crep'].apply(lambda x: 'Piezo' if not pd.isnull(x) else 'Unknown')

In [None]:
df=df[['ID','X','Y','Z','Zsol','Type','Societe','Nappe','Long_pz','Long_crep','Diam_int_pz', 'Surnageant', 
         'Sousnageant','Caractere', 'Rmq', 'Opacite_eau','Zone', 'Sous_zone']]
df['Sousnageant']=df['Sousnageant'].apply(lambda x: x/100 if not pd.isnull(x) else x) #convert unit in [m]
df['Surnageant']=df['Surnageant'].apply(lambda x: x/100 if not pd.isnull(x) else x)

In [None]:
gdf_viewer(df, un_val='ID', rows=3) # all units in [m]

In [None]:
prv_eau=df[['ID','Surnageant', 'Sousnageant', 'Caractere','Rmq', 'Opacite_eau']]
prv_eau['Type_ech']='Eau'

pz=df.query("Type=='Piezo'")
pz=pz[['ID','X','Y','Z','Zsol','Type','Societe','Nappe','Long_pz','Long_crep','Diam_int_pz','Zone','Sous_zone']]
pz['Diam_int_pz']=pz['Diam_int_pz'].apply(lambda x: x*1000 if not pd.isnull(x) else x) # millimeters

ouv=df.query("Type!='Piezo'") # unknown facilities' type (it seems they are not boreholes)
ouv=ouv[['ID','X','Y','Z','Type','Societe']]
ouv['Type'] = 'Unknown'

In [None]:
# data in the second part of the initial dataframe
sdf.rename(columns={'N_piezo.':'ID'}, inplace=True)
sdf=sdf.query("ID==ID")

In [None]:
df_tmp=pd.DataFrame()
col=sdf.columns.to_list()
ID_mes=0

for i in range(len(sdf)):
    for j in range(1,len(col)):
        d=col[j].strip('\n|.1').split('/')
        df_tmp.loc[ID_mes,'Date_mes']=dtm.date(int(d[2]), int(d[1]), int(d[0]))
        df_tmp.loc[ID_mes,'ID']=str(sdf.loc[i,'ID'])
        
        if j<=2: 
            df_tmp.loc[ID_mes,'Niv_eau_pz']=sdf.iloc[i,j]
            df_tmp.loc[ID_mes,'Niv_eau_sol']=sdf.iloc[i,j+2]
            ID_mes+=1

df_tmp=df_tmp.sort_values('Date_mes').reset_index(drop=True)
df_tmp['ID_mes']=df_tmp['ID'].apply(lambda x: 'Mes_'+str(x))
df_tmp.insert(0, 'ID_mes', df_tmp.pop('ID_mes'))

mes_pz = df_tmp[:-1]

In [None]:
mes_pz['Type_mes'] = 'Phys-chim'

In [None]:
gdf_viewer(mes_pz, rows=3), gdf_viewer(ouv, rows=3)

In [None]:
source_pz = pz
source_prv_eau = prv_eau
source_ouv = ouv
source_mes_pz = mes_pz

In [None]:
if not os.path.exists(tmp_dir):
    os.makedirs(tmp_dir)

#bh.to_csv(tmp_dir+'source_Boreholes.csv', index=False)
#litho.to_csv(tmp_dir+sheet+'_Lithology.csv', index=False)
pz.to_csv(tmp_dir+sheet+'_Piezometers.csv', index=False)
mes_pz.to_csv(tmp_dir+sheet+'_Measures.csv', index=False)
ouv.to_csv(tmp_dir+sheet+'_Unkown-facility.csv', index=False)
prv_eau.to_csv(tmp_dir+sheet+'_Samples-water.csv', index=False)


source_mes_pz.to_csv(tmp_dir+'source_Measures.csv', index=False) #all lithologies or descriptions data in the source
source_pz.to_csv(tmp_dir+'source_Piezometers.csv', index=False)
source_prv_eau.to_csv(tmp_dir+'source_Samples-water.csv', index=False)
source_ouv.to_csv(tmp_dir+'source_Unkown-facility.csv', index=False)

print(f'source_bh:{len(source_bh)} ; source_pz:{len(source_pz)} ; source_litho:{len(source_litho)} ; '
     f'source_Fac-uknw:{len(source_ouv)} ; source_prv_sol:{len(source_prv_sol)} ; source_prv_eau:{len(source_prv_eau)} ;\n'
     f'source_mes_pz:{len(source_mes_pz)} ; ')

* **Sheet : 'DRAINS ET PIEZOS ENEL'**

In [None]:
tmp_dir='../../CF_data/Result_traitem/database_Memoris3/'
sheet='Drains_Pz_ENEL'

In [None]:
df = pd.read_excel('../../CF_data/Data_UMONS/Contamination/Doc_SITEREM/Rapport de synthèse des études de sol et des eaux souterraines. Siterem - 2017/Database MEMORIS3.xlsx', 
                        sheet_name='DRAINS ET PIEZOS ENEL', skiprows=1)

df=na_line_drop(df,0)
df=na_col_drop(df,1)
df.replace(r'<|>','', inplace=True, regex=True)
df.replace(r'-$',np.nan, inplace=True, regex=True)

gdf_viewer(df)

In [None]:
df.rename(columns={'N°':'ID', 'Date ':'Date_prv','Hauteur de la chambre ':'Ht_Chbre','T':'Temp'}, inplace=True)

df_tmp=df[df.columns.to_list()[:2]+df.columns.to_list()[10:-2]]
df=df[df.columns.to_list()[:7]]

In [None]:
df.columns.to_list()[:12]

In [None]:
df=df[['ID', 'X', 'Y', 'Zsol', 'Ht_Chbre']][2:21]
df.replace('-',np.nan, inplace=True)
df.drop(index=[5], inplace=True)
df.loc[:14,'Type']='Unknown'
df.loc[15:,'Type']='Piezo'
#df['Zsol']=df['Zsol'].apply(lambda x: x if not pd.isnull(x) else np.nan)

In [None]:
gdf_viewer(df, un_val='ID', rows=3)

In [None]:
pz=df.query("Type=='Piezo'")
ouv=df.query("Type!='Piezo'")

In [None]:
sdf=df_tmp.query('ID==ID').reset_index(drop=True)

In [None]:
df_tmp=pd.DataFrame()
cols=[sdf.columns.to_list()[2]]+sdf.columns.to_list()[4:6]
ID_mes=0
d=['01/10/2013','01/11/2015','01/12/2016'] # 01/11/2015 added by me (according to data observation)

for i in range(len(sdf)):
    k=0
    for j in cols:
        #df_tmp.loc[ID_mes,'ID_mes']='Mes_'+str(ID_mes)
        df_tmp.loc[ID_mes,'Date_mes']=dtm.date(int(d[k].split('/')[2]), int(d[k].split('/')[1]), 
                                                             int(d[k].split('/')[0]))
        df_tmp.loc[ID_mes,'ID']=str(sdf.loc[i,'ID'])
        df_tmp.loc[ID_mes,'Niv_eau_sol']=sdf.loc[i,j]
                
        if df_tmp.loc[ID_mes, 'Date_mes']==sdf.loc[i, 'Date_prv'] and \
        df_tmp.loc[ID_mes, 'ID']==sdf.loc[i, 'ID']:
            df_tmp.loc[ID_mes, sdf.columns.to_list()[6:-1]]=sdf.iloc[i, 6:-1]
        
        ID_mes+=1
        k+=1
df_tmp.replace('-', np.nan, inplace=True)
df_tmp=df_tmp.sort_values('Date_mes').reset_index(drop=True)
df_tmp['ID_mes']=df_tmp['ID'].apply(lambda x: 'Mes_'+str(x))
df_tmp.insert(0, 'ID_mes', df_tmp.pop('ID_mes'))

In [None]:
df_tmp=na_line_drop(df_tmp, 3)

In [None]:
df_tmp['ID_mes']=df_tmp['ID'].apply(lambda x: 'Mes_'+x)
df_tmp['CE']=df_tmp['CE'].apply(lambda x: pd.to_numeric(x)/1000 
                                  if re.search('^\d+', str(x)) and not pd.isnull(x) else np.nan)

In [None]:
mes_pz=df_tmp[['ID_mes', 'Date_mes', 'ID', 'Niv_eau_sol', 'pH', 'CE', 'Temp', 'ORP','Odiss']]

an=df_tmp[['ID','arsenic', 'cadmium', 'chrome', 'cobalt', 'cuivre', 'mercure',
       'plomb', 'nickel', 'zinc', 'CN_libre', 'CN_totaux', 'CN_totaux.1',
       'CN_totaux.2', 'thiocyanate', 'benzène', 'toluène', 'éthylbenzène',
       'orthoxylène', 'para- et métaxylène', 'xylènes', 'BTEX total',
       'styrène', 'Iph.', 'naphtalène', 'anthracène', 'phénanthrène',
       'fluoranthène', 'benzo(a)anthracène', 'chrysène', 'benzo(a)pyrène',
       'benzo(ghi)pérylène', 'benzo(k)fluoranthène', 'indéno(1,2,3-cd)pyrène',
       '\nC5-C8', 'C8-C10', 'C10-C12', 'C12-C16', ' C16 - C21', 'C21 - C35',
       'C35 - C40', 'totaux C10-C35', 'C10-C12.1', 'C12-C22']]

#another way to do this
#an=df_tmp[df_tmp.columns.to_list()[1:3]+df_tmp.columns.to_list()[9:]]

In [None]:
mes_pz['Type_mes'] = 'Phys-chim' 

In [None]:
an=na_line_drop(an, 2)
an.insert(1, 'Type_ech', 'Eau')
an.rename(columns={'ID':'ID_ech'}, inplace=True)
#an['Anl_ID']=an['ID'].apply(lambda x: 'Anl_'+str(x))
#an.insert(an.columns.to_list().index('ID')+1, 'Type_ech', an.pop('Type_ech'))

In [None]:
an=col_ren(an, name=pol_field_model, mode=1)

In [None]:
an=dble_col_drop(an)

In [None]:
gdf_viewer(source_mes_pz, rows=5)

Data merging

In [None]:
source_pz, error_df=gdf_merger(source_pz, pz, 'outer', 'ID')

In [None]:
source_mes_pz, error_df=gdf_merger(source_mes_pz, mes_pz, 'outer', 'ID_mes', fcol='ID_mes')

In [None]:
source_ouv, error_df=gdf_merger(source_ouv, ouv, 'outer', 'ID')

In [None]:
source_an=an

In [None]:
if not os.path.exists(tmp_dir):
    os.makedirs(tmp_dir)

#bh.to_csv(tmp_dir+'source_Boreholes.csv', index=False)
#litho.to_csv(tmp_dir+sheet+'_Lithology.csv', index=False)
pz.to_csv(tmp_dir+sheet+'_Piezometers.csv', index=False)
mes_pz.to_csv(tmp_dir+sheet+'_Measures.csv', index=False)
ouv.to_csv(tmp_dir+sheet+'_Unkown-facility.csv', index=False)
#prv_eau.to_csv(tmp_dir+sheet+'_Samples-water.csv', index=False)
an.to_csv(tmp_dir+sheet+'_Analysis.csv', index=False)


source_mes_pz.to_csv(tmp_dir+'source_Measures.csv', index=False) 
source_pz.to_csv(tmp_dir+'source_Piezometers.csv', index=False)
#source_prv_eau.to_csv(tmp_dir+'source_Samples-water.csv', index=False)
source_ouv.to_csv(tmp_dir+'source_Unkown-facility.csv', index=False)
source_an.to_csv(tmp_dir+'source_Analysis.csv', index=False)

print(f'source_bh:{len(source_bh)} ; source_pz:{len(source_pz)} ; source_litho:{len(source_litho)} ; '
     f'source_Fac-uknw:{len(source_ouv)} ; source_prv_sol:{len(source_prv_sol)} ; source_prv_eau:{len(source_prv_eau)} ;\n'
     f'source_mes_pz:{len(source_mes_pz)} ; source_an:{len(source_an)} ;')

* **Sheet : 'RESULTS_EAU' (F)**

In [None]:
tmp_dir='../../CF_data/Result_traitem/database_Memoris3/'
sheet='Result_eau'

In [None]:
df = pd.read_excel('../../CF_data/Data_UMONS/Contamination/Doc_SITEREM/Rapport de synthèse des études de sol et des eaux souterraines. Siterem - 2017/Database MEMORIS3.xlsx', 
                        sheet_name='RESULTS_EAU', skiprows=1)

df.replace(r'<|>','', inplace=True, regex=True)
df.replace(r'-$',np.nan, inplace=True, regex=True)

gdf_viewer(df, rows=5)

In [None]:
df.drop(0, inplace=True)
df.reset_index(drop=True, inplace=True)

In [None]:
df.rename(columns={'Campagne':'Societe','N_piezo.':'ID','Z tête PZ':'Z','Zsol':'Zsol', 'Prof_PZ':'Long_pz',
                        'Section_crépinée':'Long_crep','Diamètre_int':'Diam_int_pz','Surnageant':'Surnageant',
                        'Sousnageant':'Sousnageant','Description éch.':'Opacite_eau','Remarques':'Rmq',
                        'Aquifère_échantillonné':'Nappe', 'Caractéristique':'Caractere'}, inplace=True)

#df=df[['ID','X', 'Y', 'Z', 'Zsol', 'Long_pz', 'Long_crep', 'Diam_int_pz', 'Societe']]#[:130]
df=df.query("ID ==ID")
df.replace('-',np.nan, inplace=True)

In [None]:
# splitting
sdf=df[[df.columns.to_list()[0]]+df.columns.to_list()[12:16]+df.columns.to_list()[21:26]]
an=df[[df.columns.to_list()[0]]+df.columns.to_list()[26:]]
prv_eau=df[df.columns.to_list()[:3]+df.columns.to_list()[16:21]+['Nappe']]
df=df[df.columns.to_list()[:12]]

In [None]:
df['Type']=df['Long_crep'].apply(lambda x: 'Piezo' if not pd.isnull(x) else 'Unknown')
df.insert(8, 'Type', df.pop('Type'))
df['Diam_int_pz'] = df['Diam_int_pz'].apply(lambda x: x*1000 if not pd.isnull(x) else x)

In [None]:
pz=df.query("Type=='Piezo'")
ouv=df.query("Type!='Piezo'")[['ID', 'Societe', 'Zone', 'Sous_zone', 'X', 'Y', 'Z', 'Type']]

In [None]:
prv_eau['Surnageant']=prv_eau['Surnageant'].apply(lambda x: x/100) # to express value in [m]
prv_eau['Sousnageant']=prv_eau['Sousnageant'].apply(lambda x: x/100)

In [None]:
df_tmp=pd.DataFrame()
cols=sdf.columns.to_list()[5:]
ID_mes=0
d=['27/04/2010', '08/09/2010']

for i in range(len(sdf)):
    for k in [0,1]:
        df_tmp.loc[ID_mes,'ID_mes']='Mes_'+str(ID_mes)
        df_tmp.loc[ID_mes,'ID']=str(sdf.loc[i,'ID'])
        
        dt=d[k].split('/')
        df_tmp.loc[ID_mes,'Date_mes']=dtm.date(int(dt[2]), int(dt[1]),int(dt[0]))
        df_tmp.loc[ID_mes,'Niv_eau_pz']=sdf.iloc[i,k+1]
        df_tmp.loc[ID_mes,'Niv_eau_sol']=sdf.iloc[i,k+3]
        df_tmp.loc[ID_mes, cols]=list(sdf.loc[i,cols])
        ID_mes+=1          
        
df_tmp.replace('-', np.nan, inplace=True)
df_tmp=df_tmp.sort_values('Date_mes').reset_index(drop=True)
df_tmp['ID_mes']=df_tmp['ID'].apply(lambda x: 'Mes_'+str(x))
df_tmp.insert(0, 'ID_mes', df_tmp.pop('ID_mes'))

In [None]:
q=df_tmp.query('Niv_eau_pz.isnull() and Niv_eau_sol.isnull()').index
df_tmp.drop(q, inplace=True)
df_tmp['CE']=df_tmp['CE'].apply(lambda x: pd.to_numeric(x)/1000 
                                  if re.search('^\d+', str(x)) and not pd.isnull(x) else np.nan)
mes_pz=df_tmp
mes_pz['Type_mes'] = 'Phys-chim'

In [None]:
source_mes_pz=source_mes_pz.sort_values('Date_mes').reset_index(drop=True)

In [None]:
gdf_viewer(mes_pz, rows=3), gdf_viewer(source_mes_pz, rows=3)

In [None]:
gdf_viewer(source_mes_pz.merge(mes_pz, on='ID', how='outer').query("ID=='1'"))#[['ID','Date_Mes']]))

In [None]:
gdf_viewer(gdf_merger(source_mes_pz, mes_pz, col='ID', how='outer')[0])

In [None]:
an=na_line_drop(an,col_n=3)

In [None]:
an=dble_col_drop(an)

In [None]:
an.insert(1, 'Type_ech', 'Eau')
an.rename(columns={'ID':'ID_ech'}, inplace=True)
#an['Anl_ID']=an['ID'].apply(lambda x: 'Anl_'+str(x))
#an.insert(an.columns.to_list().index('ID')+1, 'Type_ech', 'Eau')

In [None]:
an=col_ren(an, name=pol_field_model, mode=1)

data merging

In [None]:
source_pz, error_df=gdf_merger(source_pz, pz, 'outer', 'ID')

In [None]:
source_mes_pz, error_df=gdf_merger(source_mes_pz, mes_pz, 'outer', 'ID', fcol='ID_mes', )
if len(error_df)>0 :gdf_viewer(error_df, rows=3)

In [None]:
gdf_viewer(source_mes_pz, rows=3)

In [None]:
source_ouv, error_df=gdf_merger(source_ouv, ouv, 'outer', 'ID')
if len(error_df)>0 :gdf_viewer(error_df, rows=3)

In [None]:
len(source_an.columns),len(set(source_an.columns))
source_an.columns

In [None]:
source_an, error_df=gdf_merger(source_an, an, 'outer', 'ID_ech')
if len(error_df)>0 :gdf_viewer(error_df, rows=3)

In [None]:
gdf_viewer(source_an)

In [None]:
if not os.path.exists(tmp_dir):
    os.makedirs(tmp_dir)

#bh.to_csv(tmp_dir+'source_Boreholes.csv', index=False)
#litho.to_csv(tmp_dir+sheet+'_Lithology.csv', index=False)
pz.to_csv(tmp_dir+sheet+'_Piezometers.csv', index=False)
mes_pz.to_csv(tmp_dir+sheet+'_Measures.csv', index=False)
ouv.to_csv(tmp_dir+sheet+'_Unkown-facility.csv', index=False)
prv_eau.to_csv(tmp_dir+sheet+'_Samples-water.csv', index=False)
an.to_csv(tmp_dir+sheet+'_Analysis.csv', index=False)


source_mes_pz.to_csv(tmp_dir+'source_Measures.csv', index=False) #all lithologies or descriptions data in the source
source_pz.to_csv(tmp_dir+'source_Piezometers.csv', index=False)
source_prv_eau.to_csv(tmp_dir+'source_Samples-water.csv', index=False)
source_ouv.to_csv(tmp_dir+'source_Unkown-facility.csv', index=False)
source_an.to_csv(tmp_dir+'source_Analysis.csv', index=False)
#source_litho.to_csv(tmp_dir+'source_Lithology.csv', index=False)
#source_bh.to_csv(tmp_dir+'source_Boreholes.csv', index=False)

print(f'source_bh:{len(source_bh)} ; source_pz:{len(source_pz)} ; source_litho:{len(source_litho)} ; '
     f'source_Fac-uknw:{len(source_ouv)} ; source_prv_sol:{len(source_prv_sol)} ; source_prv_eau:{len(source_prv_eau)} ;\n'
     f'source_mes_pz:{len(source_mes_pz)} ; source_an:{len(source_an)} ;')

* **Sheet : 'RESULTS_SOL'**

In [None]:
tmp_dir='../../CF_data/Result_traitem/database_Memoris3/'
sheet='Result_sol'

In [None]:
df = pd.read_excel('../../CF_data/Data_UMONS/Contamination/Doc_SITEREM/Rapport de synthèse des études de sol et des eaux souterraines. Siterem - 2017/'
                   'Database MEMORIS3.xlsx', sheet_name='RESULTS_SOL', skiprows=1)
df=na_line_drop(df,0)
df=na_col_drop(df,1)
df.replace(r'<|>','', inplace=True, regex=True)
df.replace(r'-$',np.nan, inplace=True, regex=True)

gdf_viewer(df)

In [None]:
df.rename(columns={'Unnamed: 92':'EOX', 'Unnamed: 93':'Idc_phenol','Campagne':'Societe','N_forage':'ID','refus':'Refus',
                   'Prof.\nforage':'Long', 'N_ech':'ID_ech', 'Min_Ech':'Ech_top','Max_Ech':'Ech_base',
                  'Terrain':'Nappe','Epaisseur remblais':'Ep_remb', 'Epaisseur alluvions':'Ep_alluv', 
                   'Nature':'Polluant','Min_organo':'Pol_top', 'Max_organo':'Pol_base', 'Fraction   2000 µm':'Fract_2000µ',
                   'Fraction   63 µm':'Fract_63µ', 'Fraction   45 µm':'Fract_45µ','Fraction   16 µm':'Fract_16µ',
                   'Fraction   2 µm':'Fract_2µ'}, inplace=True)

In [None]:
df.drop(columns=[df.columns.to_list()[x] for x in range(len(df.columns))
                      if re.search(r"Unnamed",df.columns.to_list()[x])], axis=1, inplace=True) 
df.replace(r'<|>','', inplace=True, regex=True)
df=df.query('ID==ID')
df['ID']=df['ID'].apply(lambda x : x.replace('Monito ', 'Mon') if re.findall('Monit', str(x)) else x)
df['ID_ech']=df['ID_ech'].apply(lambda x : x.replace('Monito ', 'Mon') if re.findall('Monit', str(x)) else x)
df.replace('-',np.nan, inplace=True)

In [None]:
for i in range(len(df['ID'])):
    #r=re.search('(\w+)/.+',str(df.loc[i, 'ID_ech']))
    #if r : df.loc[i, 'ID']=r.group(1)
    r=re.search('^\d+',str(df.loc[i, 'ID']))
    if r : df.loc[i, 'ID']='F'+str(df.loc[i, 'ID'])

In [None]:
sdf=df[['ID','X','Y','Z','Long','Description','Nappe','Ep_remb','Ep_alluv','Refus','Societe','Zone','Sous_zone']]
sdf.insert(5, 'Type', '')

prv_sol=df[['ID','ID_ech', 'Ech_top', 'Ech_base','Polluant','Intensité', 'Pol_top','Pol_base','MS','pH H2O',
            'T° pH H2O', 'T° pH CaCl2','pH CaCl2','T° pH KCl','pH KCl','T° CE','CE','MO',
       'Résidus chauffage', 'Argile ', 'Fract_2000µ','Fract_63µ','Fract_45µ','Fract_16µ','Fract_2µ']]
#mes_sol=df[['ID','ID_ech','MS','pH H2O', 'T° pH H2O', 'T° pH CaCl2','pH CaCl2','T° pH KCl','pH KCl','T° CE','CE','MO',
#       'Résidus chauffage', 'Argile ', 'Fract_2000','Fract_63','Fract_45','Fract_16','Fract_2']]
prv_sol.insert(2, 'Type_ech', 'Sol')

an=df[['ID','ID_ech','Arsenic','Cadmium','Chrome_total','Chrome_VI','Cobalt','Cuivre','Mercure','Plomb','Nickel','Zinc','Libres',
       'Totaux', 'Non chloro destruct.', 'Thiocyantes', 'Cyanures totaux EPA','Benzène', 'Toluène', 'Ethylbenzène',
       'o-Xylènes','mp-Xylènes','Xylènes','SOM BTEX','Styrène','Naphtalène','Anthracene','Phénanthrène',
       'Fluoranthène', 'Benzoaanthracène', 'Chrysène','Benzo(a)pyrene','Benzo(ghi)pérylène','Benzo(k)fluoranthène',
       'Indéno[123cd]pyrène', 'Acenaphtylene', 'Acenaphthene', 'Fluorène','Pyrène', 'Benzo_b_fluoranthene', 
       'Dibenzo[ah]anthracène','SOM VROM 10', 'SOM EPA 16', 'C5_C8', 'C8_C10', 'C10_C12', 'C12_C16','C16_C21', 
       'C21_C35', 'C35_C40', 'SOM_C5_C35', 'C21_C30', 'C30_C35','SOM C10_C40', 'EOX', 'Idc_phenol']]

dfs processing

In [None]:
sdf['Refus']=sdf['Refus'].apply(lambda x: 'x' if not pd.isnull(x) else '')

In [None]:
for i in range(len(sdf['Nappe'])):
    x=sdf.loc[i,'Nappe']
    if re.search('[R|r]em', str(x)) : sdf.loc[i,'Nappe']='Remblais'
    elif re.search('[A|a]ll', str(x)) : sdf.loc[i,'Nappe']='Alluvions'
    elif re.search('[S|s]oc', str(x)) : sdf.loc[i,'Nappe']='Socle'
    elif re.search('[A|a]rg', str(x)) : sdf.loc[i,'Nappe']='Argile'
    else : sdf.loc[i,'Nappe']=''

In [None]:
litho=sdf #lithologies and all facilities without distinction here (because type of facility not defined clearly !)

In [None]:
prv_sol=na_line_drop(prv_sol, 3)

In [None]:
prv_sol=na_col_drop(prv_sol, non_na=5, verbose=False)

In [None]:
an.replace('#',np.nan, inplace=True)
an=na_line_drop(an, 2)
an.insert(1, 'Type_ech', 'Sol')
#an['Anl_ID']=an['ID'].apply(lambda x: 'Anl_'+str(x))
#an.insert(0, 'Anl_ID', an.pop('Anl_ID'))

data merging

In [None]:
#source_mes_sol=mes_sol
source_prv_sol=prv_sol

In [None]:
source_litho, error_df=gdf_merger(source_litho, litho, 'outer', 'ID', )

In [None]:
source_an, error_df=gdf_merger(source_an, an, 'outer', 'ID_ech', ) 

In [None]:
gdf_viewer(prv_sol, un_val='ID', rows=3), gdf_viewer(an, un_val='ID', rows=3) 

In [None]:
if not os.path.exists(tmp_dir):
    os.makedirs(tmp_dir)

#bh.to_csv(tmp_dir+'source_Boreholes.csv', index=False)
litho.to_csv(tmp_dir+sheet+'_Lithology.csv', index=False)
pz.to_csv(tmp_dir+sheet+'_Piezometers.csv', index=False)
mes_pz.to_csv(tmp_dir+sheet+'_Measures.csv', index=False)
#mes_sol.to_csv(tmp_dir+sheet+'_Measures-soil.csv', index=False)
ouv.to_csv(tmp_dir+sheet+'_Unkown-facility.csv', index=False)
prv_eau.to_csv(tmp_dir+sheet+'_Samples-water.csv', index=False)
prv_sol.to_csv(tmp_dir+sheet+'_Samples-soil.csv', index=False)
an.to_csv(tmp_dir+sheet+'_Analysis.csv', index=False)


source_mes_pz.to_csv(tmp_dir+'source_Measures.csv', index=False)
#source_mes_sol.to_csv(tmp_dir+'source_Measures-soil.csv', index=False)
source_pz.to_csv(tmp_dir+'source_Piezometers.csv', index=False)
source_prv_eau.to_csv(tmp_dir+'source_Samples-water.csv', index=False)
source_prv_sol.to_csv(tmp_dir+'source_Samples-soil.csv', index=False)
source_ouv.to_csv(tmp_dir+'source_Unkown-facility.csv', index=False)
source_an.to_csv(tmp_dir+'source_Analysis.csv', index=False)
source_litho.to_csv(tmp_dir+'source_Lithology.csv', index=False)
#source_bh.to_csv(tmp_dir+'source_Boreholes.csv', index=False)

print(f'source_bh:{len(source_bh)} ; source_pz:{len(source_pz)} ; source_litho:{len(source_litho)} ; '
     f'source_Fac-uknw:{len(source_ouv)} ; source_an:{len(source_an)} ;\nsource_prv_sol:{len(source_prv_sol)} ;'
     f'source_prv_eau:{len(source_prv_eau)} ; source_mes_pz:{len(source_mes_pz)} ; source_mes_sol:{len(source_mes_sol)} ;')

--------------------------------------------------------------------------------------------------------

## 3-obsrevations terrain et mesures piézos phase 2.xlsx

* **Sheet : 'Piézométrie'**

In [35]:
# New file, so the source variables must be overwritten !!
_df=pd.DataFrame()
source_mes_pz, source_mes_sol, source_pz, source_prv_eau, source_prv_sol =  _df, _df, _df, _df, _df
source_ouv, source_an, source_litho, source_bh = _df, _df, _df, _df

In [36]:
tmp_dir='../../CF_data/Result_traitem/observ_terrain/'
sheet='Piezometrie'

In [37]:
df = pd.read_excel('../../CF_data/Data_UMONS/Contamination/Résultats et mesures Siterem/résultats phase 2/'
                   'obsrevations terrain et mesures piézos phase 2.xlsx', sheet_name='Piézométrie', skiprows=1)
df=na_line_drop(df,0)
df=na_col_drop(df,1)
df.replace(r'<|>','', inplace=True, regex=True)
df.replace(r'-$',np.nan, inplace=True, regex=True)

gdf_viewer(df)

18 NaN lines dropped

Columns dropped :['Unnamed: 3', 'Unnamed: 4', 'Unnamed: 5', 'Unnamed: 6', 'Unnamed: 23', 'Unnamed: 24', 'Unnamed: 25', 'Unnamed: 26']

Rows : 31, columns : 19


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


interactive(children=(IntSlider(value=10, description='rows', max=31, min=10, readout=False), IntSlider(value=…

In [38]:
sdf=df[df.columns.to_list()[:3]]
sdf=na_line_drop(sdf,0)
sdf.rename(columns={'Niveau \npiézométrique':'Niv_eau_sol', 'Commentaires ':'Date_prv'}, inplace=True)

9 NaN lines dropped


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['line_na'] = False
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


In [39]:
sdf2=df.loc[:11, df.columns.to_list()[3:-1]]
sdf2.rename(columns={'Unnamed: 7':'Date_prv', 'Unnamed: 8':'Nappe', 'Unnamed: 9':'ID', 'NP/piézo [m]':'Niv_eau_pz',
       'dim. piezo hors sol [m]':'Dim_pz_sol', 'NP/sol [m]':'Niv_eau_sol', 'Prof. piézo/piézo [m]':'Long_pz',
       'Prof. piézo/sol [m]':'Long_pz_sol', 'CE [mS/cm]':'CE','t° [°C]':'Temp','O2 dissous\n[%]':'O_diss', 
        'Observations':'Rmq'}, 
           inplace=True)

In [40]:
for i in range(len(sdf2['ID'])):
    sdf2.loc[i,'ID']=re.sub(r'^P','F', sdf2.loc[i,'ID'])
    
    if pd.isnull(sdf2.loc[i,'CE']) and not pd.isnull(sdf2.loc[i,'CE [µS/cm]']):
        sdf2.loc[i,'CE']=sdf2.loc[i,'CE [µS/cm]']/1000

sdf2.drop(['CE [µS/cm]'], axis=1, inplace=True)

In [41]:
df=df.loc[14:, df.columns.to_list()[3:-1]]
df.rename(columns={'Unnamed: 7':'Date_prv', 'Unnamed: 8':'Nappe', 'Unnamed: 9':'ID', 'NP/piézo [m]':'Niv_eau_pz',
       'dim. piezo hors sol [m]':'Dim_pz_sol', 'NP/sol [m]':'Niv_eau_sol', 'Prof. piézo/piézo [m]':'Long_pz',
       'Prof. piézo/sol [m]':'Long_pz_sol', 'CE [mS/cm]':'CE','t° [°C]':'Temp','O2 dissous\n[%]':'O_diss', 
        'Observations':'Rmq'}, 
           inplace=True)
df.drop([19,20], inplace=True)
df.reset_index(drop=True, inplace=True)

In [42]:
df['ID']

0      P2Msup
1      P4Msup
2     P12Msup
3        P537
4         P99
5        P18c
6         P19
7         P20
8         P21
9         P22
10        P23
11       P24b
12        P25
13        P26
14       P27d
Name: ID, dtype: object

In [43]:
for i in range(len(df['ID'])):
    df.loc[i,'ID']=re.sub(r'^P','F', df.loc[i,'ID'])
    
    if pd.isnull(df.loc[i,'CE']) and not pd.isnull(df.loc[i,'CE [µS/cm]']):
        df.loc[i,'CE']=df.loc[i,'CE [µS/cm]']/1000
        
df.drop(['CE [µS/cm]', 'O_diss'], axis=1, inplace=True)

In [44]:
df=na_col_drop(df, 5)
sdf2=na_col_drop(sdf2, 5,)


Columns dropped :['ORP', 'O_diss']



In [45]:
prv_eau, error_df=gdf_merger(sdf2, df, how='outer', col='ID', fcol='ID')

In [46]:
gdf_viewer(prv_eau, rows=5, un_val='ID')

Rows : 27, columns : 13, Unique col 'ID': 27


interactive(children=(IntSlider(value=5, description='rows', max=27, min=5, readout=False), IntSlider(value=12…

In [47]:
prv_eau=prv_eau[['ID','Date_prv','Long_pz', 'Long_pz_sol','Dim_pz_sol','Nappe','Niv_eau_sol', 'Niv_eau_pz',
                 'pH', 'Temp', 'CE', 'ORP','Rmq']]
prv_eau.insert(1,'Type_ech','Eau')

source_prv_eau=prv_eau

In [48]:
if not os.path.exists(tmp_dir):
    os.makedirs(tmp_dir)

prv_eau.to_csv(tmp_dir+sheet+'_Samples-water.csv', index=False)

source_prv_eau.to_csv(tmp_dir+'source_Samples-water.csv', index=False)

print(f'source_bh:{len(source_bh)} ; source_pz:{len(source_pz)} ; source_litho:{len(source_litho)} ; '
     f'source_Fac-uknw:{len(source_ouv)} ; source_an:{len(source_an)} ;\nsource_prv_sol:{len(source_prv_sol)} ;'
     f'source_prv_eau:{len(source_prv_eau)} ; source_mes_pz:{len(source_mes_pz)} ; source_mes_sol:{len(source_mes_sol)} ;')

source_bh:0 ; source_pz:0 ; source_litho:0 ; source_Fac-uknw:0 ; source_an:0 ;
source_prv_sol:0 ;source_prv_eau:27 ; source_mes_pz:0 ; source_mes_sol:0 ;


--------------------------------------------------------------------------------------------------------

## 4-profondeur de contact campagne de forages octobre 2019.xlsx

* **Sheet : 'Feuil1'**

In [49]:
# New file, so the source variables must be overwritten !!
_df=pd.DataFrame()
source_mes_pz, source_mes_sol, source_pz, source_prv_eau, source_prv_sol =  _df, _df, _df, _df, _df
source_ouv, source_an, source_litho, source_bh = _df, _df, _df, _df

In [50]:
tmp_dir='../../CF_data/Result_traitem/Prof_contact_sol_forage/'
sheet='Feuil1'

In [51]:
df = pd.read_excel('../../CF_data/Data_UMONS/ouvrages/profondeur de contact campagne de forages octobre 2019.xlsx', 
                   sheet_name='Feuil1', skiprows=2)
df=na_line_drop(df,0)
df=na_col_drop(df,1)
df.replace(r'<|>','', inplace=True, regex=True)
df.replace(r'-$',np.nan, inplace=True, regex=True)

gdf_viewer(df)

2 NaN lines dropped
Rows : 8, columns : 5


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


interactive(children=(IntSlider(value=8, description='rows', max=8, min=8, readout=False), IntSlider(value=5, …

In [52]:
df.rename(columns={'n°forage ':'ID','profondeur(m)':'Long_for','x':'X', 'y':'Y', 'z':'Z'}, inplace=True)
df['Type']='Forage' # type is not defined clearly in data
df['ID']=df['ID'].apply(lambda x: 'F'+str(x).replace('.0',''))

bh=df

In [53]:
source_bh=bh

In [54]:
if not os.path.exists(tmp_dir):
    os.makedirs(tmp_dir)

bh.to_csv(tmp_dir+sheet+'_Boreholes.csv', index=False)
source_bh.to_csv(tmp_dir+'source_Boreholes.csv', index=False)

print(f'source_bh:{len(source_bh)} ; source_pz:{len(source_pz)} ; source_litho:{len(source_litho)} ; '
     f'source_Fac-uknw:{len(source_ouv)} ; source_an:{len(source_an)} ;\nsource_prv_sol:{len(source_prv_sol)} ;'
     f'source_prv_eau:{len(source_prv_eau)} ; source_mes_pz:{len(source_mes_pz)} ; source_mes_sol:{len(source_mes_sol)} ;')

source_bh:8 ; source_pz:0 ; source_litho:0 ; source_Fac-uknw:0 ; source_an:0 ;
source_prv_sol:0 ;source_prv_eau:0 ; source_mes_pz:0 ; source_mes_sol:0 ;


--------------------------------------------------------------------------------------------------------

## 5-Forages_Pilote_Decoupe.xlsx

* **Sheet : 'leve'**

In [55]:
# New file, so the source variables must be overwritten !!
_df=pd.DataFrame()
source_mes_pz, source_mes_sol, source_pz, source_prv_eau, source_prv_sol =  _df, _df, _df, _df, _df
source_ouv, source_an, source_litho, source_bh = _df, _df, _df, _df

In [56]:
tmp_dir='../../CF_data/Result_traitem/Forage_Pilote/'
sheet='leve_Z_elect_pos'

In [57]:
df = pd.read_excel('../../CF_data/Data_UMONS/geometrie_electrodes_et_sondes/Forages_Pilote_Decoupe.xlsx', 
                   sheet_name='leve')#, skiprows=0)
df=na_line_drop(df,0)
df=na_col_drop(df,1)
df.replace(r'<|>','', inplace=True, regex=True)
df.replace(r'-$',np.nan, inplace=True, regex=True)

gdf_viewer(df, rows=5)


Columns dropped :['Unnamed: 10']

Rows : 72, columns : 11


interactive(children=(IntSlider(value=5, description='rows', max=72, min=5, readout=False), IntSlider(value=11…

In [58]:
df.columns

Index(['Ref_puits', 'Niveau mesuré', 'Niveau corrigé', 'Bouteille',
       'decoupage [m]', 'Z_diff [m] repere_local', 'long_fin [m]',
       'Pos_Inox_#1 [m]', 'Pos_Inox_#6 [m]', 'Pos_Impol_#3 [m]',
       'Unnamed: 11'],
      dtype='object')

In [59]:
df.rename(columns={'Ref_puits':'ID','Niveau mesuré':'Z_mes', 'Niveau corrigé':'Z','Z_diff [m] repere_local':'Diff_Z_local',
                   'long_fin [m]':'Long_for','Pos_Inox_#1 [m]':'Pos_Inox_#1', 
                   'Pos_Inox_#6 [m]':'Pos_Inox_#6', 'Pos_Impol_#3 [m]':'Pos_Impol_#3'}, inplace=True)

In [60]:
df['Type']='Forage' # type is not defined clearly in data
df['ID']=df['ID'].apply(lambda x: 'F'+str(x).replace('.0',''))

elc = df[['ID','Pos_Inox_#6', 'Pos_Impol_#3']] # 'ID' is for boreholes
bh = df[['ID','Z','Diff_Z_local','Long_for', 'Type']]# Z_local origin = 145.5 [m]

In [61]:
source_bh = bh
source_elc = elc

In [62]:
if not os.path.exists(tmp_dir):
    os.makedirs(tmp_dir)

bh.to_csv(tmp_dir+sheet+'_Boreholes.csv', index=False)
elc.to_csv(tmp_dir+sheet+'_Electrodes.csv', index=False)

source_bh.to_csv(tmp_dir+'source_Boreholes.csv', index=False)
source_elc.to_csv(tmp_dir+'source_Electrodes.csv', index=False)

print(f'source_bh:{len(source_bh)} ; source_pz:{len(source_pz)} ; source_litho:{len(source_litho)} ; '
     f'source_Fac-uknw:{len(source_ouv)} ; source_an:{len(source_an)} ;\nsource_prv_sol:{len(source_prv_sol)} ;'
     f'source_prv_eau:{len(source_prv_eau)} ; source_mes_pz:{len(source_mes_pz)} ; source_mes_sol:{len(source_mes_sol)} ;')

source_bh:72 ; source_pz:0 ; source_litho:0 ; source_Fac-uknw:0 ; source_an:0 ;
source_prv_sol:0 ;source_prv_eau:0 ; source_mes_pz:0 ; source_mes_sol:0 ;


--------------------------------------------------------------------------------------------------------

## 6-Liste XY investigations.xlsx
* **Sheet : 'SOL_EAU'**

In [63]:
# New file, so the source variables must be overwritten !!
_df=pd.DataFrame()
source_mes_pz, source_mes_sol, source_pz, source_prv_eau, source_prv_sol =  _df, _df, _df, _df, _df
source_ouv, source_an, source_litho, source_bh = _df, _df, _df, _df

In [64]:
tmp_dir='../../CF_data/Result_traitem/Liste_XY/'
sheet='Sol_Eau'

In [65]:
df = pd.read_excel('../../CF_data/Data_UMONS/Contamination/Doc_SITEREM/Etude de caracterisation. SITEREM - 2011/Documents '
                   'supplémentaires/Liste XY investigations.xlsx', sheet_name='SOL')#, skiprows=4)
df['Type_ech']='Sol'

df1 = pd.read_excel('../../CF_data/Data_UMONS/Contamination/Doc_SITEREM/Etude de caracterisation. SITEREM - 2011/Documents '
                   'supplémentaires/Liste XY investigations.xlsx', sheet_name='EAU PR')#, skiprows=4)
df1['Type_ech']='Eau'
df1['Nappe']='Socle'

df2 = pd.read_excel('../../CF_data/Data_UMONS/Contamination/Doc_SITEREM/Etude de caracterisation. SITEREM - 2011/Documents '
                   'supplémentaires/Liste XY investigations.xlsx', sheet_name='EAU RB')#, skiprows=4)
df2['Type_ech']='Eau'
df2['Nappe']='remblais'

df3 = pd.read_excel('../../CF_data/Data_UMONS/Contamination/Doc_SITEREM/Etude de caracterisation. SITEREM - 2011/Documents '
                   'supplémentaires/Liste XY investigations.xlsx', sheet_name='EAU ALL')#, skiprows=4)
df3['Type_ech']='Eau'
df3['Nappe']='Alluvions'

In [66]:
df2=na_line_drop(df2,0)
df2=na_col_drop(df2,1)


Columns dropped :['Unnamed: 3', 'Unnamed: 4']



In [67]:
mdf, error_df=gdf_merger(df1, df2, 'outer', 'N°')

In [68]:
mdf=mdf.append(df3)
mdf=mdf.dropna(how='any', subset=['N°'])

In [69]:
mdf, error_df=gdf_merger(mdf, df, 'outer', 'N°') 

In [70]:
mdf.rename(columns={'N°':'ID'}, inplace=True)
mdf['Type'] = 'Piezo'
pz=mdf
source_pz = pz # we only have boreholes 'ID' here, no Z, no date

In [71]:
gdf_viewer(source_bh, rows=5)

Rows : 0, columns : 0


interactive(children=(IntSlider(value=0, description='rows', max=0, readout=False), IntSlider(value=0, descrip…

In [72]:
if not os.path.exists(tmp_dir):
    os.makedirs(tmp_dir)

pz.to_csv(tmp_dir+sheet+'_Piezometers.csv', index=False)    
source_pz.to_csv(tmp_dir+'source_Piezometers.csv', index=False)

print(f'source_bh:{len(source_bh)} ; source_pz:{len(source_pz)} ; source_litho:{len(source_litho)} ; '
     f'source_Fac-uknw:{len(source_ouv)} ; source_an:{len(source_an)} ;\nsource_prv_sol:{len(source_prv_sol)} ;'
     f'source_prv_eau:{len(source_prv_eau)} ; source_mes_pz:{len(source_mes_pz)} ; source_mes_sol:{len(source_mes_sol)} ;')

source_bh:0 ; source_pz:257 ; source_litho:0 ; source_Fac-uknw:0 ; source_an:0 ;
source_prv_sol:0 ;source_prv_eau:0 ; source_mes_pz:0 ; source_mes_sol:0 ;


--------------------------------------------------------------------------------------------------------

## 7-Résultats phase 1_MEMORIS.xls
* **Sheet : 'Résult SOL'**

In [73]:
# New file, so the source variables must be overwritten !!
_df=pd.DataFrame()
source_mes_pz, source_mes_sol, source_pz, source_prv_eau, source_prv_sol =  _df, _df, _df, _df, _df
source_ouv, source_an, source_litho, source_bh = _df, _df, _df, _df

In [74]:
tmp_dir='../../CF_data/Result_traitem/Phase_1_Memoris/'
sheet='Result_sol'

In [75]:
df = pd.read_excel('../../CF_data/Data_UMONS/Contamination/Résultats et mesures Siterem/résultats phase 1/'
                   'Résultats phase 1_MEMORIS.xls', sheet_name='Résult SOL', skiprows=4)
df=na_line_drop(df,0)
df=na_col_drop(df,1)
df.replace(r'<|>','', inplace=True, regex=True)
df.replace(r'-$',np.nan, inplace=True, regex=True)

gdf_viewer(df, rows=5)

1 NaN lines dropped

Columns dropped :['Unnamed: 0', 'Unnamed: 2']

Rows : 135, columns : 35


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


interactive(children=(IntSlider(value=5, description='rows', max=135, min=5, readout=False), IntSlider(value=1…

In [76]:
prv_sol=df.loc[:35]
an=df.loc[36:]

In [77]:
an.loc[0.5] = df.loc[0] # put data on first line
an = an.sort_index().reset_index(drop=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value, self.name)


In [78]:
prv_sol=prv_sol.transpose()
prv_sol.reset_index(drop=True, inplace=True)
prv_sol=col_ren(prv_sol, 1)

In [79]:
prv_sol.drop(list(range(5)), axis=0, inplace=True)
prv_sol.reset_index(drop=True, inplace=True)
prv_sol=na_col_drop(prv_sol,1)
prv_sol=na_line_drop(prv_sol,3)
prv_sol.reset_index(drop=True, inplace=True)


Columns dropped :['col_1', "Nom / description d'échantillon", 'Date de prélèvement', "Nature de l'étude (*)", 'Terrain', 'Epaisseur de remblais', 'Epaisseur alluvions', "Nature de l'observation organoleptique", 'LOCALISATION/AFFECTATION(S) - USAGE(S)', 'zone', 'parcelle (selon dénom.)', "Type d'usage de fait actuel", "Type d'usage de fait futur", "Type d'usage utilisé pour comparer les résultats", 'Lieux de prélèvement', 'X Lambert', 'Y Lambert', 'DESCRIPTION SOMMAIRE', 'Type de recouvrement de surface (***)', 'Sol en place (S : souillé, NS : non souillé)', 'Remblais de terre (S : souillé, NS : non souillé)', 'Remblais technique (S : souillé, NS : non souillé)', 'Déchet', 'Matières organiques', 'GRANULOMETRIE', 'Fraction argileuse']



In [80]:
name=['ID_ech', 'Description','Organo', 'Long_for', 'Refus','Ech_top', 'Ech_base', 'MS','Fract_2','Fract_2+']
prv_sol=col_ren(prv_sol, name=name, mode=1)

In [81]:
for i in range(len(prv_sol['Description'])):
    x = prv_sol.loc[i,'Description']
    if x=='R': prv_sol.loc[i,'Description']='Remblais'
    elif x=='L': prv_sol.loc[i,'Description']='Limons'

prv_sol['Refus']=prv_sol['Refus'].apply(lambda x: 'x' if not re.search('x|X', str(x)) else '')
prv_sol.insert(1,'Type_ech','Sol')

In [82]:
gdf_viewer(prv_sol, rows=3)

Rows : 29, columns : 11


interactive(children=(IntSlider(value=3, description='rows', max=29, min=3, readout=False), IntSlider(value=11…

In [83]:
an=an.transpose()
an.reset_index(drop=True, inplace=True)

In [84]:
an=col_ren(an, 1)

In [85]:
an.replace(r'<|>','', inplace=True, regex=True)
an.replace(r'-',np.nan, inplace=True, regex=True)
an.rename(columns={an.columns[0]:'ID_ech'}, inplace=True)

In [86]:
an.drop(list(range(5)), axis=0, inplace=True)
an.reset_index(drop=True, inplace=True)
an=na_col_drop(an,1)
an.insert(1,'Type_ech','Sol')


Columns dropped :['METAUX LOURDS', 'CYANURES', 'COMPOSES AROMATIQUES VOLATILS', 'BTEX totaux', 'PHENOLS', 'Indice phénol', 'HYDROCARBURES AROMATIQUES POLYCYCLIQUES', 'COMPOSES ORGANOHALOGENES VOLATILS', 'col_63', 'EOX (****)', 'HYDROCARBURES TOTAUX', 'fraction aromat. C6-C7', 'fraction aromat. C7-C8', 'fraction aromat. C8-C10', 'fraction aliphat. C5-C6', 'fraction aliphat. C6-C8', 'fraction aliphat. C8-C10', 'Fraction C35 - C40', 'Hydrocarbures totaux C10-C40', 'METHYL-TERT-BUTYL-ETHER', 'POLYCHLOROBIPHENYL (PCB)', 'PCB 28', 'PCB 52', 'PCB 101', 'PCB 118', 'PCB 138', 'PCB 153', 'PCB 180', 'PCB totaux (7)', 'F4/2M*', 'Teneur mesurée', 'Teneur mesurée (souligne/gras)', 'Teneur mesurée (gras/grisé)', "(*) CP = Campagne de prélèvements; Ex = Expertise; ES = Etude de sol; EO = Etude d'orientation; EC = Etude de caractérisation; SA = Suivi d'assainissement", '(**) L: limon, A: Argile, S: Sable, R: Remblai', '(***) ib : imperméable (béton) ; ih : imperméable hydrocarboné ; p : perméable (gra

In [87]:
an=col_ren(an, name=pol_field_model, mode=1)

In [88]:
gdf_viewer(an, rows=5) 

Rows : 29, columns : 64


interactive(children=(IntSlider(value=5, description='rows', max=29, min=5, readout=False), IntSlider(value=12…

In [89]:
source_prv_sol=prv_sol
source_an=an

In [90]:
if not os.path.exists(tmp_dir):
    os.makedirs(tmp_dir)

#bh.to_csv(tmp_dir+'source_Boreholes.csv', index=False)
#pz.to_csv(tmp_dir+sheet+'_Piezometers.csv', index=False)
#ouv.to_csv(tmp_dir+sheet+'_Unkown-facility.csv', index=False)
#litho.to_csv(tmp_dir+sheet+'_Lithology.csv', index=False)
#mes_pz.to_csv(tmp_dir+sheet+'_Measures.csv', index=False)
#mes_sol.to_csv(tmp_dir+sheet+'_Measures-soil.csv', index=False)
#prv_eau.to_csv(tmp_dir+sheet+'_Samples-water.csv', index=False)
prv_sol.to_csv(tmp_dir+sheet+'_Samples-soil.csv', index=False)
an.to_csv(tmp_dir+sheet+'_Analysis.csv', index=False)


#source_bh.to_csv(tmp_dir+'source_Boreholes.csv', index=False)
#source_pz.to_csv(tmp_dir+'source_Piezometers.csv', index=False)
#source_ouv.to_csv(tmp_dir+'source_Unkown-facility.csv', index=False)
#source_litho.to_csv(tmp_dir+'source_Lithology.csv', index=False)
#source_mes_pz.to_csv(tmp_dir+'source_Measures.csv', index=False)
#source_mes_sol.to_csv(tmp_dir+'source_Measures-soil.csv', index=False)
#source_prv_eau.to_csv(tmp_dir+'source_Samples-water.csv', index=False)
source_prv_sol.to_csv(tmp_dir+'source_Samples-soil.csv', index=False)
source_an.to_csv(tmp_dir+'source_Analysis.csv', index=False)


print(f'source_bh:{len(source_bh)} ; source_pz:{len(source_pz)} ; source_litho:{len(source_litho)} ; '
     f'source_Fac-uknw:{len(source_ouv)} ; source_an:{len(source_an)} ;\nsource_prv_sol:{len(source_prv_sol)} ;'
     f'source_prv_eau:{len(source_prv_eau)} ; source_mes_pz:{len(source_mes_pz)} ; source_mes_sol:{len(source_mes_sol)} ;')

source_bh:0 ; source_pz:0 ; source_litho:0 ; source_Fac-uknw:0 ; source_an:29 ;
source_prv_sol:29 ;source_prv_eau:0 ; source_mes_pz:0 ; source_mes_sol:0 ;


* **Sheet : 'Résult EAU'**

In [91]:
tmp_dir='../../CF_data/Result_traitem/Phase_1_Memoris/'
sheet='Result_eau'

In [92]:
df = pd.read_excel('../../CF_data/Data_UMONS/Contamination/Résultats et mesures Siterem/résultats phase 1/'
                   'Résultats phase 1_MEMORIS.xls', sheet_name='Résult EAU', skiprows=4)
df=na_line_drop(df,0)
df=na_col_drop(df,1)
df.replace(r'<|>','', inplace=True, regex=True)
df.replace(r'-$',np.nan, inplace=True, regex=True)
gdf_viewer(df, rows=5)

1 NaN lines dropped

Columns dropped :['Unnamed: 0', 'Unnamed: 2']

Rows : 136, columns : 23


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


interactive(children=(IntSlider(value=5, description='rows', max=136, min=5, readout=False), IntSlider(value=1…

In [93]:
prv_eau=df.loc[:32]
an=df.loc[33:]

In [94]:
an.loc[0.5] = df.loc[0] # put data on first line
an = an.sort_index().reset_index(drop=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value, self.name)


In [95]:
prv_eau=prv_eau.transpose()
prv_eau.reset_index(drop=True, inplace=True)
prv_eau=col_ren(prv_eau, 1)

In [96]:
prv_eau['CE']=prv_eau['CE'].apply(lambda x: pd.to_numeric(x)/1000 
                                  if re.search('^\d+', str(x)) and not pd.isnull(x) else np.nan)

In [97]:
prv_eau.drop(list(range(5)), axis=0, inplace=True)
prv_eau.reset_index(drop=True, inplace=True)

In [98]:
prv_eau=dble_col_drop(prv_eau)

column(s) dropped: ['21:DESCRIPTION SOMMAIRE', '27:Prof. arrêt du forage']


In [99]:
prv_eau=na_col_drop(prv_eau,1)
prv_eau=na_line_drop(prv_eau,3)
prv_eau.reset_index(drop=True, inplace=True)


Columns dropped :['col_1', "Nature de l'étude (*)", 'Observations organoleptiques', 'LOCALISATION/AFFECTATION(S) - USAGE(S)', 'parcelle (selon dénom.)', "Type d'usage de fait actuel", "Type d'usage de fait futur", "Type d'usage utilisé pour comparer les résultats", 'Lieux de prélèvement', 'DESCRIPTION SOMMAIRE', 'Type de recouvrement de surface (**)', 'Sol en place (S : souillé, NS : non souillé)', 'Remblais de terre (S : souillé, NS : non souillé)', 'Remblais technique (S : souillé, NS : non souillé)', 'Déchet', 'PARAMETRES PHYSICO-CHIMIQUES ', 'ORP']



In [100]:
name=['ID_ech','Date_prv','Num_maille','Affectation','X','Y','Zsol','Long_for','Prof_crep','Long_pz',
      'Niv_eau_sol','pH','CE','T']
prv_eau=col_ren(prv_eau, name=name, mode=1)
prv_eau.insert(1,'Type_ech','Eau')

In [101]:
prv_eau.columns

Index(['ID_ech', 'Type_ech', 'Date_prv', 'Num_maille', 'Affectation', 'X', 'Y',
       'Zsol', 'Long_for', 'Prof_crep', 'Long_pz', 'Niv_eau_sol', 'pH', 'CE',
       'T'],
      dtype='object')

In [102]:
prv_eau['Prof_crep'].replace('\[|\]','', regex=True, inplace=True)
for i in range(len(prv_eau)):
    c=prv_eau.loc[i,'Prof_crep']
    prv_eau.loc[i,'Equip_top']=c.split('-')[0]
    prv_eau.loc[i,'Equip_base']=c.split('-')[1]

prv_eau['Type_equip'] = 'Crepine'
prv_eau.drop(columns=['Prof_crep'], inplace=True)

In [103]:
prv_eau['ID_ech'].replace('Canne ', 'Can', inplace=True, regex=True)
prv_eau['ID_ech'].replace('\n', ' ', inplace=True, regex=True)

In [104]:
pz=prv_eau[['ID_ech','X', 'Y', 'Zsol', 'Long_for','Long_pz', 'Equip_top', 'Equip_base', 'Type_equip']]
pz.rename(columns={'ID_ech':'ID'}, inplace=True)
pz['Type'] = 'Piezo'

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pz['Type'] = 'Piezo'


In [105]:
for i in range(len(pz.ID)):
    c=pz.loc[i, 'ID']
    pz.loc[i, 'ID']=re.search("(\w+\d+(?:\w)?)",c).group(1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)


In [106]:
pz.drop_duplicates(subset=['ID'], keep='first', ignore_index=True, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pz.drop_duplicates(subset=['ID'], keep='first', ignore_index=True, inplace=True)


In [107]:
gdf_viewer(prv_eau, rows=5)

Rows : 17, columns : 17


interactive(children=(IntSlider(value=5, description='rows', max=17, min=5, readout=False), IntSlider(value=12…

In [108]:
an=an.transpose()
an.reset_index(drop=True, inplace=True)

In [109]:
an=col_ren(an, 1)

In [110]:
an.replace(r'<|>','', inplace=True, regex=True)
an.replace(r'-',np.nan, inplace=True, regex=True)
an.rename(columns={an.columns[0]:'ID_ech'}, inplace=True)

In [111]:
an.drop(list(range(5)), axis=0, inplace=True)
an.reset_index(drop=True, inplace=True)

In [112]:
an.drop(columns=an.columns[[-6,-5]], axis=1, inplace=True)
an=na_col_drop(an,1)
an.insert(1,'Type_ech','Eau')


Columns dropped :['METAUX LOURDS', 'CYANURES', 'COMPOSES AROMATIQUES VOLATILS', 'PHENOLS', 'crésols (total)', 'CHLOROPHENOLS', '2-chlorophénol', 'monochlorophénol total', 'dichlorophénol total', '2,4,5-trichlorophénol', '2,4,6-trichlorophénol', 'trichlorophénol total', '2,3,4,6- tétrachlorophénol', 'tétrachlorophénol total', 'pentachlorophénol', 'HYDROCARBURES AROMATIQUES POLYCYCLIQUES', 'COMPOSES ORGANOCHLORES VOLATILS', 'CHLOROBENZENES', 'monochlorobenzène', '1,3-dichlorobenzène', '1,2-dichlorobenzène', '1,4-Dichlorobenzène', '1,2,4,5- et 1,2,3,5-tétrachlorobenzènes', 'pentachlorobenzène', 'hexachlorobenzène', 'HYDROCARBURES TOTAUX', 'METHYL-TERT-BUTYL-ETHER', 'POLYCHLOROBIPHENYLS (PCB)', 'PCB totaux (7)', 'Teneur mesurée (souligne/gras)', 'Teneur mesurée (gras/grisé)', "(*) CP = Campagne de prélèvements; Ex = Expertise; ES = Etude de sol; EO = Etude d'orientation; EC = Etude de caractérisation; SA = Suivi d'assainissement", '(**) ib : imperméable (béton) ; ih : imperméable hydrocar

In [113]:
an=col_ren(an, name=pol_field_model, mode=1)

In [114]:
gdf_viewer(an, rows=5) 

Rows : 17, columns : 70


interactive(children=(IntSlider(value=5, description='rows', max=17, min=5, readout=False), IntSlider(value=12…

In [115]:
source_pz=pz
source_prv_eau=prv_eau
source_an=source_an.append(an)

In [116]:
if not os.path.exists(tmp_dir):
    os.makedirs(tmp_dir)

#bh.to_csv(tmp_dir+'source_Boreholes.csv', index=False)
pz.to_csv(tmp_dir+sheet+'_Piezometers.csv', index=False)
#ouv.to_csv(tmp_dir+sheet+'_Unkown-facility.csv', index=False)
#litho.to_csv(tmp_dir+sheet+'_Lithology.csv', index=False)
#mes_pz.to_csv(tmp_dir+sheet+'_Measures.csv', index=False)
#mes_sol.to_csv(tmp_dir+sheet+'_Measures-soil.csv', index=False)
prv_eau.to_csv(tmp_dir+sheet+'_Samples-water.csv', index=False)
#prv_sol.to_csv(tmp_dir+sheet+'_Samples-soil.csv', index=False)
an.to_csv(tmp_dir+sheet+'_Analysis.csv', index=False)


#source_bh.to_csv(tmp_dir+'source_Boreholes.csv', index=False)
source_pz.to_csv(tmp_dir+'source_Piezometers.csv', index=False)
#source_ouv.to_csv(tmp_dir+'source_Unkown-facility.csv', index=False)
#source_litho.to_csv(tmp_dir+'source_Lithology.csv', index=False)
#source_mes_pz.to_csv(tmp_dir+'source_Measures.csv', index=False)
#source_mes_sol.to_csv(tmp_dir+'source_Measures-soil.csv', index=False)
source_prv_eau.to_csv(tmp_dir+'source_Samples-water.csv', index=False)
#source_prv_sol.to_csv(tmp_dir+'source_Samples-soil.csv', index=False)
source_an.to_csv(tmp_dir+'source_Analysis.csv', index=False)


print(f'source_bh:{len(source_bh)} ; source_pz:{len(source_pz)} ; source_litho:{len(source_litho)} ; '
     f'source_Fac-uknw:{len(source_ouv)} ; source_an:{len(source_an)} ;\nsource_prv_sol:{len(source_prv_sol)} ;'
     f'source_prv_eau:{len(source_prv_eau)} ; source_mes_pz:{len(source_mes_pz)} ; source_mes_sol:{len(source_mes_sol)} ;')

source_bh:0 ; source_pz:14 ; source_litho:0 ; source_Fac-uknw:0 ; source_an:46 ;
source_prv_sol:29 ;source_prv_eau:17 ; source_mes_pz:0 ; source_mes_sol:0 ;


## 8-Résultats phase 2_MEMORIS.xls
* **Sheet : 'Résult SOL'**

In [117]:
# New file, so the source variables must be overwritten !!
_df=pd.DataFrame()
source_mes_pz, source_mes_sol, source_pz, source_prv_eau, source_prv_sol =  _df, _df, _df, _df, _df
source_ouv, source_an, source_litho, source_bh = _df, _df, _df, _df

In [118]:
tmp_dir='../../CF_data/Result_traitem/Phase_2_Memoris/'
sheet='Result_SOL'

In [119]:
df = pd.read_excel('../../CF_data/Data_UMONS/Contamination/Résultats et mesures Siterem/résultats phase 2/'
                   'Résultats phase 2_MEMORIS.xls', sheet_name='Résult SOL', skiprows=4)
df=na_line_drop(df,0)
df=na_col_drop(df,1)
df.replace(r'<|>','', inplace=True, regex=True)
df.replace(r'-$',np.nan, inplace=True, regex=True)
gdf_viewer(df, rows=5)

1 NaN lines dropped

Columns dropped :['Unnamed: 0', 'Unnamed: 2']

Rows : 135, columns : 31


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


interactive(children=(IntSlider(value=5, description='rows', max=135, min=5, readout=False), IntSlider(value=1…

In [120]:
prv_sol=df.loc[:35]
an=df.loc[36:]

In [121]:
an.loc[0.5] = df.loc[1] # put data on first line
an = an.sort_index().reset_index(drop=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value, self.name)


In [122]:
prv_sol=prv_sol.transpose()
prv_sol.reset_index(drop=True, inplace=True)
prv_sol=col_ren(prv_sol, 1)

In [123]:
prv_sol.drop(list(range(5)), axis=0, inplace=True)
prv_sol.reset_index(drop=True, inplace=True)
prv_sol=na_col_drop(prv_sol,1)
prv_sol=na_line_drop(prv_sol,3)
prv_sol.reset_index(drop=True, inplace=True)


Columns dropped :['col_0', "Nom / description d'échantillon", "Nature de l'étude (*)", 'Terrain', 'Epaisseur de remblais', 'Epaisseur alluvions', "Nature de l'observation organoleptique", 'LOCALISATION/AFFECTATION(S) - USAGE(S)', 'zone', 'parcelle (selon dénom.)', "Type d'usage de fait actuel", "Type d'usage de fait futur", "Type d'usage utilisé pour comparer les résultats", 'Lieux de prélèvement', 'X Lambert', 'Y Lambert', 'DESCRIPTION SOMMAIRE', 'Type de recouvrement de surface (***)', 'Sol en place (S : souillé, NS : non souillé)', 'Remblais de terre (S : souillé, NS : non souillé)', 'Remblais technique (S : souillé, NS : non souillé)', 'Déchet', 'Matières organiques', 'GRANULOMETRIE', 'Fraction argileuse']



In [124]:
name=['ID_ech', 'Date_prv', 'Description','Organo', 'Long_for', 'Refus','Ech_top', 'Ech_base', 'MS','Fract_2','Fract_2+']
prv_sol=col_ren(prv_sol, name=name, mode=1)

In [125]:
set(prv_sol['Description'])

{'L', 'LA', 'LS', 'R'}

In [126]:
for i in range(len(prv_sol['Description'])):
    x = prv_sol.loc[i,'Description']
    if x=='R': prv_sol.loc[i,'Description']='Remblais'
    elif x=='L': prv_sol.loc[i,'Description']='Limons'
    elif x=='LA': prv_sol.loc[i,'Description']='Limons et argiles'
    elif x=='LS': prv_sol.loc[i,'Description']='Limons et sables'

prv_sol['Refus']=prv_sol['Refus'].apply(lambda x: 'x' if not re.search('#', str(x)) else '')
prv_sol.insert(1,'Type_ech','Sol')

In [127]:
gdf_viewer(prv_sol, rows=3)

Rows : 25, columns : 12


interactive(children=(IntSlider(value=3, description='rows', max=25, min=3, readout=False), IntSlider(value=12…

In [128]:
an=an.transpose()
an.reset_index(drop=True, inplace=True)

In [129]:
an=col_ren(an, 1)

In [130]:
an.replace(r'<|>','', inplace=True, regex=True)
an.replace(r'-',np.nan, inplace=True, regex=True)
an.rename(columns={an.columns[0]:'ID_ech'}, inplace=True)

In [131]:
an.drop(list(range(5)), axis=0, inplace=True)
an=na_col_drop(an,1)
an.insert(1,'Type_ech','Sol')


Columns dropped :['METAUX LOURDS', 'CYANURES', 'COMPOSES AROMATIQUES VOLATILS', 'BTEX totaux', 'PHENOLS', 'Phénol', 'Indice phénol', 'HYDROCARBURES AROMATIQUES POLYCYCLIQUES', 'COMPOSES ORGANOHALOGENES VOLATILS', '1,1-Dichloroéthane', '1,2-Dichloroéthane', '1,1-dichloroéthène', 'Cis-1,2-dichloroéthène', 'Trans 1,2-dichloroéthylène', 'Dichlorométhane', 'Totaux (cis,trans) 1,2-dichloroéthènes', '1,2-dichloropropane', 'Tétrachloroéthylène', 'Tétrachlorométhane', '1,1,1-Trichloroéthane', '1,1,2-Trichloroéthane', 'Trichloroéthylène', 'Chloroforme', 'Chlorure de vinyle', 'col_63', 'EOX (****)', 'HYDROCARBURES TOTAUX', 'Fraction C35 - C40', 'Hydrocarbures totaux C10-C40', 'METHYL-TERT-BUTYL-ETHER', 'MTBE', 'POLYCHLOROBIPHENYL (PCB)', 'PCB 28', 'PCB 52', 'PCB 101', 'PCB 118', 'PCB 138', 'PCB 153', 'PCB 180', 'PCB totaux (7)', 'F4/2M*', 'Teneur mesurée', 'Teneur mesurée (souligne/gras)', 'Teneur mesurée (gras/grisé)', "(*) CP = Campagne de prélèvements; Ex = Expertise; ES = Etude de sol; EO = 

In [132]:
an=col_ren(an, name=pol_field_model, mode=1)

In [133]:
gdf_viewer(an, rows=5, cols=20) 

Rows : 25, columns : 53


interactive(children=(IntSlider(value=5, description='rows', max=25, min=5, readout=False), IntSlider(value=20…

In [134]:
source_prv_sol=prv_sol
source_an=an

In [135]:
if not os.path.exists(tmp_dir):
    os.makedirs(tmp_dir)

#bh.to_csv(tmp_dir+'source_Boreholes.csv', index=False)
#pz.to_csv(tmp_dir+sheet+'_Piezometers.csv', index=False)
#ouv.to_csv(tmp_dir+sheet+'_Unkown-facility.csv', index=False)
#litho.to_csv(tmp_dir+sheet+'_Lithology.csv', index=False)
#mes_pz.to_csv(tmp_dir+sheet+'_Measures.csv', index=False)
#mes_sol.to_csv(tmp_dir+sheet+'_Measures-soil.csv', index=False)
#prv_eau.to_csv(tmp_dir+sheet+'_Samples-water.csv', index=False)
prv_sol.to_csv(tmp_dir+sheet+'_Samples-soil.csv', index=False)
an.to_csv(tmp_dir+sheet+'_Analysis.csv', index=False)


#source_bh.to_csv(tmp_dir+'source_Boreholes.csv', index=False)
#source_pz.to_csv(tmp_dir+'source_Piezometers.csv', index=False)
#source_ouv.to_csv(tmp_dir+'source_Unkown-facility.csv', index=False)
#source_litho.to_csv(tmp_dir+'source_Lithology.csv', index=False)
#source_mes_pz.to_csv(tmp_dir+'source_Measures.csv', index=False)
#source_mes_sol.to_csv(tmp_dir+'source_Measures-soil.csv', index=False)
#source_prv_eau.to_csv(tmp_dir+'source_Samples-water.csv', index=False)
source_prv_sol.to_csv(tmp_dir+'source_Samples-soil.csv', index=False)
source_an.to_csv(tmp_dir+'source_Analysis.csv', index=False)


print(f'source_bh:{len(source_bh)} ; source_pz:{len(source_pz)} ; source_litho:{len(source_litho)} ; '
     f'source_Fac-uknw:{len(source_ouv)} ; source_an:{len(source_an)} ;\nsource_prv_sol:{len(source_prv_sol)} ;'
     f'source_prv_eau:{len(source_prv_eau)} ; source_mes_pz:{len(source_mes_pz)} ; source_mes_sol:{len(source_mes_sol)} ;')

source_bh:0 ; source_pz:0 ; source_litho:0 ; source_Fac-uknw:0 ; source_an:25 ;
source_prv_sol:25 ;source_prv_eau:0 ; source_mes_pz:0 ; source_mes_sol:0 ;


* **Sheet : 'Résult EAU'**

In [136]:
tmp_dir='../../CF_data/Result_traitem/Phase_2_Memoris/'
sheet='Result_eau'

In [137]:
df = pd.read_excel('../../CF_data/Data_UMONS/Contamination/Résultats et mesures Siterem/résultats phase 2/'
                   'Résultats phase 2_MEMORIS.xls', sheet_name='Résult EAU', skiprows=4)
df=na_line_drop(df,0)
df=na_col_drop(df,1)
df.replace(r'<|>','', inplace=True, regex=True)
df.replace(r'-$',np.nan, inplace=True, regex=True)

gdf_viewer(df, rows=5)


Columns dropped :['Unnamed: 0', 'Unnamed: 2']

Rows : 138, columns : 17


interactive(children=(IntSlider(value=5, description='rows', max=138, min=5, readout=False), IntSlider(value=1…

In [138]:
prv_eau=df.loc[:32]
an=df.loc[33:]

In [139]:
an.loc[0.5] = df.loc[1] # put data on first line
an = an.sort_index().reset_index(drop=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value, self.name)


In [140]:
prv_eau=prv_eau.transpose()
prv_eau.reset_index(drop=True, inplace=True)
prv_eau=col_ren(prv_eau, 1)

In [141]:
prv_eau['CE']=prv_eau['CE'].apply(lambda x: pd.to_numeric(x)/1000 
                                  if re.search('^\d+', str(x)) and not pd.isnull(x) else np.nan)

In [142]:
prv_eau.drop(list(range(5)), axis=0, inplace=True)
prv_eau.reset_index(drop=True, inplace=True)

In [143]:
prv_eau=dble_col_drop(prv_eau)

column(s) dropped: ['21:DESCRIPTION SOMMAIRE', '27:Prof. arrêt du forage']


In [144]:
prv_eau=na_col_drop(prv_eau,1)
prv_eau=na_line_drop(prv_eau,3)
prv_eau.reset_index(drop=True, inplace=True)


Columns dropped :['Nom du piézomètre', "Nature de l'étude (*)", 'Observations organoleptiques', 'LOCALISATION/AFFECTATION(S) - USAGE(S)', 'Numéro de maille', 'parcelle (selon dénom.)', "Type d'usage de fait actuel", "Type d'usage de fait futur", "Type d'usage utilisé pour comparer les résultats", 'Lieux de prélèvement', 'DESCRIPTION SOMMAIRE', 'Type de recouvrement de surface (**)', 'Sol en place (S : souillé, NS : non souillé)', 'Remblais de terre (S : souillé, NS : non souillé)', 'Remblais technique (S : souillé, NS : non souillé)', 'Déchet', 'PARAMETRES PHYSICO-CHIMIQUES ', 'ORP']



In [145]:
prv_eau.columns

Index(['col_1', 'Date de prélèvement', 'Type d'affectation (Plan de secteur)',
       'X Lambert', 'Y Lambert', 'Z Sol', 'Prof. arrêt du forage',
       'Profondeur crépine ', 'Prof. piézo/sol mesurée sur site',
       'Niveau de la nappe/sol', 'pH', 'CE', 'T'],
      dtype='object')

In [146]:
name=['ID_ech', 'Date_prv','Affectation','X', 'Y','Zsol', 'Long_for','Prof_crep','Long_pz_sol',
      'Niv_eau_sol','pH', 'CE', 'T']
prv_eau=col_ren(prv_eau, name=name, mode=1)
prv_eau.insert(1,'Type_ech','Eau')

In [147]:
prv_eau['ID_ech'].replace('Canne ', 'Can', inplace=True, regex=True)
prv_eau['ID_ech'].replace('\n', ' ', inplace=True, regex=True)

In [148]:
prv_eau['Prof_crep'].replace('\[|\]','', regex=True, inplace=True)
for i in range(len(prv_eau)):
    c=prv_eau.loc[i,'Prof_crep']
    prv_eau.loc[i,'Equip_top']=c.split('-')[0]
    prv_eau.loc[i,'Equip_base']=c.split('-')[1]
    
prv_eau.drop(columns=['Prof_crep'], inplace=True)
prv_eau['Type_equip'] = 'Crepine'

In [149]:
prv_eau['ID_ech'].replace('Canne ', 'Can', inplace=True, regex=True)
prv_eau['ID_ech'].replace('\n', ' ', inplace=True, regex=True)

In [150]:
pz=prv_eau[['ID_ech', 'X', 'Y', 'Zsol', 'Long_for','Long_pz_sol', 'Equip_top', 'Equip_base']]
pz.rename(columns={'ID_ech':'ID'}, inplace=True)
pz['Type']='Piezo'

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pz['Type']='Piezo'


In [151]:
for i in range(len(pz.ID)):
    c=pz.loc[i, 'ID']
    pz.loc[i, 'ID']=re.search("(\w+\d+(?:\w)?)",c).group(1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)


In [152]:
pz.drop_duplicates(subset=['ID'], keep='first', ignore_index=True, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pz.drop_duplicates(subset=['ID'], keep='first', ignore_index=True, inplace=True)


In [153]:
prv_eau=prv_eau[['ID_ech', 'Date_prv', 'X', 'Y', 'Zsol','Niv_eau_sol', 'pH', 'CE', 'T','Affectation']]

In [154]:
gdf_viewer(prv_eau, rows=5)

Rows : 11, columns : 10


interactive(children=(IntSlider(value=5, description='rows', max=11, min=5, readout=False), IntSlider(value=10…

In [155]:
an=an.transpose()
an.reset_index(drop=True, inplace=True)

In [156]:
an=col_ren(an, 1)

In [157]:
an.replace(r'<|>','', inplace=True, regex=True)
an.replace(r'-',np.nan, inplace=True, regex=True)
an.rename(columns={an.columns[0]:'ID_ech'}, inplace=True)

In [158]:
an.drop(list(range(5)), axis=0, inplace=True)
an.reset_index(drop=True, inplace=True)

In [159]:
an.drop(columns=an.columns[[-6,-5]], axis=1, inplace=True)
an=na_col_drop(an,1)
an.insert(1,'Type_ech','Eau')


Columns dropped :['METAUX LOURDS', 'Chrome VI', 'CYANURES', 'COMPOSES AROMATIQUES VOLATILS', 'PHENOLS', 'crésols (total)', 'CHLOROPHENOLS', '2-chlorophénol', 'monochlorophénol total', 'dichlorophénol total', '2,4,5-trichlorophénol', '2,4,6-trichlorophénol', 'trichlorophénol total', '2,3,4,6- tétrachlorophénol', 'tétrachlorophénol total', 'pentachlorophénol', 'HYDROCARBURES AROMATIQUES POLYCYCLIQUES', 'COMPOSES ORGANOCHLORES VOLATILS', 'CHLOROBENZENES', 'monochlorobenzène', '1,3-dichlorobenzène', '1,2-dichlorobenzène', '1,4-Dichlorobenzène', '1,2,4,5- et 1,2,3,5-tétrachlorobenzènes', 'pentachlorobenzène', 'hexachlorobenzène', 'HYDROCARBURES TOTAUX', 'METHYL-TERT-BUTYL-ETHER', 'POLYCHLOROBIPHENYLS (PCB)', 'PCB totaux (7)', 'AUTRES ANALYSES ', 'Teneur mesurée (souligne/gras)', 'Teneur mesurée (gras/grisé)', "(*) CP = Campagne de prélèvements; Ex = Expertise; ES = Etude de sol; EO = Etude d'orientation; EC = Etude de caractérisation; SA = Suivi d'assainissement", '(**) ib : imperméable (b

In [160]:
an=col_ren(an, name=pol_field_model, mode=1)

In [161]:
gdf_viewer(an, rows=5) 

Rows : 11, columns : 70


interactive(children=(IntSlider(value=5, description='rows', max=11, min=5, readout=False), IntSlider(value=12…

In [162]:
source_prv_eau=prv_eau
source_an=source_an.append(an)

In [163]:
if not os.path.exists(tmp_dir):
    os.makedirs(tmp_dir)

#bh.to_csv(tmp_dir+'source_Boreholes.csv', index=False)
pz.to_csv(tmp_dir+sheet+'_Piezometers.csv', index=False)
#ouv.to_csv(tmp_dir+sheet+'_Unkown-facility.csv', index=False)
#litho.to_csv(tmp_dir+sheet+'_Lithology.csv', index=False)
#mes_pz.to_csv(tmp_dir+sheet+'_Measures.csv', index=False)
#mes_sol.to_csv(tmp_dir+sheet+'_Measures-soil.csv', index=False)
prv_eau.to_csv(tmp_dir+sheet+'_Samples-water.csv', index=False)
#prv_sol.to_csv(tmp_dir+sheet+'_Samples-soil.csv', index=False)
an.to_csv(tmp_dir+sheet+'_Analysis.csv', index=False)


#source_bh.to_csv(tmp_dir+'source_Boreholes.csv', index=False)
#source_pz.to_csv(tmp_dir+'source_Piezometers.csv', index=False)
#source_ouv.to_csv(tmp_dir+'source_Unkown-facility.csv', index=False)
#source_litho.to_csv(tmp_dir+'source_Lithology.csv', index=False)
#source_mes_pz.to_csv(tmp_dir+'source_Measures.csv', index=False)
#source_mes_sol.to_csv(tmp_dir+'source_Measures-soil.csv', index=False)
source_prv_eau.to_csv(tmp_dir+'source_Samples-water.csv', index=False)
#source_prv_sol.to_csv(tmp_dir+'source_Samples-soil.csv', index=False)
source_an.to_csv(tmp_dir+'source_Analysis.csv', index=False)


print(f'source_bh:{len(source_bh)} ; source_pz:{len(source_pz)} ; source_litho:{len(source_litho)} ; '
     f'source_Fac-uknw:{len(source_ouv)} ; source_an:{len(source_an)} ;\nsource_prv_sol:{len(source_prv_sol)} ;'
     f'source_prv_eau:{len(source_prv_eau)} ; source_mes_pz:{len(source_mes_pz)} ; source_mes_sol:{len(source_mes_sol)} ;')

source_bh:0 ; source_pz:0 ; source_litho:0 ; source_Fac-uknw:0 ; source_an:36 ;
source_prv_sol:25 ;source_prv_eau:11 ; source_mes_pz:0 ; source_mes_sol:0 ;


## 9-Ensemble des résultats Memoris version Seafile.xls
* **Sheet : 'Résult SOL'**

In [164]:
# New file, so the source variables must be overwritten !!
_df=pd.DataFrame()
source_mes_pz, source_mes_sol, source_pz, source_prv_eau, source_prv_sol =  _df, _df, _df, _df, _df
source_ouv, source_an, source_litho, source_bh = _df, _df, _df, _df

In [165]:
tmp_dir='../../CF_data/Result_traitem/Memoris_seafile/'
sheet='Result_SOL'

In [166]:
df = pd.read_excel('../../CF_data/Data_UMONS/Contamination/Résultats et mesures Siterem/'
                   'Ensemble des résultats Memoris version Seafile.xls', sheet_name='Résult SOL', skiprows=4)
df=na_line_drop(df,0)
df=na_col_drop(df,1)
gdf_viewer(df, rows=5)

2 NaN lines dropped

Columns dropped :['Unnamed: 0', 'Unnamed: 2']

Rows : 138, columns : 66


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


interactive(children=(IntSlider(value=5, description='rows', max=138, min=5, readout=False), IntSlider(value=1…

In [167]:
prv_sol=df.loc[:37]
an=df.loc[38:]

In [168]:
an.loc[0.5] = df.loc[0] # put data on first line
an = an.sort_index().reset_index(drop=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value, self.name)


In [169]:
prv_sol=prv_sol.transpose()
prv_sol.reset_index(drop=True, inplace=True)
prv_sol=col_ren(prv_sol, 1)

In [170]:
prv_sol.drop(list(range(5)), axis=0, inplace=True)
prv_sol.reset_index(drop=True, inplace=True)
prv_sol=na_col_drop(prv_sol,1)
prv_sol=na_line_drop(prv_sol,3)
prv_sol.reset_index(drop=True, inplace=True)


Columns dropped :['col_1', "Nom / description d'échantillon", "Nature de l'étude (*)", 'Terrain', 'Epaisseur de remblais', 'Epaisseur alluvions', "Nature de l'observation organoleptique", 'LOCALISATION/AFFECTATION(S) - USAGE(S)', 'zone', 'parcelle (selon dénom.)', "Type d'usage de fait actuel", "Type d'usage de fait futur", "Type d'usage utilisé pour comparer les résultats", 'Lieux de prélèvement', 'X Lambert', 'Y Lambert', 'DESCRIPTION SOMMAIRE', 'Type de recouvrement de surface (***)', 'Sol en place (S : souillé, NS : non souillé)', 'Remblais de terre (S : souillé, NS : non souillé)', 'Remblais technique (S : souillé, NS : non souillé)', 'Déchet', 'Matières organiques', 'GRANULOMETRIE', 'Fraction argileuse']



In [171]:
prv_sol.drop(columns=prv_sol.columns[[-3,-4]], axis=1, inplace=True)

In [172]:
name=['ID_ech', 'Date_prv', 'Description','Organo', 'Long_for', 'Refus','Ech_top', 'Ech_base', 'MS','Fract_2','Fract_2+']
prv_sol=col_ren(prv_sol, name=name, mode=1)

In [173]:
set(prv_sol['Description'])

{'L', 'LA', 'LS', 'R', 'R '}

In [174]:
for i in range(len(prv_sol['Description'])):
    x = prv_sol.loc[i,'Description']
    if x=='R' or x=='R ': prv_sol.loc[i,'Description']='Remblais'
    elif x=='L': prv_sol.loc[i,'Description']='Limons'
    elif x=='LA': prv_sol.loc[i,'Description']='Limons et argiles'
    elif x=='LS': prv_sol.loc[i,'Description']='Limons et sables'

prv_sol['Refus']=prv_sol['Refus'].apply(lambda x: 'x' if not re.search('#', str(x)) else '')
prv_sol.insert(1,'Type_ech','Sol')

In [175]:
gdf_viewer(prv_sol, rows=3)

Rows : 60, columns : 12


interactive(children=(IntSlider(value=3, description='rows', max=60, min=3, readout=False), IntSlider(value=12…

In [176]:
an=an.transpose()
an.reset_index(drop=True, inplace=True)

In [177]:
an=col_ren(an, 1)

In [178]:
an.replace(r'<|>','', inplace=True, regex=True)
an.replace(r'-',np.nan, inplace=True, regex=True)
an.rename(columns={an.columns[0]:'ID_ech'}, inplace=True)

In [179]:
an=dble_col_drop(an)

column(s) dropped: []


In [180]:
an.drop(list(range(5)), axis=0, inplace=True)
an.reset_index(drop=True, inplace=True)
an=na_col_drop(an,1)
an.insert(1,'Type_ech','Sol')


Columns dropped :['METAUX LOURDS', 'CYANURES', 'COMPOSES AROMATIQUES VOLATILS', 'BTEX totaux', 'PHENOLS', 'Indice phénol', 'HYDROCARBURES AROMATIQUES POLYCYCLIQUES', 'COMPOSES ORGANOHALOGENES VOLATILS', 'col_63', 'EOX (****)', 'HYDROCARBURES TOTAUX', 'Hydrocarbures totaux C10-C40', 'METHYL-TERT-BUTYL-ETHER', 'POLYCHLOROBIPHENYL (PCB)', 'PCB 28', 'PCB 52', 'PCB 101', 'PCB 118', 'PCB 138', 'PCB 153', 'PCB 180', 'PCB totaux (7)', 'F4/2M*', 'Teneur mesurée', 'Teneur mesurée (souligne/gras)', 'Teneur mesurée (gras/grisé)', "(*) CP = Campagne de prélèvements; Ex = Expertise; ES = Etude de sol; EO = Etude d'orientation; EC = Etude de caractérisation; SA = Suivi d'assainissement", '(**) L: limon, A: Argile, S: Sable, R: Remblai', '(***) ib : imperméable (béton) ; ih : imperméable hydrocarboné ; p : perméable (gravier, fissuré,…) ; tvh : terres végétation haute ; tvb : terres végétation basse ', '(****) 3 mg/kg = Seuil limite défini dans le GREO ', "(1) l'échantillon n'a pas pu être extrait ni

In [181]:
an=col_ren(an, name=pol_field_model, mode=1)

In [182]:
gdf_viewer(an, rows=5) 

Rows : 60, columns : 71


interactive(children=(IntSlider(value=5, description='rows', max=60, min=5, readout=False), IntSlider(value=12…

In [183]:
source_prv_sol=prv_sol
source_an=an

In [184]:
if not os.path.exists(tmp_dir):
    os.makedirs(tmp_dir)

#bh.to_csv(tmp_dir+'source_Boreholes.csv', index=False)
#pz.to_csv(tmp_dir+sheet+'_Piezometers.csv', index=False)
#ouv.to_csv(tmp_dir+sheet+'_Unkown-facility.csv', index=False)
#litho.to_csv(tmp_dir+sheet+'_Lithology.csv', index=False)
#mes_pz.to_csv(tmp_dir+sheet+'_Measures.csv', index=False)
#mes_sol.to_csv(tmp_dir+sheet+'_Measures-soil.csv', index=False)
#prv_eau.to_csv(tmp_dir+sheet+'_Samples-water.csv', index=False)
prv_sol.to_csv(tmp_dir+sheet+'_Samples-soil.csv', index=False)
an.to_csv(tmp_dir+sheet+'_Analysis.csv', index=False)


#source_bh.to_csv(tmp_dir+'source_Boreholes.csv', index=False)
#source_pz.to_csv(tmp_dir+'source_Piezometers.csv', index=False)
#source_ouv.to_csv(tmp_dir+'source_Unkown-facility.csv', index=False)
#source_litho.to_csv(tmp_dir+'source_Lithology.csv', index=False)
#source_mes_pz.to_csv(tmp_dir+'source_Measures.csv', index=False)
#source_mes_sol.to_csv(tmp_dir+'source_Measures-soil.csv', index=False)
#source_prv_eau.to_csv(tmp_dir+'source_Samples-water.csv', index=False)
source_prv_sol.to_csv(tmp_dir+'source_Samples-soil.csv', index=False)
source_an.to_csv(tmp_dir+'source_Analysis.csv', index=False)


print(f'source_bh:{len(source_bh)} ; source_pz:{len(source_pz)} ; source_litho:{len(source_litho)} ; '
     f'source_Fac-uknw:{len(source_ouv)} ; source_an:{len(source_an)} ;\nsource_prv_sol:{len(source_prv_sol)} ;'
     f'source_prv_eau:{len(source_prv_eau)} ; source_mes_pz:{len(source_mes_pz)} ; source_mes_sol:{len(source_mes_sol)} ;')

source_bh:0 ; source_pz:0 ; source_litho:0 ; source_Fac-uknw:0 ; source_an:60 ;
source_prv_sol:60 ;source_prv_eau:0 ; source_mes_pz:0 ; source_mes_sol:0 ;


* **Sheet : 'Résult EAU'**

In [185]:
tmp_dir='../../CF_data/Result_traitem/Memoris_seafile/'
sheet='Result_eau'

In [186]:
df = pd.read_excel('../../CF_data/Data_UMONS/Contamination/Résultats et mesures Siterem/'
                   'Ensemble des résultats Memoris version Seafile.xls', sheet_name='Résult EAU', skiprows=4)
df=na_line_drop(df,0)
df=na_col_drop(df,1)
df.replace(r'<|>','', inplace=True, regex=True)
df.replace(r'-$',np.nan, inplace=True, regex=True)

gdf_viewer(df, rows=5)

4 NaN lines dropped

Columns dropped :['Unnamed: 0', 'Unnamed: 2']

Rows : 154, columns : 51


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


interactive(children=(IntSlider(value=5, description='rows', max=154, min=5, readout=False), IntSlider(value=1…

In [187]:
prv_eau=df.loc[:32]
an=df.loc[33:]

In [188]:
an.loc[0.5] = df.loc[0] # put data on first line
an = an.sort_index().reset_index(drop=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value, self.name)


In [189]:
prv_eau=prv_eau.transpose()
prv_eau.reset_index(drop=True, inplace=True)
prv_eau=col_ren(prv_eau, 1)

In [190]:
prv_eau['CE']=prv_eau['CE'].apply(lambda x: pd.to_numeric(x)/1000 
                                  if re.search('^\d+', str(x)) and not pd.isnull(x) else np.nan)

In [191]:
prv_eau.drop(list(range(5)), axis=0, inplace=True)
prv_eau.reset_index(drop=True, inplace=True)

In [192]:
prv_eau=dble_col_drop(prv_eau)

column(s) dropped: ['21:DESCRIPTION SOMMAIRE', '27:Prof. arrêt du forage']


In [193]:
prv_eau=na_col_drop(prv_eau,1)
prv_eau=na_line_drop(prv_eau,3)
prv_eau.reset_index(drop=True, inplace=True)


Columns dropped :['col_1', "Nature de l'étude (*)", 'Observations organoleptiques', 'LOCALISATION/AFFECTATION(S) - USAGE(S)', 'parcelle (selon dénom.)', "Type d'usage de fait actuel", "Type d'usage de fait futur", "Type d'usage utilisé pour comparer les résultats", 'Lieux de prélèvement', 'DESCRIPTION SOMMAIRE', 'Type de recouvrement de surface (**)', 'Sol en place (S : souillé, NS : non souillé)', 'Remblais de terre (S : souillé, NS : non souillé)', 'Remblais technique (S : souillé, NS : non souillé)', 'Déchet', 'PARAMETRES PHYSICO-CHIMIQUES ', 'ORP']



In [194]:
prv_eau.drop(columns=prv_eau.columns[[2]], axis=2, inplace=True)

In [195]:
name=['ID_ech', 'Date_prv','Affectation','X', 'Y','Zsol', 'Long_for','Prof_crep','Long_pz_sol', 
      'Niv_eau_sol','pH', 'CE', 'T']
prv_eau=col_ren(prv_eau, name=name, mode=1)
prv_eau.insert(1,'Type_ech','Eau')

In [196]:
prv_eau['Prof_crep'].replace('\[|\]','', regex=True, inplace=True)
for i in range(len(prv_eau)):
    c=prv_eau.loc[i,'Prof_crep']
    prv_eau.loc[i,'Equip_top']=c.split('-')[0]
    prv_eau.loc[i,'Equip_base']=c.split('-')[1]
    
prv_eau.drop(columns=['Prof_crep'], inplace=True)
prv_eau['Type_equip'] = 'Crepine'

In [197]:
prv_eau['ID_ech'].replace('Canne ', 'Can', inplace=True, regex=True)
prv_eau['ID_ech'].replace('\n', ' ', inplace=True, regex=True)

In [198]:
pz=prv_eau[['ID_ech', 'X', 'Y', 'Zsol', 'Long_for','Long_pz_sol', 'Equip_top', 'Equip_base', 'Type_equip']]
pz.rename(columns={'ID_ech':'ID'}, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(


In [199]:
for i in range(len(pz.ID)):
    c=pz.loc[i, 'ID']
    pz.loc[i, 'ID']=re.search("(\w+\d+)",c).group(1)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value, self.name)


In [200]:
pz.drop_duplicates(subset=['ID'], keep='first', ignore_index=True, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pz.drop_duplicates(subset=['ID'], keep='first', ignore_index=True, inplace=True)


In [201]:
prv_eau=prv_eau[['ID_ech', 'Date_prv', 'X', 'Y', 'Zsol','Niv_eau_sol', 'pH', 'CE', 'T','Affectation']]

In [202]:
gdf_viewer(prv_eau, rows=3)

Rows : 45, columns : 10


interactive(children=(IntSlider(value=3, description='rows', max=45, min=3, readout=False), IntSlider(value=10…

In [203]:
an=an.transpose()
an.reset_index(drop=True, inplace=True)

In [204]:
an=col_ren(an, 1)

In [205]:
an.replace(r'<|>','', inplace=True, regex=True)
an.replace(r'-',np.nan, inplace=True, regex=True)
an.rename(columns={an.columns[0]:'ID_ech'}, inplace=True)

In [206]:
an.drop(list(range(5)), axis=0, inplace=True)
an.reset_index(drop=True, inplace=True)

In [207]:
an=dble_col_drop(an)

column(s) dropped: ['104:nitrite', '106:nitrate', '112:ammonium', '117:Teneur mesurée']


In [208]:
an.drop(columns=an.columns[[-6,-5]], axis=1, inplace=True)
an=na_col_drop(an,1)
an.insert(1,'Type_ech','Eau')


Columns dropped :['METAUX LOURDS', 'CYANURES', 'COMPOSES AROMATIQUES VOLATILS', 'PHENOLS', 'crésols (total)', 'CHLOROPHENOLS', '2-chlorophénol', 'monochlorophénol total', 'dichlorophénol total', '2,4,5-trichlorophénol', '2,4,6-trichlorophénol', 'trichlorophénol total', '2,3,4,6- tétrachlorophénol', 'tétrachlorophénol total', 'pentachlorophénol', 'HYDROCARBURES AROMATIQUES POLYCYCLIQUES', 'COMPOSES ORGANOCHLORES VOLATILS', 'CHLOROBENZENES', 'monochlorobenzène', '1,3-dichlorobenzène', '1,2-dichlorobenzène', '1,4-Dichlorobenzène', '1,2,4,5- et 1,2,3,5-tétrachlorobenzènes', 'pentachlorobenzène', 'hexachlorobenzène', 'HYDROCARBURES TOTAUX', 'METHYL-TERT-BUTYL-ETHER', 'POLYCHLOROBIPHENYLS (PCB)', 'PCB totaux (7)', 'AUTRES ANALYSES ', 'azote Kjeldahl', 'COMPOSES INORGANIQUES ', 'sulfures totaux', 'Teneur mesurée (souligne/gras)', 'Teneur mesurée (gras/grisé)', "(*) CP = Campagne de prélèvements; Ex = Expertise; ES = Etude de sol; EO = Etude d'orientation; EC = Etude de caractérisation; SA = 

In [209]:
an['ID_ech'].replace('Canne ', 'Can', inplace=True, regex=True)
an['ID_ech'].replace('\n', ' ', inplace=True, regex=True)

In [210]:
an=col_ren(an, name=pol_field_model, mode=1)

In [211]:
gdf_viewer(an, rows=5) 

Rows : 45, columns : 80


interactive(children=(IntSlider(value=5, description='rows', max=45, min=5, readout=False), IntSlider(value=12…

In [212]:
source_pz=pz
source_prv_eau=prv_eau
source_an=source_an.append(an)

In [213]:
if not os.path.exists(tmp_dir):
    os.makedirs(tmp_dir)

#bh.to_csv(tmp_dir+'source_Boreholes.csv', index=False)
pz.to_csv(tmp_dir+sheet+'_Piezometers.csv', index=False)
#ouv.to_csv(tmp_dir+sheet+'_Unkown-facility.csv', index=False)
#litho.to_csv(tmp_dir+sheet+'_Lithology.csv', index=False)
#mes_pz.to_csv(tmp_dir+sheet+'_Measures.csv', index=False)
#mes_sol.to_csv(tmp_dir+sheet+'_Measures-soil.csv', index=False)
prv_eau.to_csv(tmp_dir+sheet+'_Samples-water.csv', index=False)
#prv_sol.to_csv(tmp_dir+sheet+'_Samples-soil.csv', index=False)
an.to_csv(tmp_dir+sheet+'_Analysis.csv', index=False)


#source_bh.to_csv(tmp_dir+'source_Boreholes.csv', index=False)
source_pz.to_csv(tmp_dir+'source_Piezometers.csv', index=False)
#source_ouv.to_csv(tmp_dir+'source_Unkown-facility.csv', index=False)
#source_litho.to_csv(tmp_dir+'source_Lithology.csv', index=False)
#source_mes_pz.to_csv(tmp_dir+'source_Measures.csv', index=False)
#source_mes_sol.to_csv(tmp_dir+'source_Measures-soil.csv', index=False)
source_prv_eau.to_csv(tmp_dir+'source_Samples-water.csv', index=False)
#source_prv_sol.to_csv(tmp_dir+'source_Samples-soil.csv', index=False)
source_an.to_csv(tmp_dir+'source_Analysis.csv', index=False)


print(f'source_bh:{len(source_bh)} ; source_pz:{len(source_pz)} ; source_litho:{len(source_litho)} ; '
     f'source_Fac-uknw:{len(source_ouv)} ; source_an:{len(source_an)} ;\nsource_prv_sol:{len(source_prv_sol)} ;'
     f'source_prv_eau:{len(source_prv_eau)} ; source_mes_pz:{len(source_mes_pz)} ; source_mes_sol:{len(source_mes_sol)} ;')

source_bh:0 ; source_pz:30 ; source_litho:0 ; source_Fac-uknw:0 ; source_an:105 ;
source_prv_sol:60 ;source_prv_eau:45 ; source_mes_pz:0 ; source_mes_sol:0 ;


## 10-Résultats SOL container phyto t=0_décret sol.xls
* **Sheet : 'Résult SOL'**

In [214]:
# New file, so the source variables must be overwritten !!
_df=pd.DataFrame()
source_mes_pz, source_mes_sol, source_pz, source_prv_eau, source_prv_sol =  _df, _df, _df, _df, _df
source_ouv, source_an, source_litho, source_bh = _df, _df, _df, _df

In [215]:
tmp_dir='../../CF_data/Result_traitem/Container_phyto/'
sheet='Result_SOL'

In [216]:
df = pd.read_excel('../../CF_data/Data_UMONS/Contamination/Résultats et mesures Siterem/'
                   'Résultats SOL container phyto t=0_décret sol.xls', sheet_name='Résult SOL', skiprows=4)
df=na_line_drop(df,0)
df=na_col_drop(df,1)
df.replace(r'<|>','', inplace=True, regex=True)
df.replace(r'-$',np.nan, inplace=True, regex=True)

gdf_viewer(df, rows=5)

2 NaN lines dropped

Columns dropped :['Unnamed: 0', 'Unnamed: 2', 'Unnamed: 16', 'Unnamed: 17']

Rows : 121, columns : 15


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


interactive(children=(IntSlider(value=5, description='rows', max=121, min=5, readout=False), IntSlider(value=1…

In [217]:
prv_sol=df.loc[:21]
an=df.loc[22:]

In [218]:
an.loc[0.5] = df.loc[0] # put data on first line
an = an.sort_index().reset_index(drop=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value, self.name)


In [219]:
prv_sol=prv_sol.transpose()
prv_sol.reset_index(drop=True, inplace=True)
prv_sol=col_ren(prv_sol, 1)

In [220]:
prv_sol=dble_col_drop(prv_sol)

column(s) dropped: ['8:Autre zone suspecte investiguée']


In [221]:
prv_sol.drop(list(range(5)), axis=0, inplace=True)
prv_sol.reset_index(drop=True, inplace=True)
prv_sol=na_col_drop(prv_sol,2)
prv_sol=na_line_drop(prv_sol,3)
prv_sol.reset_index(drop=True, inplace=True)


Columns dropped :['Matières organiques', 'SPP/zone suspecte invetiguée', 'Autre zone suspecte investiguée', 'Parcelle', 'X Lambert', 'Y Lambert', "Type d'étude (*)", 'Refus de forage', 'Terrain naturel/Remblai (**)', 'Organoleptique couleur suspecte', 'Organoleptique odeur intensité (***)', 'Organoleptique odeur type', 'GRANULOMETRIE']

4 NaN lines dropped


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


In [222]:
prv_sol.drop(columns=prv_sol.columns[[-3]], axis=1, inplace=True)

In [223]:
name=['ID_ech', 'Ech_top', 'Ech_base','MS','Date_prv','Fract_2','Fract_2+']
prv_sol=col_ren(prv_sol, name=name, mode=1)
prv_sol=prv_sol.query('ID_ech==ID_ech')
prv_sol.insert(1,'Type_ech','Sol')

In [224]:
gdf_viewer(prv_sol, rows=3)

Rows : 5, columns : 8


interactive(children=(IntSlider(value=3, description='rows', max=5, min=3, readout=False), IntSlider(value=8, …

In [225]:
an=an.transpose()
an.reset_index(drop=True, inplace=True)

In [226]:
an=col_ren(an, 1)

In [227]:
an.replace(r'<|>','', inplace=True, regex=True)
an.replace(r'-',np.nan, inplace=True, regex=True)
an.rename(columns={an.columns[0]:'ID_ech'}, inplace=True)

In [228]:
an=dble_col_drop(an)

column(s) dropped: ['92:Teneur mesurée', '93:Teneur mesurée', '94:Teneur mesurée']


In [229]:
an.drop(list(range(5)), axis=0, inplace=True)
an.reset_index(drop=True, inplace=True)
an=na_col_drop(an,1)
an.insert(1,'Type_ech','Sol')


Columns dropped :['METAUX LOURDS', 'Cobalt', 'CYANURES', 'COMPOSES AROMATIQUES VOLATILS', 'BTEX totaux', 'PHENOLS', 'Indice phénol', 'HYDROCARBURES AROMATIQUES POLYCYCLIQUES', 'COMPOSES ORGANOHALOGENES VOLATILS', 'col_64', 'EOX (****)', 'HYDROCARBURES TOTAUX', 'METHYL-TERT-BUTYL-ETHER', 'POLYCHLOROBIPHENYL (PCB)', 'PCB 28', 'PCB 52', 'PCB 101', 'PCB 118', 'PCB 138', 'PCB 153', 'PCB 180', 'PCB totaux (7)', 'Teneur mesurée', "VR : Valeur de référence; VS : Valeur seuil; VI : Valeur d'intervention", "(*) RP : Rapport de prélèvements; ES : Etude de sol; EO : Etude d'orientation; EC : Etude de caractérisation; SA : Suivi d'assainissement; EF : Evaluation finale", '(**) TN : Terrain naturel; R : Remblai', "(***) - : Pas d'impression organoleptique; + : Impression organoleptique faible; ++ : Impression organoleptique forte", '(****) 3 mg/kg = Seuil limite défini dans le GREO ']



In [230]:
an=col_ren(an, name=pol_field_model, mode=1)

In [231]:
gdf_viewer(an, rows=5) 

Rows : 9, columns : 70


interactive(children=(IntSlider(value=5, description='rows', max=9, min=5, readout=False), IntSlider(value=12,…

In [232]:
source_prv_sol=prv_sol
source_an=an

In [233]:
if not os.path.exists(tmp_dir):
    os.makedirs(tmp_dir)

#bh.to_csv(tmp_dir+'source_Boreholes.csv', index=False)
#pz.to_csv(tmp_dir+sheet+'_Piezometers.csv', index=False)
#ouv.to_csv(tmp_dir+sheet+'_Unkown-facility.csv', index=False)
#litho.to_csv(tmp_dir+sheet+'_Lithology.csv', index=False)
#mes_pz.to_csv(tmp_dir+sheet+'_Measures.csv', index=False)
#mes_sol.to_csv(tmp_dir+sheet+'_Measures-soil.csv', index=False)
#prv_eau.to_csv(tmp_dir+sheet+'_Samples-water.csv', index=False)
prv_sol.to_csv(tmp_dir+sheet+'_Samples-soil.csv', index=False)
an.to_csv(tmp_dir+sheet+'_Analysis.csv', index=False)


#source_bh.to_csv(tmp_dir+'source_Boreholes.csv', index=False)
#source_pz.to_csv(tmp_dir+'source_Piezometers.csv', index=False)
#source_ouv.to_csv(tmp_dir+'source_Unkown-facility.csv', index=False)
#source_litho.to_csv(tmp_dir+'source_Lithology.csv', index=False)
#source_mes_pz.to_csv(tmp_dir+'source_Measures.csv', index=False)
#source_mes_sol.to_csv(tmp_dir+'source_Measures-soil.csv', index=False)
#source_prv_eau.to_csv(tmp_dir+'source_Samples-water.csv', index=False)
source_prv_sol.to_csv(tmp_dir+'source_Samples-soil.csv', index=False)
source_an.to_csv(tmp_dir+'source_Analysis.csv', index=False)


print(f'source_bh:{len(source_bh)} ; source_pz:{len(source_pz)} ; source_litho:{len(source_litho)} ; '
     f'source_Fac-uknw:{len(source_ouv)} ; source_an:{len(source_an)} ;\nsource_prv_sol:{len(source_prv_sol)} ;'
     f'source_prv_eau:{len(source_prv_eau)} ; source_mes_pz:{len(source_mes_pz)} ; source_mes_sol:{len(source_mes_sol)} ;')

source_bh:0 ; source_pz:0 ; source_litho:0 ; source_Fac-uknw:0 ; source_an:9 ;
source_prv_sol:5 ;source_prv_eau:0 ; source_mes_pz:0 ; source_mes_sol:0 ;


* **Sheet : 'Paramètres agro.'**

In [234]:
tmp_dir='../../CF_data/Result_traitem/Container_phyto/'
sheet='Param_agro'

In [235]:
df = pd.read_excel('../../CF_data/Data_UMONS/Contamination/Résultats et mesures Siterem/'
                   'Résultats SOL container phyto t=0_décret sol.xls', sheet_name='Paramètres agro.', skiprows=4)
df=na_line_drop(df,0)
df=na_col_drop(df,1)
df.replace(r'<|>','', inplace=True, regex=True)
df.replace(r'-$',np.nan, inplace=True, regex=True)

gdf_viewer(df, rows=5)

1 NaN lines dropped

Columns dropped :['Unnamed: 0', 'Unnamed: 1', 'Unnamed: 2']

Rows : 28, columns : 10


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


interactive(children=(IntSlider(value=5, description='rows', max=28, min=5, readout=False), IntSlider(value=10…

In [236]:
df=df.transpose()
df.reset_index(drop=True, inplace=True)
df=col_ren(df, 0)

In [237]:
df.drop(list(range(1)), axis=0, inplace=True)
df.reset_index(drop=True, inplace=True)

In [238]:
df=dble_col_drop(df)

column(s) dropped: ['22:température pour mes. pH']


In [239]:
df=na_col_drop(df,1)
df=na_line_drop(df,3)
df.reset_index(drop=True, inplace=True)


Columns dropped :['Matières organiques', 'GRANULOMETRIE', 'pH', 'Composés inorganiques ', 'Autres analyses chimiques ']

3 NaN lines dropped


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


In [240]:
df.drop(columns=df.columns[[5,6]], axis=2, inplace=True)

In [241]:
df.columns

Index(['Nom de l'échantillon', 'Profondeur échantillon dans container de',
       '                                           à', 'Matière sèche',
       'Date de prélèvement', 'Matières organiques ',
       'résidu après perte au feu', 'COT', 'fraction argileuse',
       'parties min. 2µm', 'parties min. 50µm', 'parties min. 2mm',
       'fraction  2 mm (prép. séché à 40°C) ',
       'fraction 2 mm (prép. séché à 40°C) ', 'pH (KCl)',
       'température pour mes. pH', 'pH (H20)', 'sulfures totaux', 'chlorures',
       'azote Kjeldahl'],
      dtype='object')

In [242]:
name=['ID_ech','Ech_top','Ech_base','MS','Date_prv','MO','Residu_perte_feu','COT','Fract_arg','Fract_min_2µ', 
      'Fract_min_50µ', 'Fract_min_2', 'Fract_2', 'Fract_2+', 'pH_KCl','Tem_pH_mes', 'pH_H20', 'sulfures_tot', 
      'chlorures', 'azote_Kjeldahl']
df=col_ren(df, name=name, mode=1)
df.insert(1,'Type_ech','Sol')

In [243]:
prv_sol=df

In [244]:
gdf_viewer(prv_sol, rows=5)

Rows : 5, columns : 21


interactive(children=(IntSlider(value=5, description='rows', max=5, min=5, readout=False), IntSlider(value=12,…

In [245]:
gdf_merger(source_prv_sol, prv_sol, col='ID_ech', how='outer', )[0]

Ambiguous values in both columns compared, change it manually !
Columns Index(['Fract_2+_x', 'Fract_2+_y', 'Fract_2_x', 'Fract_2_y', 'MS_x', 'MS_y'], dtype='object') must be dropped manually !


Unnamed: 0,ID_ech,pH_KCl,azote_Kjeldahl,pH_H20,Fract_min_50µ,Fract_min_2µ,sulfures_tot,Fract_min_2,Tem_pH_mes,Fract_arg,...,MO,chlorures,Residu_perte_feu,Fract_2+,Fract_2,Ech_base,Ech_top,Date_prv,Type_ech,MS
0,Ech. 1,11.8,1320,11.3,4.8,1,130,57,20.7,1.0,...,6.3,34,93.3,99999.0,99999.0,15,12,2017-12-14 00:00:00,Sol,99999.0
1,Ech. 2,8.2,3810,8.6,13.0,1,78,56,20.6,1.9,...,11.5,18,88.2,53.0,47.0,12,9,2017-12-14 00:00:00,Sol,99999.0
2,Ech. 3,11.0,3040,11.0,10.0,1,86,72,20.6,1.0,...,10.2,36,89.4,99999.0,99999.0,9,6,2017-12-14 00:00:00,Sol,99999.0
3,Ech. 4,11.1,2550,11.1,9.0,1,60,65,20.4,1.0,...,9.0,36,90.6,99999.0,99999.0,6,3,2017-12-14 00:00:00,Sol,99999.0
4,Ech. 5,11.4,2300,11.4,8.1,1,80,65,20.9,1.0,...,8.4,43,91.2,99999.0,99999.0,3,0,2017-12-14 00:00:00,Sol,99999.0


In [246]:
if not os.path.exists(tmp_dir):
    os.makedirs(tmp_dir)

#bh.to_csv(tmp_dir+'source_Boreholes.csv', index=False)
#pz.to_csv(tmp_dir+sheet+'_Piezometers.csv', index=False)
#ouv.to_csv(tmp_dir+sheet+'_Unkown-facility.csv', index=False)
#litho.to_csv(tmp_dir+sheet+'_Lithology.csv', index=False)
#mes_pz.to_csv(tmp_dir+sheet+'_Measures.csv', index=False)
#mes_sol.to_csv(tmp_dir+sheet+'_Measures-soil.csv', index=False)
#prv_eau.to_csv(tmp_dir+sheet+'_Samples-water.csv', index=False)
prv_sol.to_csv(tmp_dir+sheet+'_Samples-soil.csv', index=False)
#an.to_csv(tmp_dir+sheet+'_Analysis.csv', index=False)


#source_bh.to_csv(tmp_dir+'source_Boreholes.csv', index=False)
#source_pz.to_csv(tmp_dir+'source_Piezometers.csv', index=False)
#source_ouv.to_csv(tmp_dir+'source_Unkown-facility.csv', index=False)
#source_litho.to_csv(tmp_dir+'source_Lithology.csv', index=False)
#source_mes_pz.to_csv(tmp_dir+'source_Measures.csv', index=False)
#source_mes_sol.to_csv(tmp_dir+'source_Measures-soil.csv', index=False)
#source_prv_eau.to_csv(tmp_dir+'source_Samples-water.csv', index=False)
source_prv_sol.to_csv(tmp_dir+'source_Samples-soil.csv', index=False)
#source_an.to_csv(tmp_dir+'source_Analysis.csv', index=False)


print(f'source_bh:{len(source_bh)} ; source_pz:{len(source_pz)} ; source_litho:{len(source_litho)} ; '
     f'source_Fac-uknw:{len(source_ouv)} ; source_an:{len(source_an)} ;\nsource_prv_sol:{len(source_prv_sol)} ;'
     f'source_prv_eau:{len(source_prv_eau)} ; source_mes_pz:{len(source_mes_pz)} ; source_mes_sol:{len(source_mes_sol)} ;')

source_bh:0 ; source_pz:0 ; source_litho:0 ; source_Fac-uknw:0 ; source_an:9 ;
source_prv_sol:5 ;source_prv_eau:0 ; source_mes_pz:0 ; source_mes_sol:0 ;


--------------------------------------------------------------------------------------------------------

## 11-Resultats_Siterem_eau_extension_pilote_jusque_decembre_2020.xlsx
* **Sheet : 'Résult EAU'**

In [247]:
# New file, so the source variables must be overwritten !!
_df=pd.DataFrame()
source_mes_pz, source_mes_sol, source_pz, source_prv_eau, source_prv_sol =  _df, _df, _df, _df, _df
source_ouv, source_an, source_litho, source_bh = _df, _df, _df, _df

In [248]:
tmp_dir='../../CF_data/Result_traitem/Siterem_Ext_Pilote/'
sheet='Result_eau'

In [249]:
df = pd.read_excel('../../CF_data/Data_SITEREM/Resultats_Siterem_eau_extension_pilote_jusque_decembre_2020.xlsx', 
                   sheet_name='Résult EAU', skiprows=2)
df=na_line_drop(df,0)
df=na_col_drop(df,1)
df.replace(r'<|>','', inplace=True, regex=True)
df.replace(r'-$',np.nan, inplace=True, regex=True)

gdf_viewer(df, rows=5)

  warn(msg)


11 NaN lines dropped

Columns dropped :['Unnamed: 0', 'Unnamed: 2', 'Unnamed: 39', 'Unnamed: 40', 'Unnamed: 41', 'Unnamed: 42', 'Unnamed: 43', 'Unnamed: 44', 'Unnamed: 45', 'Unnamed: 46', 'Unnamed: 47', 'Unnamed: 48', 'Unnamed: 49', 'Unnamed: 50', 'Unnamed: 51', 'Unnamed: 52', 'Unnamed: 53', 'Unnamed: 54', 'Unnamed: 55', 'Unnamed: 56', 'Unnamed: 57', 'Unnamed: 58', 'Unnamed: 59', 'Unnamed: 60', 'Unnamed: 61', 'Unnamed: 62', 'Unnamed: 63', 'Unnamed: 64', 'Unnamed: 65', 'Unnamed: 66', 'Unnamed: 67', 'Unnamed: 68', 'Unnamed: 69']

Rows : 115, columns : 37


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


interactive(children=(IntSlider(value=5, description='rows', max=115, min=5, readout=False), IntSlider(value=1…

In [250]:
prv_eau=df.loc[:31]
an=df.loc[32:]

In [251]:
an.loc[0.5] = df.loc[0] # put data on first line
an = an.sort_index().reset_index(drop=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value, self.name)


In [252]:
prv_eau=prv_eau.transpose()
prv_eau.reset_index(drop=True, inplace=True)
prv_eau=col_ren(prv_eau, 1)

In [253]:
prv_eau=dble_col_drop(prv_eau)

column(s) dropped: ['6:Autre zone suspecte investiguée', '25:pH', '30:pH']


In [254]:
prv_eau.drop(list(range(5)), axis=0, inplace=True)
prv_eau.reset_index(drop=True, inplace=True)
prv_eau=na_col_drop(prv_eau,2)
prv_eau=na_line_drop(prv_eau,3)
prv_eau.reset_index(drop=True, inplace=True)


Columns dropped :['SPP/zone suspecte investiguée', 'Autre zone suspecte investiguée', 'Parcelle', 'X Lambert', 'Y Lambert', "Type d'étude (*)", 'Profondeur minimum de la crépine', 'Profondeur maximum de la crépine', 'Profondeur du fond du piézomètre', 'Profondeur de la nappe/piezo', 'Profondeur de la nappe/chambre visite', 'Organoleptique odeur intensité (**)', 'Organoleptique odeur type', 'Limpidité (***)', 'Température', 'Conductivité électrique', 'Profondeur de la nappe/sol ', 'PARAMETRES PHYSICO-CHIMIQUES \n(lors du prélèvement)', 'CE', 'ORP', 'Oxygène dissous', 'PARAMETRES PHYSICO-CHIMIQUES \n(mesures au labo)']



In [255]:
prv_eau.columns

Index(['Nom échantillon', 'Période ', 'Emplacement ', 'Date de prélèvement',
       'pH', 'Température de prélèvement ', 'température pour mes. pH'],
      dtype='object')

In [256]:
name=['ID_ech', 'Periode', 'Emplacement','Date_prv','pH','Temp_prv','Temp_pH_mes']
prv_eau=col_ren(prv_eau, name=name, mode=1)
prv_eau=prv_eau.query('ID_ech==ID_ech')
prv_eau.insert(1,'Type_ech','Eau')

In [257]:
gdf_viewer(prv_eau, rows=3)

Rows : 31, columns : 8


interactive(children=(IntSlider(value=3, description='rows', max=31, min=3, readout=False), IntSlider(value=8,…

In [258]:
an=an.transpose()
an.reset_index(drop=True, inplace=True)

In [259]:
an=col_ren(an, 1)

In [260]:
an.replace(r'<|>','', inplace=True, regex=True)
an.replace(r'-',np.nan, inplace=True, regex=True)
an.rename(columns={an.columns[0]:'ID_ech'}, inplace=True)

In [261]:
an=dble_col_drop(an)

column(s) dropped: []


In [262]:
# rename in a first time before dropping columns (because of names like 'col_xx' in columns)
name=['ID_ech', 'METAUX LOURDS', 'Arsenic', 'Cadmium', 'Chrome', 'Cuivre','Mercure', 'Plomb', 'Nickel', 'Zinc', 
      'CYANURES',"Cyanures (libres)  -  NEN-EN-ISO 14403", "CN_totaux - NEN-EN-ISO 14403", 
      "cyanure (APE) - méthode basée sur EPA 335.3", "cyanure complex - méthode interne ", 
      "thiocyanate - méthode interne", "COMPOSES AROMATIQUES VOLATILS", "Benzène", "Toluène", "Éthylbenzène",
      "Orthoxylène", "Para- et métaxylène", "Xylènes", "Styrène", "PHENOLS", "Phénol", "Indice phénol", 
      "HYDROCARBURES AROMATIQUES POLYCYCLIQUES", "Naphtalène", "Acénaphtylène", "Acénaphtène", "Fluorène", 
      "Phénanthrène", "Anthracène", "Fluoranthène", "Pyrène", "Benzo(a)anthracène", "Chrysène", 
      "Benzo(b)fluoranthène", "Benzo(k)fluoranthène", "Benzo(a)pyrène", "Dibenzo(ah)anthracène", 
      "Benzo(ghi)pérylène", "Indéno(1,2,3-cd)pyrène", "HAP Totaux (16) - EPA", 
      "COMPOSES ORGANOCHLORES VOLATILS", "1,1-Dichloroéthane", "1,2-Dichloroéthane", 
      "1,1-dichloroéthène", "Cis-1,2-dichloroéthène", "Totaux (cis,trans) 1,2-dichloroéthènes", 
      "Trans 1,2-dichloroéthylène", "Dichlorométhane", "1,2-dichloropropane", "Tétrachloroéthylène ", 
      "Tétrachlorométhane", "1,1,1-Trichloroéthane", "1,1,2-Trichloroéthane", "Trichloroéthylène",
      "Chloroforme", "Chlorure de vinyle", "HYDROCARBURES TOTAUX", "fraction aromat. >C6-C7", 
      "fraction aromat. >C7-C8", "fraction aromat. >C8-C10", "fraction aliphat. C5-C6", "fraction aliphat. >C6-C8",
      "fraction aliphat. >C8-C10", "Fraction C5 - C8", "Fraction C8 - C10", "Fraction C10-C12", "Fraction C12-C16",
      "Fraction C16 - C21", "Fraction C21 - C35", "Hydrocarbures totaux C10-C35", "METHYL-TERT-BUTYL-ETHER", "MTBE",
     "a","b","c","d","e","f","g"]

an=col_ren(an, name=name, mode=1)
an=an.iloc[:,:-7]

In [263]:
an.drop(list(range(3)), axis=0, inplace=True)
an.reset_index(drop=True, inplace=True)
an=na_col_drop(an,1)
an.insert(1,'Type_ech','Eau')


Columns dropped :['METAUX LOURDS', 'CYANURES', 'COMPOSES AROMATIQUES VOLATILS', 'Styrène', 'PHENOLS', 'Indice phénol', 'HYDROCARBURES AROMATIQUES POLYCYCLIQUES', 'COMPOSES ORGANOCHLORES VOLATILS', 'HYDROCARBURES TOTAUX', 'METHYL-TERT-BUTYL-ETHER', 'MTBE']



In [264]:
an=col_ren(an, name=pol_field_model, mode=1)

In [265]:
gdf_viewer(an, rows=3)

Rows : 33, columns : 67


interactive(children=(IntSlider(value=3, description='rows', max=33, min=3, readout=False), IntSlider(value=12…

In [266]:
source_prv_eau=prv_eau
source_an=an

In [267]:
if not os.path.exists(tmp_dir):
    os.makedirs(tmp_dir)

#bh.to_csv(tmp_dir+'source_Boreholes.csv', index=False)
#pz.to_csv(tmp_dir+sheet+'_Piezometers.csv', index=False)
#ouv.to_csv(tmp_dir+sheet+'_Unkown-facility.csv', index=False)
#litho.to_csv(tmp_dir+sheet+'_Lithology.csv', index=False)
#mes_pz.to_csv(tmp_dir+sheet+'_Measures.csv', index=False)
#mes_sol.to_csv(tmp_dir+sheet+'_Measures-soil.csv', index=False)
prv_eau.to_csv(tmp_dir+sheet+'_Samples-water.csv', index=False)
#prv_sol.to_csv(tmp_dir+sheet+'_Samples-soil.csv', index=False)
an.to_csv(tmp_dir+sheet+'_Analysis.csv', index=False)


#source_bh.to_csv(tmp_dir+'source_Boreholes.csv', index=False)
#source_pz.to_csv(tmp_dir+'source_Piezometers.csv', index=False)
#source_ouv.to_csv(tmp_dir+'source_Unkown-facility.csv', index=False)
#source_litho.to_csv(tmp_dir+'source_Lithology.csv', index=False)
#source_mes_pz.to_csv(tmp_dir+'source_Measures.csv', index=False)
#source_mes_sol.to_csv(tmp_dir+'source_Measures-soil.csv', index=False)
source_prv_eau.to_csv(tmp_dir+'source_Samples-water.csv', index=False)
#source_prv_sol.to_csv(tmp_dir+'source_Samples-soil.csv', index=False)
source_an.to_csv(tmp_dir+'source_Analysis.csv', index=False)


print(f'source_bh:{len(source_bh)} ; source_pz:{len(source_pz)} ; source_litho:{len(source_litho)} ; '
     f'source_Fac-uknw:{len(source_ouv)} ; source_an:{len(source_an)} ;\nsource_prv_sol:{len(source_prv_sol)} ;'
     f'source_prv_eau:{len(source_prv_eau)} ; source_mes_pz:{len(source_mes_pz)} ; source_mes_sol:{len(source_mes_sol)} ;')

source_bh:0 ; source_pz:0 ; source_litho:0 ; source_Fac-uknw:0 ; source_an:33 ;
source_prv_sol:0 ;source_prv_eau:31 ; source_mes_pz:0 ; source_mes_sol:0 ;


* **Sheet : 'Param physico'**

In [268]:
tmp_dir='../../CF_data/Result_traitem/Siterem_Ext_Pilote/'
sheet='Param_physico'

In [269]:
df = pd.read_excel('../../CF_data/Data_SITEREM/Resultats_Siterem_eau_extension_pilote_jusque_decembre_2020.xlsx', 
                   sheet_name='param. physico', skiprows=2)
df=na_line_drop(df,0)
df=na_col_drop(df,1)
df.replace(r'<|>','', inplace=True, regex=True)
df.replace(r'-$',np.nan, inplace=True, regex=True)

gdf_viewer(df, rows=5)

8 NaN lines dropped

Columns dropped :['Unnamed: 0', 'Unnamed: 2', 'Unnamed: 79', 'Unnamed: 80', 'Unnamed: 81', 'Unnamed: 82', 'Unnamed: 83', 'Unnamed: 84', 'Unnamed: 85', 'Unnamed: 86', 'Unnamed: 87', 'Unnamed: 88', 'Unnamed: 89', 'Unnamed: 90', 'Unnamed: 91', 'Unnamed: 92', 'Unnamed: 93', 'Unnamed: 94', 'Unnamed: 95', 'Unnamed: 96', 'Unnamed: 97', 'Unnamed: 98', 'Unnamed: 99', 'Unnamed: 100', 'Unnamed: 101', 'Unnamed: 102']

Rows : 53, columns : 77


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


interactive(children=(IntSlider(value=5, description='rows', max=53, min=5, readout=False), IntSlider(value=12…

In [270]:
df=df.transpose()
df.reset_index(drop=True, inplace=True)

In [271]:
df=col_ren(df, 1)

In [272]:
df.drop(list(range(2)), axis=0, inplace=True)
df.reset_index(drop=True, inplace=True)

In [273]:
sdf=df.iloc[:,:33]
df=df.iloc[:,34:]

In [274]:
df=dble_col_drop(df)
sdf=dble_col_drop(sdf)

column(s) dropped: ['3:Période ']
column(s) dropped: ['6:Autre zone suspecte investiguée', '26:pH', '32:pH']


In [275]:
df=na_line_drop(df,1)
sdf=na_line_drop(sdf,1)

41 NaN lines dropped
27 NaN lines dropped


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


In [276]:
df=na_col_drop(df,1)
sdf=na_col_drop(sdf,1)


Columns dropped :['Profondeur de la nappe/chambre visite', 'Organoleptique odeur intensité (**)', 'Organoleptique odeur type', 'Profondeur de la nappe/sol ', 'PARAMETRES PHYSICO-CHIMIQUES \n(lors du prélèvement)', "*paramètres n'ont pas été pris en débit continu (dans seau) - peu de débit", 'col_52']


Columns dropped :['SPP/zone suspecte investiguée', 'Autre zone suspecte investiguée', 'Parcelle', 'X Lambert', 'Y Lambert', "Type d'étude (*)", 'Profondeur minimum de la crépine', 'Profondeur maximum de la crépine', 'Profondeur du fond du piézomètre', 'Organoleptique odeur intensité (**)', 'Organoleptique odeur type', 'Limpidité (***)', 'Température', 'Conductivité électrique', 'PARAMETRES PHYSICO-CHIMIQUES \n(lors du prélèvement)', 'PARAMETRES PHYSICO-CHIMIQUES \n(mesures au labo)']



In [277]:
df.columns

Index(['Nom échantillon', 'Période ',
       'Emplacement \n- S : Simulateur \n- HZS : Hors zone simulateur',
       'Date de prélèvement', 'Profondeur de la nappe/piezo',
       'Profondeur piezo/piezo', 'Température de prélèvement ', 'pH', 'CE',
       'ORP', 'Oxygène dissous'],
      dtype='object')

In [278]:
name=['ID_ech','Periode','Emplacement','Date_prv','Niv_eau_pz','Long_pz','Temp_prv ','pH', 'CE', 'ORP','O_diss']
df=col_ren(df, mode=1, name=name)

In [279]:
sdf=sdf.iloc[:,:-1]
name=['ID_ech','Periode','Emplacement','Date_prv','Niv_eau_pz','Niv_eau_chbre','pH','Niv_eau_sol','Long_pz',
      'Temp_prv ','CE','ORP','O_diss']
sdf=col_ren(sdf, mode=1, name=name)

In [280]:
df['CE']=df['CE'].apply(lambda x: pd.to_numeric(x)/1000 
                                  if re.search('^\d+', str(x)) and not pd.isnull(x) else np.nan)
sdf['CE']=sdf['CE'].apply(lambda x: pd.to_numeric(x)/1000 
                                  if re.search('^\d+', str(x)) and not pd.isnull(x) else np.nan)

In [281]:
sdf['Periode'].replace('\n',' ', regex=True, inplace=True)
sdf.replace('\n','', regex=True, inplace=True)

In [282]:
data=[df, sdf]
for d in data:
    d['Rmq']=''
    for i in range(len(d['ID_ech'])):
        e=str(d.loc[i, 'Emplacement'])
        n=str(d.loc[i, 'ID_ech'])
        d.loc[i,'ID_ech']=n.replace('*', '')
        
        if re.match('S',e, re.I): 
            d.loc[i,'Emplacement']='Simulateur'
        elif re.match('HZS',e, re.I): 
            d.loc[i,'Emplacement']='Hors Simulateur'
        else:
            d.loc[i,'Emplacement']=np.nan
        
        if re.match('\d+\*{1}$',n, re.I): 
            d.loc[i,'Rmq']="mesures faites dans un seau (débit non continu ou peu de débit)"
        elif re.match('\d+\*{2}$',n, re.I): 
            d.loc[i,'Rmq']="mésures faites dans une eau quasi-stagnante (Piezo rempli de sédiment et débit très faible)"

In [283]:
df.insert(1, 'Type_ech', 'Eau')
sdf.insert(1, 'Type_ech', 'Eau')

In [284]:
prv_eau=gdf_merger(sdf, df, 'outer', 'ID_ech')[0]

In [285]:
prv_eau=na_col_drop(df,2)
prv_eau=na_line_drop(df,1)
prv_eau.reset_index(drop=True, inplace=True)

In [286]:
gdf_viewer(prv_eau, rows=3)

Rows : 33, columns : 13


interactive(children=(IntSlider(value=3, description='rows', max=33, min=3, readout=False), IntSlider(value=12…

In [287]:
source_prv_eau=gdf_merger(source_prv_eau, prv_eau, col='ID_ech', how='outer', )[0]

Ambiguous values in both columns compared, change it manually !
Columns Index(['Date_prv_x', 'Date_prv_y', 'Emplacement_x', 'Emplacement_y',
       'Periode_x', 'Periode_y', 'pH_x', 'pH_y'],
      dtype='object') must be dropped manually !


In [288]:
if not os.path.exists(tmp_dir):
    os.makedirs(tmp_dir)

#bh.to_csv(tmp_dir+'source_Boreholes.csv', index=False)
#pz.to_csv(tmp_dir+sheet+'_Piezometers.csv', index=False)
#ouv.to_csv(tmp_dir+sheet+'_Unkown-facility.csv', index=False)
#litho.to_csv(tmp_dir+sheet+'_Lithology.csv', index=False)
#mes_pz.to_csv(tmp_dir+sheet+'_Measures.csv', index=False)
#mes_sol.to_csv(tmp_dir+sheet+'_Measures-soil.csv', index=False)
prv_eau.to_csv(tmp_dir+sheet+'_Samples-water.csv', index=False)
#prv_sol.to_csv(tmp_dir+sheet+'_Samples-soil.csv', index=False)
#an.to_csv(tmp_dir+sheet+'_Analysis.csv', index=False)


#source_bh.to_csv(tmp_dir+'source_Boreholes.csv', index=False)
#source_pz.to_csv(tmp_dir+'source_Piezometers.csv', index=False)
#source_ouv.to_csv(tmp_dir+'source_Unkown-facility.csv', index=False)
#source_litho.to_csv(tmp_dir+'source_Lithology.csv', index=False)
#source_mes_pz.to_csv(tmp_dir+'source_Measures.csv', index=False)
#source_mes_sol.to_csv(tmp_dir+'source_Measures-soil.csv', index=False)
source_prv_eau.to_csv(tmp_dir+'source_Samples-water.csv', index=False)
#source_prv_sol.to_csv(tmp_dir+'source_Samples-soil.csv', index=False)
#source_an.to_csv(tmp_dir+'source_Analysis.csv', index=False)


print(f'source_bh:{len(source_bh)} ; source_pz:{len(source_pz)} ; source_litho:{len(source_litho)} ; '
     f'source_Fac-uknw:{len(source_ouv)} ; source_an:{len(source_an)} ;\nsource_prv_sol:{len(source_prv_sol)} ;'
     f'source_prv_eau:{len(source_prv_eau)} ; source_mes_pz:{len(source_mes_pz)} ; source_mes_sol:{len(source_mes_sol)} ;')

source_bh:0 ; source_pz:0 ; source_litho:0 ; source_Fac-uknw:0 ; source_an:33 ;
source_prv_sol:0 ;source_prv_eau:162 ; source_mes_pz:0 ; source_mes_sol:0 ;


In [289]:
# continue here

* **Sheet : 'Inorganiques et composés majeurs'**

In [290]:
tmp_dir='../../CF_data/Result_traitem/Siterem_Ext_Pilote/'
sheet='Inorganic_major'

In [291]:
df = pd.read_excel('../../CF_data/Data_SITEREM/Resultats_Siterem_eau_extension_pilote_jusque_decembre_2020.xlsx', 
                   sheet_name='inorganiques et composés majeur', skiprows=2)
df=na_line_drop(df,0)
df=na_col_drop(df,1)
df.replace(r'<|>','', inplace=True, regex=True)
df.replace(r'-$',np.nan, inplace=True, regex=True)

gdf_viewer(df, rows=5)

9 NaN lines dropped

Columns dropped :['Unnamed: 0', 'Unnamed: 2', 'Unnamed: 29', 'Unnamed: 30', 'Unnamed: 31', 'Unnamed: 32', 'Unnamed: 33', 'Unnamed: 34', 'Unnamed: 36', 'Unnamed: 37', 'Unnamed: 38', 'Unnamed: 39', 'Unnamed: 40', 'Unnamed: 41', 'Unnamed: 42', 'Unnamed: 43', 'Unnamed: 44', 'Unnamed: 45', 'Unnamed: 46', 'Unnamed: 47', 'Unnamed: 48', 'Unnamed: 49', 'Unnamed: 50', 'Unnamed: 51', 'Unnamed: 52', 'Unnamed: 53', 'Unnamed: 54', 'Unnamed: 55', 'Unnamed: 56', 'Unnamed: 57', 'Unnamed: 58', 'Unnamed: 59', 'Unnamed: 60', 'Unnamed: 61', 'Unnamed: 62', 'Unnamed: 63', 'Unnamed: 64', 'Unnamed: 65', 'Unnamed: 66', 'Unnamed: 67', 'Unnamed: 68', 'Unnamed: 69', 'Unnamed: 70', 'Unnamed: 71', 'Unnamed: 72', 'Unnamed: 73', 'Unnamed: 74', 'Unnamed: 75', 'Unnamed: 76', 'Unnamed: 77', 'Unnamed: 78', 'Unnamed: 79', 'Unnamed: 80', 'Unnamed: 81', 'Unnamed: 82', 'Unnamed: 83', 'Unnamed: 84', 'Unnamed: 85', 'Unnamed: 86', 'Unnamed: 87', 'Unnamed: 88', 'Unnamed: 89', 'Unnamed: 90', 'Unnamed: 91', 'Un

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


interactive(children=(IntSlider(value=5, description='rows', max=68, min=5, readout=False), IntSlider(value=12…

In [292]:
prv_eau=df.loc[:21]
an=df.loc[22:]

In [293]:
an.loc[0.5] = df.loc[0] # put data on first line
an = an.sort_index().reset_index(drop=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value, self.name)


In [294]:
prv_eau=prv_eau.transpose()
prv_eau.reset_index(drop=True, inplace=True)
prv_eau=col_ren(prv_eau, 1)

In [295]:
prv_eau=dble_col_drop(prv_eau)

column(s) dropped: ['6:Autre zone suspecte investiguée']


In [296]:
prv_eau.drop(list(range(2)), axis=0, inplace=True)
prv_eau.reset_index(drop=True, inplace=True)
prv_eau=na_col_drop(prv_eau,2)
prv_eau=na_line_drop(prv_eau,2)
prv_eau.reset_index(drop=True, inplace=True)


Columns dropped :['SPP/zone suspecte investiguée', 'Autre zone suspecte investiguée', 'Parcelle', 'X Lambert', 'Y Lambert', "Type d'étude (*)", 'Profondeur minimum de la crépine', 'Profondeur maximum de la crépine', 'Profondeur du fond du piézomètre', 'Profondeur de la nappe', 'Organoleptique odeur intensité (**)', 'Organoleptique odeur type', 'Limpidité (***)', 'Température', 'Conductivité électrique', 'pH']

1 NaN lines dropped


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


In [297]:
prv_eau.columns

Index(['Nom échantillon', 'Période ', 'Emplacement ', 'Date de prélèvement',
       'Température de prélèvement '],
      dtype='object')

In [298]:
name=['ID_ech', 'Periode', 'Emplacement','Date_prv','Temp_prv']
prv_eau=col_ren(prv_eau, name=name, mode=1)
prv_eau=prv_eau.query('ID_ech==ID_ech')
prv_eau.insert(1,'Type_ech','Eau')

In [299]:
gdf_viewer(prv_eau, rows=3)

Rows : 24, columns : 6


interactive(children=(IntSlider(value=3, description='rows', max=24, min=3, readout=False), IntSlider(value=6,…

In [300]:
an=an.transpose()
an.reset_index(drop=True, inplace=True)

In [301]:
an=col_ren(an, 1)

In [302]:
an.replace(r'<|>','', inplace=True, regex=True)
an.replace(r'-',np.nan, inplace=True, regex=True)
an.rename(columns={an.columns[0]:'ID_ech'}, inplace=True)

In [303]:
an=dble_col_drop(an)

column(s) dropped: ['8:ammonium', '12:nitrite', '14:nitrate']


In [304]:
an=na_col_drop(an,3)


Columns dropped :['CARBONE ORGANIQUE', 'DEMANDE EN O2', 'COMPOSES AZOTES', 'COMPOSES SOUFRES ', 'ELEMENTS MAJEURS', 'AUTRES ANALYSES', 'cyanure (libre)', 'METHYL-TERT-BUTYL-ETHER', 'MTBE', 'Teneur mesurée ', 'Teneur mesurée', 'VS : Valeur seuil', "(*) RP : Rapport de prélèvements; ES : Etude de sol; EO : Etude d'orientation; EC : Etude de caractérisation; SA : Suivi d'assainissement; EF : Evaluation finale", "(**) - : Pas d'impression organoleptique; + : Impression organoleptique faible; ++ : Impression organoleptique forte", '(***) + : Limpide; - : Trouble; -- : Opaque', "Le contenu des tableaux est conforme au modèle repris à l'annexe IX du GREO V03. Le formalisme a été adapté par SITEREM tout en garantissant la lisibilité du document imprimé. "]



In [305]:
an.columns

Index(['ID_ech', 'COT', 'DBO (5 jours)', 'DCO', 'ammonium',
       'ammoniaque - libre', 'azote Kjeldahl', 'nitrite', 'nitrate',
       'sulfures totaux', 'sulfures (libre)', 'Soufre Total', 'sulfites',
       'sulfate', 'calcium', 'potassium', 'magnésium', 'manganèse', 'sodium',
       'fer', 'fer (Fe) total', 'fer (2+)', 'chlorures', 'fluorures',
       'bromure (libre)', 'phosphore (total)', 'carbonate', 'bicarbonate'],
      dtype='object')

In [306]:
an.rename(columns={'col_9':'ammoniaque libre'}, inplace=True)

In [307]:
an.drop(list(range(2)), axis=0, inplace=True)
an.reset_index(drop=True, inplace=True)
an=na_col_drop(an,1)
an.insert(1,'Type_ech','Eau')

In [308]:
an=col_ren(an, name=pol_field_model, mode=1)
#an=an.iloc[:,:-7]

In [309]:
gdf_viewer(an, rows=3)

Rows : 25, columns : 29


interactive(children=(IntSlider(value=3, description='rows', max=25, min=3, readout=False), IntSlider(value=12…

In [310]:
gdf_viewer(source_prv_eau)

Rows : 162, columns : 15


interactive(children=(IntSlider(value=10, description='rows', max=162, min=10, readout=False), IntSlider(value…

$\color{red}{\text{error on merge (because of temporal data). I must check it after}}$

In [311]:
if not os.path.exists(tmp_dir):
    os.makedirs(tmp_dir)

#bh.to_csv(tmp_dir+'source_Boreholes.csv', index=False)
#pz.to_csv(tmp_dir+sheet+'_Piezometers.csv', index=False)
#ouv.to_csv(tmp_dir+sheet+'_Unkown-facility.csv', index=False)
#litho.to_csv(tmp_dir+sheet+'_Lithology.csv', index=False)
#mes_pz.to_csv(tmp_dir+sheet+'_Measures.csv', index=False)
#mes_sol.to_csv(tmp_dir+sheet+'_Measures-soil.csv', index=False)
prv_eau.to_csv(tmp_dir+sheet+'_Samples-water.csv', index=False)
#prv_sol.to_csv(tmp_dir+sheet+'_Samples-soil.csv', index=False)
an.to_csv(tmp_dir+sheet+'_Analysis.csv', index=False)


#source_bh.to_csv(tmp_dir+'source_Boreholes.csv', index=False)
#source_pz.to_csv(tmp_dir+'source_Piezometers.csv', index=False)
#source_ouv.to_csv(tmp_dir+'source_Unkown-facility.csv', index=False)
#source_litho.to_csv(tmp_dir+'source_Lithology.csv', index=False)
#source_mes_pz.to_csv(tmp_dir+'source_Measures.csv', index=False)
#source_mes_sol.to_csv(tmp_dir+'source_Measures-soil.csv', index=False)
source_prv_eau.to_csv(tmp_dir+'source_Samples-water.csv', index=False)
#source_prv_sol.to_csv(tmp_dir+'source_Samples-soil.csv', index=False)
source_an.to_csv(tmp_dir+'source_Analysis.csv', index=False)


print(f'source_bh:{len(source_bh)} ; source_pz:{len(source_pz)} ; source_litho:{len(source_litho)} ; '
     f'source_Fac-uknw:{len(source_ouv)} ; source_an:{len(source_an)} ;\nsource_prv_sol:{len(source_prv_sol)} ;'
     f'source_prv_eau:{len(source_prv_eau)} ; source_mes_pz:{len(source_mes_pz)} ; source_mes_sol:{len(source_mes_sol)} ;')

source_bh:0 ; source_pz:0 ; source_litho:0 ; source_Fac-uknw:0 ; source_an:33 ;
source_prv_sol:0 ;source_prv_eau:162 ; source_mes_pz:0 ; source_mes_sol:0 ;


## 12-Resultats_Siterem_eau_pilote_jusque_decembre_2020.xlsx
* **Sheet : 'Résult EAU'**

In [312]:
# New file, so the source variables must be overwritten !!
_df=pd.DataFrame()
source_mes_pz, source_mes_sol, source_pz, source_prv_eau, source_prv_sol =  _df, _df, _df, _df, _df
source_ouv, source_an, source_litho, source_bh = _df, _df, _df, _df

In [313]:
tmp_dir='../../CF_data/Result_traitem/Siterem_Pilote/'
sheet='Result_eau'

In [314]:
df = pd.read_excel('../../CF_data/Data_SITEREM/Resultats_Siterem_eau_pilote_jusque_decembre_2020.xlsx', 
                   sheet_name='Résult EAU', skiprows=2)
df=na_line_drop(df,0)
df=na_col_drop(df,1)
df.replace(r'<|>','', inplace=True, regex=True)
df.replace(r'-$',np.nan, inplace=True, regex=True)

gdf_viewer(df, rows=5)

  warn(msg)


9 NaN lines dropped

Columns dropped :['Unnamed: 0', 'Unnamed: 2', 'Unnamed: 93', 'Unnamed: 94', 'Unnamed: 95', 'Unnamed: 96', 'Unnamed: 97', 'Unnamed: 98', 'Unnamed: 99', 'Unnamed: 100', 'Unnamed: 101', 'Unnamed: 102', 'Unnamed: 103', 'Unnamed: 104', 'Unnamed: 105', 'Unnamed: 106', 'Unnamed: 107', 'Unnamed: 108', 'Unnamed: 109', 'Unnamed: 110', 'Unnamed: 111', 'Unnamed: 112', 'Unnamed: 113']

Rows : 117, columns : 91


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


interactive(children=(IntSlider(value=5, description='rows', max=117, min=5, readout=False), IntSlider(value=1…

In [315]:
prv_eau=df.loc[:32]
an=df.loc[33:]

In [316]:
an.loc[0.5] = df.loc[0] # put data on first line
an = an.sort_index().reset_index(drop=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value, self.name)


In [317]:
prv_eau=prv_eau.transpose()
prv_eau.reset_index(drop=True, inplace=True)
prv_eau=col_ren(prv_eau, 1)

In [318]:
prv_eau=dble_col_drop(prv_eau)

column(s) dropped: ['6:Autre zone suspecte investiguée', '25:pH', '31:pH']


In [319]:
prv_eau.drop(list(range(3)), axis=0, inplace=True)
prv_eau.reset_index(drop=True, inplace=True)
prv_eau=na_col_drop(prv_eau,2)
prv_eau=na_line_drop(prv_eau,3)
prv_eau.reset_index(drop=True, inplace=True)


Columns dropped :['SPP/zone suspecte investiguée', 'Autre zone suspecte investiguée', 'Parcelle', 'X Lambert', 'Y Lambert', "Type d'étude (*)", 'Profondeur minimum de la crépine', 'Profondeur maximum de la crépine', 'Profondeur du fond du piézomètre', 'Organoleptique odeur intensité (**)', 'Organoleptique odeur type', 'Limpidité (***)', 'Température', 'Conductivité électrique', 'Profondeur de la nappe/sol ', 'PARAMETRES PHYSICO-CHIMIQUES \n(lors du prélèvement)', 'PARAMETRES PHYSICO-CHIMIQUES \n(mesures au labo)']



In [320]:
prv_eau.columns

Index(['Nom échantillon', 'Période ',
       'Emplacement \n- P : Pilote \n- HZP : Hors zone pilote',
       'Date de prélèvement', 'Profondeur de la nappe/piezo',
       'Profondeur de la nappe/chambre visite', 'pH',
       'Température de prélèvement ', 'CE', 'ORP', 'Oxygène dissous', 'col_29',
       'température pour mes. pH'],
      dtype='object')

In [321]:
name=['ID_ech', 'Periode', 'Emplacement','Date_prv','Niv_eau_pz','Niv_eau_chbre','pH','Temp_prv','CE','ORP',
      'O_diss','col_29','Temp_pH_mes']
prv_eau=col_ren(prv_eau, name=name, mode=1)
prv_eau=prv_eau.query('ID_ech==ID_ech')
prv_eau.insert(1,'Type_ech','Eau')

In [322]:
prv_eau.drop(columns=['col_29'], inplace=True)
prv_eau['CE']=prv_eau['CE'].apply(lambda x: pd.to_numeric(x)/1000 
                                  if re.search('^\d+', str(x)) and not pd.isnull(x) else np.nan)

In [323]:
prv_eau['Periode'].replace('\n',' ', regex=True, inplace=True)
prv_eau.replace('\n','', regex=True, inplace=True)

In [324]:
data=[prv_eau]
for d in data:
    for i in range(len(d['ID_ech'])):
        e=str(d.loc[i, 'Emplacement'])        
        if re.match('P',e, re.I): 
            d.loc[i,'Emplacement']='Pilote'
        elif re.match('HZP',e, re.I): 
            d.loc[i,'Emplacement']='Hors Pilote'
        else:
            d.loc[i,'Emplacement']=np.nan

In [325]:
gdf_viewer(prv_eau, rows=3)

Rows : 87, columns : 13


interactive(children=(IntSlider(value=3, description='rows', max=87, min=3, readout=False), IntSlider(value=12…

In [326]:
an=an.transpose()
an.reset_index(drop=True, inplace=True)

In [327]:
an=col_ren(an, 1)

In [328]:
an.replace(r'<|>','', inplace=True, regex=True)
an.replace(r'-',np.nan, inplace=True, regex=True)
an.rename(columns={an.columns[0]:'ID_ech'}, inplace=True)

In [329]:
an=dble_col_drop(an)

column(s) dropped: []


In [330]:
# rename in a first time before dropping columns (because of names like 'col_xx' in columns)
name=['ID_ech', 'METAUX LOURDS', 'Arsenic', 'Cadmium', 'Chrome', 'Cuivre','Mercure', 'Plomb', 'Nickel', 'Zinc', 
      'CYANURES',"Cyanures (libres)  -  NEN-EN-ISO 14403", "CN_totaux - NEN-EN-ISO 14403", 
      "cyanure (APE) - méthode basée sur EPA 335.3", "cyanure complex - méthode interne ", 
      "thiocyanate - méthode interne", "COMPOSES AROMATIQUES VOLATILS", "Benzène", "Toluène", "Éthylbenzène",
      "Orthoxylène", "Para- et métaxylène", "Xylènes", "Styrène", "PHENOLS", "Phénol", "Indice phénol", 
      "HYDROCARBURES AROMATIQUES POLYCYCLIQUES", "Naphtalène", "Acénaphtylène", "Acénaphtène", "Fluorène", 
      "Phénanthrène", "Anthracène", "Fluoranthène", "Pyrène", "Benzo(a)anthracène", "Chrysène", 
      "Benzo(b)fluoranthène", "Benzo(k)fluoranthène", "Benzo(a)pyrène", "Dibenzo(ah)anthracène", 
      "Benzo(ghi)pérylène", "Indéno(1,2,3-cd)pyrène", "HAP Totaux (16) - EPA", 
      "COMPOSES ORGANOCHLORES VOLATILS", "1,1-Dichloroéthane", "1,2-Dichloroéthane", 
      "1,1-dichloroéthène", "Cis-1,2-dichloroéthène", "Totaux (cis,trans) 1,2-dichloroéthènes", 
      "Trans 1,2-dichloroéthylène", "Dichlorométhane", "1,2-dichloropropane", "Tétrachloroéthylène ", 
      "Tétrachlorométhane", "1,1,1-Trichloroéthane", "1,1,2-Trichloroéthane", "Trichloroéthylène",
      "Chloroforme", "Chlorure de vinyle", "HYDROCARBURES TOTAUX", "fraction aromat. >C6-C7", 
      "fraction aromat. >C7-C8", "fraction aromat. >C8-C10", "fraction aliphat. C5-C6", "fraction aliphat. >C6-C8",
      "fraction aliphat. >C8-C10", "Fraction C5 - C8", "Fraction C8 - C10", "Fraction C10-C12", "Fraction C12-C16",
      "Fraction C16 - C21", "Fraction C21 - C35", "Hydrocarbures totaux C10-C35", "METHYL-TERT-BUTYL-ETHER", "MTBE",
     "a","b","c","d","e","f","g","h"]

In [331]:
an=col_ren(an, name=name, mode=1)
an=an.iloc[:,:-8]

In [332]:
an.drop(list(range(3)), axis=0, inplace=True)
an.reset_index(drop=True, inplace=True)
an=na_col_drop(an,1)
an.insert(1,'Type_ech','Eau')


Columns dropped :['METAUX LOURDS', 'CYANURES', 'COMPOSES AROMATIQUES VOLATILS', 'PHENOLS', 'Indice phénol', 'HYDROCARBURES AROMATIQUES POLYCYCLIQUES', 'COMPOSES ORGANOCHLORES VOLATILS', 'HYDROCARBURES TOTAUX', 'METHYL-TERT-BUTYL-ETHER', 'MTBE']



In [333]:
pol_field_model={'Arsenic': 'As', 'Cadmium': 'Cd', 'Chrome': 'Cr', 'Chrome VI': 'Cr_VI', 'Cuivre': 'Cu', 
'Mercure': 'Hg', 'Plomb': 'Pb', 'Nickel': 'Ni', 'Zinc': 'Zn', 'Cyanure(s) (?libre(s))?': 'CN_libre', 
'Cyanures (libres)  -  NEN-EN-ISO 14403':'CN_libre', 'cyanure (APE) - méthode basée sur EPA 335.3':'CN_EPA', 
'Cyanure(s) (totaux)': 'CN_tot','CN_totaux':'CN_tot','cyanure(s) (APE)': 'CN_EPA', 'cyanure complex': 'CN_comp',
'thiocyanate': 'thioCN','Benzène': 'Bnz', 'Toluène': 'Toln', 'Éthylbenzène': 'EthylBnz', 'Orthoxylène': 'O-Xyl', 
'Para- et métaxylène': 'P-M-Xyl', 'Xylènes': 'Xyl', 'Styrène': 'Styr', 'BTEX totaux': 'BTEX_tot', 
'Phénol': 'Phenol', 'Indice phénol': 'Idc_Phenol', 'Naphtalène': 'Naphta', 'Acénaphtylène': 'Acenaphtyl', 
'Acénaphtène': 'Acenaphtn', 'Fluorène': 'Fluorene', 'Phénanthrène': 'Phenanthr', 'Anthracène': 'Anthrc', 
'Fluoranthène': 'Flranth', 'Pyrène': 'Pyr', 'Benzo(a)anthracène': 'Bnz(a)anthrc', 'Chrysène': 'Chrys', 
'Benzo(b)fluoranthène': 'Bnz(b)flranth', 'Benzo(k)fluoranthène': 'Bnz(k)flranth', 
'Benzo(a)pyrène': 'Bnz(a)pyr','Dibenzo(ah)anthracène': 'Dibnz(ah)anthrc',
'Benzo(ghi)pérylène': 'Bnz(ghi)peryl', 
'Indéno(1,2,3-cd)pyrène': 'Indeno(1,2,3-cd)pyr', 'HAP Totaux (?:(16) - EPA)?': 'HAP_tot_EPA', 
'1,1-Dichloroéthane': '1,1-DCE', '1,2-Dichloroéthane': '1,2-DCE', '1,1-dichloroéthène': '1,1-DCEn', 
'Cis-1,2-dichloroéthène': 'Cis-1,2-DCEn', 'Trans 1,2-dichloroéthylène': 'Trans 1,2-DCEyl', 
'Dichlorométhane': 'DCM', 'Totaux (cis,trans) 1,2-dichloroéthène(s)': '(cis,trans) 1,2-DCE_tot', 
'1,2-dichloropropane': '1,2-DCP', 'Tétrachloroéthylène': 'TetraCEyn', 'Tétrachlorométhane': 'TCM', 
'1,1,1-Trichloroéthane': '1,1,1-TCE', '1,1,2-Trichloroéthane': '1,1,2-TCE', 'Trichloroéthylène': 'TCEyn', 
'Chloroforme': 'Chloroforme', 'Chlorure de vinyle': 'CVinyl', 'EOX': 'EOX', 
'fraction aromat. >C6-C7': 'Arom_C6C7', 'fraction aromat. >C7-C8': 'Arom_C7C8', 
'fraction aromat. >C8-C10': 'Arom_C8C10', 'fraction aliphat. C5-C6': 'Aliphat_C5C6', 
'fraction aliphat. >C6-C8': 'Aliphat_C6C8', 'fraction aliphat. >C8-C10': 'Aliphat_C8C10', 
'Fraction C5 - C8': 'Fract_C5C8', 'Fraction C8-C10': 'Fract_C8C10', 'Fraction C10-C12': 'Fract_C10C12', 
'Fraction C12-C16': 'Fract_C12C16', 'Fraction C16 - C21': 'Fract_C16C21', 'Fraction C21 - C35': 'Fract_C21C35', 
'Fraction C35 - C40': 'Fract_C35C40', 'Hydrocarbures totaux C10-C35': 'HC_tot_C10C35','C5-C8':'Fract_C5C8', 
'C8-C10':'Fract_C8C10','C10-C12':'Fract_C10C12','C12-C16':'Fract_C12C16','C16-C21':'Fract_C16C21', 
'C21-C35':'Fract_C21C35','C35-C40':'Fract_C35C40', 'totaux C10-C35':'HC_tot_C10C35','C12-C22':'Fract_C12C22', 
'C22-C30':'Fract_C22C30','C30-C40':'Fract_C30C40', 'Totaux C10-C40':'HC_tot_C10C40',
'Hydrocarbures totaux C10-C40':'HC_tot_C10C40', 'MTBE': 'MTBE', 'PCB 28': 'PCB_28', 'PCB 52': 'PCB_52', 
'PCB 101': 'PCB_101', 'PCB 118': 'PCB_118', 'PCB 138': 'PCB_138', 'PCB 153': 'PCB_153', 'PCB 180': 'PCB_180', 
'PCB totaux (7)?': 'PCB_tot', 'Chlorure(s)': 'Chlorure', 'Soufre Total': 'S_tot', 'sulfite(s)': 'sulfite', 
'sulfate(s)': 'sulfate', 'COT':'COT','DBO (5 jours)':'DBO_5j','DCO':'DCO', 
'Ammonium':'NH4','ammoniaque libre':'NH3_libre','azote Kjeldahl':'N_Kjdl','sulfures totaux':'Sulfure_tot', 
'sulfure(s) (libre)':'Sulfure_libre','sulfure(s) (libre(s))':'Sulfure_libre','calcium':'Ca','potassium':'K', 'magnésium':'Mg', 'manganèse':'Mn', 
'sodium':"Na", 'fer':'Fe','phosphore (total)':'P_tot','carbonate':'CaCO3', 'bicarbonate':'Bicarb','Phoshore':'P',
'fer ((Fe))? total':'Fe_tot', 'fer (2\+)':'Fe2','fluorure(s)':'Fluorure','bromure (libre)':'B_libre'}


In [334]:
an=col_ren(an, name=pol_field_model, mode=1)

In [335]:
gdf_viewer(an, rows=3)

Rows : 87, columns : 68


interactive(children=(IntSlider(value=3, description='rows', max=87, min=3, readout=False), IntSlider(value=12…

In [336]:
source_prv_eau=prv_eau
source_an=an

In [337]:
if not os.path.exists(tmp_dir):
    os.makedirs(tmp_dir)

#bh.to_csv(tmp_dir+'source_Boreholes.csv', index=False)
#pz.to_csv(tmp_dir+sheet+'_Piezometers.csv', index=False)
#ouv.to_csv(tmp_dir+sheet+'_Unkown-facility.csv', index=False)
#litho.to_csv(tmp_dir+sheet+'_Lithology.csv', index=False)
#mes_pz.to_csv(tmp_dir+sheet+'_Measures.csv', index=False)
#mes_sol.to_csv(tmp_dir+sheet+'_Measures-soil.csv', index=False)
prv_eau.to_csv(tmp_dir+sheet+'_Samples-water.csv', index=False)
#prv_sol.to_csv(tmp_dir+sheet+'_Samples-soil.csv', index=False)
an.to_csv(tmp_dir+sheet+'_Analysis.csv', index=False)


#source_bh.to_csv(tmp_dir+'source_Boreholes.csv', index=False)
#source_pz.to_csv(tmp_dir+'source_Piezometers.csv', index=False)
#source_ouv.to_csv(tmp_dir+'source_Unkown-facility.csv', index=False)
#source_litho.to_csv(tmp_dir+'source_Lithology.csv', index=False)
#source_mes_pz.to_csv(tmp_dir+'source_Measures.csv', index=False)
#source_mes_sol.to_csv(tmp_dir+'source_Measures-soil.csv', index=False)
source_prv_eau.to_csv(tmp_dir+'source_Samples-water.csv', index=False)
#source_prv_sol.to_csv(tmp_dir+'source_Samples-soil.csv', index=False)
source_an.to_csv(tmp_dir+'source_Analysis.csv', index=False)


print(f'source_bh:{len(source_bh)} ; source_pz:{len(source_pz)} ; source_litho:{len(source_litho)} ; '
     f'source_Fac-uknw:{len(source_ouv)} ; source_an:{len(source_an)} ;\nsource_prv_sol:{len(source_prv_sol)} ;'
     f'source_prv_eau:{len(source_prv_eau)} ; source_mes_pz:{len(source_mes_pz)} ; source_mes_sol:{len(source_mes_sol)} ;')

source_bh:0 ; source_pz:0 ; source_litho:0 ; source_Fac-uknw:0 ; source_an:87 ;
source_prv_sol:0 ;source_prv_eau:87 ; source_mes_pz:0 ; source_mes_sol:0 ;


* **Sheet : 'Param physico'**

In [338]:
tmp_dir='../../CF_data/Result_traitem/Siterem_Pilote/'
sheet='Param_physico'

In [339]:
df = pd.read_excel('../../CF_data/Data_SITEREM/Resultats_Siterem_eau_pilote_jusque_decembre_2020.xlsx', 
                   sheet_name='param. physico', skiprows=2)
df=na_line_drop(df,0)
df=na_col_drop(df,1)
df.replace(r'<|>','', inplace=True, regex=True)
df.replace(r'-$',np.nan, inplace=True, regex=True)

gdf_viewer(df, rows=5)

7 NaN lines dropped

Columns dropped :['Unnamed: 0', 'Unnamed: 2']

Rows : 52, columns : 92


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


interactive(children=(IntSlider(value=5, description='rows', max=52, min=5, readout=False), IntSlider(value=12…

In [340]:
df=df.transpose()
df.reset_index(drop=True, inplace=True)

In [341]:
df=col_ren(df, 1)

In [342]:
df.drop(list(range(2)), axis=0, inplace=True)
df.reset_index(drop=True, inplace=True)

In [343]:
sdf=df.iloc[:,:33]
df=df.iloc[:,34:]

In [344]:
df=dble_col_drop(df)
sdf=dble_col_drop(sdf)

column(s) dropped: []
column(s) dropped: ['6:Autre zone suspecte investiguée', '25:pH', '31:pH']


In [345]:
df=na_line_drop(df,1)
sdf=na_line_drop(sdf,1)

83 NaN lines dropped


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


In [346]:
df=na_col_drop(df,1)
sdf=na_col_drop(sdf,1)


Columns dropped :['Profondeur de la nappe/chambre visite', 'Organoleptique odeur intensité (**)', 'Organoleptique odeur type', 'Profondeur de la nappe/sol ', 'PARAMETRES PHYSICO-CHIMIQUES \n(lors du prélèvement)', 'col_51']


Columns dropped :['SPP/zone suspecte investiguée', 'Autre zone suspecte investiguée', 'Parcelle', 'X Lambert', 'Y Lambert', "Type d'étude (*)", 'Profondeur minimum de la crépine', 'Profondeur maximum de la crépine', 'Profondeur du fond du piézomètre', 'Organoleptique odeur intensité (**)', 'Organoleptique odeur type', 'Limpidité (***)', 'Température', 'Conductivité électrique', 'PARAMETRES PHYSICO-CHIMIQUES \n(lors du prélèvement)', 'PARAMETRES PHYSICO-CHIMIQUES \n(mesures au labo)']



In [347]:
df=df.iloc[:,:-1]
name=['ID_ech','Periode','Emplacement','Date_prv','Niv_eau_pz','Long_pz','Temp_prv ','pH', 'CE', 'ORP','O_diss']
df=col_ren(df, mode=1, name=name)

In [348]:
sdf.drop(columns=['col_29'], inplace=True)
name=['ID_ech','Periode','Emplacement','Date_prv','Niv_eau_pz','Long_pz','pH','Niv_eau_sol','Temp_prv ','CE',
      'ORP','O_diss','Temp_pH_mes']
sdf=col_ren(sdf, mode=1, name=name)

In [349]:
df['CE']=df['CE'].apply(lambda x: pd.to_numeric(x)/1000 
                                  if re.search('^\d+', str(x)) and not pd.isnull(x) else np.nan)
sdf['CE']=sdf['CE'].apply(lambda x: pd.to_numeric(x)/1000 
                                  if re.search('^\d+', str(x)) and not pd.isnull(x) else np.nan)

In [350]:
sdf['Periode'].replace('\n',' ', regex=True, inplace=True)
sdf.replace('\n','', regex=True, inplace=True)
sdf.drop(columns=["Niv_eau_sol"], inplace=True)

In [351]:
set(sdf['Emplacement'])

{'HZP', 'P'}

In [352]:
data=[df, sdf]
for d in data:
    for i in range(len(d['ID_ech'])):
        e=str(d.loc[i, 'Emplacement'])
        
        if re.match('P',e, re.I): 
            d.loc[i,'Emplacement']='Pilote'
        elif re.match('HZP',e, re.I): 
            d.loc[i,'Emplacement']='Hors Pilote'
        else:
            d.loc[i,'Emplacement']=np.nan

In [353]:
df.insert(1, 'Type_ech', 'Eau')
sdf.insert(1, 'Type_ech', 'Eau')

In [354]:
df.replace('\*|à compléter',np.nan, inplace=True, regex=True)

In [355]:
prv_eau=gdf_merger(sdf, df, 'outer', 'ID_ech', debug=True)[0]

column: ORP_x
column: pH_x
column: Long_pz_x
column: O_diss_x
column: Temp_prv _x
column: Date_prv_x
column: Emplacement_x
column: Type_ech_x
column: Periode_x
column: Niv_eau_pz_x
column: CE_x


In [356]:
gdf_viewer(df, rows=3)

Rows : 6, columns : 12


interactive(children=(IntSlider(value=3, description='rows', max=6, min=3, readout=False), IntSlider(value=12,…

$\color{red}{\text{error on merge (because of temporal data). I must check it after}}$

In [357]:
if not os.path.exists(tmp_dir):
    os.makedirs(tmp_dir)

#bh.to_csv(tmp_dir+'source_Boreholes.csv', index=False)
#pz.to_csv(tmp_dir+sheet+'_Piezometers.csv', index=False)
#ouv.to_csv(tmp_dir+sheet+'_Unkown-facility.csv', index=False)
#litho.to_csv(tmp_dir+sheet+'_Lithology.csv', index=False)
#mes_pz.to_csv(tmp_dir+sheet+'_Measures.csv', index=False)
#mes_sol.to_csv(tmp_dir+sheet+'_Measures-soil.csv', index=False)
prv_eau.to_csv(tmp_dir+sheet+'_Samples-water.csv', index=False)
#prv_sol.to_csv(tmp_dir+sheet+'_Samples-soil.csv', index=False)
#an.to_csv(tmp_dir+sheet+'_Analysis.csv', index=False)


#source_bh.to_csv(tmp_dir+'source_Boreholes.csv', index=False)
#source_pz.to_csv(tmp_dir+'source_Piezometers.csv', index=False)
#source_ouv.to_csv(tmp_dir+'source_Unkown-facility.csv', index=False)
#source_litho.to_csv(tmp_dir+'source_Lithology.csv', index=False)
#source_mes_pz.to_csv(tmp_dir+'source_Measures.csv', index=False)
#source_mes_sol.to_csv(tmp_dir+'source_Measures-soil.csv', index=False)
source_prv_eau.to_csv(tmp_dir+'source_Samples-water.csv', index=False)
#source_prv_sol.to_csv(tmp_dir+'source_Samples-soil.csv', index=False)
#source_an.to_csv(tmp_dir+'source_Analysis.csv', index=False)


print(f'source_bh:{len(source_bh)} ; source_pz:{len(source_pz)} ; source_litho:{len(source_litho)} ; '
     f'source_Fac-uknw:{len(source_ouv)} ; source_an:{len(source_an)} ;\nsource_prv_sol:{len(source_prv_sol)} ;'
     f'source_prv_eau:{len(source_prv_eau)} ; source_mes_pz:{len(source_mes_pz)} ; source_mes_sol:{len(source_mes_sol)} ;')

source_bh:0 ; source_pz:0 ; source_litho:0 ; source_Fac-uknw:0 ; source_an:87 ;
source_prv_sol:0 ;source_prv_eau:87 ; source_mes_pz:0 ; source_mes_sol:0 ;


* **Sheet : 'Inorganiques et composés majeurs'**

In [358]:
tmp_dir='../../CF_data/Result_traitem/Siterem_Pilote/'
sheet='Inorganic_major'

In [359]:
df = pd.read_excel('../../CF_data/Data_SITEREM/Resultats_Siterem_eau_pilote_jusque_decembre_2020.xlsx', 
                   sheet_name='inorganiques et composés majeur', skiprows=2)
df=na_line_drop(df,0)
df=na_col_drop(df,1)
df.replace(r'<|>','', inplace=True, regex=True)
df.replace(r'-$',np.nan, inplace=True, regex=True)

gdf_viewer(df, rows=5)

10 NaN lines dropped

Columns dropped :['Unnamed: 0', 'Unnamed: 2', 'Unnamed: 56', 'Unnamed: 57', 'Unnamed: 58', 'Unnamed: 59', 'Unnamed: 60', 'Unnamed: 61', 'Unnamed: 62', 'Unnamed: 63', 'Unnamed: 64', 'Unnamed: 65', 'Unnamed: 66', 'Unnamed: 67', 'Unnamed: 68', 'Unnamed: 69', 'Unnamed: 70', 'Unnamed: 71', 'Unnamed: 72', 'Unnamed: 73', 'Unnamed: 74', 'Unnamed: 75', 'Unnamed: 76', 'Unnamed: 77', 'Unnamed: 78', 'Unnamed: 79', 'Unnamed: 80', 'Unnamed: 81']

Rows : 68, columns : 54


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


interactive(children=(IntSlider(value=5, description='rows', max=68, min=5, readout=False), IntSlider(value=12…

In [360]:
prv_eau=df.loc[:21]
an=df.loc[22:]

In [361]:
an.loc[0.5] = df.loc[0] # put data on first line
an = an.sort_index().reset_index(drop=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value, self.name)


In [362]:
prv_eau=prv_eau.transpose()
prv_eau.reset_index(drop=True, inplace=True)
prv_eau=col_ren(prv_eau, 1)

In [363]:
prv_eau=dble_col_drop(prv_eau)

column(s) dropped: ['6:Autre zone suspecte investiguée']


In [364]:
prv_eau.drop(list(range(2)), axis=0, inplace=True)
prv_eau.reset_index(drop=True, inplace=True)
prv_eau=na_col_drop(prv_eau,2)
prv_eau=na_line_drop(prv_eau,2)
prv_eau.reset_index(drop=True, inplace=True)


Columns dropped :['SPP/zone suspecte investiguée', 'Autre zone suspecte investiguée', 'Parcelle', 'X Lambert', 'Y Lambert', "Type d'étude (*)", 'Profondeur minimum de la crépine', 'Profondeur maximum de la crépine', 'Profondeur du fond du piézomètre', 'Profondeur de la nappe', 'Organoleptique odeur intensité (**)', 'Organoleptique odeur type', 'Limpidité (***)', 'Température', 'Conductivité électrique', 'pH']



In [365]:
prv_eau.columns

Index(['Nom échantillon', 'Période ',
       'Emplacement \n- S : Simulateur \n- HZS : Hors zone simulateur',
       'Date de prélèvement', 'Température de prélèvement '],
      dtype='object')

In [366]:
name=['ID_ech', 'Periode', 'Emplacement','Date_prv','Temp_prv']
prv_eau.replace(r'\n',' ', inplace=True, regex=True)
prv_eau=col_ren(prv_eau, name=name, mode=1)
prv_eau=prv_eau.query('ID_ech==ID_ech')
prv_eau.insert(1,'Type_ech','Eau')

In [367]:
gdf_viewer(prv_eau, rows=3)

Rows : 51, columns : 6


interactive(children=(IntSlider(value=3, description='rows', max=51, min=3, readout=False), IntSlider(value=6,…

In [368]:
an=an.transpose()
an.reset_index(drop=True, inplace=True)

In [369]:
an=col_ren(an, 1)

In [370]:
an.replace(r'<|>','', inplace=True, regex=True)
an.replace(r'-',np.nan, inplace=True, regex=True)
an.rename(columns={an.columns[0]:'ID_ech'}, inplace=True)

In [371]:
an=dble_col_drop(an)

column(s) dropped: ['8:ammonium', '12:nitrite', '14:nitrate']


In [372]:
an=na_col_drop(an,3)


Columns dropped :['CARBONE ORGANIQUE', 'DEMANDE EN O2', 'COMPOSES AZOTES', 'COMPOSES SOUFRES ', 'ELEMENTS MAJEURS', 'AUTRES ANALYSES', 'cyanure (libre)', 'METHYL-TERT-BUTYL-ETHER', 'MTBE', 'Teneur mesurée ', 'Teneur mesurée', 'VS : Valeur seuil', "(*) RP : Rapport de prélèvements; ES : Etude de sol; EO : Etude d'orientation; EC : Etude de caractérisation; SA : Suivi d'assainissement; EF : Evaluation finale", "(**) - : Pas d'impression organoleptique; + : Impression organoleptique faible; ++ : Impression organoleptique forte", '(***) + : Limpide; - : Trouble; -- : Opaque', "Le contenu des tableaux est conforme au modèle repris à l'annexe IX du GREO V03. Le formalisme a été adapté par SITEREM tout en garantissant la lisibilité du document imprimé. "]



In [373]:
an.rename(columns={'col_9':'ammoniaque libre'}, inplace=True)

In [374]:
an.drop(list(range(2)), axis=0, inplace=True)
an.reset_index(drop=True, inplace=True)
an=na_col_drop(an,1)
an.insert(1,'Type_ech','Eau')

In [375]:
an=col_ren(an, name=pol_field_model, mode=1)
#an=an.iloc[:,:-7]

In [376]:
gdf_viewer(an, rows=3)

Rows : 51, columns : 29


interactive(children=(IntSlider(value=3, description='rows', max=51, min=3, readout=False), IntSlider(value=12…

In [377]:
gdf_viewer(source_prv_eau)

Rows : 87, columns : 13


interactive(children=(IntSlider(value=10, description='rows', max=87, min=10, readout=False), IntSlider(value=…

$\color{red}{\text{error on merge (mainly because of temporal data, can't compare col ..._x, ...y). I must check it after}}$

In [378]:
if not os.path.exists(tmp_dir):
    os.makedirs(tmp_dir)

#bh.to_csv(tmp_dir+'source_Boreholes.csv', index=False)
#pz.to_csv(tmp_dir+sheet+'_Piezometers.csv', index=False)
#ouv.to_csv(tmp_dir+sheet+'_Unkown-facility.csv', index=False)
#litho.to_csv(tmp_dir+sheet+'_Lithology.csv', index=False)
#mes_pz.to_csv(tmp_dir+sheet+'_Measures.csv', index=False)
#mes_sol.to_csv(tmp_dir+sheet+'_Measures-soil.csv', index=False)
prv_eau.to_csv(tmp_dir+sheet+'_Samples-water.csv', index=False)
#prv_sol.to_csv(tmp_dir+sheet+'_Samples-soil.csv', index=False)
an.to_csv(tmp_dir+sheet+'_Analysis.csv', index=False)


#source_bh.to_csv(tmp_dir+'source_Boreholes.csv', index=False)
#source_pz.to_csv(tmp_dir+'source_Piezometers.csv', index=False)
#source_ouv.to_csv(tmp_dir+'source_Unkown-facility.csv', index=False)
#source_litho.to_csv(tmp_dir+'source_Lithology.csv', index=False)
#source_mes_pz.to_csv(tmp_dir+'source_Measures.csv', index=False)
#source_mes_sol.to_csv(tmp_dir+'source_Measures-soil.csv', index=False)
source_prv_eau.to_csv(tmp_dir+'source_Samples-water.csv', index=False)
#source_prv_sol.to_csv(tmp_dir+'source_Samples-soil.csv', index=False)
source_an.to_csv(tmp_dir+'source_Analysis.csv', index=False)


print(f'source_bh:{len(source_bh)} ; source_pz:{len(source_pz)} ; source_litho:{len(source_litho)} ; '
     f'source_Fac-uknw:{len(source_ouv)} ; source_an:{len(source_an)} ;\nsource_prv_sol:{len(source_prv_sol)} ;'
     f'source_prv_eau:{len(source_prv_eau)} ; source_mes_pz:{len(source_mes_pz)} ; source_mes_sol:{len(source_mes_sol)} ;')

source_bh:0 ; source_pz:0 ; source_litho:0 ; source_Fac-uknw:0 ; source_an:87 ;
source_prv_sol:0 ;source_prv_eau:87 ; source_mes_pz:0 ; source_mes_sol:0 ;


## 13-Resultats_Siterem_SOL.xlsx
* **Sheet : 'Résult SOL ext. pilote'**

In [379]:
# New file, so the source variables must be overwritten !!
_df=pd.DataFrame()
source_mes_pz, source_mes_sol, source_pz, source_prv_eau, source_prv_sol =  _df, _df, _df, _df, _df
source_ouv, source_an, source_litho, source_bh = _df, _df, _df, _df

In [380]:
tmp_dir='../../CF_data/Result_traitem/Siterem_Result_Sol/'
sheet='Result_sol_ExtP'

In [381]:
df = pd.read_excel('../../CF_data/Data_SITEREM/Resultats_Siterem_SOL.xlsx', 
                   sheet_name='Résult SOL ext. pilote', skiprows=5)
df=na_line_drop(df,0)
df=na_col_drop(df,1)
df.replace(r'<|>','', inplace=True, regex=True)
df.replace(r'-$',np.nan, inplace=True, regex=True)

gdf_viewer(df, rows=5)

  warn(msg)


7 NaN lines dropped

Columns dropped :['Unnamed: 0', 'Unnamed: 2', 'Unnamed: 49', 'Unnamed: 51', 'Unnamed: 52', 'Unnamed: 53', 'Unnamed: 54', 'Unnamed: 55', 'Unnamed: 56', 'Unnamed: 57', 'Unnamed: 58', 'Unnamed: 59', 'Unnamed: 60', 'Unnamed: 61', 'Unnamed: 62', 'Unnamed: 63', 'Unnamed: 64', 'Unnamed: 65', 'Unnamed: 66', 'Unnamed: 67', 'Unnamed: 68', 'Unnamed: 69', 'Unnamed: 70', 'Unnamed: 71', 'Unnamed: 72', 'Unnamed: 73', 'Unnamed: 74', 'Unnamed: 75', 'Unnamed: 76', 'Unnamed: 77', 'Unnamed: 78', 'Unnamed: 79', 'Unnamed: 80', 'Unnamed: 81', 'Unnamed: 82', 'Unnamed: 83', 'Unnamed: 84', 'Unnamed: 85', 'Unnamed: 86', 'Unnamed: 87', 'Unnamed: 88', 'Unnamed: 89', 'Unnamed: 90', 'Unnamed: 91', 'Unnamed: 92', 'Unnamed: 93', 'Unnamed: 94', 'Unnamed: 95', 'Unnamed: 96', 'Unnamed: 97', 'Unnamed: 98', 'Unnamed: 99', 'Unnamed: 100', 'Unnamed: 101', 'Unnamed: 102', 'Unnamed: 103', 'Unnamed: 104', 'Unnamed: 105', 'Unnamed: 106', 'Unnamed: 107', 'Unnamed: 108', 'Unnamed: 109', 'Unnamed: 110', 'Unname

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


interactive(children=(IntSlider(value=5, description='rows', max=103, min=5, readout=False), IntSlider(value=1…

In [382]:
prv_sol=df.loc[:22]
an=df.loc[23:]

In [383]:
an.loc[0.5] = df.loc[0] # put data on first line
an = an.sort_index().reset_index(drop=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value, self.name)


In [384]:
prv_sol=prv_sol.transpose()
prv_sol.reset_index(drop=True, inplace=True)
prv_sol=col_ren(prv_sol, 1)

In [385]:
prv_sol=dble_col_drop(prv_sol)

column(s) dropped: ['16:température pour mes. pH']


In [386]:
prv_sol.drop(list(range(3)), axis=0, inplace=True)
prv_sol.reset_index(drop=True, inplace=True)
prv_sol=na_col_drop(prv_sol,1)
prv_sol=na_line_drop(prv_sol,3)
prv_sol.reset_index(drop=True, inplace=True)


Columns dropped :['MO et COT', 'pH', 'GRANULOMETRIE']



In [387]:
gdf_viewer(prv_sol, rows=3)

Rows : 44, columns : 19


interactive(children=(IntSlider(value=3, description='rows', max=44, min=3, readout=False), IntSlider(value=12…

In [388]:
prv_sol=prv_sol[:-1]
prv_sol.drop(columns=['broyage'], inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


In [389]:
name=['ID_ech','Ech_top', 'Ech_base','MS','Date_prv','Long_for','Refus','Description','MO','COT','pH_KCl', 
      'Temp_pH_mes','pH_H20','Fract_2','Fract_2+', 'Fract_min_2µ','Fract_min_50µ','Fract_min_2']
prv_sol=col_ren(prv_sol, name=name, mode=1)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(


In [390]:
set(prv_sol.Description)

{'R', 'R ', 'TN', 'TN '}

In [391]:
for i in range(len(prv_sol['Description'])):
    x = prv_sol.loc[i,'Description']
    if x in ['R','R ']: prv_sol.loc[i,'Description']='Remblais'
    elif x in ['TN','TN ']: prv_sol.loc[i,'Description']='Terrain naturel'

prv_sol['Refus']=prv_sol['Refus'].apply(lambda x: 'x' if not pd.isnull(x) else '')
prv_sol.insert(1,'Type_ech','Sol')#

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value, self.name)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  prv_sol['Refus']=prv_sol['Refus'].apply(lambda x: 'x' if not pd.isnull(x) else '')


In [392]:
gdf_viewer(prv_sol, rows=3)

Rows : 43, columns : 19


interactive(children=(IntSlider(value=3, description='rows', max=43, min=3, readout=False), IntSlider(value=12…

In [393]:
an=an.transpose()
an.reset_index(drop=True, inplace=True)

In [394]:
an=col_ren(an, 1)

In [395]:
an.rename(columns={an.columns[0]:'ID_ech'}, inplace=True)

In [396]:
name=['ID_ech','METAUX LOURDS','Arsenic','Cadmium','Chrome','Chrome VI','Cuivre','Mercure','Plomb','Nickel',
'Zinc','CYANURES','cyanure (libre)','cyanure (totaux)','cyanure (APE)','cyanure complex','thiocyanate',
'COMPOSES AROMATIQUES VOLATILS','Benzène','Toluène','Éthylbenzène','Orthoxylène','Para- et métaxylène','Xylènes',
'Styrène','BTEX totaux','PHENOLS','Phénol','HYDROCARBURES AROMATIQUES POLYCYCLIQUES','Naphtalène','Acénaphtylène',
'Acénaphtène','Fluorène','Phénanthrène','Anthracène','Fluoranthène','Pyrène','Benzo(a)anthracène','Chrysène',
'Benzo(b)fluoranthène','Benzo(k)fluoranthène','Benzo(a)pyrène','Dibenzo(ah)anthracène','Benzo(ghi)pérylène',
'Indéno(1,2,3-cd)pyrène','HAP Totaux (16) - EPA','COMPOSES ORGANOHALOGENES VOLATILS','Tétrachloroéthylène',
'Trichloroéthylène','1,1-dichloroéthène','Cis-1,2-dichloroéthène','Trans 1,2-dichloroéthylène',
'Totaux (cis,trans) 1,2-dichloroéthènes','Chlorure de vinyle','1,1,1-Trichloroéthane','1,1,2-Trichloroéthane',
'1,1-Dichloroéthane','1,2-Dichloroéthane','Tétrachlorométhane','Chloroforme','Dichlorométhane',
'1,2-dichloropropane','HYDROCARBURES TOTAUX','fraction aromat. >C6-C7','fraction aromat. >C7-C8',
'fraction aromat. >C8-C10','fraction aliphat. C5-C6','fraction aliphat. >C6-C8','fraction aliphat. >C8-C10',
'Fraction C5 - C8','Fraction C8 - C10','Fraction C10-C12','Fraction C12-C16','Fraction C16 - C21',
'Fraction C21 - C35','Fraction C35 - C40','Hydrocarbures totaux C10-C35','Hydrocarbures totaux C10-C40',
'Teneur mesurée','Teneur mesurée','VS : Valeur seuil']

an=col_ren(an, name=name, mode=1)

In [397]:
an=dble_col_drop(an)

column(s) dropped: ['79:Teneur mesurée']


In [398]:
an.drop(list(range(3)), axis=0, inplace=True)
an.reset_index(drop=True, inplace=True)
an=na_col_drop(an,1)
an.insert(1,'Type_ech','Sol')


Columns dropped :['METAUX LOURDS', 'CYANURES', 'COMPOSES AROMATIQUES VOLATILS', 'BTEX totaux', 'PHENOLS', 'HYDROCARBURES AROMATIQUES POLYCYCLIQUES', 'COMPOSES ORGANOHALOGENES VOLATILS', 'HYDROCARBURES TOTAUX', 'Teneur mesurée', 'VS : Valeur seuil']



In [399]:
an=col_ren(an, name=pol_field_model, mode=1)

In [400]:
an.rename(columns={'cyanure (totaux)':'CN_tot', 'cyanure (APE)':'CN_EPA'}, inplace=True)

In [401]:
gdf_viewer(an, rows=5) 

Rows : 44, columns : 71


interactive(children=(IntSlider(value=5, description='rows', max=44, min=5, readout=False), IntSlider(value=12…

In [402]:
source_prv_sol=prv_sol
source_an=an

In [403]:
if not os.path.exists(tmp_dir):
    os.makedirs(tmp_dir)

#bh.to_csv(tmp_dir+'source_Boreholes.csv', index=False)
#pz.to_csv(tmp_dir+sheet+'_Piezometers.csv', index=False)
#ouv.to_csv(tmp_dir+sheet+'_Unkown-facility.csv', index=False)
#litho.to_csv(tmp_dir+sheet+'_Lithology.csv', index=False)
#mes_pz.to_csv(tmp_dir+sheet+'_Measures.csv', index=False)
#mes_sol.to_csv(tmp_dir+sheet+'_Measures-soil.csv', index=False)
#prv_eau.to_csv(tmp_dir+sheet+'_Samples-water.csv', index=False)
prv_sol.to_csv(tmp_dir+sheet+'_Samples-soil.csv', index=False)
an.to_csv(tmp_dir+sheet+'_Analysis.csv', index=False)


#source_bh.to_csv(tmp_dir+'source_Boreholes.csv', index=False)
#source_pz.to_csv(tmp_dir+'source_Piezometers.csv', index=False)
#source_ouv.to_csv(tmp_dir+'source_Unkown-facility.csv', index=False)
#source_litho.to_csv(tmp_dir+'source_Lithology.csv', index=False)
#source_mes_pz.to_csv(tmp_dir+'source_Measures.csv', index=False)
#source_mes_sol.to_csv(tmp_dir+'source_Measures-soil.csv', index=False)
#source_prv_eau.to_csv(tmp_dir+'source_Samples-water.csv', index=False)
source_prv_sol.to_csv(tmp_dir+'source_Samples-soil.csv', index=False)
source_an.to_csv(tmp_dir+'source_Analysis.csv', index=False)


print(f'source_bh:{len(source_bh)} ; source_pz:{len(source_pz)} ; source_litho:{len(source_litho)} ; '
     f'source_Fac-uknw:{len(source_ouv)} ; source_an:{len(source_an)} ;\nsource_prv_sol:{len(source_prv_sol)} ;'
     f'source_prv_eau:{len(source_prv_eau)} ; source_mes_pz:{len(source_mes_pz)} ; source_mes_sol:{len(source_mes_sol)} ;')

source_bh:0 ; source_pz:0 ; source_litho:0 ; source_Fac-uknw:0 ; source_an:44 ;
source_prv_sol:43 ;source_prv_eau:0 ; source_mes_pz:0 ; source_mes_sol:0 ;


* **Sheet : 'SOL T1 pilote'**

In [404]:
tmp_dir='../../CF_data/Result_traitem/Siterem_Result_Sol/'
sheet='SOL_T1_Pilote'

In [405]:
df = pd.read_excel('../../CF_data/Data_SITEREM/Resultats_Siterem_SOL.xlsx', 
                   sheet_name='SOL T1 pilote', skiprows=5)
df=na_line_drop(df,0)
df=na_col_drop(df,1)
df.replace(r'<|>','', inplace=True, regex=True)
df.replace(r'-$',np.nan, inplace=True, regex=True)

gdf_viewer(df, rows=5)

  warn(msg)


4 NaN lines dropped

Columns dropped :['Unnamed: 0', 'Unnamed: 2', 'Unnamed: 30', 'Unnamed: 31', 'Unnamed: 32', 'Unnamed: 33', 'Unnamed: 34', 'Unnamed: 35', 'Unnamed: 36', 'Unnamed: 38', 'Unnamed: 39', 'Unnamed: 40', 'Unnamed: 41', 'Unnamed: 42', 'Unnamed: 43', 'Unnamed: 44', 'Unnamed: 45', 'Unnamed: 46', 'Unnamed: 47', 'Unnamed: 48', 'Unnamed: 49', 'Unnamed: 50', 'Unnamed: 51', 'Unnamed: 52', 'Unnamed: 53', 'Unnamed: 54', 'Unnamed: 55', 'Unnamed: 56', 'Unnamed: 57', 'Unnamed: 58', 'Unnamed: 59', 'Unnamed: 60', 'Unnamed: 61', 'Unnamed: 62', 'Unnamed: 63', 'Unnamed: 64', 'Unnamed: 65', 'Unnamed: 66', 'Unnamed: 67', 'Unnamed: 68', 'Unnamed: 69', 'Unnamed: 70', 'Unnamed: 71', 'Unnamed: 72', 'Unnamed: 73', 'Unnamed: 74', 'Unnamed: 75', 'Unnamed: 76', 'Unnamed: 77', 'Unnamed: 78', 'Unnamed: 79', 'Unnamed: 80', 'Unnamed: 81', 'Unnamed: 82', 'Unnamed: 83', 'Unnamed: 84', 'Unnamed: 85', 'Unnamed: 86', 'Unnamed: 87', 'Unnamed: 88', 'Unnamed: 89', 'Unnamed: 90', 'Unnamed: 91', 'Unnamed: 92', 'Un

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


interactive(children=(IntSlider(value=5, description='rows', max=135, min=5, readout=False), IntSlider(value=1…

In [406]:
prv_sol=df.loc[:35]
an=df.loc[36:]

In [407]:
an.loc[0.5] = df.loc[0] # put data on first line
an = an.sort_index().reset_index(drop=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value, self.name)


In [408]:
prv_sol=prv_sol.transpose()
prv_sol.reset_index(drop=True, inplace=True)
prv_sol=col_ren(prv_sol, 1)

In [409]:
prv_sol=dble_col_drop(prv_sol)

column(s) dropped: ['9:Autre zone suspecte investiguée', '27:température pour mes. pH', '30:pH (H20)']


In [410]:
prv_sol.drop(list(range(3)), axis=0, inplace=True)
prv_sol.reset_index(drop=True, inplace=True)
prv_sol=na_col_drop(prv_sol,1)
prv_sol=na_line_drop(prv_sol,3)
prv_sol.reset_index(drop=True, inplace=True)


Columns dropped :['Matières organiques', 'SPP/zone suspecte investiguée', 'Autre zone suspecte investiguée', 'Parcelle', 'X Lambert', 'Y Lambert', "Type d'étude (*)", 'Organoleptique couleur suspecte', 'Organoleptique odeur intensité (***)', 'Organoleptique odeur type', 'MO et COT', 'matières organiques', 'COT', 'pH', 'pH (KCl)', 'température pour mes. pH', 'pH (H20)', 'GRANULOMETRIE', 'Fraction argileuse', 'parties min. 2µm', 'parties min. 50µm', 'parties min. 2mm']

9 NaN lines dropped


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


In [411]:
prv_sol=prv_sol[:-1]
prv_sol.drop(columns=['broyage'], inplace=True)

In [412]:
prv_sol.columns

Index(['Nom de l'échantillon', 'Profondeur échantillon de', 'à',
       'Matière sèche', 'Date de prélèvement', 'Profondeur d'arrêt du forage',
       'Refus de forage (seulement si oui)', 'Terrain naturel/Remblai (**)',
       'fraction  2 mm (prép. séché à 40°C) ',
       'fraction 2 mm (prép. séché à 40°C) '],
      dtype='object')

In [413]:
name=['ID_ech','Ech_top', 'Ech_base','MS','Date_prv','Long_for','Refus','Nature_ech','Fract_2','Fract_2+']
prv_sol=col_ren(prv_sol, name=name, mode=1)

In [414]:
for i in range(len(prv_sol['Nature_ech'])):
    x = prv_sol.loc[i,'Nature_ech']
    if x in ['R','R ']: prv_sol.loc[i,'Nature_ech']='Remblais'
    elif x in ['TN','TN ']: prv_sol.loc[i,'Nature_ech']='Terrain naturel'

prv_sol['Refus']=prv_sol['Refus'].apply(lambda x: 'x' if not pd.isnull(x) else '')
prv_sol.insert(1,'Type_ech','Sol')#

In [415]:
gdf_viewer(prv_sol, rows=3)

Rows : 15, columns : 11


interactive(children=(IntSlider(value=3, description='rows', max=15, min=3, readout=False), IntSlider(value=11…

In [416]:
an=an.transpose()
an.reset_index(drop=True, inplace=True)

In [417]:
an=col_ren(an, 1)

In [418]:
an.rename(columns={an.columns[0]:'ID_ech'}, inplace=True)

In [419]:
name=['ID_ech','METAUX LOURDS','Arsenic','Cadmium','Chrome','Chrome VI','Cobalt','Cuivre','Mercure','Plomb', 
'Nickel','Zinc','CYANURES','cyanure (libre)','cyanure (totaux)','cyanure (APE)','cyanure complex','thiocyanate',
'COMPOSES AROMATIQUES VOLATILS','Benzène','Toluène','Éthylbenzène', 'Orthoxylène','Para- et métaxylène','Xylènes',
'Styrène','BTEX totaux','PHENOLS','Phénol','Indice phénol','HYDROCARBURES AROMATIQUES POLYCYCLIQUES','Naphtalène',
'Acénaphtylène','Acénaphtène', 'Fluorène','Phénanthrène','Anthracène','Fluoranthène','Pyrène','Benzo(a)anthracène',
'Chrysène','Benzo(b)fluoranthène','Benzo(k)fluoranthène','Benzo(a)pyrène','Dibenzo(ah)anthracène',
'Benzo(ghi)pérylène','Indéno(1,2,3-cd)pyrène','HAP Totaux (16) - EPA','COMPOSES ORGANOHALOGENES VOLATILS',
'Tétrachloroéthylène','Trichloroéthylène','1,1-dichloroéthène','Cis-1,2-dichloroéthène',
'Trans 1,2-dichloroéthylène','Totaux (cis,trans) 1,2-dichloroéthènes','Chlorure de vinyle',
'1,1,1-Trichloroéthane','1,1,2-Trichloroéthane','1,1-Dichloroéthane','1,2-Dichloroéthane','Tétrachlorométhane',
'Chloroforme','Dichlorométhane','1,2-dichloropropane','EOX','HYDROCARBURES TOTAUX',
'fraction aromat. >C6-C7','fraction aromat. >C7-C8','fraction aromat. >C8-C10','fraction aliphat. C5-C6',
'fraction aliphat. >C6-C8','fraction aliphat. >C8-C10','Fraction C5 - C8','Fraction C8 - C10','Fraction C10-C12',
'Fraction C12-C16','Fraction C16 - C21','Fraction C21 - C35','Fraction C35 - C40','Hydrocarbures totaux C10-C35',
'Hydrocarbures totaux C10-C40','METHYL-TERT-BUTYL-ETHER','MTBE']

an=an.iloc[:,:-17]
an=col_ren(an, name=name, mode=1)

In [420]:
an=dble_col_drop(an)

column(s) dropped: []


In [421]:
an.drop(list(range(3)), axis=0, inplace=True)
an.reset_index(drop=True, inplace=True)
an=na_col_drop(an,3)
an.insert(1,'Type_ech','Sol')


Columns dropped :['METAUX LOURDS', 'Chrome VI', 'Cobalt', 'CYANURES', 'COMPOSES AROMATIQUES VOLATILS', 'BTEX totaux', 'PHENOLS', 'Phénol', 'Indice phénol', 'HYDROCARBURES AROMATIQUES POLYCYCLIQUES', 'COMPOSES ORGANOHALOGENES VOLATILS', 'EOX', 'HYDROCARBURES TOTAUX', 'METHYL-TERT-BUTYL-ETHER']



In [422]:
an=col_ren(an, name=pol_field_model, mode=1)

In [423]:
an.rename(columns={'cyanure (totaux)':'CN_tot', 'cyanure (APE)':'CN_EPA'}, inplace=True)

In [424]:
gdf_viewer(an, rows=5) 

Rows : 25, columns : 70


interactive(children=(IntSlider(value=5, description='rows', max=25, min=5, readout=False), IntSlider(value=12…

In [425]:
#source_prv_sol.info()#, prv_sol.info()

In [426]:
source_prv_sol=source_prv_sol[['ID_ech', 'Type_ech', 'Ech_top', 'Ech_base', 'MS', 'Date_prv','Long_for', 
                               'Refus', 'Description', 'Fract_2', 'Fract_2+']]

In [427]:
source_prv_sol=gdf_merger(source_prv_sol, prv_sol, col='ID_ech', how='outer')[0]

In [428]:
source_an=gdf_merger(source_an,an,col='ID_ech', how='outer')[0]
source_an=source_an.query('ID_ech==ID_ech')

In [429]:
if not os.path.exists(tmp_dir):
    os.makedirs(tmp_dir)

#bh.to_csv(tmp_dir+'source_Boreholes.csv', index=False)
#pz.to_csv(tmp_dir+sheet+'_Piezometers.csv', index=False)
#ouv.to_csv(tmp_dir+sheet+'_Unkown-facility.csv', index=False)
#litho.to_csv(tmp_dir+sheet+'_Lithology.csv', index=False)
#mes_pz.to_csv(tmp_dir+sheet+'_Measures.csv', index=False)
#mes_sol.to_csv(tmp_dir+sheet+'_Measures-soil.csv', index=False)
#prv_eau.to_csv(tmp_dir+sheet+'_Samples-water.csv', index=False)
prv_sol.to_csv(tmp_dir+sheet+'_Samples-soil.csv', index=False)
an.to_csv(tmp_dir+sheet+'_Analysis.csv', index=False)


#source_bh.to_csv(tmp_dir+'source_Boreholes.csv', index=False)
#source_pz.to_csv(tmp_dir+'source_Piezometers.csv', index=False)
#source_ouv.to_csv(tmp_dir+'source_Unkown-facility.csv', index=False)
#source_litho.to_csv(tmp_dir+'source_Lithology.csv', index=False)
#source_mes_pz.to_csv(tmp_dir+'source_Measures.csv', index=False)
#source_mes_sol.to_csv(tmp_dir+'source_Measures-soil.csv', index=False)
#source_prv_eau.to_csv(tmp_dir+'source_Samples-water.csv', index=False)
source_prv_sol.to_csv(tmp_dir+'source_Samples-soil.csv', index=False)
source_an.to_csv(tmp_dir+'source_Analysis.csv', index=False)


print(f'source_bh:{len(source_bh)} ; source_pz:{len(source_pz)} ; source_litho:{len(source_litho)} ; '
     f'source_Fac-uknw:{len(source_ouv)} ; source_an:{len(source_an)} ;\nsource_prv_sol:{len(source_prv_sol)} ;'
     f'source_prv_eau:{len(source_prv_eau)} ; source_mes_pz:{len(source_mes_pz)} ; source_mes_sol:{len(source_mes_sol)} ;')

source_bh:0 ; source_pz:0 ; source_litho:0 ; source_Fac-uknw:0 ; source_an:58 ;
source_prv_sol:58 ;source_prv_eau:0 ; source_mes_pz:0 ; source_mes_sol:0 ;


## 14-Logs_forages_vUmons_2018-03-20.xlsx
* **Sheet : 'Analyse_eau_Phases1&2'**

In [430]:
# New file, so the source variables must be overwritten !!
_df=pd.DataFrame()
source_mes_pz, source_mes_sol, source_pz, source_prv_eau, source_prv_sol =  _df, _df, _df, _df, _df
source_ouv, source_an, source_litho, source_bh = _df, _df, _df, _df

In [431]:
tmp_dir='../../CF_data/Result_traitem/vUmons_logsFor/'
sheet='Analyse_eau_Phases1&2'

In [432]:
df = pd.read_excel('../../CF_data/Data_UMONS/Logs_forages_vUmons_2018-03-20.xlsx', 
                   sheet_name='Analyse_eau_Phases1&2', skiprows=0)
df=na_line_drop(df,0)
df=na_col_drop(df,1)
df.replace(r'<|>','', inplace=True, regex=True)
df.replace(r'-$',np.nan, inplace=True, regex=True)

gdf_viewer(df, rows=5)

1 NaN lines dropped
Rows : 51, columns : 85


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


interactive(children=(IntSlider(value=5, description='rows', max=51, min=5, readout=False), IntSlider(value=12…

In [433]:
df.drop(list(range(4)), axis=0, inplace=True)
df.reset_index(drop=True, inplace=True)

In [434]:
df.replace(9999,np.nan, inplace=True, regex=True) #int
df.replace(f'[{9999}|9999].',np.nan, inplace=True, regex=True) #float, str

In [435]:
df['CE']=df['CE'].apply(lambda x: pd.to_numeric(x)/1000 
                                  if re.search('^\d+', str(x)) and not pd.isnull(x) else np.nan)

In [436]:
df=col_ren(df,mode=1,name=[re.sub('9999','-',x) for x in df.columns])
df=col_ren(df,mode=1, name=pol_field_model)

In [437]:
name=['ID', 'ID_ech', 'Date_prv', 'X', 'Y', 'Z', 'Long_for','Long_pz_sol', 'Niv_eau_sol', 'pH', 'CE', 'T', 
      'As', 'Cd', 'Cr', 'Cr_VI', 'Cu', 'Hg','Pb', 'Ni', 'Zn', 'CN_libre', 'CN_tot', 'CN_APE', 'CN_comp',
      'thioCN', 'Bnz_vn', 'Bnz', 'Toln_vn', 'Toln', 'EthylBnz','O-Xyl', 'P-M-Xyl', 'Xyl_vn', 'Xyl', 'Styr', 
      'Phenol','Naphta_vn', 'Naphta', 'Acenaphtyl', 'Acenaphtn', 'Fluorene',
       'Phenanthr', 'Anthrc', 'Flranth', 'Pyr', 'Bnz(a)anthrc', 'Chrys',
       'Bnz(b)flranth', 'Bnz(k)flranth', 'Bnz(a)pyr', 'Dibnz(ah)anthrc',
       'Bnz(ghi)peryl', 'Indeno(1,2,3-cd)pyr', 'HAP_tot_EPA',
       '1,1-DCE', '1,2-DCE', '1,1-DCEn', 'Cis-1,2-DCEn',
       '(cis,trans) 1,2-DCE_tot', 'Trans 1,2-DCEyl', 'DCM', '1,2-DCP',
       'TetraCEyn', 'TCM', '1,1,1-TCE', '1,1,2-TCE', 'TCEyn', 'Chloroforme',
       'CVinyl', 'Arom_C6C7', 'Arom_C7C8', 'Arom_C8C10', 'Aliphat_C5C6',
       'Aliphat_C6C8', 'Aliphat_C8C10', 'Fract_C5C8', 'Fract_C8C10',
       'Fract_C10C12', 'Fract_C12C16', 'Fract_C16C21', 'Fract_C21C35',
       'HC_tot_C10C35', 'MTBE', 'Chlorure']
df=col_ren(df, mode=1,name=name)

In [438]:
df['ID_ech'].replace('Canne ', 'Can', inplace=True, regex=True)
df['ID_ech'].replace('\n', ' ', inplace=True, regex=True)
df.insert(1,'Type_ech','Eau')

In [439]:
df.drop([20,39], axis=0,inplace=True)
df.reset_index(drop=True, inplace=True)

In [440]:
df.loc[38:,'Date_prv']=df.loc[38:,'Date_prv'].apply(lambda x : dtm.datetime.fromordinal(dtm.datetime(1900, 1, 1).toordinal() + x - 2))

In [441]:
for i in range(len(df['ID_ech'])):
    if pd.isnull(df.loc[i,'ID_ech']): 
        df.loc[i,'ID_ech']=df.loc[i,'ID']

In [442]:
pz=df[['ID', 'X', 'Y', 'Z', 'Long_for','Long_pz_sol']]
pz['Type'] = 'Piezo'

prv_eau=df[['ID_ech','Type_ech','Date_prv', 'X', 'Y', 'Z','Niv_eau_sol', 'pH', 'CE', 'T']]
an=df[['ID_ech','Type_ech','Date_prv','As', 'Cd', 'Cr', 'Cr_VI', 'Cu', 'Hg','Pb', 'Ni', 'Zn', 'CN_libre', 'CN_tot', 'CN_APE', 
       'CN_comp','thioCN', 'Bnz_vn', 'Bnz', 'Toln_vn', 'Toln', 'EthylBnz','O-Xyl', 'P-M-Xyl', 'Xyl_vn', 'Xyl',
       'Styr', 'Phenol','Naphta_vn', 'Naphta', 'Acenaphtyl', 'Acenaphtn', 'Fluorene',
       'Phenanthr', 'Anthrc', 'Flranth', 'Pyr', 'Bnz(a)anthrc', 'Chrys',
       'Bnz(b)flranth', 'Bnz(k)flranth', 'Bnz(a)pyr', 'Dibnz(ah)anthrc',
       'Bnz(ghi)peryl', 'Indeno(1,2,3-cd)pyr', 'HAP_tot_EPA',
       '1,1-DCE', '1,2-DCE', '1,1-DCEn', 'Cis-1,2-DCEn',
       '(cis,trans) 1,2-DCE_tot', 'Trans 1,2-DCEyl', 'DCM', '1,2-DCP',
       'TetraCEyn', 'TCM', '1,1,1-TCE', '1,1,2-TCE', 'TCEyn', 'Chloroforme',
       'CVinyl', 'Arom_C6C7', 'Arom_C7C8', 'Arom_C8C10', 'Aliphat_C5C6',
       'Aliphat_C6C8', 'Aliphat_C8C10', 'Fract_C5C8', 'Fract_C8C10',
       'Fract_C10C12', 'Fract_C12C16', 'Fract_C16C21', 'Fract_C21C35',
       'HC_tot_C10C35', 'MTBE', 'Chlorure']]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pz['Type'] = 'Piezo'


In [443]:
pz.drop_duplicates(subset=['ID'], keep='first', ignore_index=True, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pz.drop_duplicates(subset=['ID'], keep='first', ignore_index=True, inplace=True)


In [444]:
gdf_viewer(an, rows=5)

Rows : 45, columns : 76


interactive(children=(IntSlider(value=5, description='rows', max=45, min=5, readout=False), IntSlider(value=12…

In [445]:
source_an=an
source_pz=pz
source_prv_eau=prv_eau

In [446]:
if not os.path.exists(tmp_dir):
    os.makedirs(tmp_dir)

#bh.to_csv(tmp_dir+'source_Boreholes.csv', index=False)
pz.to_csv(tmp_dir+sheet+'_Piezometers.csv', index=False)
#ouv.to_csv(tmp_dir+sheet+'_Unkown-facility.csv', index=False)
#litho.to_csv(tmp_dir+sheet+'_Lithology.csv', index=False)
#mes_pz.to_csv(tmp_dir+sheet+'_Measures.csv', index=False)
#mes_sol.to_csv(tmp_dir+sheet+'_Measures-soil.csv', index=False)
prv_eau.to_csv(tmp_dir+sheet+'_Samples-water.csv', index=False)
#prv_sol.to_csv(tmp_dir+sheet+'_Samples-soil.csv', index=False)
an.to_csv(tmp_dir+sheet+'_Analysis.csv', index=False)


#source_bh.to_csv(tmp_dir+'source_Boreholes.csv', index=False)
source_pz.to_csv(tmp_dir+'source_Piezometers.csv', index=False)
#source_ouv.to_csv(tmp_dir+'source_Unkown-facility.csv', index=False)
#source_litho.to_csv(tmp_dir+'source_Lithology.csv', index=False)
#source_mes_pz.to_csv(tmp_dir+'source_Measures.csv', index=False)
#source_mes_sol.to_csv(tmp_dir+'source_Measures-soil.csv', index=False)
source_prv_eau.to_csv(tmp_dir+'source_Samples-water.csv', index=False)
#source_prv_sol.to_csv(tmp_dir+'source_Samples-soil.csv', index=False)
source_an.to_csv(tmp_dir+'source_Analysis.csv', index=False)


print(f'source_bh:{len(source_bh)} ; source_pz:{len(source_pz)} ; source_litho:{len(source_litho)} ; '
     f'source_Fac-uknw:{len(source_ouv)} ; source_an:{len(source_an)} ;\nsource_prv_sol:{len(source_prv_sol)} ;'
     f'source_prv_eau:{len(source_prv_eau)} ; source_mes_pz:{len(source_mes_pz)} ; source_mes_sol:{len(source_mes_sol)} ;')

source_bh:0 ; source_pz:29 ; source_litho:0 ; source_Fac-uknw:0 ; source_an:45 ;
source_prv_sol:0 ;source_prv_eau:45 ; source_mes_pz:0 ; source_mes_sol:0 ;


* **Sheet : 'Analyse_sol_Phases1&2'**

In [447]:
tmp_dir='../../CF_data/Result_traitem/vUmons_logsFor/'
sheet='Analyse_sol_Phases1&2'

In [448]:
df = pd.read_excel('../../CF_data/Data_UMONS/Logs_forages_vUmons_2018-03-20.xlsx', 
                   sheet_name='Analyse_sol_Phases1&2', skiprows=0)
df=na_line_drop(df,0)
df=na_col_drop(df,1)
df.replace(r'<|>','', inplace=True, regex=True)
df.replace(r'-$',np.nan, inplace=True, regex=True)

gdf_viewer(df, rows=5)

Rows : 64, columns : 84


interactive(children=(IntSlider(value=5, description='rows', max=64, min=5, readout=False), IntSlider(value=12…

In [449]:
df=col_ren(df, mode=1, name=pol_field_model)

In [450]:
name=['ID_ech','Date_prv','ID','X','Y','Z','Nature_ech','Organo','Long_for','Refus','Ech_top','Ech_base',
      'MS','Broyage < 150 µm','Broyage ','Fract_2','Fract_2+','As','Cd','Cr','Cr_VI','Cu',
       'Hg','Pb','Ni','Zn','CN_libre','CN_tot','CN_APE',
       'CN_comp','thioCN','Bnz','Toln','EthylBnz','O-Xyl','P-M-Xyl',
       'Xyl','Styr','Phenol','Naphta','Acenaphtyl','Acenaphtn',
       'Fluorene','Phenanthr','Anthrc','Flranth','Pyr','Bnz(a)anthrc',
       'Chrys','Bnz(b)flranth','Bnz(k)flranth','Bnz(a)pyr',
       'Dibnz(ah)anthrc','Bnz(ghi)peryl','Indeno(1,2,3-cd)pyr',
       'HAP_tot_EPA','1,1-DCE','1,2-DCE','1,1-DCEn',
       'Cis-1,2-DCEn','Trans 1,2-DCEyl','DCM',
       '(cis,trans) 1,2-DCE_tot','1,2-DCP','TetraCEyn','TCM',
       '1,1,1-TCE','1,1,2-TCE','TCEyn','Chloroforme','CVinyl','Arom_C6C7',
       'Arom_C7C8','Arom_C8C10','Aliphat_C5C6','Aliphat_C6C8',
       'Aliphat_C8C10','Fract_C5C8','Fract_C8C10','Fract_C10C12',
       'Fract_C12C16','Fract_C16C21','Fract_C21C35','HC_tot_C10C35']
df=col_ren(df, mode=1, name=name)

In [451]:
df.drop(list(range(4)), axis=0, inplace=True)
df.reset_index(drop=True, inplace=True)

In [452]:
df.replace(9999,np.nan, inplace=True, regex=True) #int
df.replace(f'[{9999}|9999].',np.nan, inplace=True, regex=True) #float, str

In [453]:
for i in range(len(df['Nature_ech'])):
    x = df.loc[i,'Nature_ech']
    if x in ['R','R ']: df.loc[i,'Nature_ech']='Remblais'
    elif x in ['L']: df.loc[i,'Nature_ech']='Limons'
    elif x in ['LA']: df.loc[i,'Nature_ech']='Limons et argiles'
    elif x in ['LS']: df.loc[i,'Nature_ech']='Limons et sables'

df['Refus']=df['Refus'].apply(lambda x: 'x' if not pd.isnull(x) else '')
df.insert(1,'Type_ech','Sol')

In [454]:
df.drop(14, axis=0, inplace=True)
df.drop(['Broyage < 150 µm', 'Broyage '], axis=1, inplace=True)
df.reset_index(drop=True, inplace=True)

In [455]:
df.loc[8, 'ID_ech']='F4/2M'
df.loc[31, 'ID_ech']='P19/1'
df.loc[32, 'ID_ech']='P19/2'

In [456]:
pz=df[['ID', 'X', 'Y', 'Z', 'Long_for','Refus']]
pz['Type'] = 'Piezo'

prv_sol=df[['ID_ech', 'Type_ech', 'Date_prv', 'X', 'Y', 'Z', 'Nature_ech','Organo', 
            'Ech_top', 'Ech_base', 'MS', 'Fract_2','Fract_2+']]
an=df[['ID_ech', 'Type_ech','As', 'Cd', 'Cr', 'Cr_VI', 'Cu', 'Hg', 'Pb', 'Ni', 'Zn',
       'CN_libre', 'CN_tot', 'CN_APE', 'CN_comp', 'thioCN', 'Bnz', 'Toln',
       'EthylBnz', 'O-Xyl', 'P-M-Xyl', 'Xyl', 'Styr', 'Phenol', 'Naphta',
       'Acenaphtyl', 'Acenaphtn', 'Fluorene', 'Phenanthr', 'Anthrc', 'Flranth',
       'Pyr', 'Bnz(a)anthrc', 'Chrys', 'Bnz(b)flranth', 'Bnz(k)flranth',
       'Bnz(a)pyr', 'Dibnz(ah)anthrc', 'Bnz(ghi)peryl', 'Indeno(1,2,3-cd)pyr',
       'HAP_tot_EPA', '1,1-DCE', '1,2-DCE', '1,1-DCEn', 'Cis-1,2-DCEn',
       'Trans 1,2-DCEyl', 'DCM', '(cis,trans) 1,2-DCE_tot', '1,2-DCP',
       'TetraCEyn', 'TCM', '1,1,1-TCE', '1,1,2-TCE', 'TCEyn', 'Chloroforme',
       'CVinyl', 'Arom_C6C7', 'Arom_C7C8', 'Arom_C8C10', 'Aliphat_C5C6',
       'Aliphat_C6C8', 'Aliphat_C8C10', 'Fract_C5C8', 'Fract_C8C10',
       'Fract_C10C12', 'Fract_C12C16', 'Fract_C16C21', 'Fract_C21C35',
       'HC_tot_C10C35']]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pz['Type'] = 'Piezo'


In [457]:
gdf_viewer(df, rows=5)

Rows : 59, columns : 83


interactive(children=(IntSlider(value=5, description='rows', max=59, min=5, readout=False), IntSlider(value=12…

In [458]:
source_an=source_an.append(an, ignore_index=True)
source_pz=pz
source_prv_sol=prv_sol

In [459]:
if not os.path.exists(tmp_dir):
    os.makedirs(tmp_dir)

#bh.to_csv(tmp_dir+'source_Boreholes.csv', index=False)
pz.to_csv(tmp_dir+sheet+'_Piezometers.csv', index=False)
#ouv.to_csv(tmp_dir+sheet+'_Unkown-facility.csv', index=False)
#litho.to_csv(tmp_dir+sheet+'_Lithology.csv', index=False)
#mes_pz.to_csv(tmp_dir+sheet+'_Measures.csv', index=False)
#mes_sol.to_csv(tmp_dir+sheet+'_Measures-soil.csv', index=False)
#prv_eau.to_csv(tmp_dir+sheet+'_Samples-water.csv', index=False)
prv_sol.to_csv(tmp_dir+sheet+'_Samples-soil.csv', index=False)
an.to_csv(tmp_dir+sheet+'_Analysis.csv', index=False)


#source_bh.to_csv(tmp_dir+'source_Boreholes.csv', index=False)
source_pz.to_csv(tmp_dir+'source_Piezometers.csv', index=False)
#source_ouv.to_csv(tmp_dir+'source_Unkown-facility.csv', index=False)
#source_litho.to_csv(tmp_dir+'source_Lithology.csv', index=False)
#source_mes_pz.to_csv(tmp_dir+'source_Measures.csv', index=False)
#source_mes_sol.to_csv(tmp_dir+'source_Measures-soil.csv', index=False)
#source_prv_eau.to_csv(tmp_dir+'source_Samples-water.csv', index=False)
source_prv_sol.to_csv(tmp_dir+'source_Samples-soil.csv', index=False)
source_an.to_csv(tmp_dir+'source_Analysis.csv', index=False)


print(f'source_bh:{len(source_bh)} ; source_pz:{len(source_pz)} ; source_litho:{len(source_litho)} ; '
     f'source_Fac-uknw:{len(source_ouv)} ; source_an:{len(source_an)} ;\nsource_prv_sol:{len(source_prv_sol)} ;'
     f'source_prv_eau:{len(source_prv_eau)} ; source_mes_pz:{len(source_mes_pz)} ; source_mes_sol:{len(source_mes_sol)} ;')

source_bh:0 ; source_pz:59 ; source_litho:0 ; source_Fac-uknw:0 ; source_an:104 ;
source_prv_sol:59 ;source_prv_eau:45 ; source_mes_pz:0 ; source_mes_sol:0 ;


* **Sheet : 'Synthèse'**

In [460]:
tmp_dir='../../CF_data/Result_traitem/vUmons_logsFor/'
sheet='Synthese'

In [461]:
df = pd.read_excel('../../CF_data/Data_UMONS/Logs_forages_vUmons_2018-03-20.xlsx', 
                   sheet_name='Synthèse', skiprows=1)
df=na_line_drop(df,0)
df=na_col_drop(df,1)
df.replace(r'<|>','', inplace=True, regex=True)
df.replace(r'-$',np.nan, inplace=True, regex=True)

gdf_viewer(df, rows=5)

4 NaN lines dropped

Columns dropped :['Unnamed: 0', 'Unnamed: 15', 'Unnamed: 16']

Rows : 33, columns : 14


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


interactive(children=(IntSlider(value=5, description='rows', max=33, min=5, readout=False), IntSlider(value=12…

In [462]:
df=df[:29]
df.replace('\*','', inplace=True, regex=True)
df['Refus']=df['Refus'].apply(lambda x: 'x' if not pd.isnull(x) else '')

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().replace(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Refus']=df['Refus'].apply(lambda x: 'x' if not pd.isnull(x) else '')


In [463]:
name=['ID','X','Y','Z', 'Refus','Long_for', 'RB', 'ALL', 'S_A', 'S_S', 
      'Rb_base','All_top', 'Soc_alt_top','Soc_sn_top']
df=col_ren(df, mode=1, name=name)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(


In [464]:
cols=['ID','X','Y','Z', 'Refus','Long_for']

for i in range(len(df)):
    if not pd.isnull(df.loc[i, 'RB']): 
        df.loc[i, 'Nappe']='Remblais'
        df.loc[i, 'Litho_top']=0
        if not pd.isnull(df.loc[i, 'Rb_base']):
            df.loc[i, 'Litho_base']=df.loc[i, 'Rb_base']
        else:
            df.loc[i, 'Litho_base']=df.loc[i, 'Long_for']
    
    if not pd.isnull(df.loc[i, 'ALL']):
        df.loc[i+.2,cols]=df.loc[i,cols]
        df.loc[i+.2, 'Nappe']='Alluvions'
        df.loc[i+.2, 'Litho_top']=df.loc[i, 'All_top']
        if not pd.isnull(df.loc[i, 'S_A']):
            df.loc[i+.2, 'Litho_base']=df.loc[i, 'Soc_alt_top']
        else:
            df.loc[i+.2, 'Litho_base']=df.loc[i, 'Long_for']
    
    if not pd.isnull(df.loc[i, 'S_A']):
        df.loc[i+.5,cols]=df.loc[i,cols]
        df.loc[i+.5, 'Nappe']='Socle altéré'
        df.loc[i+.5, 'Litho_top']=df.loc[i, 'Soc_alt_top']
        if not pd.isnull(df.loc[i, 'S_S']):
            df.loc[i+.5, 'Litho_base']=df.loc[i, 'Soc_sn_top']
        else:
            df.loc[i+.5, 'Litho_base']=df.loc[i, 'Long_for']
            
    if not pd.isnull(df.loc[i, 'S_S']):
        df.loc[i+.7,cols]=df.loc[i,cols]
        df.loc[i+.7, 'Nappe']='Socle sain'
        df.loc[i+.7, 'Litho_top']=df.loc[i, 'Soc_sn_top']
        df.loc[i+.7, 'Litho_base']=df.loc[i, 'Long_for']

df.drop(columns=['RB', 'ALL', 'S_A', 'S_S', 'Rb_base','All_top', 'Soc_alt_top','Soc_sn_top'], inplace=True)
df.sort_index(inplace=True)
df.reset_index(drop=True, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.loc[i+.2,cols]=df.loc[i,cols]


In [465]:
gdf_viewer(df, rows=5, cols=15)

Rows : 51, columns : 9


interactive(children=(IntSlider(value=5, description='rows', max=51, min=5, readout=False), IntSlider(value=9,…

In [466]:
df.columns

Index(['ID', 'X', 'Y', 'Z', 'Refus', 'Long_for', 'Nappe', 'Litho_top',
       'Litho_base'],
      dtype='object')

In [467]:
bh=df[['ID','X','Y','Z','Long_for','Refus']]
bh['Type']='Forage'

litho=df[['ID','X','Y','Z','Litho_top','Litho_base','Nappe']]
source_litho=litho

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  bh['Type']='Forage'


In [468]:
if not os.path.exists(tmp_dir):
    os.makedirs(tmp_dir)

bh.to_csv(tmp_dir+sheet+'source_Boreholes.csv', index=False)
#pz.to_csv(tmp_dir+sheet+'_Piezometers.csv', index=False)
#ouv.to_csv(tmp_dir+sheet+'_Unkown-facility.csv', index=False)
litho.to_csv(tmp_dir+sheet+'_Lithology.csv', index=False)
#mes_pz.to_csv(tmp_dir+sheet+'_Measures.csv', index=False)
#mes_sol.to_csv(tmp_dir+sheet+'_Measures-soil.csv', index=False)
#prv_eau.to_csv(tmp_dir+sheet+'_Samples-water.csv', index=False)
#prv_sol.to_csv(tmp_dir+sheet+'_Samples-soil.csv', index=False)
#an.to_csv(tmp_dir+sheet+'_Analysis.csv', index=False)


#source_bh.to_csv(tmp_dir+'source_Boreholes.csv', index=False)
#source_pz.to_csv(tmp_dir+'source_Piezometers.csv', index=False)
#source_ouv.to_csv(tmp_dir+'source_Unkown-facility.csv', index=False)
source_litho.to_csv(tmp_dir+'source_Lithology.csv', index=False)
#source_mes_pz.to_csv(tmp_dir+'source_Measures.csv', index=False)
#source_mes_sol.to_csv(tmp_dir+'source_Measures-soil.csv', index=False)
#source_prv_eau.to_csv(tmp_dir+'source_Samples-water.csv', index=False)
#source_prv_sol.to_csv(tmp_dir+'source_Samples-soil.csv', index=False)
#source_an.to_csv(tmp_dir+'source_Analysis.csv', index=False)


print(f'source_bh:{len(source_bh)} ; source_pz:{len(source_pz)} ; source_litho:{len(source_litho)} ; '
     f'source_Fac-uknw:{len(source_ouv)} ; source_an:{len(source_an)} ;\nsource_prv_sol:{len(source_prv_sol)} ;'
     f'source_prv_eau:{len(source_prv_eau)} ; source_mes_pz:{len(source_mes_pz)} ; source_mes_sol:{len(source_mes_sol)} ;')

source_bh:0 ; source_pz:59 ; source_litho:51 ; source_Fac-uknw:0 ; source_an:104 ;
source_prv_sol:59 ;source_prv_eau:45 ; source_mes_pz:0 ; source_mes_sol:0 ;


* **Sheet : 'Sond2017v2'**

In [469]:
tmp_dir='../../CF_data/Result_traitem/vUmons_logsFor/'
sheet='Sond2017v2'

In [470]:
df = pd.read_excel('../../CF_data/Data_UMONS/Logs_forages_vUmons_2018-03-20.xlsx', 
                   sheet_name='Sond2017v2', skiprows=0)
df=na_line_drop(df,0)
df=na_col_drop(df,1)
df.replace(r'<|>','', inplace=True, regex=True)
df.replace(r'-$',np.nan, inplace=True, regex=True)

gdf_viewer(df, rows=5)


Columns dropped :['Unnamed: 18', 'Unnamed: 19', 'Unnamed: 20', 'Unnamed: 21', 'Unnamed: 22', 'Unnamed: 23', 'Unnamed: 24', 'Unnamed: 25', 'Unnamed: 26', 'Unnamed: 27', 'Unnamed: 28', 'Unnamed: 29', 'Unnamed: 30', 'Unnamed: 31']

Rows : 71, columns : 18


interactive(children=(IntSlider(value=5, description='rows', max=71, min=5, readout=False), IntSlider(value=12…

In [471]:
df.replace('\*','', inplace=True, regex=True)
df['Refus']=df['Refus'].apply(lambda x: 'x' if x==1 else '')

In [472]:
name=['R_ID','ID','X','Y','Z','Refus','Date_ouv','Long_for','Z_fond','RB','ALL', 'S_A', 'S_S', 
      'Rb_base','cote_rb','All_top', 'Soc_alt_top','Soc_sn_top']
df=col_ren(df, mode=1, name=name)
df=df[['ID','X','Y','Z','Refus','Date_ouv','Long_for','Z_fond','RB','ALL', 'S_A', 'S_S', 
      'Rb_base','All_top', 'Soc_alt_top','Soc_sn_top']]

In [473]:
cols=['ID','Date_ouv','X','Y','Z','Z_fond','Refus','Long_for']

for i in range(len(df)):    
    if df.loc[i, 'RB']==1: 
        df.loc[i, 'Nappe']='Remblais'
        df.loc[i, 'Litho_top']=0
        
        if not pd.isnull(df.loc[i, 'Rb_base']): df.loc[i, 'Litho_base']=df.loc[i, 'Rb_base']
        else: df.loc[i, 'Litho_base']=df.loc[i, 'Long_for']
    
    val_def=df.loc[i, 'Litho_base'] # temporary value of litho_base if nan
    
    if df.loc[i, 'ALL']==1:
        df.loc[i+.2,cols]=df.loc[i,cols]
        df.loc[i+.2, 'Nappe']='Alluvions'
        
        if not pd.isnull(df.loc[i, 'All_top']): df.loc[i+.2, 'Litho_top']=df.loc[i, 'All_top']
        else: df.loc[i+.2, 'Litho_top']=val_def #df.loc[i, 'litho_base']
            
        if df.loc[i, 'S_A']==1: df.loc[i+.2, 'Litho_base']=df.loc[i, 'Soc_alt_top']
        else: df.loc[i+.2, 'Litho_base']=df.loc[i, 'Long_for']
    
    if df.loc[i, 'S_A']==1:
        df.loc[i+.5,cols]=df.loc[i,cols]
        df.loc[i+.5, 'Nappe']='Socle altéré'
        
        if not pd.isnull(df.loc[i, 'Soc_alt_top']): df.loc[i+.5, 'Litho_top']=df.loc[i, 'Soc_alt_top']
        else: df.loc[i+.5, 'Litho_top']=val_def #df.loc[i+.2, 'litho_base']
        
        if df.loc[i, 'S_S']==1: df.loc[i+.5, 'Litho_base']=df.loc[i, 'Soc_sn_top']
        else: df.loc[i+.5, 'Litho_base']=df.loc[i, 'Long_for']
            
    if df.loc[i, 'S_S']==1:
        df.loc[i+.7,cols]=df.loc[i,cols]
        df.loc[i+.7, 'Nappe']='Socle sain'
        df.loc[i+.7, 'Litho_top']=df.loc[i, 'Soc_sn_top']
        df.loc[i+.7, 'Litho_base']=df.loc[i, 'Long_for']

df.drop(columns=['RB', 'ALL', 'S_A', 'S_S','Rb_base','All_top', 'Soc_alt_top','Soc_sn_top'], inplace=True)
df.sort_index(inplace=True)
df.reset_index(drop=True, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)


In [474]:
df=df[:-1]

In [475]:
gdf_viewer(df, rows=5, cols=15)

Rows : 109, columns : 11


interactive(children=(IntSlider(value=5, description='rows', max=109, min=5, readout=False), IntSlider(value=1…

In [476]:
bh=df[['ID','X','Y','Z','Z_fond','Date_ouv','Long_for','Refus']]
bh['Type']='Forage'

litho=df[['ID','X','Y','Z','Litho_top','Litho_base','Nappe']]
source_litho = source_litho.merge(df, 'outer')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  bh['Type']='Forage'


In [477]:
if not os.path.exists(tmp_dir):
    os.makedirs(tmp_dir)

bh.to_csv(tmp_dir+sheet+'source_Boreholes.csv', index=False)
#pz.to_csv(tmp_dir+sheet+'_Piezometers.csv', index=False)
#ouv.to_csv(tmp_dir+sheet+'_Unkown-facility.csv', index=False)
litho.to_csv(tmp_dir+sheet+'_Lithology.csv', index=False)
#mes_pz.to_csv(tmp_dir+sheet+'_Measures.csv', index=False)
#mes_sol.to_csv(tmp_dir+sheet+'_Measures-soil.csv', index=False)
#prv_eau.to_csv(tmp_dir+sheet+'_Samples-water.csv', index=False)
#prv_sol.to_csv(tmp_dir+sheet+'_Samples-soil.csv', index=False)
#an.to_csv(tmp_dir+sheet+'_Analysis.csv', index=False)


#source_bh.to_csv(tmp_dir+'source_Boreholes.csv', index=False)
#source_pz.to_csv(tmp_dir+'source_Piezometers.csv', index=False)
#source_ouv.to_csv(tmp_dir+'source_Unkown-facility.csv', index=False)
source_litho.to_csv(tmp_dir+'source_Lithology.csv', index=False)
#source_mes_pz.to_csv(tmp_dir+'source_Measures.csv', index=False)
#source_mes_sol.to_csv(tmp_dir+'source_Measures-soil.csv', index=False)
#source_prv_eau.to_csv(tmp_dir+'source_Samples-water.csv', index=False)
#source_prv_sol.to_csv(tmp_dir+'source_Samples-soil.csv', index=False)
#source_an.to_csv(tmp_dir+'source_Analysis.csv', index=False)


print(f'source_bh:{len(source_bh)} ; source_pz:{len(source_pz)} ; source_litho:{len(source_litho)} ; '
     f'source_Fac-uknw:{len(source_ouv)} ; source_an:{len(source_an)} ;\nsource_prv_sol:{len(source_prv_sol)} ;'
     f'source_prv_eau:{len(source_prv_eau)} ; source_mes_pz:{len(source_mes_pz)} ; source_mes_sol:{len(source_mes_sol)} ;')

source_bh:0 ; source_pz:59 ; source_litho:119 ; source_Fac-uknw:0 ; source_an:104 ;
source_prv_sol:59 ;source_prv_eau:45 ; source_mes_pz:0 ; source_mes_sol:0 ;


# Processing for new data added - April 2021

## 15-Profils de sol et données de terrain 2019.xlsx
* **Sheet : 'Log'**

In [575]:
# New file, so the source variables must be overwritten !!
_df=pd.DataFrame()
source_mes_pz, source_mes_sol, source_pz, source_prv_eau, source_prv_sol =  _df, _df, _df, _df, _df
source_ouv, source_an, source_litho, source_bh = _df, _df, _df, _df

In [576]:
tmp_dir='../../CF_data/Result_traitem/donnees_terrain_2019/'
sheet='Log'

In [577]:
df = pd.read_excel('../../CF_data/Data_UMONS/Profils_de_sol_Siterem- extension_pilote_et_pilote/'
                   'Profils de sol et donnees de terrain 2019.xlsx', 
                   sheet_name='Log', skiprows=0)
df=na_line_drop(df,0)
df=na_col_drop(df,2)
df.replace(r'<|>','', inplace=True, regex=True)
df.replace(r'-$',np.nan, inplace=True, regex=True)

gdf_viewer(df, rows=5)

153 NaN lines dropped

Columns dropped :['Unnamed: 5', 'Unnamed: 6']

Rows : 98, columns : 5


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


interactive(children=(IntSlider(value=5, description='rows', max=98, min=5, readout=False), IntSlider(value=5,…

In [578]:
name = ['ID','Litho_top', 'Litho_base', 'Keyword', 'Description']
df = col_ren(df, name=name, mode=1, )
df = df[1:]
df['Date_ouv'] = dtm.datetime(2019,12,18)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Date_ouv'] = dtm.datetime(2019,12,18)


In [579]:
df['ID'] = df['ID'].apply(lambda x: 'F'+str(x))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['ID'] = df['ID'].apply(lambda x: 'F'+str(x))


In [580]:
df.drop(index=df.query('Litho_base.isnull() or Litho_top.isnull()').index, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


In [581]:
compute_BH_length(df)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[top_col] = df[top_col].astype('float64')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[base_col] = df[base_col].astype('float64')
A value is trying to be set on a copy of a slice from a DataFrame.
Try 

In [582]:
df.query('Litho_base.isnull() or Litho_top.isnull()')

Unnamed: 0,ID,Profondeur,Litho_top,Litho_base,Keyword,Description,Date_ouv


In [583]:
gdf_viewer(df, rows=5, cols=15)

Rows : 93, columns : 7


interactive(children=(IntSlider(value=5, description='rows', max=93, min=5, readout=False), IntSlider(value=7,…

In [584]:
bh = df[1:62]
pza = df[65:80] #piezair
pz = df[83:]

In [585]:
bh.reset_index(drop=True, inplace=True)
pza.reset_index(drop=True, inplace=True)
pz.reset_index(drop=True, inplace=True)

In [586]:
bh.insert(1,'Type', 'Forage')
bh.insert(1,'Zone', 'Extension Pilote')
pza.insert(1,'Type', 'Piezair')
pza.insert(1,'Zone', 'Extension Pilote')
pz.insert(1,'Type', 'Piezo')
pz.insert(1,'Zone', 'Mini-Pilote')

In [587]:
litho=bh.append(pza)
litho=litho.append(pz)
litho=litho[['ID','Type','Zone','Litho_top','Litho_base','Description','Keyword']]

In [588]:
gdf_viewer(litho, rows=3)

Rows : 86, columns : 7


interactive(children=(IntSlider(value=3, description='rows', max=86, min=3, readout=False), IntSlider(value=7,…

In [589]:
source_litho=litho

In [590]:
if not os.path.exists(tmp_dir):
    os.makedirs(tmp_dir)

bh.to_csv(tmp_dir+'source_Boreholes.csv', index=False)
pz.to_csv(tmp_dir+sheet+'_Piezometers.csv', index=False)
pza.to_csv(tmp_dir+sheet+'_Piezairs.csv', index=False)
#ouv.to_csv(tmp_dir+sheet+'_Unkown-facility.csv', index=False)
litho.to_csv(tmp_dir+sheet+'_Lithology.csv', index=False)
#mes_pz.to_csv(tmp_dir+sheet+'_Measures.csv', index=False)
#mes_sol.to_csv(tmp_dir+sheet+'_Measures-soil.csv', index=False)
#prv_eau.to_csv(tmp_dir+sheet+'_Samples-water.csv', index=False)
#prv_sol.to_csv(tmp_dir+sheet+'_Samples-soil.csv', index=False)
#an.to_csv(tmp_dir+sheet+'_Analysis.csv', index=False)


source_bh.to_csv(tmp_dir+'source_Boreholes.csv', index=False)
source_pz.to_csv(tmp_dir+'source_Piezometers.csv', index=False)
#source_ouv.to_csv(tmp_dir+'source_Unkown-facility.csv', index=False)
source_litho.to_csv(tmp_dir+'source_Lithology.csv', index=False)
#source_mes_pz.to_csv(tmp_dir+'source_Measures.csv', index=False)
#source_mes_sol.to_csv(tmp_dir+'source_Measures-soil.csv', index=False)
#source_prv_eau.to_csv(tmp_dir+'source_Samples-water.csv', index=False)
#source_prv_sol.to_csv(tmp_dir+'source_Samples-soil.csv', index=False)
#source_an.to_csv(tmp_dir+'source_Analysis.csv', index=False)


print(f'source_bh:{len(source_bh)} ; source_pz:{len(source_pz)} ; source_litho:{len(source_litho)} ; '
     f'source_Fac-uknw:{len(source_ouv)} ; source_an:{len(source_an)} ;\nsource_prv_sol:{len(source_prv_sol)} ;'
     f'source_prv_eau:{len(source_prv_eau)} ; source_mes_pz:{len(source_mes_pz)} ; source_mes_sol:{len(source_mes_sol)} ;')

source_bh:0 ; source_pz:0 ; source_litho:86 ; source_Fac-uknw:0 ; source_an:0 ;
source_prv_sol:0 ;source_prv_eau:0 ; source_mes_pz:0 ; source_mes_sol:0 ;


* **Sheet : 'Echantillon'+'Organoleptique**

In [591]:
tmp_dir='../../CF_data/Result_traitem/donnees_terrain_2019/'
sheet='Echantillon'

In [592]:
df = pd.read_excel('../../CF_data/Data_UMONS/Profils_de_sol_Siterem- extension_pilote_et_pilote/'
                   'Profils de sol et donnees de terrain 2019.xlsx', 
                   sheet_name='Echantillon', skiprows=1)
df=na_line_drop(df,0)
df=na_col_drop(df,2)
df.replace(r'<|>','', inplace=True, regex=True)
df.replace(r'-$',np.nan, inplace=True, regex=True)

gdf_viewer(df, rows=5)

16 NaN lines dropped

Columns dropped :['Unnamed: 4', 'Unnamed: 5']

Rows : 67, columns : 4


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


interactive(children=(IntSlider(value=5, description='rows', max=67, min=5, readout=False), IntSlider(value=4,…

In [593]:
name=['ID','Ech_top', 'Ech_base', 'ID_ech']
df=col_ren(df, name=name, mode=1)
df.insert(1,'Type_ech','Sol')

In [594]:
df.drop(index=[43,44,55,56,66], inplace=True)
df.reset_index(drop=True, inplace=True)

In [595]:
ech=df.copy()

In [596]:
df = pd.read_excel('../../CF_data/Data_UMONS/Profils_de_sol_Siterem- extension_pilote_et_pilote/'
                   'Profils de sol et donnees de terrain 2019.xlsx', 
                   sheet_name='Organoleptique', skiprows=1)
df=na_line_drop(df,0)
df=na_col_drop(df,4)
df.replace(r'<|>','', inplace=True, regex=True)
df.replace(r'-$',np.nan, inplace=True, regex=True)

gdf_viewer(df, rows=5)

20 NaN lines dropped

Columns dropped :['Unnamed: 5', 'Unnamed: 6', 'Unnamed: 7', 'Unnamed: 8', 'Unnamed: 9', 'Unnamed: 10', 'Unnamed: 11', 'Unnamed: 12', 'Unnamed: 13', 'Unnamed: 14', 'Unnamed: 15']

Rows : 20, columns : 5


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


interactive(children=(IntSlider(value=5, description='rows', max=20, min=5, readout=False), IntSlider(value=5,…

In [597]:
name=['ID','Pol_top', 'Pol_base','Polluant','Intensite']
df=col_ren(df, name=name, mode=1)

In [598]:
df.drop(index=[10,11,14,15], inplace=True)
df.reset_index(drop=True, inplace=True)

In [599]:
mdf=gdf_merger(ech, df, col='ID', how='outer')[0]

In [600]:
gdf_viewer(mdf)

Rows : 70, columns : 9


interactive(children=(IntSlider(value=10, description='rows', max=70, min=10, readout=False), IntSlider(value=…

In [601]:
prv_sol=mdf
source_prv_sol=prv_sol

In [602]:
if not os.path.exists(tmp_dir):
    os.makedirs(tmp_dir)

#bh.to_csv(tmp_dir+'source_Boreholes.csv', index=False)
#pz.to_csv(tmp_dir+sheet+'_Piezometers.csv', index=False)
#ouv.to_csv(tmp_dir+sheet+'_Unkown-facility.csv', index=False)
#litho.to_csv(tmp_dir+sheet+'_Lithology.csv', index=False)
#mes_pz.to_csv(tmp_dir+sheet+'_Measures.csv', index=False)
#mes_sol.to_csv(tmp_dir+sheet+'_Measures-soil.csv', index=False)
#prv_eau.to_csv(tmp_dir+sheet+'_Samples-water.csv', index=False)
prv_sol.to_csv(tmp_dir+sheet+'_Samples-soil.csv', index=False)
#an.to_csv(tmp_dir+sheet+'_Analysis.csv', index=False)


#source_bh.to_csv(tmp_dir+'source_Boreholes.csv', index=False)
#source_pz.to_csv(tmp_dir+'source_Piezometers.csv', index=False)
#source_ouv.to_csv(tmp_dir+'source_Unkown-facility.csv', index=False)
#source_litho.to_csv(tmp_dir+'source_Lithology.csv', index=False)
#source_mes_pz.to_csv(tmp_dir+'source_Measures.csv', index=False)
#source_mes_sol.to_csv(tmp_dir+'source_Measures-soil.csv', index=False)
#source_prv_eau.to_csv(tmp_dir+'source_Samples-water.csv', index=False)
source_prv_sol.to_csv(tmp_dir+'source_Samples-soil.csv', index=False)
#source_an.to_csv(tmp_dir+'source_Analysis.csv', index=False)


print(f'source_bh:{len(source_bh)} ; source_pz:{len(source_pz)} ; source_litho:{len(source_litho)} ; '
     f'source_Fac-uknw:{len(source_ouv)} ; source_an:{len(source_an)} ;\nsource_prv_sol:{len(source_prv_sol)} ;'
     f'source_prv_eau:{len(source_prv_eau)} ; source_mes_pz:{len(source_mes_pz)} ; source_mes_sol:{len(source_mes_sol)} ;')

source_bh:0 ; source_pz:0 ; source_litho:86 ; source_Fac-uknw:0 ; source_an:0 ;
source_prv_sol:70 ;source_prv_eau:0 ; source_mes_pz:0 ; source_mes_sol:0 ;


* **Sheet : 'Données de forage'**

In [603]:
tmp_dir='../../CF_data/Result_traitem/donnees_terrain_2019/'
sheet='Donnees_forage'

In [604]:
df = pd.read_excel('../../CF_data/Data_UMONS/Profils_de_sol_Siterem- extension_pilote_et_pilote/'
                   'Profils de sol et donnees de terrain 2019.xlsx', 
                   sheet_name='Données de forage', skiprows=1)
df=na_line_drop(df,0)
df=na_col_drop(df,2)
df.replace(r'<|>','', inplace=True, regex=True)
df.replace(r'-$',np.nan, inplace=True, regex=True)

gdf_viewer(df, rows=5)

25 NaN lines dropped

Columns dropped :['RAS', 'RAS.1', 'Niv. Eau p/r sol', 'RAS.2', 'Unnamed: 18', 'PZ Prof.\nmesurée']

Rows : 27, columns : 14


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


interactive(children=(IntSlider(value=5, description='rows', max=27, min=5, readout=False), IntSlider(value=12…

In [605]:
name=['ID', 'X', 'Y', 'Z', 'Date_ouv', 'Long_for', 'Methode', 'Diam_for','Rmq', 'Long_pz', 'Diam_pz', 
      'Crep_long','Societe', 'Resp_chantier']
df=col_ren(df, name=name, mode=1)
df.drop(index=[16,23], inplace=True)
df.reset_index(drop=True, inplace=True)

In [606]:
df.insert(5, 'Type', '')
df.loc[:15,'Type']='Forage'
df.loc[16:21,'Type']='Piezair'
df.loc[22:,'Type']='Piezo'

In [607]:
df.loc[9,'ID']='224 bis'

In [608]:
df['Refus'] = ''
df['Type_refus']=''

for i in range(len(df['Rmq'])):
    val = str(df.loc[i,'Rmq'])
    if re.search('[Bb]loqué', val) :
        df.loc[i,'Refus'] = 'x'
        
        if re.search('[lL]aitier', val):
            df.loc[i,'Type_refus'] = 'Laitier'
        elif re.search('[Bb]éton', val):
            df.loc[i,'Type_refus'] = 'Béton'
        elif re.search('[Mm]atériaux', val):
            df.loc[i,'Type_refus'] = 'Matériaux indurés' 
    else: 
        df.loc[i,'Refus'] = '' 

df['Diam_int_pz'] = df['Diam_pz'].apply(lambda x: pd.to_numeric(x.replace('mm','').split('x')[1]) if not pd.isnull(x) else x)
df['Diam_ext_pz'] = df['Diam_pz'].apply(lambda x: pd.to_numeric(x.replace('mm','').split('x')[0]) if not pd.isnull(x) else x)
df['Diam_for'] = df['Diam_for'].apply(lambda x: pd.to_numeric(x) if not pd.isnull(x) else x)

df.insert(10, 'Diam_ext_pz', df.pop('Diam_ext_pz')) # move to a specified position
df.insert(11, 'Diam_int_pz', df.pop('Diam_int_pz'))
df.drop(columns=['Rmq', 'Diam_pz'], axis=1, inplace=True)
df.drop(df.query("ID!=ID").index, inplace=True) # delete all ID='NaN' lines
df.reset_index(drop=True, inplace=True)

gen_id_dated(df,'ID','Date_ouv')  

Generation of ID-dated...
Using column ' Date_ouv ' in the (geo)dataframe !
Process ended, check the (geo)dataframe


In [609]:
pz = df.query("Type=='Piezo'")
pza=df.query("Type=='Piezair'")
bh = df.query("Type=='Forage'")

pz.reset_index(inplace=True, drop=True)
pza.reset_index(inplace=True, drop=True)
bh.reset_index(inplace=True, drop=True)

In [610]:
gdf_viewer(df, rows=3)

Rows : 25, columns : 18


interactive(children=(IntSlider(value=3, description='rows', max=25, min=3, readout=False), IntSlider(value=12…

In [611]:
source_pz = pz
source_pza = pza
source_bh = bh

In [612]:
if not os.path.exists(tmp_dir):
    os.makedirs(tmp_dir)

bh.to_csv(tmp_dir+sheet+'_Boreholes.csv', index=False)
pz.to_csv(tmp_dir+sheet+'_Piezometers.csv', index=False)
pza.to_csv(tmp_dir+sheet+'_Piezairs.csv', index=False)
#ouv.to_csv(tmp_dir+sheet+'_Unkown-facility.csv', index=False)
#litho.to_csv(tmp_dir+sheet+'_Lithology.csv', index=False)
#mes_pz.to_csv(tmp_dir+sheet+'_Measures.csv', index=False)
#mes_sol.to_csv(tmp_dir+sheet+'_Measures-soil.csv', index=False)
#prv_eau.to_csv(tmp_dir+sheet+'_Samples-water.csv', index=False)
#prv_sol.to_csv(tmp_dir+sheet+'_Samples-soil.csv', index=False)
#an.to_csv(tmp_dir+sheet+'_Analysis.csv', index=False)


source_bh.to_csv(tmp_dir+'source_Boreholes.csv', index=False)
source_pz.to_csv(tmp_dir+'source_Piezometers.csv', index=False)
source_pza.to_csv(tmp_dir+'source_Piezairs.csv', index=False)
#source_ouv.to_csv(tmp_dir+'source_Unkown-facility.csv', index=False)
#source_litho.to_csv(tmp_dir+'source_Lithology.csv', index=False)
#source_mes_pz.to_csv(tmp_dir+'source_Measures.csv', index=False)
#source_mes_sol.to_csv(tmp_dir+'source_Measures-soil.csv', index=False)
#source_prv_eau.to_csv(tmp_dir+'source_Samples-water.csv', index=False)
#source_prv_sol.to_csv(tmp_dir+'source_Samples-soil.csv', index=False)
#source_an.to_csv(tmp_dir+'source_Analysis.csv', index=False)


print(f'source_bh:{len(source_bh)} ; source_pz:{len(source_pz)}; source_pza:{len(source_pza)} ;'
      f'source_litho:{len(source_litho)} ; '
     f'source_Fac-uknw:{len(source_ouv)} ; source_an:{len(source_an)} ;\nsource_prv_sol:{len(source_prv_sol)} ;'
     f'source_prv_eau:{len(source_prv_eau)} ; source_mes_pz:{len(source_mes_pz)} ; source_mes_sol:{len(source_mes_sol)} ;')

source_bh:16 ; source_pz:3; source_pza:6 ;source_litho:86 ; source_Fac-uknw:0 ; source_an:0 ;
source_prv_sol:70 ;source_prv_eau:0 ; source_mes_pz:0 ; source_mes_sol:0 ;


* **Sheet : 'Equipement'**

In [613]:
tmp_dir='../../CF_data/Result_traitem/donnees_terrain_2019/'
sheet='Equipement'

In [614]:
df = pd.read_excel('../../CF_data/Data_UMONS/Profils_de_sol_Siterem- extension_pilote_et_pilote/'
                   'Profils de sol et donnees de terrain 2019.xlsx', 
                   sheet_name='Equipement', skiprows=1)
df=na_line_drop(df,0)
df=na_col_drop(df,2)
df.replace(r'<|>','', inplace=True, regex=True)
df.replace(r'-$',np.nan, inplace=True, regex=True)

gdf_viewer(df, rows=5)

37 NaN lines dropped
Rows : 35, columns : 7


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


interactive(children=(IntSlider(value=5, description='rows', max=35, min=5, readout=False), IntSlider(value=7,…

In [615]:
df.drop(columns=['Déplacement'], inplace=True)
name=['ID','Equip_top', 'Equip_base', 'Diam_for', 'Diam_ext_pz', 'Legende']
df=col_ren(df, mode=1, name=name)

In [616]:
df.drop(index=[24,25], inplace=True)
df.reset_index(drop=True, inplace=True)

In [638]:
compute_BH_length(df, top_col='Equip_top', base_col='Equip_base')

In [641]:
coi = ['ID', 'Profondeur', 'Diam_for', 'Diam_ext_pz']
pz=df[coi].drop_duplicates(['ID'])
pz['Type'] = 'Piezo'

In [643]:
gdf_viewer(df)

Rows : 33, columns : 7


interactive(children=(IntSlider(value=10, description='rows', max=33, min=10, readout=False), IntSlider(value=…

In [644]:
equip=df
source_equip=equip

In [645]:
if not os.path.exists(tmp_dir):
    os.makedirs(tmp_dir)

#bh.to_csv(tmp_dir+'source_Boreholes.csv', index=False)
pz.to_csv(tmp_dir+sheet+'_Piezometers.csv', index=False)
#ouv.to_csv(tmp_dir+sheet+'_Unkown-facility.csv', index=False)
#litho.to_csv(tmp_dir+sheet+'_Lithology.csv', index=False)
#mes_pz.to_csv(tmp_dir+sheet+'_Measures.csv', index=False)
#mes_sol.to_csv(tmp_dir+sheet+'_Measures-soil.csv', index=False)
#prv_eau.to_csv(tmp_dir+sheet+'_Samples-water.csv', index=False)
#prv_sol.to_csv(tmp_dir+sheet+'_Samples-soil.csv', index=False)
#an.to_csv(tmp_dir+sheet+'_Analysis.csv', index=False)
equip.to_csv(tmp_dir+sheet+'_Equipment.csv', index=False)

#source_bh.to_csv(tmp_dir+'source_Boreholes.csv', index=False)
source_pz.to_csv(tmp_dir+'source_Piezometers.csv', index=False)
#source_ouv.to_csv(tmp_dir+'source_Unkown-facility.csv', index=False)
#source_litho.to_csv(tmp_dir+'source_Lithology.csv', index=False)
#source_mes_pz.to_csv(tmp_dir+'source_Measures.csv', index=False)
#source_mes_sol.to_csv(tmp_dir+'source_Measures-soil.csv', index=False)
#source_prv_eau.to_csv(tmp_dir+'source_Samples-water.csv', index=False)
#source_prv_sol.to_csv(tmp_dir+'source_Samples-soil.csv', index=False)
#source_an.to_csv(tmp_dir+'source_Analysis.csv', index=False)
source_equip.to_csv(tmp_dir+'source_Equipment.csv', index=False)

print(f'source_bh:{len(source_bh)} ; source_pz:{len(source_pz)} ; source_litho:{len(source_litho)} ; '
     f'source_Fac-uknw:{len(source_ouv)} ; source_an:{len(source_an)} ;\nsource_prv_sol:{len(source_prv_sol)} ;'
     f'source_prv_eau:{len(source_prv_eau)} ; source_mes_pz:{len(source_mes_pz)} ; source_mes_sol:{len(source_mes_sol)} ;')

source_bh:16 ; source_pz:3 ; source_litho:86 ; source_Fac-uknw:0 ; source_an:0 ;
source_prv_sol:70 ;source_prv_eau:0 ; source_mes_pz:0 ; source_mes_sol:0 ;


* **Sheet : 'Piézométrie'**

In [646]:
tmp_dir='../../CF_data/Result_traitem/donnees_terrain_2019/'
sheet='piezometrie'

In [647]:
df = pd.read_excel('../../CF_data/Data_UMONS/Profils_de_sol_Siterem- extension_pilote_et_pilote/'
                   'Profils de sol et donnees de terrain 2019.xlsx', 
                   sheet_name='Piézométrie', skiprows=1)
df=na_line_drop(df,0)
df=na_col_drop(df,2)
df.replace(r'<|>','', inplace=True, regex=True)
df.replace(r'-$',np.nan, inplace=True, regex=True)

gdf_viewer(df, rows=5)

4 NaN lines dropped

Columns dropped :['Unnamed: 4', 'Unnamed: 5', 'Unnamed: 6', 'Unnamed: 7', 'Unnamed: 8']

Rows : 3, columns : 4


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


interactive(children=(IntSlider(value=3, description='rows', max=3, min=3, readout=False), IntSlider(value=4, …

In [648]:
name=['ID','Niv_pz_sol', 'Type_ech', 'Date_mes']
df=col_ren(df, name=name, mode=1)

In [649]:
mes_pz=df
source_mes_pz=mes_pz

In [650]:
if not os.path.exists(tmp_dir):
    os.makedirs(tmp_dir)

#bh.to_csv(tmp_dir+'source_Boreholes.csv', index=False)
#pz.to_csv(tmp_dir+sheet+'_Piezometers.csv', index=False)
#ouv.to_csv(tmp_dir+sheet+'_Unkown-facility.csv', index=False)
#litho.to_csv(tmp_dir+sheet+'_Lithology.csv', index=False)
mes_pz.to_csv(tmp_dir+sheet+'_Measures.csv', index=False)
#mes_sol.to_csv(tmp_dir+sheet+'_Measures-soil.csv', index=False)
#prv_eau.to_csv(tmp_dir+sheet+'_Samples-water.csv', index=False)
#prv_sol.to_csv(tmp_dir+sheet+'_Samples-soil.csv', index=False)
#an.to_csv(tmp_dir+sheet+'_Analysis.csv', index=False)


#source_bh.to_csv(tmp_dir+'source_Boreholes.csv', index=False)
#source_pz.to_csv(tmp_dir+'source_Piezometers.csv', index=False)
#source_ouv.to_csv(tmp_dir+'source_Unkown-facility.csv', index=False)
#source_litho.to_csv(tmp_dir+'source_Lithology.csv', index=False)
source_mes_pz.to_csv(tmp_dir+'source_Measures.csv', index=False)
#source_mes_sol.to_csv(tmp_dir+'source_Measures-soil.csv', index=False)
#source_prv_eau.to_csv(tmp_dir+'source_Samples-water.csv', index=False)
#source_prv_sol.to_csv(tmp_dir+'source_Samples-soil.csv', index=False)
#source_an.to_csv(tmp_dir+'source_Analysis.csv', index=False)


print(f'source_bh:{len(source_bh)} ; source_pz:{len(source_pz)} ; source_litho:{len(source_litho)} ; '
     f'source_Fac-uknw:{len(source_ouv)} ; source_an:{len(source_an)} ;\nsource_prv_sol:{len(source_prv_sol)} ;'
     f'source_prv_eau:{len(source_prv_eau)} ; source_mes_pz:{len(source_mes_pz)} ; source_mes_sol:{len(source_mes_sol)} ;')

source_bh:16 ; source_pz:3 ; source_litho:86 ; source_Fac-uknw:0 ; source_an:0 ;
source_prv_sol:70 ;source_prv_eau:0 ; source_mes_pz:3 ; source_mes_sol:0 ;


## 16-Résultats SOL extension pilote et piézairs.xlsx
* **Sheet : 'Résult SOL'**

In [651]:
# New file, so the source variables must be overwritten !!
_df=pd.DataFrame()
source_mes_pz, source_mes_sol, source_pz, source_prv_eau, source_prv_sol =  _df, _df, _df, _df, _df
source_ouv, source_an, source_litho, source_bh = _df, _df, _df, _df

In [652]:
tmp_dir='../../CF_data/Result_traitem/donnees_terrain_2019/'
sheet='Result_Sol'

In [653]:
df = pd.read_excel('../../CF_data/Data_UMONS/Profils_de_sol_Siterem- extension_pilote_et_pilote/'
                   'Resultats SOL extension pilote et piezairs.xlsx', 
                   sheet_name='Résult SOL', skiprows=5)
df=na_line_drop(df,0)
df=na_col_drop(df,1)
df.replace(r'<|>','', inplace=True, regex=True)
df.replace(r'-$',np.nan, inplace=True, regex=True)

gdf_viewer(df, rows=5)

  warn(msg)


4 NaN lines dropped

Columns dropped :['Unnamed: 0', 'Unnamed: 2', 'Unnamed: 61', 'Unnamed: 62', 'Unnamed: 63', 'Unnamed: 64', 'Unnamed: 65', 'Unnamed: 66', 'Unnamed: 67', 'Unnamed: 69', 'Unnamed: 70', 'Unnamed: 71', 'Unnamed: 72', 'Unnamed: 73', 'Unnamed: 74', 'Unnamed: 75', 'Unnamed: 76', 'Unnamed: 77', 'Unnamed: 78', 'Unnamed: 79', 'Unnamed: 80', 'Unnamed: 81', 'Unnamed: 82', 'Unnamed: 83', 'Unnamed: 84', 'Unnamed: 85', 'Unnamed: 86', 'Unnamed: 87', 'Unnamed: 88', 'Unnamed: 89', 'Unnamed: 90', 'Unnamed: 91', 'Unnamed: 92', 'Unnamed: 93', 'Unnamed: 94', 'Unnamed: 95', 'Unnamed: 96', 'Unnamed: 97', 'Unnamed: 98', 'Unnamed: 99', 'Unnamed: 100', 'Unnamed: 101', 'Unnamed: 102', 'Unnamed: 103', 'Unnamed: 104', 'Unnamed: 105', 'Unnamed: 106', 'Unnamed: 107', 'Unnamed: 108', 'Unnamed: 109', 'Unnamed: 110', 'Unnamed: 111', 'Unnamed: 112', 'Unnamed: 113', 'Unnamed: 114', 'Unnamed: 115', 'Unnamed: 116', 'Unnamed: 117', 'Unnamed: 118', 'Unnamed: 119', 'Unnamed: 120', 'Unnamed: 121', 'Unnamed: 1

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


interactive(children=(IntSlider(value=5, description='rows', max=135, min=5, readout=False), IntSlider(value=1…

In [654]:
prv_sol=df.loc[:35]
an=df.loc[36:]

In [655]:
an.loc[0.5] = df.loc[0] # put data on first line
an = an.sort_index().reset_index(drop=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value, self.name)


In [656]:
prv_sol=prv_sol.transpose()
prv_sol.reset_index(drop=True, inplace=True)
prv_sol=col_ren(prv_sol, 1)

In [657]:
prv_sol=dble_col_drop(prv_sol)

column(s) dropped: ['9:Autre zone suspecte investiguée', '27:température pour mes. pH', '30:pH (H20)']


In [658]:
prv_sol.drop(list(range(3)), axis=0, inplace=True)
prv_sol.reset_index(drop=True, inplace=True)
prv_sol=na_col_drop(prv_sol,3)
prv_sol=na_line_drop(prv_sol,3)
prv_sol.reset_index(drop=True, inplace=True)


Columns dropped :['Matières organiques', 'SPP/zone suspecte investiguée', 'Autre zone suspecte investiguée', 'Parcelle', 'X Lambert', 'Y Lambert', "Type d'étude (*)", 'Organoleptique couleur suspecte', 'Organoleptique odeur intensité (***)', 'Organoleptique odeur type', 'MO et COT', 'pH', 'GRANULOMETRIE', 'Fraction argileuse']



In [659]:
prv_sol=prv_sol[:-1]
prv_sol.drop(columns=['broyage'], inplace=True)

In [660]:
name=['ID_ech','Ech_top', 'Ech_base','MS','Date_prv','Long_for','Refus','Description','MO','COT','pH_KCl', 
      'Temp_pH_mes','pH_H20','Fract_2','Fract_2+', 'Fract_min_2µ','Fract_min_50µ','Fract_min_2']
prv_sol=col_ren(prv_sol, name=name, mode=1)

In [661]:
set(prv_sol.Description)

{'R', 'R ', 'TN', 'TN ', nan}

In [662]:
for i in range(len(prv_sol['Description'])):
    x = prv_sol.loc[i,'Description']
    if x in ['R','R ']: prv_sol.loc[i,'Description']='Remblais'
    elif x in ['TN','TN ']: prv_sol.loc[i,'Description']='Terrain naturel'

prv_sol['Refus']=prv_sol['Refus'].apply(lambda x: 'x' if not pd.isnull(x) else '')
prv_sol.insert(1,'Type_ech','Sol')#

In [663]:
for i in range(len(prv_sol)):
    x=prv_sol.loc[i,'ID_ech']
    r=re.search('([\w|\d]+)\n.+$', x)
    if r: 
        prv_sol.loc[i,'ID_ech']='226/'+r.group(1) # Rename borehole 304 to 226 because of conflict with piezair

In [664]:
prv_sol.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 55 entries, 0 to 54
Data columns (total 19 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   ID_ech         55 non-null     object
 1   Type_ech       55 non-null     object
 2   Ech_top        53 non-null     object
 3   Ech_base       53 non-null     object
 4   MS             55 non-null     object
 5   Date_prv       55 non-null     object
 6   Long_for       53 non-null     object
 7   Refus          55 non-null     object
 8   Description    53 non-null     object
 9   MO             5 non-null      object
 10  COT            4 non-null      object
 11  pH_KCl         4 non-null      object
 12  Temp_pH_mes    4 non-null      object
 13  pH_H20         4 non-null      object
 14  Fract_2        55 non-null     object
 15  Fract_2+       55 non-null     object
 16  Fract_min_2µ   5 non-null      object
 17  Fract_min_50µ  5 non-null      object
 18  Fract_min_2    5 non-null      o

In [665]:
gdf_viewer(prv_sol, rows=3)

Rows : 55, columns : 19


interactive(children=(IntSlider(value=3, description='rows', max=55, min=3, readout=False), IntSlider(value=12…

In [666]:
an=an.transpose()
an.reset_index(drop=True, inplace=True)

In [667]:
an=col_ren(an, 1)

In [668]:
an.rename(columns={an.columns[0]:'ID_ech'}, inplace=True)

In [669]:
an.columns

Index(['ID_ech', 'METAUX LOURDS', 'Arsenic', 'Cadmium', 'Chrome', 'Chrome VI',
       'Cobalt', 'Cuivre', 'Mercure', 'Plomb', 'Nickel', 'Zinc', 'CYANURES',
       'cyanure (libre)', 'cyanure (totaux)', 'cyanure (APE)',
       'cyanure complex', 'thiocyanate', 'COMPOSES AROMATIQUES VOLATILS',
       'Benzène', 'Toluène', 'Éthylbenzène', 'Orthoxylène',
       'Para- et métaxylène', 'Xylènes', 'Styrène', 'BTEX totaux', 'PHENOLS',
       'Phénol', 'Indice phénol', 'HYDROCARBURES AROMATIQUES POLYCYCLIQUES',
       'Naphtalène', 'Acénaphtylène', 'Acénaphtène', 'Fluorène', 'col_35',
       'Anthracène', 'Fluoranthène', 'Pyrène', 'Benzo(a)anthracène',
       'Chrysène', 'Benzo(b)fluoranthène', 'Benzo(k)fluoranthène',
       'Benzo(a)pyrène', 'Dibenzo(ah)anthracène', 'Benzo(ghi)pérylène',
       'Indéno(1,2,3-cd)pyrène', 'HAP Totaux (16) - EPA',
       'COMPOSES ORGANOHALOGENES VOLATILS', 'Tétrachloroéthylène',
       'Trichloroéthylène', '1,1-dichloroéthène', 'Cis-1,2-dichloroéthène',
       '

In [670]:
an=an[an.columns[:-17]]
an.rename(columns={'col_35':'Phénanthrène'}, inplace=True)

In [671]:
an=col_ren(an, name=pol_field_model, mode=1)

In [672]:
an=dble_col_drop(an)

column(s) dropped: []


In [673]:
an.drop(list(range(3)), axis=0, inplace=True)
an.reset_index(drop=True, inplace=True)
an=na_col_drop(an,1)
an.insert(1,'Type_ech','Sol')


Columns dropped :['METAUX LOURDS', 'CYANURES', 'COMPOSES AROMATIQUES VOLATILS', 'BTEX_tot', 'PHENOLS', 'HYDROCARBURES AROMATIQUES POLYCYCLIQUES', 'COMPOSES ORGANOHALOGENES VOLATILS', 'HYDROCARBURES TOTAUX', 'METHYL-TERT-BUTYL-ETHER']



In [674]:
gdf_viewer(an, rows=5) 

Rows : 56, columns : 75


interactive(children=(IntSlider(value=5, description='rows', max=56, min=5, readout=False), IntSlider(value=12…

In [675]:
source_prv_sol=prv_sol
source_an=an

In [676]:
if not os.path.exists(tmp_dir):
    os.makedirs(tmp_dir)

#bh.to_csv(tmp_dir+'source_Boreholes.csv', index=False)
#pz.to_csv(tmp_dir+sheet+'_Piezometers.csv', index=False)
#ouv.to_csv(tmp_dir+sheet+'_Unkown-facility.csv', index=False)
#litho.to_csv(tmp_dir+sheet+'_Lithology.csv', index=False)
#mes_pz.to_csv(tmp_dir+sheet+'_Measures.csv', index=False)
#mes_sol.to_csv(tmp_dir+sheet+'_Measures-soil.csv', index=False)
#prv_eau.to_csv(tmp_dir+sheet+'_Samples-water.csv', index=False)
prv_sol.to_csv(tmp_dir+sheet+'_Samples-soil.csv', index=False)
an.to_csv(tmp_dir+sheet+'_Analysis.csv', index=False)


#source_bh.to_csv(tmp_dir+'source_Boreholes.csv', index=False)
#source_pz.to_csv(tmp_dir+'source_Piezometers.csv', index=False)
#source_ouv.to_csv(tmp_dir+'source_Unkown-facility.csv', index=False)
#source_litho.to_csv(tmp_dir+'source_Lithology.csv', index=False)
#source_mes_pz.to_csv(tmp_dir+'source_Measures.csv', index=False)
#source_mes_sol.to_csv(tmp_dir+'source_Measures-soil.csv', index=False)
#source_prv_eau.to_csv(tmp_dir+'source_Samples-water.csv', index=False)
source_prv_sol.to_csv(tmp_dir+'source_Samples-soil.csv', index=False)
source_an.to_csv(tmp_dir+'source_Analysis.csv', index=False)


print(f'source_bh:{len(source_bh)} ; source_pz:{len(source_pz)} ; source_litho:{len(source_litho)} ; '
     f'source_Fac-uknw:{len(source_ouv)} ; source_an:{len(source_an)} ;\nsource_prv_sol:{len(source_prv_sol)} ;'
     f'source_prv_eau:{len(source_prv_eau)} ; source_mes_pz:{len(source_mes_pz)} ; source_mes_sol:{len(source_mes_sol)} ;')

source_bh:0 ; source_pz:0 ; source_litho:0 ; source_Fac-uknw:0 ; source_an:56 ;
source_prv_sol:55 ;source_prv_eau:0 ; source_mes_pz:0 ; source_mes_sol:0 ;


* **Sheet : 'inorganiques et composés majeur'**

In [699]:
tmp_dir='../../CF_data/Result_traitem/donnees_terrain_2019/'
sheet='Inorg_comp_majeur'

In [700]:
df = pd.read_excel('../../CF_data/Data_UMONS/Profils_de_sol_Siterem- extension_pilote_et_pilote/'
                   'Resultats SOL extension pilote et piezairs.xlsx', 
                   sheet_name='inorganiques et composés majeur', skiprows=1)
df=na_line_drop(df,0)
df=na_col_drop(df,1)
df.replace(r'<|>','', inplace=True, regex=True)
df.replace(r'-$',np.nan, inplace=True, regex=True)

gdf_viewer(df, rows=5)

11 NaN lines dropped

Columns dropped :['Unnamed: 0', 'Unnamed: 2', 'Unnamed: 48', 'Unnamed: 49', 'Unnamed: 50', 'Unnamed: 51', 'Unnamed: 52', 'Unnamed: 53', 'Unnamed: 54', 'Unnamed: 55', 'Unnamed: 56', 'Unnamed: 57', 'Unnamed: 58', 'Unnamed: 59', 'Unnamed: 60', 'Unnamed: 61', 'Unnamed: 62', 'Unnamed: 63', 'Unnamed: 64', 'Unnamed: 65', 'Unnamed: 66', 'Unnamed: 67', 'Unnamed: 68', 'Unnamed: 69', 'Unnamed: 70', 'Unnamed: 71', 'Unnamed: 72', 'Unnamed: 73', 'Unnamed: 74', 'Unnamed: 75', 'Unnamed: 76', 'Unnamed: 77', 'Unnamed: 78', 'Unnamed: 79', 'Unnamed: 80', 'Unnamed: 81', 'Unnamed: 82', 'Unnamed: 83', 'Unnamed: 84', 'Unnamed: 85', 'Unnamed: 86', 'Unnamed: 87', 'Unnamed: 88', 'Unnamed: 89', 'Unnamed: 90', 'Unnamed: 91', 'Unnamed: 92', 'Unnamed: 93', 'Unnamed: 94']

Rows : 64, columns : 46


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


interactive(children=(IntSlider(value=5, description='rows', max=64, min=5, readout=False), IntSlider(value=12…

In [701]:
prv_sol=df.loc[:20]
an=df.loc[21:]

In [702]:
an.loc[0.5] = df.loc[0] # put data on first line
an = an.sort_index().reset_index(drop=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value, self.name)


In [703]:
prv_sol=prv_sol.transpose()
prv_sol.reset_index(drop=True, inplace=True)
prv_sol=col_ren(prv_sol, 1)

In [704]:
prv_sol=dble_col_drop(prv_sol)

column(s) dropped: ['5:Autre zone suspecte investiguée']


this part of the file is unusefull (look above)

In [705]:
an=an.transpose()
an.reset_index(drop=True, inplace=True)

In [706]:
an=col_ren(an, 1)

In [707]:
an.rename(columns={an.columns[0]:'ID_ech'}, inplace=True)

In [709]:
an=an[an.columns[:-7]]

In [710]:
an=col_ren(an, name=pol_field_model, mode=1)

In [711]:
an=dble_col_drop(an)

column(s) dropped: ['3:NH4', '4:NH4', '5:NH4', '9:nitrite', '10:nitrite', '11:nitrite', '13:nitrate', '14:nitrate', '15:nitrate']


In [712]:
an.drop(list(range(3)), axis=0, inplace=True)
an.reset_index(drop=True, inplace=True)
an=na_col_drop(an,2)
an = na_line_drop(an,1)
an.insert(1,'Type_ech','Sol')


Columns dropped :['COMPOSES AZOTES', 'NH3_libre', 'nitrite', 'nitrate', 'COMPOSES SOUFRES ', 'Sulfure_tot', 'Sulfure_libre', 'S_tot', 'sulfite', 'ELEMENTS MAJEURS', 'Fe_tot', 'Fe2', 'AUTRES ANALYSES', 'Fluorure', 'CN_libre', 'B_libre', 'CaCO3', 'Bicarb', 'METHYL-TERT-BUTYL-ETHER', 'MTBE']

35 NaN lines dropped


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


In [714]:
gdf_viewer(an, rows=5) 

Rows : 7, columns : 15


interactive(children=(IntSlider(value=5, description='rows', max=7, min=5, readout=False), IntSlider(value=12,…

In [694]:
#source_prv_sol=prv_sol
source_an=gdf_merger(source_an, an, how='outer', col='ID_ech')[0]

In [695]:
if not os.path.exists(tmp_dir):
    os.makedirs(tmp_dir)

#bh.to_csv(tmp_dir+'source_Boreholes.csv', index=False)
#pz.to_csv(tmp_dir+sheet+'_Piezometers.csv', index=False)
#ouv.to_csv(tmp_dir+sheet+'_Unkown-facility.csv', index=False)
#litho.to_csv(tmp_dir+sheet+'_Lithology.csv', index=False)
#mes_pz.to_csv(tmp_dir+sheet+'_Measures.csv', index=False)
#mes_sol.to_csv(tmp_dir+sheet+'_Measures-soil.csv', index=False)
#prv_eau.to_csv(tmp_dir+sheet+'_Samples-water.csv', index=False)
#prv_sol.to_csv(tmp_dir+sheet+'_Samples-soil.csv', index=False)
an.to_csv(tmp_dir+sheet+'_Analysis.csv', index=False)


#source_bh.to_csv(tmp_dir+'source_Boreholes.csv', index=False)
#source_pz.to_csv(tmp_dir+'source_Piezometers.csv', index=False)
#source_ouv.to_csv(tmp_dir+'source_Unkown-facility.csv', index=False)
#source_litho.to_csv(tmp_dir+'source_Lithology.csv', index=False)
#source_mes_pz.to_csv(tmp_dir+'source_Measures.csv', index=False)
#source_mes_sol.to_csv(tmp_dir+'source_Measures-soil.csv', index=False)
#source_prv_eau.to_csv(tmp_dir+'source_Samples-water.csv', index=False)
#source_prv_sol.to_csv(tmp_dir+'source_Samples-soil.csv', index=False)
source_an.to_csv(tmp_dir+'source_Analysis.csv', index=False)


print(f'source_bh:{len(source_bh)} ; source_pz:{len(source_pz)} ; source_litho:{len(source_litho)} ; '
     f'source_Fac-uknw:{len(source_ouv)} ; source_an:{len(source_an)} ;\nsource_prv_sol:{len(source_prv_sol)} ;'
     f'source_prv_eau:{len(source_prv_eau)} ; source_mes_pz:{len(source_mes_pz)} ; source_mes_sol:{len(source_mes_sol)} ;')

source_bh:0 ; source_pz:0 ; source_litho:0 ; source_Fac-uknw:0 ; source_an:61 ;
source_prv_sol:55 ;source_prv_eau:0 ; source_mes_pz:0 ; source_mes_sol:0 ;
