# Data Gathering

In [1]:
from utils.io import dataframe_viewer, files_search, data_merger, data_validation, data_overview, \
data_filter, fix_duplicates

import re, os
import numpy as np
import pandas as pd
import datetime as dtm
from definitions import ROOT_DIR

In [2]:
def create_df(files, verbose=True): # find another name for this function
    """
    create dataframes from files and test if they contain position informations
    files: list of files name
    """
    dfs = []
    for f in files:
        df = pd.read_csv(f, delimiter=',')
        dfs.append(df)
        
        if verbose:
            if 'X' in list(df.columns): msg = ' --> Coordinates'
            else: msg = ' --> No coordinates'

            print(f"df1 : {msg}")
            
    return dfs

## Reading files

In [3]:
work_dir = ROOT_DIR+'/CF_data/Result_traitem/'
save_dir = ROOT_DIR+'/CF_data/Donnees_fusionnees/'

In [4]:
# create my dictionary structure to retrieve good files (Keynames !!!)
files_dict={'Borehole':0,'Piezometer':0,'Piezair':0,'Trench':0,'Litho':0,'Equipm':0,
        'Measure':0,'Sample':0,'Analysis':0,'Facility':0}

In [5]:
files_search(work_dir, files_dict, prefix='', skip='source')

Borehole  	:  7
Piezometer  	:  17
Piezair  	:  2
Trench  	:  1
Litho  	:  7
Equipm  	:  3
Measure  	:  6
Sample  	:  27
Analysis  	:  21
Facility  	:  4


In [6]:
how=['inner', 'outer', 'left', 'right']

In [7]:
f = False
t = True

# PIEZOMETERS PROCESSING

In [8]:
key='Piezometer'
save_file = f'Merged_Piezometers.csv'
coi=['ID','ID_date','X','Y','Z','Type','Long_for','Diam_for','Refus'] #columns of interest
dataset = pd.DataFrame()
print(len(files_dict[key]), 'files')

17 files


In [9]:
files_dict[key]

['/home/yanathan/Projects/GSDMA/CF_data/Result_traitem/Liste_XY/Sol_Eau_Piezometers.csv',
 '/home/yanathan/Projects/GSDMA/CF_data/Result_traitem/Memoris_seafile/Result_eau_Piezometers.csv',
 '/home/yanathan/Projects/GSDMA/CF_data/Result_traitem/Phase_1_Memoris/Result_eau_Piezometers.csv',
 '/home/yanathan/Projects/GSDMA/CF_data/Result_traitem/Phase_2_Memoris/Result_eau_Piezometers.csv',
 '/home/yanathan/Projects/GSDMA/CF_data/Result_traitem/database_Memoris3/Donnees_piezos_Piezometers.csv',
 '/home/yanathan/Projects/GSDMA/CF_data/Result_traitem/database_Memoris3/Drains_Pz_ENEL_Piezometers.csv',
 '/home/yanathan/Projects/GSDMA/CF_data/Result_traitem/database_Memoris3/Profils_sol_Piezometers.csv',
 '/home/yanathan/Projects/GSDMA/CF_data/Result_traitem/database_Memoris3/Result_eau_Piezometers.csv',
 '/home/yanathan/Projects/GSDMA/CF_data/Result_traitem/database_Memoris3/Result_sol_Piezometers.csv',
 '/home/yanathan/Projects/GSDMA/CF_data/Result_traitem/donnees_terrain_2019/Donnees_forage_

In [10]:
data_overview(files_dict[key])

Same files:[(7, 8)]
Files with coordinates:[0, 1, 2, 3, 4, 5, 7, 8, 9, 13, 15, 16]
Files without coordinates:[6, 10, 11, 12, 14]


#### $\color{green}{\textbf{Read and merge}}$

In [11]:
file1= work_dir + 'Phase_1_Memoris/Result_eau_Piezometers.csv' # 2
file2= work_dir + 'Memoris_seafile/Result_eau_Piezometers.csv' # 1  


df1, df2 = create_df([file1, file2])
dataframe_viewer(df1, rows=3, un_val='ID', view=t), dataframe_viewer(df2, rows=3, un_val='ID', view=t)

df1 :  --> Coordinates
df1 :  --> Coordinates
Rows : 14, columns : 10, Unique col 'ID': 14


interactive(children=(IntSlider(value=3, description='rows', max=14, min=3, readout=False), IntSlider(value=10…

Rows : 30, columns : 9, Unique col 'ID': 30


interactive(children=(IntSlider(value=3, description='rows', max=30, min=3, readout=False), IntSlider(value=9,…

(None, None)

In [12]:
fix_duplicates(df1, df2, drop_old_id=True)

14 duplicate objects fixed!


In [13]:
mdf, conflict_df=data_merger(df1, df2, how=how[1], on='ID', dist_max=1)

#### First object dataset save

In [14]:
dataset = mdf.copy() #saving

#### $\color{green}{\textbf{Read and merge}}$

In [15]:
file1= work_dir + 'database_Memoris3/Donnees_piezos_Piezometers.csv' # 4
file2= work_dir + 'Liste_XY/Sol_Eau_Piezometers.csv' # 0  


df1, df2 = create_df([file1, file2])
dataframe_viewer(df1, rows=3, un_val='ID', view=t), dataframe_viewer(df2, rows=3, un_val='ID', view=t)

df1 :  --> Coordinates
df1 :  --> Coordinates
Rows : 117, columns : 13, Unique col 'ID': 117


interactive(children=(IntSlider(value=3, description='rows', max=117, min=3, readout=False), IntSlider(value=1…

Rows : 257, columns : 6, Unique col 'ID': 254


interactive(children=(IntSlider(value=3, description='rows', max=257, min=3, readout=False), IntSlider(value=6…

(None, None)

In [16]:
df1, check = data_filter(df1, position=True, id_col='ID', expression='sup|prof', dist_max=1, drop=True, drop_old_id=True)

same objects at indices:[62, 65, 67, 70, 72, 74, 108, 113, 114, 116], will be dropped if drop is set True!
Rows : 107 ; Columns : 14 ; Unique on 'ID' : 104 ; 


In [17]:
df2, check = data_filter(df2, position=True, id_col='ID', expression='sup|prof', dist_max=1, drop=True)

same objects at indices:[1, 8, 10, 12, 256, 106, 113], will be dropped if drop is set True!
Rows : 250 ; Columns : 7 ; Unique on 'ID' : 245 ; 


In [18]:
df2

Unnamed: 0,ID,Origin_ID,Nappe,X,Y,Type_ech,Type
0,160,160prof,Socle,152395.000,122839.000,Eau,Piezo
1,502,502,Socle,152365.000,122855.000,Eau,Piezo
2,502,502,Alluvions,152366.396,122857.132,Eau,Piezo
3,508,508,Socle,152467.000,122850.000,Eau,Piezo
4,512,512,Socle,152428.000,122766.000,Eau,Piezo
...,...,...,...,...,...,...,...
245,519,519,,152656.000,122837.000,Sol,Piezo
246,520,520,,152644.000,122791.000,Sol,Piezo
247,524,524,,152570.000,122789.000,Sol,Piezo
248,525,525,,152548.000,122783.000,Sol,Piezo


In [19]:
fix_duplicates(df1, df2)

3 duplicate objects fixed!


In [20]:
'2021-07-03' == '2021-07-04'

False

In [21]:
mdf, conflict_df=data_merger(df1, df2, how=how[1], on='ID', dist_max=1)

Conflict values present. Please resolve this manually !


#### $\color{blue}{\textbf{Manage conflicts}}$

In [22]:
conflict_df

Unnamed: 0,Check_col,ID,Origin_ID_x,Origin_ID_y,Nappe_x,Nappe_y
8,"Origin_ID, Nappe",59,59,P59,Remblai_All,remblais
37,Origin_ID,186,186,503,Remblais,
61,Origin_ID,FP49,FP 49 PROF,FP49prof,Socle,Socle
64,Nappe,FP14,FP14prof,FP14prof,All_limoneuses_graveleuses,Alluvions
67,Origin_ID,FP63,FP63 prof,FP63prof,Socle,Socle
68,Origin_ID,FP76,FP76 prof,FP76prof,Socle,Socle
70,Nappe,501,501,501,Remblai_All,remblais
72,Nappe,509,509,509,All_limoneuse,Alluvions
73,Nappe,510,510,510,All_limoneuse,Alluvions
74,Nappe,511,511,511,Remblai_All,remblais


In [23]:
data_validation(overall_data=mdf, conflict_data=conflict_df, index_col='index', 
                valid_dict={'Nappe_y':list(conflict_df.index)[:18]})

Validation done, but conflicts remain!


In [24]:
conflict_df

Unnamed: 0,Check_col,ID,Origin_ID_x,Origin_ID_y,Nappe_x,Nappe_y
8,Origin_ID,59,59,P59,Done,Done
102,Nappe,539,539,539,All_limoneuse,Alluvions
104,Nappe,533,533,533,All_limoneuses_graveleuses,Alluvions
105,Origin_ID,502,502prof,502,Socle,Socle
107,"Origin_ID, Nappe",512,512sup,512,Remblai_All,Socle
110,"Origin_ID, Nappe",595,595prof,595,Socle,Alluvions
111,"Origin_ID, Nappe",540,540sup Al,540sup,All_limoneuse,remblais


#### Merge with object dataset

In [25]:
dataset, conflict_df=data_merger(dataset, mdf, how=how[1], on='ID', dist_max=1)

In [26]:
if 'level_0' in dataset.columns:
    if 'index' in dataset.columns:
        dataset.drop(columns='index', inplace=True)
    dataset.rename(columns={'level_0':'index'}, inplace=True)

In [27]:
dataframe_viewer(dataset, rows=10, un_val='ID', view=t)

Rows : 361, columns : 21, Unique col 'ID': 346


interactive(children=(IntSlider(value=10, description='rows', max=361, min=10, readout=False), IntSlider(value…

#### $\color{green}{\textbf{Read and merge}}$

In [28]:
file1= work_dir + 'Phase_2_Memoris/Result_eau_Piezometers.csv' # 3
file2= work_dir + 'database_Memoris3/Drains_Pz_ENEL_Piezometers.csv' # 5  


df1, df2 = create_df([file1, file2])
dataframe_viewer(df1, rows=3, un_val='ID', view=t), dataframe_viewer(df2, rows=3, un_val='ID', view=t)

df1 :  --> Coordinates
df1 :  --> Coordinates
Rows : 10, columns : 9, Unique col 'ID': 10


interactive(children=(IntSlider(value=3, description='rows', max=10, min=3, readout=False), IntSlider(value=9,…

Rows : 6, columns : 6, Unique col 'ID': 6


interactive(children=(IntSlider(value=3, description='rows', max=6, min=3, readout=False), IntSlider(value=6, …

(None, None)

In [29]:
mdf, conflict_df=data_merger(df1, df2, how=how[1], on='ID', dist_max=1)

#### Merge with object dataset

In [30]:
dataset, conflict_df=data_merger(dataset, mdf, how=how[1], on='ID', dist_max=1)

In [31]:
if 'level_0' in dataset.columns:
    if 'index' in dataset.columns:
        dataset.drop(columns='index', inplace=True)
    dataset.rename(columns={'level_0':'index'}, inplace=True)

In [32]:
dataframe_viewer(dataset, rows=10, un_val='ID', view=t)

Rows : 372, columns : 22, Unique col 'ID': 355


interactive(children=(IntSlider(value=10, description='rows', max=372, min=10, readout=False), IntSlider(value…

#### $\color{green}{\textbf{Read and merge}}$

In [33]:
file1= work_dir + 'donnees_terrain_2019/Donnees_forage_Piezometers.csv' # 9
file2= work_dir + 'database_Memoris3/Result_eau_Piezometers.csv' # 7  

df1, df2 = create_df([file1, file2])
dataframe_viewer(df1, rows=3, un_val='ID', view=t), dataframe_viewer(df2, rows=3, un_val='ID', view=t)

df1 :  --> Coordinates
df1 :  --> Coordinates
Rows : 3, columns : 18, Unique col 'ID': 3


interactive(children=(IntSlider(value=3, description='rows', max=3, min=3, readout=False), IntSlider(value=12,…

Rows : 117, columns : 13, Unique col 'ID': 117


interactive(children=(IntSlider(value=3, description='rows', max=117, min=3, readout=False), IntSlider(value=1…

(None, None)

In [34]:
df1.ID = df1.ID.astype(str)
df2.ID = df2.ID.astype(str)

In [35]:
df2, check = data_filter(df2, position=True, id_col='ID', expression='sup|prof', dist_max=1, error_max=1.1, drop=True, drop_old_id=True)

Rows : 117 ; Columns : 14 ; Unique on 'ID' : 104 ; 


In [36]:
dataframe_viewer(df2, rows=10, un_val='ID', view=t)

Rows : 117, columns : 14, Unique col 'ID': 104


interactive(children=(IntSlider(value=10, description='rows', max=117, min=10, readout=False), IntSlider(value…

In [37]:
fix_duplicates(df1, df2)

0 duplicate objects fixed!


In [38]:
mdf, conflict_df=data_merger(df1, df2, how=how[1], on='ID', dist_max=1)

#### Merge with object dataset

In [39]:
dataset, conflict_df=data_merger(dataset, mdf, how=how[1], on='ID', dist_max=1)

Conflict values present. Please resolve this manually !


#### $\color{blue}{\textbf{Manage conflicts}}$

In [40]:
conflict_df

Unnamed: 0,Check_col,ID,Origin_ID_x,Origin_ID_y,Nappe_x,Nappe_y,Z_x,Z_y,Zsol_x,Zsol_y
36,"Origin_ID, Nappe",59,#conflict,59,remblais,Remblai_All,102.616,102.616,101.96,101.96
65,Origin_ID,186,#conflict,186,,Remblais,110.33,110.33,109.881,109.881
89,Origin_ID,FP49,#conflict,FP 49 PROF,Socle,Socle,101.704,101.704,,
90,"Origin_ID, Nappe, Z",FP49,#conflict,FP 49 SUP,Socle,Remblai_All,101.704,101.677,,
93,"Origin_ID, Z",FP14,FP14prof,FP14sup,,Remblais,103.095,103.019,102.32,102.32
95,"Origin_ID, Nappe, Z",FP160,FP160PROF,FP160SUP,Socle,Remblai_All,104.458,104.457,103.81,103.81
96,Origin_ID,FP63,#conflict,FP63 prof,,Socle,103.331,103.331,102.803,102.803
97,"Origin_ID, Z",FP63,#conflict,FP63 sup,,Remblais,103.331,103.346,102.803,102.803
98,Origin_ID,FP76,#conflict,FP76 prof,,Socle,103.18,103.18,102.73,102.73
99,"Origin_ID, Z",FP76,#conflict,FP76 sup,,Remblais,103.18,103.227,102.73,102.73


In [41]:
print(list(conflict_df.index)[:18])

[36, 65, 89, 90, 93, 95, 96, 97, 98, 99, 101, 103, 105, 106, 119, 122, 124, 130]


In [42]:
data_validation(overall_data=mdf, conflict_data=conflict_df, index_col='index', 
                valid_dict={'Nappe_y':list(conflict_df.index)[:18]})

Validation done, but conflicts remain!


In [43]:
conflict_df

Unnamed: 0,Check_col,ID,Origin_ID_x,Origin_ID_y,Nappe_x,Nappe_y,Z_x,Z_y,Zsol_x,Zsol_y
36,Origin_ID,59,#conflict,59,Done,Done,102.616,102.616,101.96,101.96
90,"Origin_ID, Z",FP49,#conflict,FP 49 SUP,Done,Done,101.704,101.677,,
93,"Origin_ID, Z",FP14,FP14prof,FP14sup,Done,Done,103.095,103.019,102.32,102.32
95,"Origin_ID, Z",FP160,FP160PROF,FP160SUP,Done,Done,104.458,104.457,103.81,103.81
97,"Origin_ID, Z",FP63,#conflict,FP63 sup,Done,Done,103.331,103.346,102.803,102.803
99,"Origin_ID, Z",FP76,#conflict,FP76 sup,Done,Done,103.18,103.227,102.73,102.73
124,"Origin_ID, Z",541,541,541sup R,Done,Done,101.79,101.76,101.41,101.41
132,Nappe,533,533,533,#conflict,All_limoneuses_graveleuses,103.11,103.11,102.62,102.62
133,Origin_ID,502,#conflict,502prof,Socle,Socle,105.8,105.8,105.28,105.28
134,"Origin_ID, Nappe, Z",502,#conflict,502sup,Socle,All_limoneuses_graveleuses,105.8,105.91,105.28,105.28


In [44]:
if 'level_0' in dataset.columns:
    if 'index' in dataset.columns:
        dataset.drop(columns='index', inplace=True)
    dataset.rename(columns={'level_0':'index'}, inplace=True)

In [45]:
dataframe_viewer(dataset, rows=10, un_val='ID', view=t)

Rows : 421, columns : 31, Unique col 'ID': 358


interactive(children=(IntSlider(value=10, description='rows', max=421, min=10, readout=False), IntSlider(value…

In [46]:
a, b = 13, 15
file1= files_dict[key][a]
file2= files_dict[key][b]
print(files_dict[key][a].replace(work_dir,""),'||', files_dict[key][b].replace(work_dir,""))

profils_sols_donnees_forages/donnees_forage_Piezometers.csv || vUmons_logsFor/Analyse_eau_Phases1&2_Piezometers.csv


In [47]:
pause # continue from here

NameError: name 'pause' is not defined

#### $\color{green}{\textbf{Read and merge}}$

In [None]:
#file1= work_dir + 'donnees_terrain_2019/Donnees_forage_Piezometers.csv' # 9
#file2= work_dir + 'database_Memoris3/Result_eau_Piezometers.csv' # 7  

df1, df2 = create_df([file1, file2])
dataframe_viewer(df1, rows=3, un_val='ID', view=t), dataframe_viewer(df2, rows=3, un_val='ID', view=t)

In [None]:
df1.ID = df1.ID.astype(str)
df2.ID = df2.ID.astype(str)

In [None]:
df2, check = data_filter(df2, position=True, id_col='ID', expression='sup|prof', dist_max=1, error_max=1.1, drop=True, drop_old_id=True)

In [None]:
dataframe_viewer(df2, rows=10, un_val='ID', view=t)

In [None]:
fix_duplicates(df1, df2)

In [None]:
mdf, conflict_df=data_merger(df1, df2, how=how[1], on='ID', dist_max=1)

#### Merge with object dataset

In [None]:
dataset, conflict_df=data_merger(dataset, mdf, how=how[1], on='ID', dist_max=1)

#### $\color{blue}{\textbf{Manage conflicts}}$

In [None]:
conflict_df

In [None]:
data_validation(overall_data=mdf, conflict_data=conflict_df, index_col='index', 
                valid_dict={'Nappe_y':list(conflict_df.index)[:18]})

In [None]:
conflict_df

In [None]:
if 'level_0' in dataset.columns:
    if 'index' in dataset.columns:
        dataset.drop(columns='index', inplace=True)
    dataset.rename(columns={'level_0':'index'}, inplace=True)

In [None]:
dataframe_viewer(dataset, rows=10, un_val='ID', view=t)

##### ==================================== TEST ZONE ========================================

In [None]:
pause

#### $\color{green}{\textbf{Read and merge}}$

In [None]:
a, b = 13, 15
file1= files_dict[key][a]
file2= files_dict[key][b]
print(files_dict[key][a].replace(work_dir,""),'||', files_dict[key][b].replace(work_dir,""))

In [None]:
#file1= work_dir + 'donnees_terrain_2019/Donnees_forage_Piezometers.csv' # 9
#file2= work_dir + 'database_Memoris3/Result_eau_Piezometers.csv' # 7  

df1, df2 = create_df([file1, file2])
dataframe_viewer(df1, rows=3, un_val='ID', view=t), dataframe_viewer(df2, rows=3, un_val='ID', view=t)

In [None]:
fix_duplicates(df1, df2)

In [None]:
mdf, conflict_df=data_merger(df1, df2, how=how[1], on='ID', dist_max=1)

#### Merge with object dataset

In [None]:
dataset, conflict_df=data_merger(dataset, mdf, how=how[1], on='ID', dist_max=1)

#### $\color{blue}{\textbf{Manage conflicts}}$

In [None]:
conflict_df # i think there are not the same, but no date or postition to distinguish them !
# --> check boreholes sheets (pdf)

In [None]:
dataframe_viewer(dataset, rows=3, un_val='ID', view=t)

####  $\color{red}{\textbf{Save final object dataset}}$

In [None]:
if 'index' in piezometers.columns:
    piezometers.drop(columns='index', inplace=True)

In [None]:
if not os.path.exists(save_dir):
    os.makedirs(save_dir)
    
piezometers.to_csv(save_dir + save_file, index=False)

###  ------------------------------------- Testing area ------------------------------

In [None]:
a, b = 3, 0
file1= files_dict[key][a]
file2= files_dict[key][b]
print(files_dict[key][a].replace(work_dir,""),'||', files_dict[key][b].replace(work_dir,""))

#### $\color{green}{\textbf{Read and merge}}$

In [None]:
#file1= work_dir + 'Memoris_seafile/Result_SOL_Samples-soil.csv' # 3
#file2= work_dir + 'Phase_1_Memoris/Result_sol_Samples-soil.csv' # 6

df1, df2 = create_df([file1, file2])
dataframe_viewer(df1, rows=3, un_val='ID', view=t), dataframe_viewer(df2, rows=3, un_val='ID', view=t)

In [None]:
pause

#### $\color{green}{\textbf{Read and merge}}$

In [None]:
df1.ID_ech = df1.ID_ech.astype(str)
df2.ID_ech = df2.ID_ech.astype(str)

#### $\color{blue}{\textbf{Manage conflicts}}$

In [None]:
file1= files_dict[key][0]
file2= files_dict[key][3]

df1, df2 = create_df([file1, file2])
print(files_dict[key][1].replace(work_dir,""),'||', files_dict[key][2].replace(work_dir,""))
dataframe_viewer(df1, rows=3, un_val='ID', view=t), dataframe_viewer(df2, rows=3, un_val='ID', view=t)

In [None]:
fix_duplicates(df1, df2)

In [None]:
dataframe_viewer(df1, rows=3, un_val='ID', view=t), dataframe_viewer(df2, rows=3, un_val='ID', view=t)

In [None]:
df1.rename(columns={'Profondeur':'Long_for'}, inplace=True)
df2.rename(columns={'Profondeur':'Long_for'}, inplace=True)

###  °°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°

In [None]:
file1= files_dict[key][6]
file2= files_dict[key][4]

df1, df2 = create_df([file1, file2])
dataframe_viewer(df1, rows=3, un_val='ID', view=t), dataframe_viewer(df2, rows=3, un_val='ID', view=t)

In [None]:
file1= files_dict[key][0]
file2= files_dict[key][1]

df1, df2 = create_df([file1, file2])
dataframe_viewer(df1, rows=3, un_val='ID', view=t), dataframe_viewer(df2, rows=3, un_val='ID', view=t)

In [None]:
mdf, conflict_df=data_merger(df1, df2, how=how[1], on='ID', dist_max=1)
check_col(mdf)

##### check and validate duplicate objects
- The function "data_filter()" doesn't work in some cases, so we use function "doubled_objects_check()"
- we have same objects Names but differents by positions here

In [None]:
mdf, check = data_filter(mdf, position=True, id_on='ID', expression='sup|prof', dist_max=1, drop=True, rapp_val=1)

In [None]:
double_objects_check(mdf)

In [None]:
drop_id = [2,25,30] # objects are seemingly the same, but is it possible to get 2 objects so close (~ 1m)?
mdf.drop(index=drop_id, inplace=True)
mdf.reset_index(drop=True, inplace=True)

In [None]:
dataframe_viewer(mdf, rows=3, cols=13, un_val='ID', view=t)

#### Merge with object type dataset

In [None]:
piezometers = mdf.copy() #saving

In [None]:
file1= files_dict[key][2]
file2= files_dict[key][3]

df1, df2 = create_df([file1, file2])
dataframe_viewer(df1, rows=3, un_val='ID', view=t), dataframe_viewer(df2, rows=3, un_val='ID', view=t)

In [None]:
mdf, conflict_df=data_merger(df1, df2, how=how[1], on='ID', dist_max=2)
check_col(mdf)

In [None]:
dataframe_viewer(mdf, rows=3, cols=13, un_val='ID', view=f)

In [None]:
dataset, conflict_df=data_merger(dataset, mdf, how=how[1], on='ID', dist_max=1)
check_col(mdf)

In [None]:
dataframe_viewer(dataset, rows=3, cols=13, un_val='ID', view=t)

In [None]:
double_objects_check(piezometers)

In [None]:
drop_id = [292, 293]
piezometers.drop(index=drop_id, inplace=True)
dataframe_viewer(dataset, rows=5, un_val='ID', view=f)

In [None]:
file1= files_dict[key][4]
file2= files_dict[key][5]

df1, df2 = create_df([file1, file2])
dataframe_viewer(df1, rows=3, un_val='ID', view=t), dataframe_viewer(df2, rows=3, un_val='ID', view=t)

In [None]:
mdf, conflict_df=data_merger(df1, df2, how=how[1], on='ID', dist_max=2)
check_col(mdf)

In [None]:
dataset, conflict_df=data_merger(dataset, mdf, how=how[1], on='ID', dist_max=1)
check_col(mdf)

In [None]:
dataframe_viewer(dataset, rows=3, cols=13, un_val='ID', view=t)

In [None]:
dataset, check = data_filter(dataset, position=True, id_on='ID', expression='sup|prof', dist_max=1, drop=True)
#dataframe_viewer(dataset, rows=5, un_val='ID', view=t)

In [None]:
double_objects_check(piezometers)

In [None]:
drop_id = [2,4,30,94,106]
piezometers.drop(index=drop_id, inplace=True)
dataframe_viewer(dataset, rows=5, un_val='ID', view=f)

In [None]:
file1= files_dict[key][6]
file2= files_dict[key][9]

df1, df2 = create_df([file1, file2])
dataframe_viewer(df1, rows=3, un_val='ID', view=t), dataframe_viewer(df2, rows=3, un_val='ID', view=t)

In [None]:
df2['ID'] = df2.ID.astype('object')

In [None]:
mdf, conflict_df=data_merger(df1, df2, how=how[1], on='ID', dist_max=2)
check_col(mdf)

In [None]:
dataset, conflict_df=data_merger(dataset, mdf, how=how[1], on='ID', dist_max=1)
check_col(mdf)

In [None]:
dataframe_viewer(dataset, rows=3, cols=13, un_val='ID', view=t)

In [None]:
file1= files_dict[key][10]
file2= files_dict[key][11]

df1, df2 = create_df([file1, file2])
dataframe_viewer(df1, rows=3, un_val='ID', view=t), dataframe_viewer(df2, rows=3, un_val='ID', view=t)

In [None]:
df1['ID'] = df1.ID.astype('object')

In [None]:
mdf, conflict_df=data_merger(df1, df2, how=how[1], on='ID', dist_max=2)
check_col(mdf)

In [None]:
dataset, conflict_df=data_merger(dataset, mdf, how=how[1], on='ID', dist_max=1)
check_col(mdf)

In [None]:
conflict_df

In [None]:
dataframe_viewer(dataset, rows=3, cols=13, un_val='ID', view=t)

In [None]:
file1= files_dict[key][12]
file2= files_dict[key][13]

df1, df2 = create_df([file1, file2])
dataframe_viewer(df1, rows=3, un_val='ID', view=t), dataframe_viewer(df2, rows=3, un_val='ID', view=t)

In [None]:
mdf, conflict_df=data_merger(df1, df2, how=how[1], on='ID', dist_max=2)
check_col(mdf)

In [None]:
conflict_df

In [None]:
dataset, conflict_df=data_merger(dataset, mdf, how=how[1], on='ID', dist_max=1)
check_col(mdf)

In [None]:
dataframe_viewer(dataset, rows=3, cols=13, un_val='ID', view=t)

In [None]:
file1= files_dict[key][14]
file2= files_dict[key][15]

df1, df2 = create_df([file1, file2])
dataframe_viewer(df1, rows=3, un_val='ID', view=t), dataframe_viewer(df2, rows=3, un_val='ID', view=t)

In [None]:
mdf, conflict_df=data_merger(df1, df2, how=how[1], on='ID', dist_max=2)
check_col(mdf)

In [None]:
dataset, conflict_df=data_merger(dataset, mdf, how=how[1], on='ID', dist_max=1)
check_col(mdf)

In [None]:
conflict_df

In [None]:
dataframe_viewer(dataset, rows=3, cols=13, un_val='ID', view=t)

In [None]:
file1= files_dict[key][16]
df1 = pd.read_csv(file1, delimiter=',')

print(f"df1 : {file1.replace(work_dir,'')}")
dataframe_viewer(df1, rows=3, un_val='ID', view=t)

#### Last merging

In [None]:
dataset, conflict_df=data_merger(dataset, df1, how=how[1], on='ID', dist_max=1)
check_col(mdf)

In [None]:
conflict_df

In [None]:
dataframe_viewer(dataset, rows=3, cols=13, un_val='ID', view=t)

####  $\color{red}{\textbf{Save final Piezometers data}}$

In [None]:
if not os.path.exists(save_dir):
    os.makedirs(save_dir)
    
piezometers.to_csv(save_dir+save_file, index=False)