# Concatenate boreholes

In [1]:
%matplotlib widget

In [76]:
from ipywidgets import interact, IntSlider
from IPython.display import display  

import re, copy
import datetime
import numpy as np
import geopandas as gpd
from shapely.geometry import Point
import pandas as pd
import matplotlib.pyplot as plt

In [3]:
def gdf_viewer(df, rows=10, cols=15, step_r=1, step_c=1):# Afficher les dataframes au moyen d'un widget (affichage dynamique)
    
    print(f'Rows : {df.shape[0]}, columns : {df.shape[1]}')
    
    @interact(last_row=IntSlider(min=min(rows, df.shape[0]),max=df.shape[0],step=step_r,description='rows',
                                 readout=False,disabled=False,continuous_update=True,orientation='horizontal',
                                 slider_color='blue'),
              
              last_column=IntSlider(min=min(cols, df.shape[1]),max=df.shape[1],step=step_c,
                                    description='columns',readout=False,disabled=False,continuous_update=True,
                                    orientation='horizontal',slider_color='blue'))
    
    def _freeze_header(last_row, last_column):
        display(df.iloc[max(0, last_row-rows):last_row,
                        max(0, last_column-cols):last_column])

## Extraction des dates de forages du type de forage et du foreur de l'étude de caractérisation

### Lecture des fichiers de position des forages du pilote (version 8)

In [4]:
v8 = gpd.read_file('../../CF_data/Data_UMONS/SIG/Pilote_v8.0.shp')
v8.query('GPS2021=="NON"')

Unnamed: 0,Id,Type,Pilote,Ref,diam,Valid,Syscal,Impo,Inox,Impo_perm,GPS2021,geometry
18,0,4,1,400,0,0,0,0,0,0,NON,POINT (152885.032 122585.367)
32,0,5,1,500,0,0,0,0,0,0,NON,POINT (152890.824 122583.580)
63,0,3,1,306,0,0,0,0,0,0,NON,POINT (152867.452 122588.877)


### Informations sur les forages de l'étude de caractérisation de 2010

In [5]:
Logs_forages = pd.read_excel('../../CF_data/Data_SITEREM/Logs forages.xls',sheet_name='Forages, tranchées')

In [6]:
Logs_forages = Logs_forages[['Date', 'N°', 'Id', 'Profondeur', 'Description', 'Piézo', 'Unnamed: 6',
                             'Gouge Ø75', 'MFT Ø145', 'carottier', 'tarrière', 'Liner Ø60']] 

In [7]:
Logs_forages.rename({'Gouge Ø75':'Gouge_75', 'Liner Ø60': 'Liner_60'}, axis=1, inplace=True)

In [8]:
Logs_forages.loc[Logs_forages.query('Profondeur!=Profondeur' ).index,'Profondeur']=''

In [9]:
forages = Logs_forages.query('Profondeur.str.contains("Forage") and Profondeur!="Forage bloqué"', engine='python')

In [10]:
forages['Ref'] = forages['Profondeur'].apply(lambda x: x.split('Forage ')[1].replace(' ', ''))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  forages['Ref'] = forages['Profondeur'].apply(lambda x: x.split('Forage ')[1].replace(' ', ''))


In [11]:
forages['tmp'] = forages['Piézo'].replace(np.nan, '', regex=True) + '-' + forages['Unnamed: 6'].replace(np.nan, '', regex=True) + '-' + forages['Gouge_75'].replace(np.nan, '', regex=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  forages['tmp'] = forages['Piézo'].replace(np.nan, '', regex=True) + '-' + forages['Unnamed: 6'].replace(np.nan, '', regex=True) + '-' + forages['Gouge_75'].replace(np.nan, '', regex=True)


In [12]:
forages['Piezo'] = forages['tmp'].apply(lambda x: x.lstrip(' ').split('-')[0].lower()=='x') 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  forages['Piezo'] = forages['tmp'].apply(lambda x: x.lstrip(' ').split('-')[0].lower()=='x')


In [13]:
forages['Company'] = forages['tmp'].apply(lambda x: x.rstrip('-').split('-')[-1].upper()) 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  forages['Company'] = forages['tmp'].apply(lambda x: x.rstrip('-').split('-')[-1].upper())


In [14]:
forages.loc[123, 'Company']='SITEREM'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)


In [415]:
forages = forages[['Date', 'Ref', 'Company', 'Piezo']]
forages

Unnamed: 0,Date,Ref,Company,Piezo
0,2010-03-11,2010-1,SBS ENVIRONNEMENT,True
9,2010-03-11,2010-2,SBS ENVIRONNEMENT,False
19,2010-03-11,2010-3,SBS ENVIRONNEMENT,False
27,2010-03-10,2010-4,SBS ENVIRONNEMENT,False
33,2010-03-12,2010-5,SBS ENVIRONNEMENT,False
...,...,...,...,...
1973,2010-08-03,2010-612c,SBS ENVIRONNEMENT,False
1977,2010-08-03,2010-734,SITEREM,True
1984,2010-09-01,2010-FP595,SITEREM,True
1994,2010-09-02,2010-540,SITEREM,True


In [546]:
str(forages['Date'][0].year)

'2010'

In [16]:
forages['Ref'] = forages['Date'].apply(lambda x : str(x.year) + '-') + forages['Ref']

In [574]:
forages

2010

### forages phase 1

In [18]:
v2017 = gpd.read_file('../../CF_data/Data_UMONS/SIG/sondages_v2017-05-18.shp')
gdf_viewer(v2017, 10, 15)

Rows : 71, columns : 19


interactive(children=(IntSlider(value=10, description='rows', max=71, min=10, readout=False), IntSlider(value=…

In [19]:
v2017['Ref'] = v2017['Date'].apply(lambda x : str(datetime.datetime.strptime(x, '%Y-%m-%d').year) + '-') + v2017['Ref_sond']

In [20]:
sond_ant = pd.read_excel('../../CF_data/Data_UMONS/Logs_forages_vUmons_2018-03-20.xlsx', sheet_name='Sond_ant')

In [21]:
sond_ant['Ref'] = sond_ant['Ref_siterem'].apply(lambda x: '2010-' + str(x)) # Identifiant sur base de l'année

In [22]:
sond_ant

Unnamed: 0,Ref_GIS,Ref_siterem,X,Y,Type,Ref
0,1,500,152324.0,122975.0,Eau_RB,2010-500
1,2,501,152368.0,122909.0,Eau_RB,2010-501
2,3,511,152371.0,122771.0,Eau_RB,2010-511
3,4,513,152424.0,122752.0,Eau_RB,2010-513
4,5,514,152476.0,122786.0,Eau_RB,2010-514
...,...,...,...,...,...,...
252,253,520,152644.0,122791.0,SOL,2010-520
253,254,524,152570.0,122789.0,SOL,2010-524
254,255,525,152548.0,122783.0,SOL,2010-525
255,256,526,152553.0,122757.0,SOL,2010-526


In [23]:
sond_ant_gdf = gpd.GeoDataFrame(sond_ant, geometry=gpd.points_from_xy(sond_ant.X, sond_ant.Y, crs=str('EPSG:31370')))

### Lecture de l'extension du projet MEMORIS

In [24]:
ext = gpd.read_file('../../CF_data/Data_UMONS/SIG/Site_Memoris.shp')

### Affichage carte

In [25]:
def label(gdf, field, ax, size='xx-small'):
    for x, y, label in zip(gdf.geometry.x, gdf.geometry.y, gdf[field]):
        ax.annotate(label, xy=(x, y), xytext=(3, 3), textcoords='offset points', size=size)

In [26]:
fig, ax = plt.subplots(figsize=(6,6))
ext.plot(ax=ax, color='grey')
v2017.plot(ax=ax, color='r')

#for x, y, label in zip(v2017.geometry.x, v2017.geometry.y, v2017.Ref_sond):
#    ax.annotate(label, xy=(x, y), xytext=(3, 3), textcoords="offset points")
label(v2017, 'Ref', ax)
v8.plot(ax=ax, color='g')
label(v8, 'Ref', ax)  
sond_ant_gdf.plot(ax=ax, color='b', markersize=1)
label(sond_ant_gdf, 'Ref', ax)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

En ignorant Sond_ant_gdf

In [27]:
v8['new_Ref'] = 'F'+v8['Ref']

In [28]:
BH = []
BH.extend(v2017['Ref_sond'].to_list())
BH.extend(v8['new_Ref'].to_list())

In [29]:
v8['ID'] = v8.new_Ref
v2017['ID'] = v2017.Ref_sond

In [30]:
v8['Z']=101.

In [31]:
gdf = gpd.GeoDataFrame(pd.concat([v8[['geometry','ID', 'Z']], v2017[['geometry','ID', 'Z']]], ignore_index=True))

In [32]:
fig, ax = plt.subplots(figsize=(6,6))
ext.plot(ax=ax, color='lightgrey')
gdf.plot(ax=ax, markersize=1)
label(gdf, 'ID', ax)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [33]:
gdf['X']=gdf.geometry.x
gdf['Y']=gdf.geometry.y

In [34]:
pd.DataFrame(gdf)[['ID', 'X', 'Y', 'Z']].to_csv('../../CF_data/synthese/boreholes.csv', index=False)

### Liste de tous les ouvrages références y compris les doublons entre jeux de données (et dans chaque jeu de donnée) 

In [35]:
BH = []
BH.extend(v2017['Ref_sond'].to_list())
BH.extend(v8['Ref'].to_list())
BH.extend(sond_ant_gdf['Ref_siterem'].to_list())

In [36]:
gdf = v2017.merge(sond_ant_gdf, how='outer', left_on='Ref_sond', right_on='Ref_siterem')
gdf[['Ref_sond', 'Ref_siterem']][0:100]

Unnamed: 0,Ref_sond,Ref_siterem
0,75,
1,76,
2,76b,76b
3,76c,76c
4,FP76,
...,...,...
95,,P1
96,,P7
97,,P8
98,,P12


In [37]:
gdf.query('Ref_sond==Ref_siterem')['Ref_sond']

2      76b
3      76c
7      78b
9      P80
10     P81
16     97b
18     P99
24    P106
25    P107
29    538a
61     P22
65     P25
Name: Ref_sond, dtype: object

In [38]:
gdf.query('Ref_sond!=Ref_sond')['Ref_siterem'].to_list()

[500,
 501,
 511,
 513,
 514,
 517,
 521,
 522,
 523,
 528,
 529,
 530,
 531,
 531,
 532,
 '540sup',
 541,
 541,
 542,
 544,
 545,
 'FP115',
 'MW1',
 'MW2',
 'P1',
 'P7',
 'P8',
 'P12',
 'P15a',
 'P32',
 'P34',
 'P44',
 'P48b',
 'P51',
 'P58',
 'P59',
 'P64',
 'P65',
 'P67',
 'P74',
 'P86',
 'P91',
 'P109',
 'P110',
 'P112',
 'P125',
 'P143',
 'P152',
 'P162',
 'P164',
 'P165',
 'P168',
 'P170',
 'P175',
 'P181b',
 'P186',
 'P188',
 'P197',
 'P200',
 'P205',
 'P209',
 'P214',
 'P219b',
 'P400',
 'P405',
 'P406',
 'P410',
 'P411',
 'P595',
 '76sup',
 '63sup',
 '49sup',
 '14sup',
 'P512',
 504,
 509,
 510,
 533,
 534,
 536,
 537,
 539,
 540,
 '540a',
 543,
 595,
 734,
 '26275b',
 'FP14prof',
 'FP14sup',
 'P28',
 'P129',
 'P142',
 'P166',
 'P167',
 'P172',
 'P180',
 'P403',
 'P412',
 502,
 502,
 '160prof',
 '160sup',
 508,
 512,
 'FP15',
 'FP49prof',
 'FP49sup',
 'FP63prof',
 'FP63sup',
 'FP76prof',
 'FP76sup',
 'FP117prof',
 'FP600',
 'FP595',
 2,
 3,
 4,
 5,
 6,
 9,
 10,
 11,
 13,
 19,


In [39]:
len(BH)

416

In [40]:
len(set(BH)) # uniques values

395

In [41]:
gdf.query('Ref_siterem!=Ref_siterem')['Ref_sond'].to_list()

['75',
 '76',
 'FP76',
 '77',
 '78',
 '79',
 '82',
 '83',
 '95',
 '96',
 '97',
 '98',
 '100',
 '102',
 '103',
 '104',
 '105',
 '108',
 '536',
 '537',
 'P1aM',
 'P1bM',
 'P2M',
 'P3M',
 'P4M',
 'P5M',
 'P6M',
 'P7aM',
 'P7bM',
 'P8M',
 'P9aM',
 'P9bM',
 'P10M',
 'P11M',
 'P12M',
 'P13M',
 'P14M',
 'P15bM',
 'P15aM',
 "P15a'M",
 'P16M',
 'P17aM',
 'P17bM',
 'P17cM',
 'P17dM',
 'F18a',
 'F18b',
 'P18c',
 'P19',
 'P20',
 'P21',
 'P23',
 'F24a',
 'P24b',
 'P26',
 'F27a',
 'F27b',
 'F27c',
 'P27d']

In [42]:
gdf.columns

Index(['Ref_GIS_x', 'Ref_sond', 'XL72', 'YL72', 'Z', 'Refus', 'Date',
       'Prof_sond', 'Cote_fond', 'RB', 'ALL', 'S_A', 'S_S', 'Base_RB',
       'Cote_B_RB', 'Top_All', 'Top_SA', 'Top_SS', 'geometry_x', 'Ref_x', 'ID',
       'Ref_GIS_y', 'Ref_siterem', 'X', 'Y', 'Type', 'Ref_y', 'geometry_y'],
      dtype='object')

In [43]:
for idx, row in gdf.iterrows():
    if row['geometry_x'] is None:
        if row['geometry_y'] is not None:
            gdf.loc[idx, 'distance'] = 0.
        else:
            gdf.loc[idx, 'distance'] = None
    else:
        if row['geometry_y'] is None:
            gdf.loc[idx, 'distance'] = 0.
        else:
            gdf.loc[idx, 'distance'] = row['geometry_x'].distance(row['geometry_y'])

In [44]:
gdf.query('distance != distance') # recherche de forages n'ayant aucune coordonnées

Unnamed: 0,Ref_GIS_x,Ref_sond,XL72,YL72,Z,Refus,Date,Prof_sond,Cote_fond,RB,...,Ref_x,ID,Ref_GIS_y,Ref_siterem,X,Y,Type,Ref_y,geometry_y,distance


In [725]:
gdf_viewer(gdf.query('distance>0.'))

Rows : 8, columns : 29


interactive(children=(IntSlider(value=8, description='rows', max=8, min=8, readout=False), IntSlider(value=15,…

## Lecture d'autres fichiers (1 ou plusieurs feuilles)

1- Regrouper les infos sur les 'ID, X, Y, Z, Long' pour définir la géométrie des BH

2- Regrouper les infos sur les 'ID, X, Y, Z, Type' pour définir la géométrie des BH

3- Regrouper les infos sur la lithologie (intervales) et les lier aux ID

4- ...

### 1- Collecte des informations sur la géométrie des forages

#### ../../CF_data//Data_UMONS/Logs_forages_vUmons_2018-03-20.xlsx
* Feuille : 'Sond2017v3'

In [950]:
For_synth=gpd.GeoDataFrame({})

In [951]:
tmp_for = pd.read_excel('../../CF_data//Data_UMONS/Logs_forages_vUmons_2018-03-20.xlsx', sheet_name='Sond2017v3')#, skiprows=1)
gdf_viewer(tmp_for)

Rows : 89, columns : 32


interactive(children=(IntSlider(value=10, description='rows', max=89, min=10, readout=False), IntSlider(value=…

In [952]:
tmp_for.columns

Index(['Ref_GIS', 'Ref_sond', 'XL72', 'YL72', 'Z', 'Refus', 'Date',
       'Prof_sond', 'Cote_fond', 'RB', 'ALL', 'S_A', 'S_S', 'Base_RB',
       'Cote_B_RB', 'Top_All', 'Top_SA', 'Top_SS', 'Unnamed: 18',
       'Unnamed: 19', 'Unnamed: 20', 'Unnamed: 21', 'Unnamed: 22',
       'Unnamed: 23', 'Unnamed: 24', 'Unnamed: 25', 'Unnamed: 26',
       'Unnamed: 27', 'Unnamed: 28', 'Unnamed: 29', 'Unnamed: 30',
       'Unnamed: 31'],
      dtype='object')

In [953]:
tmp_for.rename(columns={'Ref_sond':'Ref_x','XL72':'X', 'YL72':'Y', 'Z (m)':'Z',
        'Prof_sond':'Long', 'Base des Remblais (m)':'Base_RB',
        'Sommet des Alluvions (m)':'Top_All', 'Sommet du Socle altéré (m)':'Top_SA',
        'Sommet du Socle sain (m)':'Top_SS'}, inplace=True)

tmp_for.drop(columns=[tmp_for.columns.to_list()[x] for x in range(len(tmp_for.columns)) 
       if re.compile(r"Unnamed|Ref_GIS").match(tmp_for.columns.to_list()[x])], axis=1, inplace=True) 

tmp_for.columns

Index(['Ref_x', 'X', 'Y', 'Z', 'Refus', 'Date', 'Long', 'Cote_fond', 'RB',
       'ALL', 'S_A', 'S_S', 'Base_RB', 'Cote_B_RB', 'Top_All', 'Top_SA',
       'Top_SS'],
      dtype='object')

In [954]:
tmp_for=tmp_for[['Date', 'Ref_x', 'X', 'Y', 'Z', 'Long', 'Refus', 'Base_RB','Top_All', 'Top_SA','Top_SS',]]

In [955]:
tmp_for

Unnamed: 0,Date,Ref_x,X,Y,Z,Long,Refus,Base_RB,Top_All,Top_SA,Top_SS
0,2016-12-31,75,152836.000000,122609.000000,102.652,5.7,0,4.3,4.3,,
1,2016-12-31,76,152867.000000,122611.000000,102.751,1.2,1,,,,
2,2016-12-31,76b,152867.000000,122610.000000,102.751,3.0,1,,,,
3,2016-12-31,76c,152867.000000,122608.000000,102.751,2.5,1,,,,
4,2016-12-31,FP76,152860.000000,122608.000000,102.727,18.5,0,8.0,,8.0,12.5
...,...,...,...,...,...,...,...,...,...,...,...
84,NaT,F30,152887.892567,122581.184654,9999.000,9999.0,9,,,,
85,NaT,F31,152885.916612,122578.766886,9999.000,9999.0,9,,,,
86,NaT,F32,152885.673928,122581.757054,9999.000,9999.0,9,,,,
87,NaT,F40,152883.685780,122577.833364,9999.000,9999.0,9,,,,


In [956]:
for i,j in tmp_for.iterrows():
    if tmp_for.loc[i, 'Refus']==1:
        tmp_for.loc[i, 'Refus']='R'
    else :
        tmp_for.loc[i, 'Refus']='-'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)


In [957]:
For_synth=copy.deepcopy(tmp_for) #save tmp_for

In [958]:
#tmp_for = tmp_for.replace(np.nan, '', regex=True) #remove all 'NAN'
#pd.concat([v8[['geometry','ID', 'Z']], v2017[['geometry','ID', 'Z']]], ignore_index=True)

* Feuille : ''DatabaseSOL20101005''

In [959]:
tmp_for = pd.read_excel('../../CF_data//Data_UMONS/Logs_forages_vUmons_2018-03-20.xlsx', sheet_name='DatabaseSOL20101005')#, skiprows=1)
gdf_viewer(tmp_for)

Rows : 758, columns : 26


interactive(children=(IntSlider(value=10, description='rows', max=758, min=10, readout=False), IntSlider(value…

In [960]:
tmp_for.columns

Index(['ID', 'CAMPAGNE', 'ZONE', 'N', 'AFFECTATIO', 'X', 'Y', 'Z',
       'PROF_FORAG', 'N_ECH', 'MIN_ECH', 'MAX_ECH', 'COUCHE', 'SOUMIS',
       'TERRAIN', 'DESCRIPTIO', 'INTENSITÉ', 'MIN_ORGANO', 'MAX_ORGANO', 'ML',
       'CN', 'BTEXS', 'HAP', 'EOX', 'HT', 'IPH'],
      dtype='object')

In [961]:
tmp_for=tmp_for[['N', 'X', 'Y', 'Z', 'PROF_FORAG', 'CAMPAGNE']]
tmp_for.rename(columns={'N':'Ref_y','PROF_FORAG':'Long', 'CAMPAGNE':'Societe'}, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(


In [963]:
set(tmp_for['Societe'])

{'SBS', 'SITERE'}

In [968]:
for i,j in tmp_for.iterrows():
    if tmp_for.loc[i, 'Societe']=='SITERE':
        tmp_for.loc[i, 'Societe']='SITEREM'
    #else :
    #    tmp_for.loc[i, 'Refus']='-'

In [969]:
merge_tmp=For_synth.merge(tmp_for, how='outer', left_on='Ref_x', right_on='Ref_y')
gdf_viewer(merge_tmp)

Rows : 846, columns : 17


interactive(children=(IntSlider(value=10, description='rows', max=846, min=10, readout=False), IntSlider(value…

In [970]:
len(set(merge_tmp.query("Ref_x==Ref_y")['Ref_y'])), len(merge_tmp)

(1, 846)

In [971]:
geom = gpd.GeoSeries(merge_tmp.apply(lambda x: Point(x['X_x'], x['Y_x']),1),crs={'init': 'epsg:31370'})
gtmp = gpd.GeoDataFrame(merge_tmp, geometry=geom, crs="EPSG:31370")

  return _prepare_from_string(" ".join(pjargs))


In [972]:
gtmp['geometry_y'] = gpd.GeoSeries(merge_tmp.apply(lambda x: Point(x['X_y'], x['Y_y']),1),crs={'init': 'epsg:31370'})
gtmp.rename_geometry('geometry_x', inplace=True)

In [973]:
for idx, row in gtmp.iterrows():
    if row['geometry_x'] is None:
        if row['geometry_y'] is not None:
            gtmp.loc[idx, 'distance'] = 0.
        else:
            gtmp.loc[idx, 'distance'] = None
    else:
        if row['geometry_y'] is None:
            gtmp.loc[idx, 'distance'] = 0.
        else:
            gtmp.loc[idx, 'distance'] = row['geometry_x'].distance(row['geometry_y'])

In [975]:
gtmp.query('distance<5')

Unnamed: 0,Date,Ref_x,X_x,Y_x,Z_x,Long_x,Refus,Base_RB,Top_All,Top_SA,Top_SS,Ref_y,X_y,Y_y,Z_y,Long_y,Societe,geometry_x,geometry_y,distance
4,2016-12-31,FP76,152860.0,122608.0,102.727,18.5,-,8.0,,8.0,12.5,FP76,152861.0,122608.0,10273,19.0,SITEREM,POINT (152860.000 122608.000),POINT (152861.000 122608.000),1.0
5,2016-12-31,FP76,152860.0,122608.0,102.727,18.5,-,8.0,,8.0,12.5,FP76,152861.0,122608.0,10273,19.0,SITEREM,POINT (152860.000 122608.000),POINT (152861.000 122608.000),1.0
6,2016-12-31,FP76,152860.0,122608.0,102.727,18.5,-,8.0,,8.0,12.5,FP76,152861.0,122608.0,10273,19.0,SITEREM,POINT (152860.000 122608.000),POINT (152861.000 122608.000),1.0
7,2016-12-31,FP76,152860.0,122608.0,102.727,18.5,-,8.0,,8.0,12.5,FP76,152861.0,122608.0,10273,19.0,SITEREM,POINT (152860.000 122608.000),POINT (152861.000 122608.000),1.0
8,2016-12-31,FP76,152860.0,122608.0,102.727,18.5,-,8.0,,8.0,12.5,FP76,152861.0,122608.0,10273,19.0,SITEREM,POINT (152860.000 122608.000),POINT (152861.000 122608.000),1.0
9,2016-12-31,FP76,152860.0,122608.0,102.727,18.5,-,8.0,,8.0,12.5,FP76,152861.0,122608.0,10273,3.0,SBS,POINT (152860.000 122608.000),POINT (152861.000 122608.000),1.0
10,2016-12-31,FP76,152860.0,122608.0,102.727,18.5,-,8.0,,8.0,12.5,FP76,152861.0,122608.0,10273,3.0,SBS,POINT (152860.000 122608.000),POINT (152861.000 122608.000),1.0
11,2016-12-31,FP76,152860.0,122608.0,102.727,18.5,-,8.0,,8.0,12.5,FP76,152861.0,122608.0,10273,3.0,SBS,POINT (152860.000 122608.000),POINT (152861.000 122608.000),1.0


In [976]:
gdf_viewer(gtmp)

Rows : 846, columns : 20


interactive(children=(IntSlider(value=10, description='rows', max=846, min=10, readout=False), IntSlider(value…

In [979]:
gtmp.to_csv('../../CF_data/synthese/Boreholes_2.csv', index=False)

* Feuille : 'Sond_all_loc'

In [926]:
tmp_for = pd.read_excel('../../CF_data//Data_UMONS/Logs_forages_vUmons_2018-03-20.xlsx', sheet_name='Sond_all_loc')#, skiprows=1)
gdf_viewer(tmp_for)

Rows : 89, columns : 6


interactive(children=(IntSlider(value=10, description='rows', max=89, min=10, readout=False), IntSlider(value=…

In [927]:
tmp_for.rename(columns={'Ref_sond':'Ref','XL72':'X', 'YL72':'Y'}, inplace=True)
tmp_for=tmp_for[['Ref_x', 'X', 'Y', 'Z']]

In [928]:
gdf_viewer(tmp_for), gdf_viewer(gtmp)

Rows : 89, columns : 4


interactive(children=(IntSlider(value=10, description='rows', max=89, min=10, readout=False), IntSlider(value=…

Rows : 846, columns : 19


interactive(children=(IntSlider(value=10, description='rows', max=846, min=10, readout=False), IntSlider(value…

(None, None)

In [None]:
datedef='2010'

if 'Date' in gtmp.columns:
    gtmp['Ref'] = gtmp['Date'].apply(lambda x : str(x.year))+ '-' + gtmp['Ref'].apply(lambda x : str(x))
else :
    print("'Date' is not in columns' name, using default date")
    gtmp['Ref'] = datedef + '-' + gtmp['Ref'].apply(lambda x : str(x))

In [940]:
merge_tmp = gtmp.merge(tmp_for, how='outer', left_on='Ref', right_on='Ref')
gdf_viewer(merge_tmp)

Rows : 846, columns : 22


interactive(children=(IntSlider(value=10, description='rows', max=846, min=10, readout=False), IntSlider(value…

In [941]:
merge_tmp.query("X_x==X and Y_y==Y")

Unnamed: 0,Date,Ref,X_x,Y_x,Z_x,Long_x,Refus,Base_RB,Top_All,Top_SA,...,Y_y,Z_y,Long_y,Societe,geometry_x,geometry_y,distance,X,Y,Z
4,2016-12-31,FP76,152860.0,122608.0,102.727,18.5,-,8.0,,8.0,...,122608.0,10273,19.0,SITEREM,POINT (152860.000 122608.000),POINT (152861.000 122608.000),1.0,152860.0,122608.0,102.727
5,2016-12-31,FP76,152860.0,122608.0,102.727,18.5,-,8.0,,8.0,...,122608.0,10273,19.0,SITEREM,POINT (152860.000 122608.000),POINT (152861.000 122608.000),1.0,152860.0,122608.0,102.727
6,2016-12-31,FP76,152860.0,122608.0,102.727,18.5,-,8.0,,8.0,...,122608.0,10273,19.0,SITEREM,POINT (152860.000 122608.000),POINT (152861.000 122608.000),1.0,152860.0,122608.0,102.727
7,2016-12-31,FP76,152860.0,122608.0,102.727,18.5,-,8.0,,8.0,...,122608.0,10273,19.0,SITEREM,POINT (152860.000 122608.000),POINT (152861.000 122608.000),1.0,152860.0,122608.0,102.727
8,2016-12-31,FP76,152860.0,122608.0,102.727,18.5,-,8.0,,8.0,...,122608.0,10273,19.0,SITEREM,POINT (152860.000 122608.000),POINT (152861.000 122608.000),1.0,152860.0,122608.0,102.727
9,2016-12-31,FP76,152860.0,122608.0,102.727,18.5,-,8.0,,8.0,...,122608.0,10273,3.0,SBS,POINT (152860.000 122608.000),POINT (152861.000 122608.000),1.0,152860.0,122608.0,102.727
10,2016-12-31,FP76,152860.0,122608.0,102.727,18.5,-,8.0,,8.0,...,122608.0,10273,3.0,SBS,POINT (152860.000 122608.000),POINT (152861.000 122608.000),1.0,152860.0,122608.0,102.727
11,2016-12-31,FP76,152860.0,122608.0,102.727,18.5,-,8.0,,8.0,...,122608.0,10273,3.0,SBS,POINT (152860.000 122608.000),POINT (152861.000 122608.000),1.0,152860.0,122608.0,102.727


In [948]:
fig, ax = plt.subplots(figsize=(6,6))
merge_tmp.plot(ax=ax, color='r', markersize=1)
#tmp_for.plot(ax=ax, markersize=1)
label(merge_tmp, 'Ref', ax)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [253]:
tmp_for.columns, len(tmp_for), "---------------------------", For_synth.columns, len(For_synth)

(Index(['Ref_sond', 'X', 'Y', 'Z', 'Refus', 'Date', 'Prof', 'Cote_fond', 'RB',
        'ALL', 'S_A', 'S_S', 'Base_RB', 'Cote_B_RB', 'Top_All', 'Top_SA',
        'Top_SS'],
       dtype='object'),
 89,
 '---------------------------',
 Index(['Ref_sond', 'X', 'Y', 'Z', 'Refus', 'Prof', 'RB', 'ALL', 'S_A', 'S_S',
        'Base_RB', 'Top_All', 'Top_SA', 'Top_SS'],
       dtype='object'),
 30)

In [243]:
#gdf_viewer(pd.concat([For_synth, tmp_for], ignore_index=True))

In [722]:
tmp_for.rename(columns={'Réf. sond.':'Ref','XL72 (m)':'X', 'YL72 (m)':'Y', 'Z (m)':'Z',
        'Prof. sond. (m)':'Long', 'Base des Remblais (m)':'Base_RB',
        'Sommet des Alluvions (m)':'Top_All', 'Sommet du Socle altéré (m)':'Top_SA',
        'Sommet du Socle sain (m)':'Top_SS'}, inplace=True)
tmp_for.columns

Index(['Ref', 'X', 'Y', 'Z', 'Refus', 'Date', 'Long', 'Cote_fond', 'RB', 'ALL',
       'S_A', 'S_S', 'Base_RB', 'Cote_B_RB', 'Top_All', 'Top_SA', 'Top_SS'],
      dtype='object')

In [192]:
For_synth = For_synth.merge(tmp_for, how='outer', left_on='Ref_sond', right_on='Ref_sond')

In [193]:
tmp_for = pd.read_excel('../../CF_data//Data_UMONS/Logs_forages_vUmons_2018-03-20.xlsx', sheet_name='Sond2017v2')#, skiprows=1)
tmp_for.rename(columns={'XL72':'X', 'YL72':'Y', 'Prof_sond':'Long'}, inplace=True)
tmp_for.drop(columns=[tmp_for.columns.to_list()[x] for x in range(len(tmp_for.columns)) 
       if re.compile(r"Unnamed|Ref_GIS").match(tmp_for.columns.to_list()[x])], axis=1, inplace=True) 

In [252]:
For_synth.rename(columns={'XL72 (m)':'X', 'YL72 (m)':'Y', 'Z (m)':'Z',
        'Prof. sond. (m)':'Long', 'Base des Remblais (m)':'Base_RB',
        'Sommet des Alluvions (m)':'Top_All', 'Sommet du Socle altéré (m)':'Top_SA',
        'Sommet du Socle sain (m)':'Top_SS'}, inplace=True)

In [256]:
For_synth.merge(tmp_for, how='outer', left_on='Ref_sond', right_on='Ref_sond')

Unnamed: 0,Ref_sond,X_x,Y_x,Z_x,Refus_x,Prof_x,RB_x,ALL_x,S_A_x,S_S_x,...,Cote_fond,RB_y,ALL_y,S_A_y,S_S_y,Base_RB_y,Cote_B_RB,Top_All_y,Top_SA_y,Top_SS_y
0,75,152836.0,122609.0,102.652,-,5.7,x,x,,,...,96.952,1.0,1.0,0.0,0.0,4.3,98.352,4.3,,
1,76,152867.0,122611.0,102.751,R,1.2,x,,,,...,101.551,1.0,0.0,0.0,0.0,,,,,
2,76b,152867.0,122610.0,102.751,R,3.0,x,,,,...,99.751,1.0,0.0,0.0,0.0,,,,,
3,76c,152867.0,122608.0,102.751,R,2.5,x,,,,...,100.251,1.0,0.0,0.0,0.0,,,,,
4,FP76*,152860.0,122608.0,102.727,-,18.5,x,,x,x,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
86,F30,,,,,,,,,,...,9999.000,9.0,9.0,9.0,9.0,,,,,
87,F31,,,,,,,,,,...,9999.000,9.0,9.0,9.0,9.0,,,,,
88,F32,,,,,,,,,,...,9999.000,9.0,9.0,9.0,9.0,,,,,
89,F40,,,,,,,,,,...,9999.000,9.0,9.0,9.0,9.0,,,,,


In [180]:
For_synth = For_synth.merge(tmp_for, how='outer', left_on='Réf. sond.', right_on='Ref_sond')

In [49]:
v8['X'], v8['Y'] = v8.geometry.x, v8.geometry.y
v8, tmp_gdf2  = v8.astype({'Ref':str}), tmp_gdf2.astype({'Ref_puits':str})

NameError: name 'tmp_gdf2' is not defined

In [None]:
gdf_viewer(tmp_gdf1)

In [None]:
req_gdf = tmp_gdf1.merge(tmp_gdf2, how='inner', left_on='N_piezo.', right_on='Ref_puits')
req_gdf = req_gdf.merge(v8, how='inner', left_on='Ref_puits', right_on='Ref')
req_gdf.rename({'X [m]':'X', 'Y [m]':'Y'}, inplace=True, axis=1)
gdf_viewer(req_gdf)