# Concatenate boreholes

In [1]:
%matplotlib widget

In [2]:
from ipywidgets import interact, IntSlider
from IPython.display import display  

import re, copy
import datetime
import numpy as np
import geopandas as gpd
from shapely.geometry import Point
import pandas as pd
import matplotlib.pyplot as plt

In [3]:
def gdf_viewer(df, rows=10, cols=14, step_r=1, step_c=1):# Afficher les dataframes au moyen d'un widget (affichage dynamique)
    
    print(f'Rows : {df.shape[0]}, columns : {df.shape[1]}')
    
    @interact(last_row=IntSlider(min=min(rows, df.shape[0]),max=df.shape[0],step=step_r,description='rows',
                                 readout=False,disabled=False,continuous_update=True,orientation='horizontal',
                                 slider_color='blue'),
              
              last_column=IntSlider(min=min(cols, df.shape[1]),max=df.shape[1],step=step_c,
                                    description='columns',readout=False,disabled=False,continuous_update=True,
                                    orientation='horizontal',slider_color='blue'))
    
    def _freeze_header(last_row, last_column):
        display(df.iloc[max(0, last_row-rows):last_row,
                        max(0, last_column-cols):last_column])

In [4]:
def genID_dated(gdf, col='Ref', datedef='No_date', datecol=None):
    """
    Generate a ID-dated reference for a (geo)dataframe
    
    Parameters
    -----------

    gdf : pandas.(Geo)Dataframe
    col : Reference column
    datedef : Default data's date
    datecol: Column containing dates
    """
    print('Generation of ID-dated...')
    
    if 'Date' in gdf.columns and datedef=='No_date' and datecol is None:
        print("Using 'Date' column in the (geo)dataframe !")
        gdf[col] = gdf['Date'].apply(lambda x : str(x.year))+ '-' + gdf[col].apply(lambda x : str(x))
        
    elif datedef!='No_date':
        print("Using default date given !")
        gdf[col] = datedef + '-' + gdf[col].apply(lambda x : str(x))
        
    elif datecol is not None:
        print("Using column '", datecol, "' in the (geo)dataframe !")
        gdf[col] = gdf[datecol].apply(lambda x : str(x.year))+ '-' + gdf[col].apply(lambda x : str(x))
        
    else:
        print("No date given and no column 'Date' is the (geo)dataframe, Process cancelled !")
        
    return gdf[col]

In [5]:
def gdf_geom(gdf):
    geom = gpd.GeoSeries(gdf.apply(lambda x: Point(x['X'], x['Y']),1),crs={'init': 'epsg:31370'})
    gdf = gpd.GeoDataFrame(gdf, geometry=geom, crs="EPSG:31370")
    
    return gdf.head(5)

## Extraction des dates de forages du type de forage et du foreur de l'étude de caractérisation

### Informations sur les forages de l'étude de caractérisation de 2010

In [19]:
Logs_forages = pd.read_excel('../../CF_data/Data_SITEREM/Logs forages.xls',sheet_name='Forages, tranchées')
gdf_viewer(Logs_forages)

Rows : 2016, columns : 16


interactive(children=(IntSlider(value=10, description='rows', max=2016, min=10, readout=False), IntSlider(valu…

In [20]:
Logs_forages = Logs_forages[['Date', 'N°', 'Id', 'Profondeur', 'Description', 'Piézo', 'Unnamed: 6',
                             'Gouge Ø75', 'MFT Ø145', 'carottier', 'tarrière', 'Liner Ø60']] 

In [21]:
Logs_forages.rename({'Gouge Ø75':'Gouge_75', 'Liner Ø60': 'Liner_60'}, axis=1, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(


In [22]:
Logs_forages.loc[Logs_forages.query('Profondeur!=Profondeur' ).index,'Profondeur']=''

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)


In [23]:
forages = Logs_forages.query('Profondeur.str.contains("Forage") and Profondeur!="Forage bloqué"', engine='python')
#Logs_forages["Id"].astype(str).str.contains('T')

In [24]:
forages['Ref'] = forages['Profondeur'].apply(lambda x: x.split('Forage ')[1].replace(' ', ''))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  forages['Ref'] = forages['Profondeur'].apply(lambda x: x.split('Forage ')[1].replace(' ', ''))


In [25]:
forages['tmp'] = forages['Piézo'].replace(np.nan, '', regex=True) + '-' + forages['Unnamed: 6'].replace(np.nan, '', regex=True) + '-' + forages['Gouge_75'].replace(np.nan, '', regex=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  forages['tmp'] = forages['Piézo'].replace(np.nan, '', regex=True) + '-' + forages['Unnamed: 6'].replace(np.nan, '', regex=True) + '-' + forages['Gouge_75'].replace(np.nan, '', regex=True)


In [26]:
#forages['Piezo'] = forages['tmp'].apply(lambda x: x.lstrip(' ').split('-')[0].lower()=='x') #1er code
forages['Type'] = forages['tmp'].apply(lambda x: 'Piezo' if x.lstrip(' ').split('-')[0].lower()=='x' else '')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  forages['Type'] = forages['tmp'].apply(lambda x: 'Piezo' if x.lstrip(' ').split('-')[0].lower()=='x' else '')


In [27]:
forages['Societe'] = forages['tmp'].apply(lambda x: x.rstrip('-').split('-')[-1].upper()) 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  forages['Societe'] = forages['tmp'].apply(lambda x: x.rstrip('-').split('-')[-1].upper())


In [28]:
forages.loc[123, 'Societe']='SITEREM'

In [29]:
forages = forages[['Date', 'Ref', 'Societe', 'Type']]
forages

Unnamed: 0,Date,Ref,Societe,Type
0,2010-03-11,1,SBS ENVIRONNEMENT,Piezo
9,2010-03-11,2,SBS ENVIRONNEMENT,
19,2010-03-11,3,SBS ENVIRONNEMENT,
27,2010-03-10,4,SBS ENVIRONNEMENT,
33,2010-03-12,5,SBS ENVIRONNEMENT,
...,...,...,...,...
1973,2010-08-03,612c,SBS ENVIRONNEMENT,
1977,2010-08-03,734,SITEREM,Piezo
1984,2010-09-01,FP595,SITEREM,Piezo
1994,2010-09-02,540,SITEREM,Piezo


In [30]:
str(forages['Date'][0].year)

'2010'

In [31]:
forages['ID'] = forages['Date'].apply(lambda x : str(x.year) + '-') + forages['Ref']
forages.drop('Ref', axis=1, inplace=True)

In [32]:
forages

Unnamed: 0,Date,Societe,Type,ID
0,2010-03-11,SBS ENVIRONNEMENT,Piezo,2010-1
9,2010-03-11,SBS ENVIRONNEMENT,,2010-2
19,2010-03-11,SBS ENVIRONNEMENT,,2010-3
27,2010-03-10,SBS ENVIRONNEMENT,,2010-4
33,2010-03-12,SBS ENVIRONNEMENT,,2010-5
...,...,...,...,...
1973,2010-08-03,SBS ENVIRONNEMENT,,2010-612c
1977,2010-08-03,SITEREM,Piezo,2010-734
1984,2010-09-01,SITEREM,Piezo,2010-FP595
1994,2010-09-02,SITEREM,Piezo,2010-540


In [52]:
forages[['ID', 'Type', 'Societe']].to_csv('../../CF_data/synthese/Result_traitem/boreholes_No_XYZ.csv', index=False) #No position

### Lecture des fichiers de position des forages du pilote (version 8)

In [53]:
v8 = gpd.read_file('../../CF_data/Data_UMONS/SIG/Pilote_v8.0.shp')
v8.query('GPS2021=="NON"')

Unnamed: 0,Id,Type,Pilote,Ref,diam,Valid,Syscal,Impo,Inox,Impo_perm,GPS2021,geometry
18,0,4,1,400,0,0,0,0,0,0,NON,POINT (152885.032 122585.367)
32,0,5,1,500,0,0,0,0,0,0,NON,POINT (152890.824 122583.580)
63,0,3,1,306,0,0,0,0,0,0,NON,POINT (152867.452 122588.877)


### forages phase 1

In [54]:
v2017 = gpd.read_file('../../CF_data/Data_UMONS/SIG/sondages_v2017-05-18.shp')
gdf_viewer(v2017, 10, 15)

Rows : 71, columns : 19


interactive(children=(IntSlider(value=10, description='rows', max=71, min=10, readout=False), IntSlider(value=…

In [55]:
v2017['Ref'] = v2017['Date'].apply(lambda x : str(datetime.datetime.strptime(x, '%Y-%m-%d').year) + '-') + v2017['Ref_sond']

### lecture des données des sondages antérieurs

In [56]:
sond_ant = pd.read_excel('../../CF_data/Data_UMONS/Logs_forages_vUmons_2018-03-20.xlsx', sheet_name='Sond_ant')

In [57]:
sond_ant['Ref'] = sond_ant['Ref_siterem'].apply(lambda x: '2010-' + str(x)) # Identifiant sur base de l'année

In [58]:
sond_ant

Unnamed: 0,Ref_GIS,Ref_siterem,X,Y,Type,Ref
0,1,500,152324.0,122975.0,Eau_RB,2010-500
1,2,501,152368.0,122909.0,Eau_RB,2010-501
2,3,511,152371.0,122771.0,Eau_RB,2010-511
3,4,513,152424.0,122752.0,Eau_RB,2010-513
4,5,514,152476.0,122786.0,Eau_RB,2010-514
...,...,...,...,...,...,...
252,253,520,152644.0,122791.0,SOL,2010-520
253,254,524,152570.0,122789.0,SOL,2010-524
254,255,525,152548.0,122783.0,SOL,2010-525
255,256,526,152553.0,122757.0,SOL,2010-526


In [59]:
sond_ant_gdf = gpd.GeoDataFrame(sond_ant, geometry=gpd.points_from_xy(sond_ant.X, sond_ant.Y, crs=str('EPSG:31370')))

### Lecture de l'extension du projet MEMORIS

In [60]:
ext = gpd.read_file('../../CF_data/Data_UMONS/SIG/Site_Memoris.shp')

### Affichage carte

In [61]:
def label(gdf, field, ax, size='xx-small'):
    for x, y, label in zip(gdf.geometry.x, gdf.geometry.y, gdf[field]):
        ax.annotate(label, xy=(x, y), xytext=(3, 3), textcoords='offset points', size=size)

In [62]:
fig, ax = plt.subplots(figsize=(6,6))
ext.plot(ax=ax, color='grey')
v2017.plot(ax=ax, color='r')

#for x, y, label in zip(v2017.geometry.x, v2017.geometry.y, v2017.Ref_sond):
#    ax.annotate(label, xy=(x, y), xytext=(3, 3), textcoords="offset points")
label(v2017, 'Ref', ax)
v8.plot(ax=ax, color='g')
label(v8, 'Ref', ax)  
sond_ant_gdf.plot(ax=ax, color='b', markersize=1)
label(sond_ant_gdf, 'Ref', ax)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

En ignorant Sond_ant_gdf

In [63]:
v8['new_Ref'] = 'F'+v8['Ref']

In [64]:
BH = []
BH.extend(v2017['Ref_sond'].to_list())
BH.extend(v8['new_Ref'].to_list())

In [65]:
v8['ID'] = v8.new_Ref
v2017['ID'] = v2017.Ref_sond

In [66]:
v8['Z']=101.

In [67]:
gdf = gpd.GeoDataFrame(pd.concat([v8[['geometry','ID', 'Z']], v2017[['geometry','ID', 'Z']]], ignore_index=True))

In [68]:
fig, ax = plt.subplots(figsize=(6,6))
ext.plot(ax=ax, color='lightgrey')
gdf.plot(ax=ax, markersize=1)
label(gdf, 'ID', ax)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [69]:
gdf['X']=gdf.geometry.x
gdf['Y']=gdf.geometry.y

In [71]:
gdf[['ID', 'X', 'Y', 'Z']].to_csv('../../CF_data/synthese/Result_traitem/boreholes_1.csv', index=False)

### Liste de tous les ouvrages références y compris les doublons entre jeux de données (et dans chaque jeu de donnée) 

In [72]:
BH = []
BH.extend(v2017['Ref_sond'].to_list())
BH.extend(v8['Ref'].to_list())
BH.extend(sond_ant_gdf['Ref_siterem'].to_list())

In [73]:
gdf = v2017.merge(sond_ant_gdf, how='outer', left_on='Ref_sond', right_on='Ref_siterem')
gdf[['Ref_sond', 'Ref_siterem']][0:100]

Unnamed: 0,Ref_sond,Ref_siterem
0,75,
1,76,
2,76b,76b
3,76c,76c
4,FP76,
...,...,...
95,,P1
96,,P7
97,,P8
98,,P12


In [74]:
gdf.query('Ref_sond==Ref_siterem')['Ref_sond']

2      76b
3      76c
7      78b
9      P80
10     P81
16     97b
18     P99
24    P106
25    P107
29    538a
61     P22
65     P25
Name: Ref_sond, dtype: object

In [75]:
gdf.query('Ref_sond!=Ref_sond')['Ref_siterem'].to_list()

[500,
 501,
 511,
 513,
 514,
 517,
 521,
 522,
 523,
 528,
 529,
 530,
 531,
 531,
 532,
 '540sup',
 541,
 541,
 542,
 544,
 545,
 'FP115',
 'MW1',
 'MW2',
 'P1',
 'P7',
 'P8',
 'P12',
 'P15a',
 'P32',
 'P34',
 'P44',
 'P48b',
 'P51',
 'P58',
 'P59',
 'P64',
 'P65',
 'P67',
 'P74',
 'P86',
 'P91',
 'P109',
 'P110',
 'P112',
 'P125',
 'P143',
 'P152',
 'P162',
 'P164',
 'P165',
 'P168',
 'P170',
 'P175',
 'P181b',
 'P186',
 'P188',
 'P197',
 'P200',
 'P205',
 'P209',
 'P214',
 'P219b',
 'P400',
 'P405',
 'P406',
 'P410',
 'P411',
 'P595',
 '76sup',
 '63sup',
 '49sup',
 '14sup',
 'P512',
 504,
 509,
 510,
 533,
 534,
 536,
 537,
 539,
 540,
 '540a',
 543,
 595,
 734,
 '26275b',
 'FP14prof',
 'FP14sup',
 'P28',
 'P129',
 'P142',
 'P166',
 'P167',
 'P172',
 'P180',
 'P403',
 'P412',
 502,
 502,
 '160prof',
 '160sup',
 508,
 512,
 'FP15',
 'FP49prof',
 'FP49sup',
 'FP63prof',
 'FP63sup',
 'FP76prof',
 'FP76sup',
 'FP117prof',
 'FP600',
 'FP595',
 2,
 3,
 4,
 5,
 6,
 9,
 10,
 11,
 13,
 19,


In [76]:
len(BH)

416

In [77]:
len(set(BH)) # uniques values

395

In [78]:
gdf.query('Ref_siterem!=Ref_siterem')['Ref_sond'].to_list()

['75',
 '76',
 'FP76',
 '77',
 '78',
 '79',
 '82',
 '83',
 '95',
 '96',
 '97',
 '98',
 '100',
 '102',
 '103',
 '104',
 '105',
 '108',
 '536',
 '537',
 'P1aM',
 'P1bM',
 'P2M',
 'P3M',
 'P4M',
 'P5M',
 'P6M',
 'P7aM',
 'P7bM',
 'P8M',
 'P9aM',
 'P9bM',
 'P10M',
 'P11M',
 'P12M',
 'P13M',
 'P14M',
 'P15bM',
 'P15aM',
 "P15a'M",
 'P16M',
 'P17aM',
 'P17bM',
 'P17cM',
 'P17dM',
 'F18a',
 'F18b',
 'P18c',
 'P19',
 'P20',
 'P21',
 'P23',
 'F24a',
 'P24b',
 'P26',
 'F27a',
 'F27b',
 'F27c',
 'P27d']

In [79]:
gdf.columns

Index(['Ref_GIS_x', 'Ref_sond', 'XL72', 'YL72', 'Z', 'Refus', 'Date',
       'Prof_sond', 'Cote_fond', 'RB', 'ALL', 'S_A', 'S_S', 'Base_RB',
       'Cote_B_RB', 'Top_All', 'Top_SA', 'Top_SS', 'geometry_x', 'Ref_x', 'ID',
       'Ref_GIS_y', 'Ref_siterem', 'X', 'Y', 'Type', 'Ref_y', 'geometry_y'],
      dtype='object')

In [80]:
for idx, row in gdf.iterrows():
    if row['geometry_x'] is None:
        if row['geometry_y'] is not None:
            gdf.loc[idx, 'distance'] = 0.
        else:
            gdf.loc[idx, 'distance'] = None
    else:
        if row['geometry_y'] is None:
            gdf.loc[idx, 'distance'] = 0.
        else:
            gdf.loc[idx, 'distance'] = row['geometry_x'].distance(row['geometry_y'])

In [81]:
gdf.query('distance != distance') # recherche de forages n'ayant aucune coordonnées

Unnamed: 0,Ref_GIS_x,Ref_sond,XL72,YL72,Z,Refus,Date,Prof_sond,Cote_fond,RB,...,Ref_x,ID,Ref_GIS_y,Ref_siterem,X,Y,Type,Ref_y,geometry_y,distance


In [86]:
gdf.rename(columns={'Prof_sond':'Long'}, inplace=True)
gdf.loc[gdf.query('Refus==1').index, 'Refus']=True
gdf.loc[gdf.query('Refus==0').index, 'Refus']=False

In [87]:
gdf_viewer(gdf)

Rows : 316, columns : 29


interactive(children=(IntSlider(value=10, description='rows', max=316, min=10, readout=False), IntSlider(value…

In [88]:
pd.DataFrame(gdf)[['ID', 'X', 'Y', 'Z', 'Long','Refus']].to_csv('../../CF_data/synthese/Result_traitem/boreholes_1geom.csv', index=False)

//////////////////////////////////////--- Recupération de données des autres fichiers ---///////////////////////////////////////////////////////////////////////

## Lecture d'autres fichiers (1 ou plusieurs feuilles)

- Regrouper les infos sur les (ID, X, Y, Z, Long, Type) pour définir la géométrie des BH

- Regrouper les infos sur les (ID, X, Y, Z_top, Z_base, Type) pour définir les échantillons

- Regrouper les infos sur la lithologie (Z_top, Z_base) et les lier aux ID

- ...

### 1- Fichier : ../../CF_data//Data_UMONS/Logs_forages_vUmons_2018-03-20.xlsx
* **Feuille : 'Sond2017v3' (F)**

In [106]:
For_synth=gpd.GeoDataFrame({})

In [107]:
tmp_for = pd.read_excel('../../CF_data//Data_UMONS/Logs_forages_vUmons_2018-03-20.xlsx', sheet_name='Sond2017v3')#, skiprows=1)
gdf_viewer(tmp_for)

Rows : 89, columns : 32


interactive(children=(IntSlider(value=10, description='rows', max=89, min=10, readout=False), IntSlider(value=…

In [108]:
tmp_for.columns

Index(['Ref_GIS', 'Ref_sond', 'XL72', 'YL72', 'Z', 'Refus', 'Date',
       'Prof_sond', 'Cote_fond', 'RB', 'ALL', 'S_A', 'S_S', 'Base_RB',
       'Cote_B_RB', 'Top_All', 'Top_SA', 'Top_SS', 'Unnamed: 18',
       'Unnamed: 19', 'Unnamed: 20', 'Unnamed: 21', 'Unnamed: 22',
       'Unnamed: 23', 'Unnamed: 24', 'Unnamed: 25', 'Unnamed: 26',
       'Unnamed: 27', 'Unnamed: 28', 'Unnamed: 29', 'Unnamed: 30',
       'Unnamed: 31'],
      dtype='object')

In [109]:
tmp_for.rename(columns={'Ref_sond':'Ref_x','XL72':'X', 'YL72':'Y', 'Z (m)':'Z',
        'Prof_sond':'Long', 'Base des Remblais (m)':'Base_RB',
        'Sommet des Alluvions (m)':'Top_All', 'Sommet du Socle altéré (m)':'Top_SA',
        'Sommet du Socle sain (m)':'Top_SS'}, inplace=True)

tmp_for.drop(columns=[tmp_for.columns.to_list()[x] for x in range(len(tmp_for.columns)) 
       if re.compile(r"Unnamed|Ref_GIS").match(tmp_for.columns.to_list()[x])], axis=1, inplace=True) 

tmp_for.columns

Index(['Ref_x', 'X', 'Y', 'Z', 'Refus', 'Date', 'Long', 'Cote_fond', 'RB',
       'ALL', 'S_A', 'S_S', 'Base_RB', 'Cote_B_RB', 'Top_All', 'Top_SA',
       'Top_SS'],
      dtype='object')

In [110]:
tmp_for=tmp_for[['Date', 'Ref_x', 'X', 'Y', 'Z', 'Long', 'Refus', 'Base_RB','Top_All', 'Top_SA','Top_SS',]]

In [111]:
tmp_for.loc[tmp_for.query('Long==9999').index, 'Long']=np.nan
tmp_for.loc[tmp_for.query('Z==9999').index, 'Z']=np.nan

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)


In [112]:
for i,j in tmp_for.iterrows():
    if tmp_for.loc[i, 'Refus']==1:
        tmp_for.loc[i, 'Refus']=True
    else :
        tmp_for.loc[i, 'Refus']=False

In [113]:
geom = gpd.GeoSeries(tmp_for.apply(lambda x: Point(x['X'], x['Y']),1),crs={'init': 'epsg:31370'})
tmp_for = gpd.GeoDataFrame(tmp_for, geometry=geom, crs="EPSG:31370")
tmp_for

  return _prepare_from_string(" ".join(pjargs))


Unnamed: 0,Date,Ref_x,X,Y,Z,Long,Refus,Base_RB,Top_All,Top_SA,Top_SS,geometry
0,2016-12-31,75,152836.000000,122609.000000,102.652,5.7,False,4.3,4.3,,,POINT (152836.000 122609.000)
1,2016-12-31,76,152867.000000,122611.000000,102.751,1.2,True,,,,,POINT (152867.000 122611.000)
2,2016-12-31,76b,152867.000000,122610.000000,102.751,3.0,True,,,,,POINT (152867.000 122610.000)
3,2016-12-31,76c,152867.000000,122608.000000,102.751,2.5,True,,,,,POINT (152867.000 122608.000)
4,2016-12-31,FP76,152860.000000,122608.000000,102.727,18.5,False,8.0,,8.0,12.5,POINT (152860.000 122608.000)
...,...,...,...,...,...,...,...,...,...,...,...,...
84,NaT,F30,152887.892567,122581.184654,,,False,,,,,POINT (152887.893 122581.185)
85,NaT,F31,152885.916612,122578.766886,,,False,,,,,POINT (152885.917 122578.767)
86,NaT,F32,152885.673928,122581.757054,,,False,,,,,POINT (152885.674 122581.757)
87,NaT,F40,152883.685780,122577.833364,,,False,,,,,POINT (152883.686 122577.833)


In [114]:
len(set(tmp_for['Ref_x']))

89

In [115]:
For_synth=copy.deepcopy(tmp_for) #save tmp_for

In [116]:
#tmp_for = tmp_for.replace(np.nan, '', regex=True) #remove all 'NAN'
#pd.concat([v8[['geometry','ID', 'Z']], v2017[['geometry','ID', 'Z']]], ignore_index=True)

* **Feuille : 'DatabaseSOL20101005' (F,E)**

In [117]:
tmp_for = pd.read_excel('../../CF_data//Data_UMONS/Logs_forages_vUmons_2018-03-20.xlsx', sheet_name='DatabaseSOL20101005')#, skiprows=1)
gdf_viewer(tmp_for)

Rows : 758, columns : 26


interactive(children=(IntSlider(value=10, description='rows', max=758, min=10, readout=False), IntSlider(value…

In [118]:
tmp_for.columns

Index(['ID', 'CAMPAGNE', 'ZONE', 'N', 'AFFECTATIO', 'X', 'Y', 'Z',
       'PROF_FORAG', 'N_ECH', 'MIN_ECH', 'MAX_ECH', 'COUCHE', 'SOUMIS',
       'TERRAIN', 'DESCRIPTIO', 'INTENSITÉ', 'MIN_ORGANO', 'MAX_ORGANO', 'ML',
       'CN', 'BTEXS', 'HAP', 'EOX', 'HT', 'IPH'],
      dtype='object')

In [119]:
tmp_for=tmp_for[['N', 'X', 'Y', 'Z', 'PROF_FORAG', 'CAMPAGNE']]
tmp_for.rename(columns={'N':'Ref_y','PROF_FORAG':'Long', 'CAMPAGNE':'Societe'}, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(


In [120]:
set(tmp_for['Societe'])

{'SBS', 'SITERE'}

In [121]:
for i,j in tmp_for.iterrows():
    if tmp_for.loc[i, 'Societe']=='SITERE':
        tmp_for.loc[i, 'Societe']='SITEREM'
    #else :
    #    tmp_for.loc[i, 'Refus']='-'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)


In [122]:
geom = gpd.GeoSeries(tmp_for.apply(lambda x: Point(x['X'], x['Y']),1),crs={'init': 'epsg:31370'})
tmp_for = gpd.GeoDataFrame(tmp_for, geometry=geom, crs="EPSG:31370")
tmp_for.head(5)

  return _prepare_from_string(" ".join(pjargs))


Unnamed: 0,Ref_y,X,Y,Z,Long,Societe,geometry
0,T220,152842.0,122770.0,,1,SITEREM,POINT (152842.000 122770.000)
1,T219,152799.0,122808.0,,1,SITEREM,POINT (152799.000 122808.000)
2,T218,152754.0,122758.0,,2,SITEREM,POINT (152754.000 122758.000)
3,T218,152754.0,122758.0,,2,SITEREM,POINT (152754.000 122758.000)
4,T217,152750.0,122713.0,,3,SITEREM,POINT (152750.000 122713.000)


In [123]:
print('unique: ',len(set(tmp_for['Ref_y'])),'/', len(tmp_for))

unique:  233 / 758


In [124]:
tmp_for.drop_duplicates('Ref_y', inplace=True)
tmp_for.reset_index(inplace=True, drop=True)
tmp_for

Unnamed: 0,Ref_y,X,Y,Z,Long,Societe,geometry
0,T220,152842.0,122770.0,,1,SITEREM,POINT (152842.000 122770.000)
1,T219,152799.0,122808.0,,1,SITEREM,POINT (152799.000 122808.000)
2,T218,152754.0,122758.0,,2,SITEREM,POINT (152754.000 122758.000)
3,T217,152750.0,122713.0,,3,SITEREM,POINT (152750.000 122713.000)
4,T216,152702.0,122709.0,,1,SITEREM,POINT (152702.000 122709.000)
...,...,...,...,...,...,...,...
228,5,153139.0,122614.0,10141,8,SBS,POINT (153139.000 122614.000)
229,4,153117.0,122614.0,10188,8,SBS,POINT (153117.000 122614.000)
230,3,153141.0,122640.0,10168,8,SBS,POINT (153141.000 122640.000)
231,2,153112.0,122638.0,10209,7,SBS,POINT (153112.000 122638.000)


In [125]:
For_synth

Unnamed: 0,Date,Ref_x,X,Y,Z,Long,Refus,Base_RB,Top_All,Top_SA,Top_SS,geometry
0,2016-12-31,75,152836.000000,122609.000000,102.652,5.7,False,4.3,4.3,,,POINT (152836.000 122609.000)
1,2016-12-31,76,152867.000000,122611.000000,102.751,1.2,True,,,,,POINT (152867.000 122611.000)
2,2016-12-31,76b,152867.000000,122610.000000,102.751,3.0,True,,,,,POINT (152867.000 122610.000)
3,2016-12-31,76c,152867.000000,122608.000000,102.751,2.5,True,,,,,POINT (152867.000 122608.000)
4,2016-12-31,FP76,152860.000000,122608.000000,102.727,18.5,False,8.0,,8.0,12.5,POINT (152860.000 122608.000)
...,...,...,...,...,...,...,...,...,...,...,...,...
84,NaT,F30,152887.892567,122581.184654,,,False,,,,,POINT (152887.893 122581.185)
85,NaT,F31,152885.916612,122578.766886,,,False,,,,,POINT (152885.917 122578.767)
86,NaT,F32,152885.673928,122581.757054,,,False,,,,,POINT (152885.674 122581.757)
87,NaT,F40,152883.685780,122577.833364,,,False,,,,,POINT (152883.686 122577.833)


In [126]:
genID_dated(tmp_for, 'Ref_y', '2010'), genID_dated(For_synth, 'Ref_x')

Generation of ID-dated...
Using default date given !
Generation of ID-dated...
Using 'Date' column in the (geo)dataframe !


(0      2010-T220
 1      2010-T219
 2      2010-T218
 3      2010-T217
 4      2010-T216
          ...    
 228       2010-5
 229       2010-4
 230       2010-3
 231       2010-2
 232       2010-1
 Name: Ref_y, Length: 233, dtype: object,
 0       2016-75
 1       2016-76
 2      2016-76b
 3      2016-76c
 4     2016-FP76
         ...    
 84      nan-F30
 85      nan-F31
 86      nan-F32
 87      nan-F40
 88      nan-F41
 Name: Ref_x, Length: 89, dtype: object)

In [127]:
For_synth=For_synth.append(tmp_for)

In [128]:
For_synth.reset_index(inplace=True, drop=True)
For_synth.loc[For_synth.query('Ref_x!=Ref_x').index, 'Ref_x']=For_synth.loc[For_synth.query('Ref_x!=Ref_x').index, 'Ref_y']
For_synth.drop('Ref_y', axis=1, inplace=True)

In [129]:
For_synth.loc[For_synth.query("Date >= '2017-02-22' and Date <= '2017-05-18'").index, "Societe"]='ECOPLANNING'
For_synth.rename(columns={'Ref_x':'ID'}, inplace=True)
#df.query()'Profondeur.str.contains("Forage") and Profondeur!="Forage bloqué"', engine='python')
#df.loc[(df['date'] >= '2020-09-01') & (df['date'] < '2020-09-15')] 
#df.query("date >= '2020-08-01' and date < '2020-09-01'") 

In [130]:
gdf_viewer(For_synth)

Rows : 322, columns : 13


interactive(children=(IntSlider(value=10, description='rows', max=322, min=10, readout=False), IntSlider(value…

In [131]:
pd.DataFrame(For_synth)[['ID', 'X', 'Y', 'Z', 'Long', 'Refus', 'Societe']].to_csv('../../CF_data/synthese/Result_traitem/boreholes_2.csv', index=False)

* **Feuilles : 'Sond_all_loc' (F) / 'Feuil1' {autre fichier}(F,E) / 'Obs_siterem_phase2' (F)**

In [132]:
tmp_for = pd.read_excel('../../CF_data//Data_UMONS/Logs_forages_vUmons_2018-03-20.xlsx', sheet_name='Sond_all_loc')#, skiprows=1)
gdf_viewer(tmp_for)

Rows : 89, columns : 6


interactive(children=(IntSlider(value=10, description='rows', max=89, min=10, readout=False), IntSlider(value=…

In [133]:
tmp_for.rename(columns={'Ref_sond':'Ref_x','XL72':'X', 'YL72':'Y'}, inplace=True)
tmp_for=tmp_for[['Ref_x', 'X', 'Y', 'Z']]
len(tmp_for)

89

In [134]:
gdf_geom(tmp_for)

  return _prepare_from_string(" ".join(pjargs))


Unnamed: 0,Ref_x,X,Y,Z,geometry
0,75,152836.0,122609.0,102.652,POINT (152836.000 122609.000)
1,76,152867.0,122611.0,102.751,POINT (152867.000 122611.000)
2,77,152898.0,122606.0,102.837,POINT (152898.000 122606.000)
3,78,152864.0,122589.0,102.709,POINT (152864.000 122589.000)
4,79,152893.0,122592.0,102.69,POINT (152893.000 122592.000)


In [135]:
For_synth=copy.deepcopy(tmp_for) #save tmp_for

In [136]:
tmp_for = pd.read_excel('../../CF_data//Data_UMONS/ouvrages/profondeur de contact campagne de forages octobre 2019.xlsx', sheet_name='Feuil1', skiprows=2)
gdf_viewer(tmp_for)

Rows : 10, columns : 5


interactive(children=(IntSlider(value=10, description='rows', max=10, min=10, readout=False), IntSlider(value=…

In [137]:
tmp_for.rename(columns={'n°forage ':'Ref_y','x':'X', 'y':'Y', 'z':'Z', 'profondeur(m)':'Long'}, inplace=True)
tmp_for=tmp_for[['Ref_y', 'X', 'Y', 'Z', 'Long']]

In [138]:
tmp_for=tmp_for[:8]

In [139]:
gdf_geom(tmp_for)

  return _prepare_from_string(" ".join(pjargs))


Unnamed: 0,Ref_y,X,Y,Z,Long,geometry
0,205.0,152887.693,122594.62,101.804,3.2,POINT (152887.693 122594.620)
1,208.0,152885.296,122592.986,101.848,3.4,POINT (152885.296 122592.986)
2,212.0,152882.85,122591.453,101.93,3.4,POINT (152882.850 122591.453)
3,207.0,152892.925,122592.662,101.889,3.4,POINT (152892.925 122592.662)
4,214.0,152888.082,122588.486,101.854,3.6,POINT (152888.082 122588.486)


In [140]:
For_synth.merge(tmp_for, how='inner', left_on='Ref_x', right_on='Ref_y')

Unnamed: 0,Ref_x,X_x,Y_x,Z_x,geometry_x,Ref_y,X_y,Y_y,Z_y,Long,geometry_y


In [141]:
genID_dated(tmp_for, 'Ref_y', '2019')

Generation of ID-dated...
Using default date given !


0    2019-205.0
1    2019-208.0
2    2019-212.0
3    2019-207.0
4    2019-214.0
5    2019-217.0
6    2019-225.0
7    2019-304.0
Name: Ref_y, dtype: object

In [142]:
For_synth=For_synth.append(tmp_for)

In [143]:
For_synth.reset_index(inplace=True, drop=True)
For_synth.loc[For_synth.query('Ref_x!=Ref_x').index, 'Ref_x']=For_synth.loc[For_synth.query('Ref_x!=Ref_x').index, 'Ref_y']
For_synth.drop('Ref_y', axis=1, inplace=True)

In [144]:
gdf_viewer(For_synth)

Rows : 97, columns : 6


interactive(children=(IntSlider(value=10, description='rows', max=97, min=10, readout=False), IntSlider(value=…

In [145]:
tmp_for = pd.read_excel('../../CF_data//Data_UMONS/Logs_forages_vUmons_2018-03-20.xlsx', sheet_name='Obs_siterem_phase2')#, skiprows=2)
gdf_viewer(tmp_for)

Rows : 22, columns : 13


interactive(children=(IntSlider(value=10, description='rows', max=22, min=10, readout=False), IntSlider(value=…

In [146]:
tmp_for.rename(columns={'Date':'Date_Prv','ref_piezo':'Ref_Pz', 'Prof_piézo':'Long_Pz', 'Nappe':'Aquifer'}, inplace=True)
tmp_for=tmp_for[['Date_Prv','Ref_Pz', 'X', 'Y', 'Z', 'Long_Pz', 'Niv_nappe', 'Aquifer']]
tmp_for['Type']='Piezo'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tmp_for['Type']='Piezo'


In [147]:
tmp_for.columns

Index(['Date_Prv', 'Ref_Pz', 'X', 'Y', 'Z', 'Long_Pz', 'Niv_nappe', 'Aquifer',
       'Type'],
      dtype='object')

In [148]:
#merge_tmp=For_synth.merge(tmp_for, how='inner', left_on='Ref_x', right_on='Ref_Pz', indicator=True)
#gdf_viewer(merge_tmp[['Ref_x', 'Ref_Pz','geometry_x', 'geometry_y']])

In [149]:
merge_tmp=For_synth.merge(tmp_for, how='outer', left_on='Ref_x', right_on='Ref_Pz')
merge_tmp.loc[merge_tmp.query('Ref_x==Ref_Pz').index, 'Long']=merge_tmp['Long_Pz']
merge_tmp=merge_tmp[['Date_Prv', 'Ref_x', 'X_x', 'Y_x', 'Z_x', 'Long', 'Niv_nappe', 'Type', 'Aquifer']]

In [150]:
#gdf_viewer(merge_tmp.query('Ref_x==Ref_Pz'))
gdf_viewer(merge_tmp)

Rows : 97, columns : 9


interactive(children=(IntSlider(value=10, description='rows', max=97, min=10, readout=False), IntSlider(value=…

In [151]:
len(set(merge_tmp['Ref_x']))

97

In [152]:
merge_tmp.rename(columns={'Ref_x':'ID', 'X_x':'X', 'Y_x':'Y', 'Z_x':'Z'}, inplace=True)

In [153]:
merge_tmp[['ID','X','Y','Z','Long','Type']].to_csv('../../CF_data/synthese/Result_traitem/boreholes_3.csv', index=False)

* **Feuille : 'Analyse_eau_Phases1&2' (F,E)**

In [199]:
tmp_for = pd.read_excel('../../CF_data//Data_UMONS/Logs_forages_vUmons_2018-03-20.xlsx', sheet_name='Analyse_eau_Phases1&2')#, skiprows=2)
gdf_viewer(tmp_for)

Rows : 52, columns : 85


interactive(children=(IntSlider(value=10, description='rows', max=52, min=10, readout=False), IntSlider(value=…

In [200]:
tmp_for.rename(columns={'Date':'Date_Prv','Ref_forage':'Ref_Pz','XL72':'X', 'YL72':'Y','Prof_forage':'Long','Prof_ piézo':'Long_Pz', 'Niveau_nappe':'Niv_nappe'}, inplace=True)
merge_tmp=tmp_for[46:][['Date_Prv', 'Ref_Pz', 'X','Y','Z','Long','Long_Pz']]
merge_tmp.reset_index(inplace=True, drop=True)
merge_tmp['Type']='Piezo'
merge_tmp['Date_Prv'] = merge_tmp['Date_Prv'].apply(lambda x : datetime.date(1900, 1, 1) + datetime.timedelta(days=x-2))

# ici c'est la date de prélèvement des échantillons d'eau !!!
# il faut enlever le niveau de la nappe pour l'instant. on l'integrera dans un objet spécifique 'param_PhysChim'
merge_tmp

Unnamed: 0,Date_Prv,Ref_Pz,X,Y,Z,Long,Long_Pz,Type
0,2018-01-25,F25,152885.424348,122579.479399,,6,4.31,Piezo
1,2018-01-25,F20,152886.408877,122578.054372,,6,3.82,Piezo
2,2018-01-25,F22,152888.384832,122580.472141,,6,4.15,Piezo
3,2018-01-25,F24,152886.166193,122581.04454,,6,4.18,Piezo
4,2018-01-25,F21,152888.135251,122578.194486,,6,4.59,Piezo
5,2018-01-25,F23,152888.263489,122581.967225,,6,3.01,Piezo


In [201]:
tmp_for=tmp_for[4:43][['Date_Prv', 'Ref_Pz', 'X','Y','Z','Long','Long_Pz']]
tmp_for.reset_index(inplace=True, drop=True)
tmp_for['Type']='Piezo'
tmp_for.loc[tmp_for.query('Long_Pz==9999').index, 'Long_Pz']=np.nan
gdf_viewer(tmp_for)

Rows : 39, columns : 8


interactive(children=(IntSlider(value=10, description='rows', max=39, min=10, readout=False), IntSlider(value=…

In [202]:
len(set(merge_tmp['Ref_Pz']))

6

In [203]:
merge_tmp.merge(tmp_for, how='inner', left_on='Ref_Pz', right_on='Ref_Pz')

Unnamed: 0,Date_Prv_x,Ref_Pz,X_x,Y_x,Z_x,Long_x,Long_Pz_x,Type_x,Date_Prv_y,X_y,Y_y,Z_y,Long_y,Long_Pz_y,Type_y


In [204]:
tmp_for=tmp_for.append(merge_tmp)
tmp_for.drop_duplicates('Ref_Pz', inplace=True)
tmp_for.reset_index(inplace=True, drop=True)

In [205]:
For_synth=copy.deepcopy(tmp_for) #save tmp_for

In [206]:
gdf_viewer(For_synth)

Rows : 30, columns : 8


interactive(children=(IntSlider(value=10, description='rows', max=30, min=10, readout=False), IntSlider(value=…

* **Feuille : 'Analyse_eau_Phases1&2_toSIG' (F,E)**

In [207]:
tmp_for = pd.read_excel('../../CF_data//Data_UMONS/Logs_forages_vUmons_2018-03-20.xlsx', sheet_name='Analyse_eau_Phases1&2_toSIG')#, skiprows=2)
gdf_viewer(tmp_for)

Rows : 45, columns : 87


interactive(children=(IntSlider(value=10, description='rows', max=45, min=10, readout=False), IntSlider(value=…

In [208]:
tmp_for['Date'][0].year

2017

In [209]:
# ici c'est la date de prélèvement des échantillons d'eau !!!
tmp_for.rename(columns={'Date':'Date_Prv','Ref_forage':'Ref_Pz','XL72':'X', 'YL72':'Y','Prof_forage':'Long','Prof_ piezo':'Long_Pz'}, inplace=True)
tmp_for['Type']='Piezo'
#tmp_for['Date_Prv'] = tmp_for['Date_Prv'].apply(lambda x : datetime.date(1900, 1, 1) + datetime.timedelta(days=x-2))
tmp_for=tmp_for[['Date_Prv','Ref_Pz', 'X', 'Y','Z', 'Long', 'Type', 'Long_Pz','Crep_min', 'Crep_max']]
gdf_viewer(tmp_for)

Rows : 45, columns : 10


interactive(children=(IntSlider(value=10, description='rows', max=45, min=10, readout=False), IntSlider(value=…

In [210]:
len(set(tmp_for['Ref_Pz']))

30

In [211]:
tmp_for.drop_duplicates('Ref_Pz', inplace=True)
tmp_for.reset_index(inplace=True, drop=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tmp_for.drop_duplicates('Ref_Pz', inplace=True)


In [212]:
For_synth['Crep_top']=tmp_for['Crep_min'] # on recupère l'info sur les crepines
For_synth['Crep_base']=tmp_for['Crep_max']
For_synth.loc[For_synth.query('Crep_top==999').index, 'Crep_top']=np.nan
For_synth.loc[For_synth.query('Crep_base==999').index, 'Crep_base']=np.nan
For_synth.loc[For_synth.query('Long_Pz==999').index, 'Long_Pz']=np.nan
For_synth.rename(columns={'Ref_Pz':'ID'}, inplace=True)
For_synth.drop('Date_Prv', axis=1, inplace=True)

In [213]:
#gdf_viewer(For_synth.merge(tmp_for, how='inner', left_on='Ref_Pz', right_on='Ref_Pz'))
gdf_viewer(For_synth)

Rows : 30, columns : 9


interactive(children=(IntSlider(value=10, description='rows', max=30, min=10, readout=False), IntSlider(value=…

In [214]:
For_synth.to_csv('../../CF_data/synthese/Result_traitem/boreholes_4.csv', index=False)

* **Feuille : 'Analyse_Sol_Phases1&2' (F,E)**

In [215]:
tmp_for = pd.read_excel('../../CF_data//Data_UMONS/Logs_forages_vUmons_2018-03-20.xlsx', sheet_name='Analyse_sol_Phases1&2', skiprows=4)
gdf_viewer(tmp_for)

Rows : 60, columns : 84


interactive(children=(IntSlider(value=10, description='rows', max=60, min=10, readout=False), IntSlider(value=…

In [216]:
# ici c'est la date de prélèvement des échantillons d'eau !!!
tmp_for.rename(columns={'Date':'Date_Prv','Ref_forage':'Ref_x','XL72':'X', 'YL72':'Y','Unnamed: 8':'Long',
                        'Unnamed: 9':'Refus'}, inplace=True)
#tmp_for['Type']='Piezo'
#tmp_for['Date_Prv'] = tmp_for['Date_Prv'].apply(lambda x : datetime.date(1900, 1, 1) + datetime.timedelta(days=x-2))
tmp_for=tmp_for[['Date_Prv','Ref_x', 'X', 'Y','Z', 'Long', 'Refus']]
gdf_viewer(tmp_for)

Rows : 60, columns : 7


interactive(children=(IntSlider(value=10, description='rows', max=60, min=10, readout=False), IntSlider(value=…

In [217]:
len(set(tmp_for['Ref_x']))

33

In [218]:
tmp_for.drop_duplicates('Ref_x', inplace=True)
tmp_for.reset_index(inplace=True, drop=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tmp_for.drop_duplicates('Ref_x', inplace=True)


In [219]:
tmp_for['Refus'] = tmp_for['Refus'].apply(lambda x: True if x.lstrip(' ').split('-')[0].lower()=='x' else False)
tmp_for.loc[tmp_for.query('Z==9999').index, 'Z']=np.nan
tmp_for.loc[[31,32], 'Refus']=True

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tmp_for['Refus'] = tmp_for['Refus'].apply(lambda x: True if x.lstrip(' ').split('-')[0].lower()=='x' else False)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)


In [227]:
merge_tmp=For_synth.merge(tmp_for, how='outer', left_on='ID', right_on='Ref_x')
merge_tmp.loc[]
gdf_viewer(merge_tmp.query('ID==Ref_x'),cols=15) #correspond à (query 'how=inner')!

Rows : 25, columns : 16


interactive(children=(IntSlider(value=10, description='rows', max=25, min=10, readout=False), IntSlider(value=…

In [225]:
gdf_viewer(tmp_for), gdf_viewer(For_synth)

Rows : 33, columns : 7


interactive(children=(IntSlider(value=10, description='rows', max=33, min=10, readout=False), IntSlider(value=…

Rows : 30, columns : 9


interactive(children=(IntSlider(value=10, description='rows', max=30, min=10, readout=False), IntSlider(value=…

(None, None)

In [196]:
For_synth=copy.deepcopy(tmp_for)

In [197]:
gdf_viewer(For_synth)

Rows : 33, columns : 7


interactive(children=(IntSlider(value=10, description='rows', max=33, min=10, readout=False), IntSlider(value=…

* **Feuille : 'Analyse_Sol_Phases1&2_toSIG' (F,E)**

In [198]:
tmp_for = pd.read_excel('../../CF_data//Data_UMONS/Logs_forages_vUmons_2018-03-20.xlsx', sheet_name='Feuil2')#, skiprows=4)
gdf_viewer(tmp_for)

Rows : 25, columns : 61


interactive(children=(IntSlider(value=10, description='rows', max=25, min=10, readout=False), IntSlider(value=…

In [538]:
# ici c'est la date de prélèvement des échantillons d'eau !!!
tmp_for.rename(columns={'Date':'Date_Prv','Ref_forage':'Ref_x','XL72':'X', 'YL72':'Y','Prof_sond':'Long',
                        'Bloque':'Refus'}, inplace=True)
#tmp_for['Type']='Piezo'
#tmp_for['Date_Prv'] = tmp_for['Date_Prv'].apply(lambda x : datetime.date(1900, 1, 1) + datetime.timedelta(days=x-2))
tmp_for=tmp_for[['Date_Prv','Ref_x', 'X', 'Y','Z', 'Long', 'Refus']]
gdf_viewer(tmp_for)

Rows : 60, columns : 7


interactive(children=(IntSlider(value=10, description='rows', max=60, min=10, readout=False), IntSlider(value=…

In [540]:
len(set(tmp_for['Ref_x']))

33

In [541]:
gdf_viewer(For_synth.merge(tmp_for, how='inner', left_on='Ref_x', right_on='Ref_x'))

Rows : 60, columns : 13


interactive(children=(IntSlider(value=10, description='rows', max=60, min=10, readout=False), IntSlider(value=…

In [501]:
tmp_for.drop_duplicates('Ref_x', inplace=True)
tmp_for.reset_index(inplace=True, drop=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tmp_for.drop_duplicates('Ref_x', inplace=True)


In [502]:
tmp_for['Refus'] = tmp_for['Refus'].apply(lambda x: True if x.lstrip(' ').split('-')[0].lower()=='x' else False)
tmp_for.loc[tmp_for.query('Z==9999').index, 'Z']=np.nan
tmp_for.loc[[31,32], 'Refus']=True

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tmp_for['Refus'] = tmp_for['Refus'].apply(lambda x: True if x.lstrip(' ').split('-')[0].lower()=='x' else False)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)


In [503]:
gdf_viewer(tmp_for)

Rows : 33, columns : 7


interactive(children=(IntSlider(value=10, description='rows', max=33, min=10, readout=False), IntSlider(value=…

In [426]:
#gdf_viewer(For_synth.merge(tmp_for, how='inner', left_on='Ref_Pz', right_on='Ref_Pz'))
gdf_viewer(For_synth)

Rows : 30, columns : 10


interactive(children=(IntSlider(value=10, description='rows', max=30, min=10, readout=False), IntSlider(value=…

/////////////////////////////----- Brouillon -----//////////////////////////////////////////////////////////////

In [None]:
tmp_for=tmp_for.append(merge_tmp)

In [None]:
For_synth=For_synth.append(tmp_for)

In [None]:
gdf_viewer(For_synth)

In [None]:
merge_tmp=copy.deepcopy(For_synth)