In [1]:
import os
import re

In [2]:
template = """
survey {surveyName} -title {surveyName} -entrance {entranceStation}
{Centrelines}
endsurvey
"""

In [3]:
centreline_template = """
    centreline
    {surveyDate}
    {surveyTeams}
    \tunits length meters
    \tunits compass clino degrees
    {dataheader}
    {data}
    \tdata dimensions station left right up down
    {lrud}
    endcentreline
"""

In [4]:
dive_topo = "\tdata diving to todepth from fromdepth tape compass"
normal_topo = "\tdata normal from to tape compass clino"

In [5]:
def writeDate(explodate):
    try:
        month = explodate[3:5]
        year = explodate[-2:]
        if int(year) < 50:
            year4digits = '20'+ year 
    
        day = explodate[:2]
    
        date =  'date {}.{}.{}\n'.format(year4digits,month,day)
        
    except:
        date = ''
        
    return date

In [6]:
def writeSurveyors(surveyors):
    lines = ""
    for surveyor in surveyors:
        if surveyor != 'et':
            lines+= '\t team "{}"\n'.format(surveyor)
        
    return lines

In [7]:
def getData(data,s,e):
    style = data[s-1] # either 'normal' or 'diving'
    stn1 = []
    stn2 = []
    fromdepth = []
    todepth = []
    tape = []
    compass = []
    clino = []
    left = []
    right = []
    up = []
    down = []
    print(style)
    for c,l in enumerate(data[s+1:e]):
        # check the station 1 name.

        data_line = l.split(' ')
        data_line[:] = [x for x in data_line if x]
        
        if len(data_line) > 9:
            if data_line[0] =='*':
                previous = data[s+c].split(' ')
                previous[:] = [x for x in previous if x]

                previous_stn = previous[1]
                
                data_line[0] = previous_stn
                
            if 'Prof' not in style:
                for key,val in enumerate((stn1,stn2,tape,compass,clino,left, right, up, down)):
                    val.append(data_line[key])
                    
            else:
                print('found diving data')
                previous = data[s+c].split(' ')
                previous[:] = [x for x in previous if x]
                if len(previous) >5:
                    
                    previous_depth = previous[4]
                
                    data_line[9] = previous_depth
                    for key,val in enumerate((stn1,stn2,tape,compass,todepth,left, right, up, down,fromdepth)):
                        val.append(data_line[key])
                elif data_line[0] != data_line[1]:
                    data_line[9] = 'not known'
                    for key,val in enumerate((stn1,stn2,tape,compass,todepth,left, right, up, down,fromdepth)):
                        val.append(data_line[key])
    
    if 'Prof' not in style:   
        formatted_data =  (stn1,stn2,tape,compass,clino,left,right,up,down)
    else:
        formatted_data = (stn2,todepth,stn1,fromdepth,tape,compass,left, right, up, down)
    return formatted_data

In [8]:
def FindEntrancestn(data):
    for c,l in enumerate(data):
        if 'Entree' in l:
            entrance_stations = re.findall(r"(?<=Entree\s).+",l)
    return entrance_stations

In [9]:
def returnCentrelineparams(data):
    # find the parameters of the file.
    start,end = [],[]
    survey_dates = []
    surveyor_groups = []
    
    for c,l in enumerate(data):
        if 'Param' in l:
            if len(start) >= 1:
                end.append(c-1)
            start.append(c+1)
            
                
            explodate = re.findall(r"\d\d.\d\d.\d\d", l)
            if len(explodate) == 0:
                survey_dates.append('')
            else:
                survey_dates.append(re.sub(r"-",".",explodate[0]))
            tp = re.findall(r"(?<=Topo réalisée par )[\w+\s]*",l)
            if len(tp) == 0:
                surveyor_groups.append('')
            else:
                surveyor_groups.append(tp[0].split(' '))
        elif 'Configuration' in l:
            end.append(c-1)
        
    
    return surveyor_groups,survey_dates,start,end

In [10]:
def cleanNames(string):
    
    string = string.encode('utf-8').replace(b'o\xcc\x82',b'o').decode('utf-8')
    string = string.encode('utf-8').replace(b'e\xc3\xa9',b'e').decode('utf-8')
    string = string.encode('utf-8').replace(b'e\xc3\xa8',b'e').decode('utf-8')
    string = string.encode('utf-8').replace(b'\xc3\x81',b'').decode('utf-8')
    string = string.encode('utf-8').replace(b'\xcc\x81',b'').decode('utf-8')
    string  = string.encode('utf-8').replace(b'\xc3\xaf',b'i').decode('utf-8')
    string.replace('\s','_')
    return string




In [11]:
def writeCentreline(data,start,end,surveyor_group,survey_date):
    shot = "{stn1}\t{stn2}\t{tape}\t{compass}\t{clino}\n\t\t"
    dive_shot = "{stn2}\t{todepth}\t{stn1}\t{fromdepth}\t{tape}\t{compass}\n\t\t"
    station_dims = "{stn}\t{left}\t{right}\t{up}\t{down}\n\t\t"
    data_lines = ""
    lrud_lines = ""
    
    if 'Prof' not in data[start-1]:
        datastyle = 'normal'
        stn1,stn2,tape,compass,clino,left,right,up,down = getData(data,start,end)


        header = normal_topo
        print(len(stn1))
        for c in range(len(stn1)):

            data_line = shot.format(stn1=stn1[c],
                        stn2=stn2[c],
                        tape=tape[c],
                        compass=compass[c],
                        clino=clino[c])
            data_lines+= '\t'+data_line
            
            lrud_line =  station_dims.format(stn=stn2[c],
                            left=left[c],
                            right=right[c],
                            up=up[c],
                            down=down[c])
            
            lrud_lines+= '\t'+re.sub(r"\*","-",lrud_line)
            
    elif 'Param Deca Degd Prof' in data[start-1]:
        print('found diving data')
        datastyle = 'diving'
        header = dive_topo
        print(header)
 
        stn2,todepth,stn1,fromdepth,tape,compass,left,right,up,down = getData(data,start,end)
        
        for c in range(len(stn1)):
            data_line = dive_shot.format(stn2=stn2[c],
                                todepth=todepth[c],
                                stn1=stn1[c],
                                fromdepth=fromdepth[c],
                                tape=tape[c],
                                compass=compass[c])
            
            if 'not known' in data_line:
                data_lines+= '#\t'+data_line
            else:
                data_lines+= '\t'+data_line
            
            lrud_line =  station_dims.format(stn=stn2[c],
                            left=left[c],
                            right=right[c],
                            up=up[c],
                            down=down[c])
            
            lrud_lines+= '\t'+re.sub(r"\*","-",lrud_line)
    else:
        header = normal_topo

            
    surveyDate = writeDate(survey_date)
    surveyTeams = writeSurveyors(surveyor_group)
    print(header)
    return centreline_template.format(surveyTeams = surveyTeams,
                                      surveyDate = surveyDate ,
                                      data = data_lines,
                                      lrud=lrud_lines,
                                     dataheader=header)

In [12]:
"gouffralaïl".encode('utf-8')

b'gouffrala\xc3\xafl'

In [13]:
def writeTemplate(filepath, new_filepath,cave_name):

    with open (filepath,'r', encoding='latin-1') as f1:
        data = f1.readlines()
        f1.close() 
    
    surveyor_groups,survey_dates,starts,ends = returnCentrelineparams(data)
    
    centrelines = ""
    
    entrance = FindEntrancestn(data)
    for start,end,surveyor_group,survey_date in zip(starts,ends,surveyor_groups,survey_dates):
        
        centrelines+= "\n"+writeCentreline(data,start,end,surveyor_group,survey_date)
    
    
    TEMPLATE = template.format(surveyName=cave_name,
                               Centrelines =centrelines,
                               entranceStation =entrance[0])
    print(new_filepath)
    with open('{}.th'.format(new_filepath[:-4]),'w+',encoding='utf-8') as f:
        f.write(TEMPLATE)
        f.close()

In [263]:
names =[]
for root, dirs, files in os.walk("../../Americas/TOPOGRAPHIES-2/", topdown=False):
    for name in files:
        if 'tro' in name and ('._' not in name):
            fp = os.path.join(root, name)
            cave_name = cleanNames(name)[:-4].replace(' ','_')
            writeTemplate(fp, os.path.join('../data/raw_convert/',cleanNames(name)), cave_name)
    #for name in dirs:
        #print(os.path.join(root, name))

Param Deca Deg Clino Deg 0.0000 Dir,Dir,Dir Dep Std

11
	data normal from to tape compass clino
../data/raw_convert/La grande traversee.tro
Param Deca Degd Clino Degd 0.0000 Dir,Dir,Dir Inc Std

32
	data normal from to tape compass clino
../data/raw_convert/4 plus 1.tro
Param Deca Degd Clino Degd 0.0000 Dir,Dir,Dir Inc Std 12/02/2019 M

6
	data normal from to tape compass clino
../data/raw_convert/Topo Pissotière.tro
Param Deca Degd Clino Degd 0.0000 Dir,Dir,Dir Arr Std ;Topographie le 06 fvrier 2017, Buldo, Natalia, Anel, Franck. Topo de l'entrée vers le fond.;

54
	data normal from to tape compass clino
Param Deca Degd Clino Degd 0.0000 Dir,Dir,Dir Dep Std ;Explo et topographie du 08 février 2017, Angel, Buldo et Natalia.;

52
	data normal from to tape compass clino
Param Deca Degd Clino Degd 0.0000 Dir,Dir,Dir Dep Std ;Relevé des stations 1000 à 1039 réalisée par Yannick Campan et Thomas Fischer le 11/02/2017;

49
	data normal from to tape compass clino
Param Deca Degd Clino Degd 0.

In [14]:
writeTemplate('../../Americas/TOPOGRAPHIES-2/GOUFFRE_4_ENTREE_PLUS_UNE_2019/4 plus 1.tro','../data/109/QuatreEntreesPlusUne.th','QuatreEntreesPlusUne')


Param Deca Degd Clino Degd 0.0000 Dir,Dir,Dir Inc Std

32
	data normal from to tape compass clino
../data/109/QuatreEntreesPlusUne.th


In [268]:
names = pd.read_csv('../data/cadastre/UP_MDD_cadastre.csv', encoding='latin-1')

In [302]:
cadastre_id_names = names['Cadastre_I'].dropna().unique()

index_lines = []
for n in cadastre_id_names:
    cavites = names.where(names['Cadastre_I']==n).dropna(subset =['Cadastre_I'])
    k=0
    for c,line in cavites.iterrows():
        cave_id = n*1000 + k+1
        NAME = line[2]
        X = line[3]
        Y = line[4]
        Z = line[5]
        
        k+=1
        #index map
        
        formatted_line = "\t\tfix ENT_{cave_id:.0f}\t{X:.0f}\t{Y:.0f}\t{Z}\t## {name}\n"
        
        index_lines.append(formatted_line.format(cave_id= cave_id,X=X,Y=Y,Z =Z,name = NAME))

In [304]:
with open('../data/index.th','w+') as f:
    f.write('survey UP_main\n\n\tcentreline\n\n')
    for line in index_lines:
        f.write(re.sub('nan','0',line))
    f.write('\n\tendcentreline\n\nendsurvey')
    
    f.close()