In [1]:
# test update/commit
# coding: utf-8

# In[1]:

from xml.etree import ElementTree
from xml.dom import minidom
import glob, os, sys, time
from shutil import copy
from xml.etree.ElementTree import Element, SubElement, Comment
import subprocess as sp
import pandas as pd
from datetime import datetime, timedelta
from osgeo import ogr
import logging
import re
from itertools import *
from pathlib import Path
import itertools
from osgeo import gdal
import numpy as np
non_decimal = re.compile(r'[^\d.,-]+')


# In[2]:

def get_features(shape):
    driver = ogr.GetDriverByName('ESRI Shapefile')

    dataSource = driver.Open(shape, 0) # 0 means read-only. 1 means writeable.

    # Check to see if shapefile is found.
    if dataSource is None:
        logger.error ('Could not open %s' % (shape))
        return
    else:
        print ('Opened %s' % (shape))
        layer = dataSource.GetLayer()
        shape_features = layer.GetFeatureCount()
        print ('Name of layer: %s' % layer.GetDescription())
        print ("Number of features in %s: %d" % (os.path.basename(shape),shape_features))

        features_shape = []
        for i in range(shape_features):
            feat = layer.GetFeature(i)
            obID = feat.GetField('OBJECTID')
            features_shape.append(obID)
#         print (obID)
        return dataSource, layer, features_shape

def iter_incrementing_file_names(path):
    """
    Iterate incrementing file names. Start with path and add " (n)" before the
    extension, where n starts at 1 and increases.

    :param path: Some path
    :return: An iterator.
    """
    yield path
    prefix, ext = os.path.splitext(path)
    for i in itertools.count(start=1, step=1):
        no = str(i).zfill(2)
        # yield prefix + '_{0}'.format(i) + ext
        yield prefix + '_'+no + ext

def get_unique_filename(file_in):
    for filename in iter_incrementing_file_names(file_in):
        new_fn = Path(filename)
        if new_fn.is_file():
            pass
        else:
            return filename


# In[4]:

def filterbyvalue(seq, value, end_prefix):
    for el in seq:
#         print (el)
        if el[0:end_prefix]==value: 
            yield el


# In[5]:

def get_unique_lists(in_files, end_prefix = 10):
    all_lists = []
    for idx in range(len(in_files)):
        new_list = []
        for inf in filterbyvalue(in_files, in_files[idx][0:end_prefix], end_prefix):
            new_list.append(inf)
#         print (new_list)
        all_lists.append(new_list)
    unique_data = [list(x) for x in set(tuple(x) for x in all_lists)]
    return ([item for item in unique_data if len(item) > 1])


# In[6]:
def saveRaster(path, array, dsSource, datatype=3, formatraster="GTiff", nan=None): 
    """
    Datatypes:
    unknown = 0
    byte = 1
    unsigned int16 = 2
    signed int16 = 3
    unsigned int32 = 4
    signed int32 = 5
    float32 = 6
    float64 = 7
    complex int16 = 8
    complex int32 = 9
    complex float32 = 10
    complex float64 = 11
    float32 = 6, 
    signed int = 3
    
    Formatraster:
    GeoTIFF = GTiff
    Erdas = HFA (output = .img)
    OGC web map service = WMS
    png = PNG
    """
    # Set Driver
    format_ = formatraster #save as format
    driver = gdal.GetDriverByName( format_ )
    driver.Register()
    
    # Set Metadata for Raster output
    cols = dsSource.RasterXSize
    rows = dsSource.RasterYSize
    bands = dsSource.RasterCount
    datatype = datatype#band.DataType
    
    # Set Projection for Raster
    outDataset = driver.Create(path, cols, rows, bands, datatype)
    geoTransform = dsSource.GetGeoTransform()
    outDataset.SetGeoTransform(geoTransform)
    proj = dsSource.GetProjection()
    outDataset.SetProjection(proj)
    
    # Write output to band 1 of new Raster and write NaN value
    outBand = outDataset.GetRasterBand(1)
    if nan != None:
        outBand.SetNoDataValue(nan)
    outBand.WriteArray(array) #save input array
    #outBand.WriteArray(dem)
    
    # Close and finalise newly created Raster
    #F_M01 = None
    outBand = None
    proj = None
    geoTransform = None
    outDataset = None
    driver = None
    datatype = None
    bands = None
    rows = None
    cols = None
    driver = None
    array = None        
        
def prettify(elem):
    """Return a pretty-printed XML string for the Element.
    """
    rough_string = ElementTree.tostring(elem, 'utf-8')
    reparsed = minidom.parseString(rough_string)
    return reparsed.toprettyxml(indent="  ")

def writeOGRVRT(basename, fullpath):
    top = Element('OGRVRTDataSource')
    child = SubElement(top, 'OGRVRTLayer')
    child.set('name', basename)
    sub_child_1 = SubElement(child, 'SrcDataSource')
    sub_child_1.text = fullpath
    sub_child_2 = SubElement(child, 'GeometryType')
    sub_child_2.text = 'wkbPoint'
    sub_child_3 = SubElement(child, 'LayerSRS')
    sub_child_3.text = 'EPSG:28992'
    sub_child_4 = SubElement(child, 'GeometryField')
    sub_child_4.set('encoding','PointFromColumns')
    sub_child_4.set('x','field_1')
    sub_child_4.set('y','field_2')
    sub_child_4.set('z','field_3')

    return prettify(top)

# def gridXml(filename, header, top):
    # reg = SubElement(top,'regular')
    # reg.set('locationId','l.'+filename[0:7])
    # child_1 = SubElement(reg, 'rows').text = str(header[1])
    # child_2 = SubElement(reg, 'columns').text = str(header[0])
    # child_3 = SubElement(reg, 'geoDatum').text = 'Rijks Driehoekstelsel'
    # child_4 = SubElement(reg, 'firstCellCenter')
    # child_4_sub = SubElement(child_4, 'x').text = str(header[2]+0.5)
    # child_4_sub = SubElement(child_4, 'y').text = str(header[3]+header[1]-0.5)
    # child_5 = SubElement(reg, 'xCellSize').text = str(header[4])
    # child_6 = SubElement(reg, 'yCellSize').text = str(header[4])
    # return top

# def idMapXml(filename, top):
    # map_ = SubElement(top,'map')
    # map_.set('internalLocation','l.'+filename[0:7])
    # map_.set('externalParameter',filename[0:7])
    # map_.set('internalParameter','h.m')
    # map_.set('externalLocation','ARC_INFO_LOC')
    # return top

# def LocationsXml(filename, top):
    # map_ = SubElement(top,'location')
    # map_.set('id','l.'+filename[0:7])
    # map_.set('name','Baggervak '+filename[4:7])
    # map_sub = SubElement(map_, 'x').text = str(0)
    # map_sub = SubElement(map_, 'y').text = str(0)
    # return top

# def LocationSetsXml(filename, top, child):
    # locId = SubElement(child,'locationId').text = 'l.'+filename[0:7]
    # return top


# In[4]:

# Set INPUT parameters
# ruweDataDir    :: input raw survey data
# asciiDataDir   :: output folder
# workDir        :: working directory (emptied each run)
# bgShp          :: background polygon shapefile of baggervakken
# logFile        :: file used to store logs

#ruweDataDir = r'D:\Projects\Pr\3317.20\BodempeilingScript_v4\survey_ruweData'
rootDir = r'D:\OmsWaddenzee\trunk\fews'
configDir = r'D:\OmsWaddenzee\trunk\fews\Config'
ruweDataDir = os.path.join(rootDir, r'Import\geoxyz\bodempeilingen\ruweData')
ruweDataDirBackup = os.path.join(rootDir, r'ImportBackup\geoxyz\bodempeilingen\ruweData')
asciiDataDir = os.path.join(rootDir, r'Import\geoxyz\bodempeilingen\asciiData')
workdir = os.path.join(rootDir, r'ImportInterim\geoxyz\bodempeilingen\tmpData')
y = os.path.join(configDir, r'MapLayerFiles\Achtergrond_polygonen//Achtergrond_polygonen.shp')
logFile = os.path.join(rootDir, r'ImportInterim\geoxyz\bodempeilingen//log_file.out')
xmldir = os.path.join(rootDir, r'ImportInterim\geoxyz\bodempeilingen\XMLGenerated')


# In[5]:

# Set path OGR/GDAL files 
# ogr2ogr        :: converts simple features data between file formats
# gdalwarp       :: image reprojection and warping utility
# gdal_rasterize :: burns vector geometries into a raster
# gdal_translate :: converts raster data between different formats
# gdalbuildvrt   :: builds a VRT from a list of datasets
# gdalinfo       :: lists information about a raster dataset
# ogrinfo        :: lists information about an OGR supported data source

rootOgrGdal = r'C:\Python35\Lib\site-packages\osgeo'
ogr2ogr = os.path.join(rootOgrGdal, 'ogr2ogr.exe')
gdalwarp = os.path.join(rootOgrGdal, 'gdalwarp.exe')
gdal_rasterize = os.path.join(rootOgrGdal, 'gdal_rasterize.exe')
gdal_translate = os.path.join(rootOgrGdal, 'gdal_translate.exe')
gdalbuildvrt = os.path.join(rootOgrGdal, 'gdalbuildvrt.exe')
gdalinfo = os.path.join(rootOgrGdal, 'gdalinfo.exe')
ogrinfo = os.path.join(rootOgrGdal, 'ogrinfo.exe')


# In[6]:

logger = logging.getLogger('survey2arcinfoascii')
hdlr = logging.FileHandler(logFile)
formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s')
hdlr.setFormatter(formatter)
logger.addHandler(hdlr) 
logger.setLevel(logging.INFO)


# In[7]:

print ('Empty tmp dir %s' % (workdir))
filelist = glob.glob(workdir+'//*')
for f in filelist:
    os.remove(f)


# In[8]:

for root, dirs, files in os.walk(ruweDataDir):
    for file in files:
        if file.endswith('.pts') and r'SILAS' not in file:
            print (file)  
            
            init_file = os.path.join( os.path.abspath(root), file )
            base, extension = os.path.splitext(file)
            print (init_file)
            
            # 0 get timestamp
            #tmod = os.path.getmtime(init_file)            
            #t = time.strftime('%Y%m%d%H%M%S', time.localtime(int(tmod)))
#           print (tmod.strftime("%Y%m%d%H%M%S"))            
            dateISO = datetime(int('20'+base[7:9]), int(base[9:11]), int(base[11:13]))
            # dateISO_yd = dateISO - timedelta(1)  # weird bug in fews
            t = dateISO.strftime("%Y%m%d%H%M%S")
            print (t)            
            
            # 1 convert survey data to CSV format
            a = get_unique_filename(os.path.join(workdir,t+'.csv'))
            df = pd.read_csv(init_file, header=None, sep='\t')
            df.to_csv(a, header=False, index=False, sep=';')
            
            # 2 build OGRVRT from CSV file
            b = os.path.join(workdir,t+'.vrt')
            with open(b, 'w') as the_file:
                the_file.write(writeOGRVRT(t, a))
            
            # 2.1 get Extent from OGRVRT            
            command = ogrinfo+' -so ' + b + ' ' + t + ' | find "Extent"'
            print (command)
            norm = sp.Popen(command, stdout=sp.PIPE, shell=True).communicate()            
            print (norm)
            extent = non_decimal.sub('', str(norm[0])).replace('-',',')
            bb = [x.strip() for x in extent.split(',')]
            
            # 2.2 spatial query extent feature achtergrond SHP
            try:
                z = get_unique_filename(os.path.join(workdir,t+'bg_sel.shp'))
                command = [ogr2ogr, z, y,'-spat',bb[0],bb[1],bb[2],bb[3]]
                print (sp.list2cmdline(command))
                norm = sp.Popen(sp.list2cmdline(command),stdout=sp.PIPE, shell=True)
                norm.communicate()                      
            except Exception as ex:
                print (ex)
                continue
                                
            # 3 create RASTER from OGRVRT
            c = get_unique_filename(os.path.join(workdir,t+'.tif'))
            command = [gdal_rasterize, '-a','field_3','-tr','1.0','1.0', '-l',t,b,c]
            print (sp.list2cmdline(command))
            norm = sp.Popen(sp.list2cmdline(command),stdout=sp.PIPE, shell=True)
            norm.communicate()

            try:
                ds, layer, z_features = get_features(z)
            except Exception as ex:
                print (ex)
                continue
                
            for obID in (z_features):
                print (obID)
                
                # 4 clip point RASTER with feature achtergrond SHP
                d = get_unique_filename(os.path.join(workdir,'grid'+str(obID).zfill(3)+'_'+t+'.tif.vrt'))
                command = [gdalwarp, '-srcnodata', '0', '-dstnodata', '-9999', '-overwrite','-of', 'VRT', '-crop_to_cutline', 
                           '-cutline', z, '-cwhere', 'OBJECTID = '+str(obID), c, d]            
                print (sp.list2cmdline(command))
                norm = sp.Popen(sp.list2cmdline(command),stdout=sp.PIPE, shell=True)
                norm.communicate()
                
                # 5A convert feature achtergrond SHP to RASTER
                e = get_unique_filename(os.path.join(workdir,'grid'+str(obID).zfill(3)+'_'+t+'_bg_tmp'+'.tif'))
                command = [gdal_rasterize, '-a', 'NoDataValu', '-a_srs', 'EPSG:28992', '-where', 'OBJECTID = '+str(obID), 
                           '-tr', '1.0', '1.0', '-l', layer.GetDescription(), z, e]         
                print (sp.list2cmdline(command))
                norm = sp.Popen(sp.list2cmdline(command),stdout=sp.PIPE, shell=True)
                norm.communicate()
                
                # 5B clip achtergrond RASTER
                f = get_unique_filename(os.path.join(workdir,'grid'+str(obID).zfill(3)+'_'+t+'_bg'+'.tif.vrt'))
                command = [gdalwarp, '-srcnodata', '-9999', '-dstnodata', '-9999', '-of', 'VRT', '-tr', '1.0', '1.0', 
                           '-overwrite', '-crop_to_cutline', '-cutline', z, '-cwhere', 'OBJECTID = '+str(obID), e, f]            
                print (sp.list2cmdline(command))
                norm = sp.Popen(sp.list2cmdline(command),stdout=sp.PIPE, shell=True)
                norm.communicate()                                

                # 6 Build VRT data source of point RASTER and feature achtergrond RASTER                
                g = get_unique_filename(os.path.join(workdir,'grid'+str(obID).zfill(3)+'_'+t+'.vrt'))
                command = [gdalbuildvrt, '-srcnodata', '-9999', g, d, f]
                print (sp.list2cmdline(command))
                norm = sp.Popen(sp.list2cmdline(command),stdout=sp.PIPE, shell=True)
                norm.communicate()

                # 7 Convert VRT to ArcInfoASCII
                h = get_unique_filename(os.path.join(asciiDataDir,'grid'+str(obID).zfill(3)+'_'+t+'.asc'))
                command = [gdal_translate, '-of', 'AAIGrid', '-tr', '1.0', '1.0', g, h]
                print (sp.list2cmdline(command))
                norm = sp.Popen(sp.list2cmdline(command),stdout=sp.PIPE, shell=True)
                norm.communicate()
                
                # 8 Only keep the ArcInfoASCIIs that contains data
                command = gdalinfo + ' -mm ' + h + ' | find "Computed"'
                print (command)
                norm = sp.Popen(command, stdout=sp.PIPE, shell=True).communicate()
                print (norm)
                if len(norm[0]) == 0:                    
                    for fl in glob.glob(h[0:-4]+'*'):
                        os.remove(fl)
            
#             # 9 Move file to ImportBackup
#             backup_file = os.path.join( os.path.abspath(ruweDataDirBackup), file )
#             try:
#                 os.remove(backup_file)
#             except OSError:
#                 pass
                
#             try:
#                 os.rename(init_file, backup_file)
#             except Exception as ex:
#                 print (ex)
#                 continue
                
#             # 10 Purge all tmp files            
#             try:
#                 os.remove(a) # csv-file
#                 os.remove(b) # OGR vrt-file
#                 os.remove(c) # bg-clip shp-file
#                 os.remove(d) # point-clip vrt-file
#                 os.remove(e) # rasterized bg-clip 
#                 os.remove(f) # wrap to cutline bg-clip
#                 os.remove(g) # clip point-clip vrt with bg-clip
#             except Exception as ex:
#                 print (ex)
#                 continue                


# merge ascis from same date and same grid
asc_files = []
for file in os.listdir(asciiDataDir):
    if file.endswith(".asc"):
        asc_files.append(file)

grid_date_groups = get_unique_lists(asc_files, 16) # 16 is geoxyz_ + timestamp yymmdd

# if none groups exist, do quit
if len(grid_date_groups) > 0:

    for group in grid_date_groups:
        i = os.path.join(asciiDataDir, group[0])
        ds_i = gdal.Open(i, gdal.GA_ReadOnly)
        ds_d = ds_i.ReadAsArray()
        ds_d_cp = np.copy(ds_d)
        ds_d_cp = np.ma.masked_equal(ds_d_cp, -9999.)
        
        for idx in range(len(group)-1):
            j = os.path.join(asciiDataDir, group[idx+1])
            ds = gdal.Open(j, gdal.GA_ReadOnly).ReadAsArray()
            ds_msk = np.ma.masked_equal(ds, -9999.)
            
            # get first slice of copy, get second slice of original
            k = ds_d_cp[::] 
            l = ds_msk[::]  

            # update first slice based on second slice
            k[~l.mask] = l.compressed()
            
        # create file 
        m = get_unique_filename(i)[:-4]+'.tif'
        saveRaster(m, ds_d_cp.data, ds_i, datatype=7, formatraster="GTiff", nan=-9999.)
        
        # flush files        
        del ds_i, ds_d, ds_d_cp, ds, ds_msk, k, l
        
        # covert to AAIGrid
        # 11 Merge VRT to ascii-grid
        n = m[:-4]+'.asc' # create unique asc file
        command = [gdal_translate, '-of', 'AAIGrid', '-a_nodata', '-9999', m, n]
        print (sp.list2cmdline(command))
        # print (sp.list2cmdline(command))
        norm = sp.Popen(sp.list2cmdline(command),stdout=sp.PIPE, shell=True)
        norm.communicate()     

        # remove grids from before merge and change grid from new_name to org_name        
        try:
            os.remove(m)
            for grid in group:
                print (os.path.join(asciiDataDir, grid))
                os.remove(os.path.join(asciiDataDir, grid))    
            os.rename(n, i)            
        except Exception as ex:
            print (ex)
            continue   


# purge redundant xml and prj files 
xmlFiles = []
prjFiles = []
for root, dirs, files in os.walk(asciiDataDir):
    for file in files:
        if file.endswith(".xml"):
             xmlFiles.append(os.path.join(root, file))
        if file.endswith(".prj"):
             prjFiles.append(os.path.join(root, file)) 

for xmlFile in xmlFiles:
    os.remove(xmlFile)
    
for prjFile in prjFiles:
    os.remove(prjFile)  
    
    


    
print ('END OF SCRIPT')


# In[ ]:

Empty tmp dir D:\OmsWaddenzee\trunk\fews\ImportInterim\geoxyz\bodempeilingen\tmpData
GEOXYZ_160901_NL0067-304_17111_DHVH_A-B_160901_RD_NAP_MB_#1.pts
D:\OmsWaddenzee\trunk\fews\Import\geoxyz\bodempeilingen\ruweData\GEOXYZ_160901_NL0067-304_17111_DHVH_A-B_160901_RD_NAP_MB_#1.pts
20160901000000
C:\Python35\Lib\site-packages\osgeo\ogrinfo.exe -so D:\OmsWaddenzee\trunk\fews\ImportInterim\geoxyz\bodempeilingen\tmpData\20160901000000.vrt 20160901000000 | find "Extent"
(b'Extent: (182715.500000, 603312.500000) - (182718.500000, 603314.500000)\r\n', None)
C:\Python35\Lib\site-packages\osgeo\ogr2ogr.exe D:\OmsWaddenzee\trunk\fews\ImportInterim\geoxyz\bodempeilingen\tmpData\20160901000000bg_sel.shp D:\OmsWaddenzee\trunk\fews\Config\MapLayerFiles\Achtergrond_polygonen//Achtergrond_polygonen.shp -spat 182715.500000 603312.500000 182718.500000 603314.500000
C:\Python35\Lib\site-packages\osgeo\gdal_rasterize.exe -a field_3 -tr 1.0 1.0 -l 20160901000000 D:\OmsWaddenzee\trunk\fews\ImportInterim\geoxyz\

Check the NumPy 1.11 release notes for more information.
