In [None]:
from xml.etree import ElementTree
from xml.dom import minidom
import glob
import os
from fnmatch import fnmatch
import sys
import time
from shutil import copy
from xml.etree.ElementTree import Element, SubElement, Comment
import subprocess as sp
import pandas as pd
import geopandas as gpd
from datetime import datetime, timedelta
from osgeo import ogr
import logging
import re
from itertools import *
from pathlib import Path
import itertools
from osgeo import gdal
import numpy as np
import ntpath
from IPython.core.debugger import set_trace
non_decimal = re.compile(r'[^\d.,-]+')

In [None]:
def cmd(command, parse_directly=False):
    
    if parse_directly==True:
        print(command)
        norm = sp.Popen(command,stdout=sp.PIPE, shell=True)
    else:
        print(sp.list2cmdline(command))
        norm = sp.Popen(sp.list2cmdline(command),stdout=sp.PIPE, shell=True)            
             
    out_cmd = norm.communicate()
    return out_cmd

In [None]:
def get_features(shape):
    driver = ogr.GetDriverByName('ESRI Shapefile')

    dataSource = driver.Open(shape, 0)  # 0 means read-only. 1 means writeable.

    # Check to see if shapefile is found.
    if dataSource is None:
        logger.error('Could not open %s' % (shape))
        return
    else:
        logger.info('Opened %s' % (shape))
        layer = dataSource.GetLayer()
        shape_features = layer.GetFeatureCount()
        logger.info('Name of layer: %s' % layer.GetDescription())
        logger.info("Number of features in %s: %d" %
                    (os.path.basename(shape), shape_features))

        features_shape = []
        for i in range(shape_features):
            feat = layer.GetFeature(i)
            obID = feat.GetField('OBJECTID')
            features_shape.append(obID)
#         logger.info (obID)
        features_shape = list(set(features_shape))
        return dataSource, layer, features_shape


def iter_incrementing_file_names(path):
    """
    Iterate incrementing file names. Start with path and add " (n)" before the
    extension, where n starts at 1 and increases.

    :param path: Some path
    :return: An iterator.
    """
    yield path
    prefix, ext = os.path.splitext(path)
    for i in itertools.count(start=1, step=1):
        no = str(i).zfill(2)
        # yield prefix + '_{0}'.format(i) + ext
        yield prefix + '_' + no + ext


def get_unique_filename(file_in):
    for filename in iter_incrementing_file_names(file_in):
        new_fn = Path(filename)
        if new_fn.is_file():
            pass
        else:
            return filename


# In[4]:

def filterbyvalue(seq, value, end_prefix):
    for el in seq:
        #         print (el)
        if el[0:end_prefix] == value:
            yield el


# In[5]:

def get_unique_lists(in_files, end_prefix=10):
    all_lists = []
    for idx in range(len(in_files)):
        new_list = []
        for inf in filterbyvalue(in_files, in_files[idx][0:end_prefix], end_prefix):
            new_list.append(inf)
#         print (new_list)
        all_lists.append(new_list)
    unique_data = [list(x) for x in set(tuple(x) for x in all_lists)]
    return ([item for item in unique_data if len(item) > 1])


# In[6]:
def saveRaster(path, array, dsSource, datatype=3, formatraster="GTiff", nan=None):
    """
    Datatypes:
    unknown = 0
    byte = 1
    unsigned int16 = 2
    signed int16 = 3
    unsigned int32 = 4
    signed int32 = 5
    float32 = 6
    float64 = 7
    complex int16 = 8
    complex int32 = 9
    complex float32 = 10
    complex float64 = 11
    float32 = 6, 
    signed int = 3

    Formatraster:
    GeoTIFF = GTiff
    Erdas = HFA (output = .img)
    OGC web map service = WMS
    png = PNG
    """
    # Set Driver
    format_ = formatraster  # save as format
    driver = gdal.GetDriverByName(format_)
    driver.Register()

    # Set Metadata for Raster output
    cols = dsSource.RasterXSize
    rows = dsSource.RasterYSize
    bands = dsSource.RasterCount
    datatype = datatype  # band.DataType

    # Set Projection for Raster
    outDataset = driver.Create(path, cols, rows, bands, datatype)
    geoTransform = dsSource.GetGeoTransform()
    outDataset.SetGeoTransform(geoTransform)
    proj = dsSource.GetProjection()
    outDataset.SetProjection(proj)

    # Write output to band 1 of new Raster and write NaN value
    outBand = outDataset.GetRasterBand(1)
    if nan != None:
        outBand.SetNoDataValue(nan)
    outBand.WriteArray(array)  # save input array
    # outBand.WriteArray(dem)

    # Close and finalise newly created Raster
    #F_M01 = None
    outBand = None
    proj = None
    geoTransform = None
    outDataset = None
    driver = None
    datatype = None
    bands = None
    rows = None
    cols = None
    driver = None
    array = None


def prettify(elem):
    """Return a pretty-printed XML string for the Element.
    """
    rough_string = ElementTree.tostring(elem, 'utf-8')
    reparsed = minidom.parseString(rough_string)
    return reparsed.toprettyxml(indent="  ")


def writeOGRVRT(basename, fullpath):
    top = Element('OGRVRTDataSource')
    child = SubElement(top, 'OGRVRTLayer')
    child.set('name', basename)
    sub_child_1 = SubElement(child, 'SrcDataSource')
    sub_child_1.text = fullpath
    sub_child_2 = SubElement(child, 'GeometryType')
    sub_child_2.text = 'wkbPoint'
    sub_child_3 = SubElement(child, 'LayerSRS')
    sub_child_3.text = 'EPSG:28992'
    sub_child_4 = SubElement(child, 'GeometryField')
    sub_child_4.set('encoding', 'PointFromColumns')
    sub_child_4.set('x', 'field_1')
    sub_child_4.set('y', 'field_2')
    sub_child_4.set('z', 'field_3')

    return prettify(top)

In [None]:
def get_WD_folder(base, workdir):
    base_list = base.split('_')
    if base_list[5] == 'a':
        coef = 'ruweData_A'        

    if base_list[5] == 'b':
        coef = 'ruweData_B'
    
    wd_dir = 'WD_{}_{}'.format(base_list[6].zfill(3), 
                                   base_list[7].zfill(3))    
    
    workdir_WD = os.path.join(workdir, coef, wd_dir)   
    return workdir_WD

In [None]:
def datafolder_to_tiles(sourceRoot, coef, wd, asciiRoot, tmpRoot, backupRoot, y):

    sourcedir = os.path.join(sourceRoot, coef, wd)
    workdir = os.path.join(tmpRoot, coef, wd)
    backupdir = os.path.join(backupRoot, coef, wd)
    asciiDataDir = os.path.join(asciiRoot, coef, wd)

    for root, dirs, files in os.walk(sourcedir):
        for file in files:
            if file.endswith('.csv'):
                print(file)

                init_file = os.path.join(os.path.abspath(root), file)
                base, extension = os.path.splitext(file)
                print(init_file)

                # 0 get timestamp
                print('# 0 get timestamp')
                dateISO = datetime(
                    int(base[0:4]), int(base[4:6]), int(base[6:8]))
                t = dateISO.strftime("%Y%m%d%H%M%S")
                print(t)

                # 1 convert survey data to CSV format
                print('# 1 convert survey data to CSV format')
                a = get_unique_filename(os.path.join(workdir, 'step1_{}.csv'.format(t)))
                df = pd.read_csv(init_file, header=None, sep=';')
                # include this to check the
                if df.shape[1] != 3:
                    print('no 3 columns, try delimiting whitespaces while parsing')
                    df = pd.read_csv(init_file, header=None,
                                     delim_whitespace=True)
                    if df.shape[1] != 3:
                        print(df.head())
                        sys.exit(
                            'Source file contains not exactly 3 columns. Check datasource')
                print('file succesfully read and three columns are parsed')
                df.dropna(inplace=True)
                df.to_csv(a, header=False, index=False, sep=';')

                # 2 build OGRVRT from CSV file
                print('# 2 build OGRVRT from CSV file')
                basename_a = os.path.basename(a)
                base_a, extension = os.path.splitext(basename_a)
                #base_a = 'step_{}.vrt'.format(t)
                b = os.path.join(workdir, 'step2_{}.vrt'.format(t))
                with open(b, 'w') as the_file:
                    the_file.write(writeOGRVRT(base_a, a))

                # 2.1 get Extent from OGRVRT
                print('# 2.1 get Extent from OGRVRT')
                # ogrinfo + ' -so ' + b + ' ' + base_a + ' | find "Extent"'
                command = '{0} -so {1} {2} | find "Extent"'.format(
                    ogrinfo, b, base_a)
                norm = cmd(command, parse_directly=True)
                extent = non_decimal.sub('', str(norm[0])).replace('-', ',')
                bb = [x.strip() for x in extent.split(',')]
                print(bb)

                # 2.2 spatial query extent feature achtergrond SHP
                print('# 2.2 spatial query extent feature achtergrond SHP')
                try:
                    z = get_unique_filename(
                        os.path.join(workdir, 'step22_{}bg_sel.shp'.format(t)))

                    layer_polygon = os.path.splitext(ntpath.basename(y))[0]
                    layer_point = os.path.splitext(ntpath.basename(a))[0]
                    file_layer_point = "'{}'.{}".format(b, layer_point)

                    command = [ogr2ogr, '-f', "ESRI Shapefile", z, y, '-dialect', 'sqlite', '-sql',
                               "SELECT g.Geometry, g.OBJECTID, g.NoDataValu FROM {} g, {} p WHERE ST_Within(p.geometry, g.geometry)".format(layer_polygon, file_layer_point)]
                    cmd(command)
                    gdf_z = gpd.read_file(z)
                    #set_trace()
                    gdf_z.drop_duplicates(["OBJECTID"], inplace=True)
                    try: 
                        os.remove(z)
                    except OSError:
                        pass
                    gdf_z.to_file(z)
                    gdf_z = None

#                     command = [ogr2ogr, z, y, '-spat', bb[0], bb[1], bb[2], bb[3]]
#                     cmd(command)
                except Exception as ex:
                    print(ex)
                    # continue

                # 3 create RASTER from OGRVRT
                print('# 3 create RASTER from OGRVRT')
                c = get_unique_filename(os.path.join(workdir, 'step3_{}.tif'.format(t)))
                command = [gdal_rasterize, '-a', 'field_3',
                           '-tr', '1.0', '1.0', '-l', layer_point, b, c]
                cmd(command)

                try:
                    ds, layer, z_features = get_features(z)
                    print(z_features)
                except Exception as ex:
                    print(ex)
                    # continue

                for obID in (z_features):
                    print(obID)

                    # 4 clip point RASTER with feature achtergrond SHP
                    print('# 4 clip point RASTER with feature achtergrond SHP')
                    d = get_unique_filename(os.path.join(
                        workdir, 'step4_' + str(obID).zfill(3) + '_' + t + '.tif'))
                    command = [gdalwarp, '-srcnodata', '0', '-dstnodata', '-9999', '-overwrite', '-of', 'GTiff', '-crop_to_cutline',
                               '-cutline', z, '-cwhere', 'OBJECTID=' + str(obID), c, d]
                    cmd(command)

                    # 5A convert feature achtergrond SHP to RASTER
                    print('# 5A convert feature achtergrond SHP to RASTER')
                    e = get_unique_filename(os.path.join(
                        workdir, 'step5A_' + str(obID).zfill(3) + '_' + t + '_bg_tmp' + '.tif'))
                    command = [gdal_rasterize, '-a', 'NoDataValu', '-a_srs', 'EPSG:28992', '-where', 'OBJECTID=' + str(obID),
                               '-tr', '1.0', '1.0', '-l', layer.GetDescription(), z, e]
                    cmd(command)

                    # 5B clip achtergrond RASTER
                    print('# 5B clip achtergrond RASTER')
                    f = get_unique_filename(os.path.join(
                        workdir, 'step5B_' + str(obID).zfill(3) + '_' + t + '_bg' + '.tif'))
                    command = [gdalwarp, '-srcnodata', '-9999', '-dstnodata', '-9999', '-of', 'GTiff', '-tr', '1.0', '1.0',
                               '-overwrite', '-crop_to_cutline', '-cutline', z, '-cwhere', 'OBJECTID=' + str(obID), e, f]
                    cmd(command)

                    # 6 Build VRT data source of point RASTER and feature achtergrond RASTER
                    print(
                        '# 6 Build VRT data source of point RASTER and feature achtergrond RASTER')
                    g = get_unique_filename(os.path.join(
                        workdir, 'step6_' + str(obID).zfill(3) + '_' + t + '.vrt'))
                    command = [gdalbuildvrt, '-srcnodata', '-9999', g, d, f]
                    cmd(command)

                    # 7 Convert VRT to ArcInfoASCII
                    print('# 7 Convert VRT to ArcInfoASCII')
                    h = get_unique_filename(os.path.join(
                        asciiDataDir, 'grid' + str(obID).zfill(3) + '_' + t + '.asc'))
                    command = [gdal_translate, '-of',
                               'AAIGrid', '-tr', '1.0', '1.0', g, h]
                    cmd(command)

                    # 8 Only keep the ArcInfoASCIIs that contains data
                    print('# 8 Only keep the ArcInfoASCIIs that contains data')
                    command = gdalinfo + ' -mm ' + h + ' | find "Computed"'
                    norm = cmd(command, parse_directly=True)
                    if len(norm[0]) == 0:
                        for fl in glob.glob(h[0:-4] + '*'):
                            print('remove grid since it doesnt contain data')
                            os.remove(fl)

                    # 8A Purge the bg_tmp files
                    print('# 8A Purge all tmp files')
                    try:
#                         print('not forget to remove rasterized bg-clip')
                        os.remove(d)  # point-clip vrt-file
                        os.remove(e)  # rasterized bg-clip
                        os.remove(f)  # wrap to cutline bg-clip
                        os.remove(g)  # clip point-clip vrt with bg-clip
                    except Exception as ex:
                        print(ex)
                        # continue
                    # raise SystemExit(0)

                # 9 Move file to ImportBackup
                print('# 9 Move file to ImportBackup')
                backup_file = os.path.join(
                    os.path.abspath(backupdir), file)
                try:
                    os.remove(backup_file)
                except OSError:
                    pass

                try:
                    os.rename(init_file, backup_file)
                except Exception as ex:
                    print(ex)
                    # continue

                # 10 Purge all tmp files
                print('# 10 Purge all tmp files')
                try:
                    os.remove(a)  # csv-file
                    os.remove(b)  # OGR vrt-file
                    os.remove(z)  # shp-file
                    os.remove(c)  # bg-clip shp-file
#                     os.remove(d)  # point-clip vrt-file
#                     os.remove(e)  # rasterized bg-clip
#                     os.remove(f)  # wrap to cutline bg-clip
#                     os.remove(g)  # clip point-clip vrt with bg-clip
                except Exception as ex:
                    print(ex)
                    # continue
                #raise SystemExit(0)

    print('function complete')

In [None]:
def multi_tile_to_single_tile(asciiDataDir):
    # merge ascis from same date and same grid
    asc_files = []
    for file in os.listdir(asciiDataDir):
        if file.endswith(".asc"):
            asc_files.append(file)

    grid_date_groups = get_unique_lists(asc_files, 16) # 16 is geoxyz_ + timestamp yymmdd

    # if none groups exist, do quit
    if len(grid_date_groups) > 0:

        for group in grid_date_groups:
            i = os.path.join(asciiDataDir, group[0])
            ds_i = gdal.Open(i, gdal.GA_ReadOnly)
            ds_d = ds_i.ReadAsArray()
            ds_d_cp = np.copy(ds_d)
            ds_d_cp = np.ma.masked_equal(ds_d_cp, -9999.)

            for idx in range(len(group)-1):
                j = os.path.join(asciiDataDir, group[idx+1])
                ds = gdal.Open(j, gdal.GA_ReadOnly).ReadAsArray()
                ds_msk = np.ma.masked_equal(ds, -9999.)

                # get first slice of copy, get second slice of original
                k = ds_d_cp[::] 
                l = ds_msk[::]  

                # update first slice based on second slice
                k[~l.mask] = l.compressed()

            # create file 
            m = get_unique_filename(i)[:-4]+'.tif'
            saveRaster(m, ds_d_cp.data, ds_i, datatype=7, formatraster="GTiff", nan=-9999.)

            # flush files        
            del ds_i, ds_d, ds_d_cp, ds, ds_msk, k, l

            # covert to AAIGrid
            # 11 Merge VRT to ascii-grid
            n = m[:-4]+'.asc' # create unique asc file
            command = [gdal_translate, '-of', 'AAIGrid', '-a_nodata', '-9999', m, n]
            cmd(command)

            # remove grids from before merge and change grid from new_name to org_name        
            try:
                os.remove(m)
                for grid in group:
                    logger.info (os.path.join(asciiDataDir, grid))
                    os.remove(os.path.join(asciiDataDir, grid))    
                os.rename(n, i)            
            except Exception as ex:
                logger.info (ex)
                continue   

    # purge redundant xml and prj files 
    xmlFiles = []
    prjFiles = []
    for root, dirs, files in os.walk(asciiDataDir):
        for file in files:
            if file.endswith(".xml"):
                 xmlFiles.append(os.path.join(root, file))
            if file.endswith(".prj"):
                 prjFiles.append(os.path.join(root, file)) 

    for xmlFile in xmlFiles:
        os.remove(xmlFile)

    for prjFile in prjFiles:
        os.remove(prjFile)      

In [None]:
rootDir = r'D:\FEWSProjecten\OmsWaddenzee\trunk\fews'
configDir = r'D:\FEWSProjecten\OmsWaddenzee\trunk\fews\Config'

ruweDataDir = os.path.join(rootDir, r'Import\hkv\coefficienten\bronCsv')
ruweDataDir_fixed_lines = os.path.join(rootDir, r'Import\hkv\coefficienten\bronFixedlineCsv')
ruweDataDirBackup = os.path.join(
    rootDir, r'ImportBackup\hkv\coefficienten\ruweDataBackup')

asciiDataDir = os.path.join(rootDir, r'Import\hkv\coefficienten\asciiData')
workdir = os.path.join(rootDir, r'ImportInterim\hkv\coefficienten\tmpData')

y = os.path.join(
    configDir, r'MapLayerFiles\Achtergrond_polygonen//Achtergrond_polygonen.shp')

logFile = os.path.join(
    rootDir, r'ImportInterim\hkv\coefficienten//log_file.out')
xmldir = os.path.join(
    rootDir, r'ImportInterim\hkv\coefficienten\XMLGenerated')

In [None]:
rootOgrGdal = r'C:\Python35\Lib\site-packages\osgeo'
ogr2ogr = os.path.join(rootOgrGdal, 'ogr2ogr.exe')
gdalwarp = os.path.join(rootOgrGdal, 'gdalwarp.exe')
gdal_rasterize = os.path.join(rootOgrGdal, 'gdal_rasterize.exe')
gdal_translate = os.path.join(rootOgrGdal, 'gdal_translate.exe')
gdalbuildvrt = os.path.join(rootOgrGdal, 'gdalbuildvrt.exe')
gdalinfo = os.path.join(rootOgrGdal, 'gdalinfo.exe')
ogrinfo = os.path.join(rootOgrGdal, 'ogrinfo.exe')

In [None]:
logger = logging.getLogger('survey2arcinfoascii')
hdlr = logging.FileHandler(logFile)
formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s')
hdlr.setFormatter(formatter)
logger.addHandler(hdlr)
logger.setLevel(logging.INFO)

In [None]:
# first get the tmp-csv files
for root, dirs, files in os.walk(ruweDataDir):
    for file in files:
        if file.endswith('.csv'):            
            print(file)

            init_file = os.path.join(os.path.abspath(root), file)
            base, extension = os.path.splitext(file)
            print(init_file)
            
            wd_folder = get_WD_folder(base, workdir)
            print(wd_folder)

            # 0 get timestamp
            print('# 0 get timestamp')
            #tmod = os.path.getmtime(init_file)
            #t = time.strftime('%Y%m%d%H%M%S', time.localtime(int(tmod)))
#           print (tmod.strftime("%Y%m%d%H%M%S"))
            dateISO = datetime(
                int(base[14:18]), int(base[18:20]), int(base[20:22]))
            # dateISO_yd = dateISO - timedelta(1)  # weird bug in fews
            t = dateISO.strftime("%Y%m%d%H%M%S")
            print(t)

            # 1 convert coeffients data to CSV format            
            print('# 1 convert coeffients data to CSV format')
            df = pd.read_csv(init_file, header=None)
            # include this to check the
            if df.shape[1] != 3:
                print('no 3 columns, try delimiting whitespaces while parsing')
                df = pd.read_csv(init_file, header=None, delim_whitespace=True)                
                if df.shape[1] != 3:
                    print(df.head())
                    sys.exit(
                        'Source file contains not exactly 3 columns. Check datasource')
            print('file succesfully read and three columns are parsed')
            #df.replace(0.0, np.nan, inplace=True)
            #df.replace(0, np.nan, inplace=True)
            df.dropna(inplace=True)
            
            
            # 1,1 make tmp-csv files not longer than 100000 rows
            print('# 1.1 make tmp-csv files not longer than 1000000 rows')
            max_rows = 1000000
            dataframes = []
            while len(df) > max_rows:
                top = df[:max_rows]
                dataframes.append(top)
                df = df[max_rows:]
            else:
                dataframes.append(df)            

            for _, frame in enumerate(dataframes):                
                a = get_unique_filename(os.path.join(wd_folder, t + '.csv'))
                frame.to_csv(a, header=False, index=False, sep=';', float_format='%.10f')

In [None]:
sourceRoot = ruweDataDir_fixed_lines
tmpRoot = workdir
asciiRoot = asciiDataDir
backupRoot = ruweDataDirBackup
                        
for root, dirs, files in os.walk(sourceRoot):
    for coef in dirs:        
        if coef in ['ruweData_A']:
            print(coef)
            for root, dirs, files in os.walk(os.path.join(sourceRoot, coef)):
                print (dirs)
                for wd in dirs:
                    if wd in ['WD_000_090']
                        # check if file exist in folder otherwise skip the folder
                        if os.listdir(os.path.join(sourceRoot, coef, wd)):                    
                            print('WD folder exist:{0}{1}'.format(coef, wd))
                            # following line execute function to create ASCII file from csv file
                            datafolder_to_tiles(sourceRoot, coef, wd, asciiRoot, tmpRoot, backupRoot, y) 
                            asciiCoefWD = os.path.join(asciiRoot, coef, wd)
                            print(asciiCoefWD)
                            # following line execute function to combine ASCII files into a single ASCII file
                            multi_tile_to_single_tile(asciiCoefWD)                        