https://gis.stackexchange.com/questions/15135/using-field-to-rgb-mapping-for-symbology-in-qgis

In [179]:
%pylab inline
import logging as log
import os
import attr
log.getLogger().setLevel(log.INFO)

Populating the interactive namespace from numpy and matplotlib


In [180]:
infolder = "/data/planmap/Planmap/DATA/mercury/PM-MER-MS-H05_3cc_01/".strip()
outfolder = "/data/tmppkg3"

# projectname = PM-MER-MS-H05_3cc_01 # override here if needed
projectname = os.path.basename(os.path.normpath(infolder))
outfname = projectname +".gpkg"
outfullpath = os.path.join(outfolder, outfname)

log.info(f"working on project {projectname}, output will be in {outfolder}")

    

vectors_exts = ["shp"] # add more if needed
rasters_exts = ["tiff", "tif"]
documents_exts = ["pdf"]

INFO:root:working on project PM-MER-MS-H05_3cc_01, output will be in /data/tmppkg3


In [181]:
import glob
def find_types(extensions, folder):
    found = []
    for vtype in extensions:
        found.extend(glob.glob(folder + f"/**/*.{vtype}", recursive = True))
        
    return found

In [182]:
shapefiles = find_types(vectors_exts, infolder)
rasters = find_types(rasters_exts, infolder)
pdfs = find_types(documents_exts, infolder)

import regex
def remove_filter_matches(fnames, filters=[".*5_classes.*"]):
    out = []
    for fname in fnames:
        for filter in filters:
            if not regex.match(filter, fname):
                out.append(fname)
            else:
                log.warning(f"removed from list {fname} due to filter {filter}")
                
    return out
    
shapefiles = remove_filter_matches(shapefiles)



In [183]:
log.info(f"found {len(shapefiles)} vectors")
for v in shapefiles:
    log.info(f"--> {v}")
    
log.info(f"found {len(rasters)} raster")
for v in rasters:
    log.info(f"--> {v}")
    
log.info(f"found {len(pdfs)} docs")
for v in pdfs:
    log.info(f"--> {v}")



INFO:root:found 6 vectors
INFO:root:--> /data/planmap/Planmap/DATA/mercury/PM-MER-MS-H05_3cc_01/vector/H05_surface_features/H05_surface_features.shp
INFO:root:--> /data/planmap/Planmap/DATA/mercury/PM-MER-MS-H05_3cc_01/vector/H05_linear_features/H05_linear_features.shp
INFO:root:--> /data/planmap/Planmap/DATA/mercury/PM-MER-MS-H05_3cc_01/vector/H05_outline/H05_outline.shp
INFO:root:--> /data/planmap/Planmap/DATA/mercury/PM-MER-MS-H05_3cc_01/vector/H05_geological_units/3_classes/H05_geological_units_3_classes.shp
INFO:root:--> /data/planmap/Planmap/DATA/mercury/PM-MER-MS-H05_3cc_01/vector/H05_contacts/H05_contacts.shp
INFO:root:--> /data/planmap/Planmap/DATA/mercury/PM-MER-MS-H05_3cc_01/vector/H05_nomenclature/H05_nomenclature.shp
INFO:root:found 1 raster
INFO:root:--> /data/planmap/Planmap/DATA/mercury/PM-MER-MS-H05_3cc_01/raster/PM-MER-MS-H05_3cc_01_basemap_mosaic.tif
INFO:root:found 2 docs
INFO:root:--> /data/planmap/Planmap/DATA/mercury/PM-MER-MS-H05_3cc_01/document/PM-MER-MS-H05_3c

In [184]:
def get_required_output_dirs(outfolder):
    outdirs = [outfolder]
    if len(shapefiles) > 0:
        outdirs.append(os.path.join(outfolder, "vector"))
        
    if len(rasters) > 0:
        outdirs.append(os.path.join(outfolder, "raster"))
        
    if len(pdfs) > 0:
        outdirs.append(os.path.join(outfolder, "document"))
        
    return outdirs
        
def create_paths(folders):
    for f in folders:
        os.makedirs(f, exist_ok=True)
        

In [185]:
flds = get_required_output_dirs(outfolder)
create_paths(flds)

In [186]:
from shutil import copyfile


def copy_files_to_folder(files, folder):
    for f in files:
        fname = os.path.basename(f)
        copyfile(f, os.path.join(folder, fname))
    



In [187]:
copy_files_to_folder(pdfs, os.path.join(outfolder, "document"))
copy_files_to_folder(rasters, os.path.join(outfolder, "raster"))

In [188]:
# we now unify in a single geopackage all the data
outgpkg = os.path.join(outfolder, "vector", outfname)
log.info(outgpkg)

INFO:root:/data/tmppkg3/vector/PM-MER-MS-H05_3cc_01.gpkg


In [189]:
def prepare_layer_name(fullname): # might want to personalize this
    lname = os.path.basename(fullname)[:-4] # strip estension from basename
    lname = lname[4:] # strip beginning
    lname = lname.replace("_", " ")
    lname = lname.title()
#     lname = lname.replace(" ", "_")
    
#     lname = ''.join(x for x in lname.title() if x != "_")
    return lname

In [190]:
import geopandas as gp
# first we load the data

tosave = {}
for vector in shapefiles:
    lname =prepare_layer_name(vector)
    log.info(f"{vector},\n   --> layer_name : {lname}")
    table = gp.GeoDataFrame.from_file(vector)
    tosave[lname] = table




    

INFO:root:/data/planmap/Planmap/DATA/mercury/PM-MER-MS-H05_3cc_01/vector/H05_surface_features/H05_surface_features.shp,
   --> layer_name : Surface Features
INFO:fiona.ogrext:Failed to auto identify EPSG: 7
INFO:root:/data/planmap/Planmap/DATA/mercury/PM-MER-MS-H05_3cc_01/vector/H05_linear_features/H05_linear_features.shp,
   --> layer_name : Linear Features
INFO:fiona.ogrext:Failed to auto identify EPSG: 7
INFO:root:/data/planmap/Planmap/DATA/mercury/PM-MER-MS-H05_3cc_01/vector/H05_outline/H05_outline.shp,
   --> layer_name : Outline
INFO:fiona.ogrext:Failed to auto identify EPSG: 7
INFO:root:/data/planmap/Planmap/DATA/mercury/PM-MER-MS-H05_3cc_01/vector/H05_geological_units/3_classes/H05_geological_units_3_classes.shp,
   --> layer_name : Geological Units 3 Classes
INFO:fiona.ogrext:Failed to auto identify EPSG: 7
INFO:root:/data/planmap/Planmap/DATA/mercury/PM-MER-MS-H05_3cc_01/vector/H05_contacts/H05_contacts.shp,
   --> layer_name : Contacts
INFO:fiona.ogrext:Failed to auto identi

In [191]:
types = []

for lanme, t in tosave.items():
    types.append( t.geom_type[0] ) # we assume all features will be of similar type
log.info(types)   



INFO:root:['Polygon', 'LineString', 'LineString', 'Polygon', 'LineString', 'Point']


In [192]:
sort_pref = {"Point": 0, "MultiPoint":0, "LineString":10, "Polygon": 20}
scores = [sort_pref[key] for key in types]
scores


sorting = np.argsort(scores)
sorting

array([5, 1, 2, 4, 0, 3])

In [193]:
alldata = list(tosave.items())

for id in sorting:
    lname = alldata[id][0]
    table = alldata[id][1]
    log.info(f"saving {lname}")
    table.to_file(outgpkg, driver="GPKG", layer=lname)

INFO:root:saving Nomenclature
INFO:root:saving Linear Features
INFO:root:saving Outline
INFO:root:saving Contacts
INFO:root:saving Surface Features
INFO:root:saving Geological Units 3 Classes


# add checks on the total number of layers received as input to the actual geopackage

In [194]:
tabfile = "/home/luca/Documenti-local/notebooks.git/mapping/maptable_3_classes.csv"
layername = "Geological Units 3 Classes"
table = gp.GeoDataFrame.from_file( outgpkg, layer=layername)
table.Geological = table.Geological.str.strip()


INFO:fiona.ogrext:Failed to auto identify EPSG: 7


In [195]:
import pandas as pd
newtab = pd.DataFrame.from_csv(tabfile, sep=",", index_col=None )
newtab.Geological = newtab["Geological"].str.strip()
newtab

  


Unnamed: 0,Extended_Label,Geological,R,G,B
0,Smooth Plains,sp,255,190,190
1,Intermediate Plains,imp,245,122,122
2,Intercrater Plains,icp,137,90,68
3,Crater material-well preserved,c3,255,255,115
4,Crater material-degraded,c2,92,137,68
5,Crater material-heavily degraded,c1,115,0,0
6,Degraded catenae,dc,223,128,255
7,Crater floor material-smooth,cfs,255,255,175
8,Crater floor material-hummocky,cfh,205,170,102


In [196]:
matchname = "Geological"

tomatch = np.unique(table[matchname])
testmatch = list(newtab[matchname])

for m in tomatch:
    if m not in testmatch:
        print(f"---> Check {m}")
    else:
        print(f"{m} is ok")

c1 is ok
c2 is ok
c3 is ok
cfh is ok
cfs is ok
dc is ok
icp is ok
imp is ok
sp is ok


In [197]:
fulldata = pd.merge(   table, newtab, on="Geological")
fulldata
# fulldata = geopandas.GeoDataFrame(fulldata)
fulldata.drop(columns=["Join_Count", "TARGET_FID"])

fulldata.to_file(outgpkg, layer=layername, driver="GPKG")


In [None]:
table

In [None]:
newtab

In [None]:
pd.merge?a

In [None]:
table[matchname]
newtab[matchname]

In [None]:
newtab[matchname]

In [None]:
newtab["Extended_Label"]

In [None]:
newtab