In [1]:
%load_ext autoreload
%autoreload 2   # Change to %autoreload when development phase is over

# Table of Contents

<div id="toc"></div>

The following cell is a Javascript section of code for building the Jupyter notebook's table of content.

In [2]:
%%javascript
$.getScript('https://kmahelona.github.io/ipython_notebook_goodies/ipython_notebook_toc.js')

<IPython.core.display.Javascript object>

**-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-**

# Define working environment

**Import libraries**

In [3]:
## Import libraries needed for setting parameters of operating system 
import os
import sys
## Import library for temporary files creation 
import tempfile 
## Import library for time management 
import time
## Import multiprocessing and functools libraries
import multiprocessing
from multiprocessing import Pool
from functools import partial

In [4]:
## Import Pandas library
import pandas as pd
## Import Numpy library
import numpy as np

In [5]:
## Import Matplotlib 
import matplotlib as mpl 
## agg backend is used to create plot as a .png file
mpl.use('agg')
## Import Matplotlib.pyplot for creating graphs
import matplotlib.pyplot as plt 

In [6]:
%matplotlib inline

** Add folder with SCR provided belong to this notebook**

In [7]:
# Add local module to the path
src = os.path.abspath('../SRC')
if src not in sys.path:
    sys.path.append(src)

** Environment variables when working on Linux Mint **

In [8]:
import environ_variables as envi

In [9]:
# Set environmental variables
envi.setup_environmental_variables() 
# Display current environment variables of your computer
envi.print_environmental_variables()

MDMSESSION = mate 	
MANDATORY_PATH = /usr/share/gconf/mate.mandatory.path 	
MATE_DESKTOP_SESSION_ID = this-is-deprecated 	
LESSOPEN = | /usr/bin/lesspipe %s 	
MDM_LANG = fr_BE.UTF-8 	
LOGNAME = tais 	
USER = tais 	
HOME = /home/tais 	
XDG_VTNR = 9 	
PATH = /usr/local/bin:/home/tais/BIN:/home/tais/bin:/home/tais/.local/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/home/tais/SRC/GRASS/grass_trunk/dist.x86_64-pc-linux-gnu/bin:/home/tais/SRC/GRASS/grass_trunk/dist.x86_64-pc-linux-gnu/script:/home/tais/SRC/GRASS/grass_trunk/dist.x86_64-pc-linux-gnu/lib 	
CLICOLOR = 1 	
DISPLAY = :0.0 	
SSH_AGENT_PID = 25466 	
LANG = fr_BE.UTF-8 	
TERM = xterm-color 	
SHELL = /bin/bash 	
GIS_LOCK = $$ 	
XAUTHORITY = /home/tais/.Xauthority 	
SESSION_MANAGER = local/tais-HP-Z620-Workstation:@/tmp/.ICE-unix/25383,unix/tais-HP-Z620-Workstation:/tmp/.ICE-unix/25383 	
SHLVL = 1 	
QT_LINUX_ACCESSIBILITY_ALWAYS_ON = 1 	
INSIDE_CAJA_PYTHON =  	
QT_ACCESSIBILITY = 1 	
LD

** GRASS GIS Python libraries **

In [10]:
## Import libraries needed to launch GRASS GIS in the jupyter notebook
import grass.script.setup as gsetup
## Import libraries needed to call GRASS using Python
import grass.script as gscript

** Functions defined by the user **

In [11]:
# Import function that check existance and create GRASS GIS database folder if needed
from grass_database import check_gisdb
from grass_database import check_location
from grass_database import check_mapset
from grass_database import working_mapset

In [12]:
## Import functions for processing time information
from processing_time import start_processing
from processing_time import print_processing_time

In [13]:
# Import function that generate a random name in the GRASS GIS environement
from random_layer_name import random_layer_name

In [14]:
# Import function that create color file for raster
from colorise_raster import create_color_rule

In [15]:
# Import function that clip multiple raster according to extention of a vector layer
from create_clumped_grid import create_clumped_grid

**-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-**

# User inputs

Here after:
- Enter the path to the directory you want to use as "[GRASSDATA](https://grass.osgeo.org/programming7/loc_struct.png)". 
- Enter the name of the location in which you want to work and its projection information in [EPSG code](http://spatialreference.org/ref/epsg/) format. Please note that the GRASSDATA folder and locations will be automatically created if not existing yet. If the location name already exists, the projection information will not be used.  
- Enter the name you want for the mapsets which will be used later for Unsupervised Segmentation Parameter Optimization (USPO), Segmentation and Classification steps.

In [16]:
## Define a empty dictionnary for saving user inputs
user={}

In [17]:
## Enter the path to GRASSDATA folder
user["gisdb"] = "/media/tais/My_Book_1/MAUPP/Traitement/Population_modelling_dasymetry/GRASSDATA"
## Enter the name of the location (existing or for a new one)
user["location"] = "Dakar_32628"
## Enter the EPSG code for this location 
user["locationepsg"] = "32628"
## Enter the name of the permanent mapset
user["permanent_mapset"] = "PERMANENT"
## Enter the name of a working mapset
user["dasym_mapset"] = "DASYMETRY"

**Check for existance of GRASSDATA folder, location and mapsets**

Here after, the python script will check if GRASSDATA folder, locations and mapsets already exist. If not, they will be automatically created.

In [18]:
# Check if the GRASS GIS database exists and create it if not
check_gisdb(user["gisdb"])

GRASSDATA folder already exist


In [19]:
# Check if the location exists and create it if not, with the CRS defined by the epsg code 
check_location(user["gisdb"],user["location"],user["locationepsg"])

Location Dakar_32628 already exist


In [20]:
# Check if the mapset exists and create it if not
check_mapset(user["gisdb"],user["location"],user["permanent_mapset"])

'PERMANENT' mapset already exists in location 'Dakar_32628'


In [21]:
# Check if the mapset exists and create it if not
check_mapset(user["gisdb"],user["location"],user["dasym_mapset"])

'DASYMETRY' mapset already exists in location 'Dakar_32628'


**-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-**

# Launch GRASS GIS working session on DASYMETRY mapset

In [22]:
# Change the current working GRASS GIS session mapset
working_mapset(user["gisdb"],user["location"],user["dasym_mapset"])

You are now working in mapset 'DASYMETRY'


### Set the name of main layers

In [23]:
# Set the name of the layers
Land_cover = 'landcover'  # VHR Land cover map
Land_use = 'landuse' # VHR Land use map
mr_builtup = 'MR_builtup' # MR built-up map
mr_built_pixelvalue = '1'
grid = 'clumped_grid'
## Name of the column containing the population count
population_column="POPULATION"

### Set the resolution of the prediction grid

In [24]:
tile_size = 100

### Set several variables and parameter

In [25]:
# Declare list that will contain the name/paths of temporary layers/files
TMP_MAPS = []
TMP_CSV = []

In [26]:
# Declare strings that will contain the log of the processing
log_text = ""
log_text_extend = ""

In [27]:
# Set the number of job that could be runned in parallel
n_jobs = 15
# Check if the computer has enough cores
if(n_jobs >= multiprocessing.cpu_count()):
    gscript.fatal(_("Requested number of jobs is > or = to available ressources. \
                    Try to reduce to at maximum <%s> jobs")%(int(multiprocessing.cpu_count())-1))

### Create temporary directories for output

In [28]:
import os

def create_tempdirs(list_of_directories):
    '''
    Function that create needed temporary folder. Those name have to be saved as other function will depend of the name of those folder.
    '''
    return_list = []
    tmp_grass_dir=gscript.tempdir()
    
    for directory in list_of_directories:    
        # Temporary directory for administrative units statistics
        outputdirectory=os.path.join(tmp_grass_dir,directory)
        if not os.path.exists(outputdirectory):
            os.makedirs(outputdirectory)
        return_list.append(outputdirectory)
    # Return paths
    return return_list

In [29]:
# Create temporary folder and get their paths
outputdirectory_admin, outputdirectory_grid = create_tempdirs(["admin_level", "grid_level"])

# Data preparation

## Create a clumped grid for dasymetry

This grid will be the reference grid for re-allocation of population count.

In [30]:
# Set a name for the empty grid layer
clumped_grid = 'clumped_grid'

In [31]:
# Create clumped grid
create_clumped_grid(tile_size=tile_size, mask_raster=Land_cover, output=clumped_grid)

## Create layers with grid boundary for both levels

In [36]:
def gridded_admin_boundaries(input_vector, id, pop_column, grid):
    '''
    Function convecting the vector to raster then raster to vector: boundaries will have a "staircase" appearence
    so that each tile of the gridded vector will be contained in only one administrative unit
    '''
    
    def check_no_missing_zones(vector_origin, vector_gridded, resolution):
        '''
        Function checking if the number of items (admin zones) in the original vector provided by the user is wall conserved after the rasterization.
        If the original vector contains small sized polygons (or very tight) and desired 'tile_size' is too large, some polygons could disappeared during the rasterization process
        '''
        origin_n=gscript.parse_command('v.db.univar', flags='g', map=vector_origin, column='cat')['n']
        gridded_n=gscript.parse_command('v.db.univar', flags='g', map=vector_gridded, column='cat')['n']
        if origin_n != gridded_n:
            gscript.run_command('g.remove', quiet=True, type='vector', name=vector_gridded, flags='fb')
            message=_(("A tile size of %s m seems too large and produce loss of some polygons when rasterizing them.\n") % resolution)
            message+=_(("Try to reduce the 'tile_size' parameter or edit the <%s> vector to merge smallest administrative units with their neighoring units") % vector_origin)
            gscript.fatal(message)
        
    current_mapset = gscript.gisenv()['MAPSET']
    gscript.run_command('g.region', raster=grid)
    resolution = int(float(gscript.parse_command('g.region', flags='pg')['nsres']))
    global gridded_admin_units
    global gridded_vector
    gridded_admin_units = random_layer_name(prefix='gridded_admin_units')
    gridded_vector = input_vector.split("@")[0]+'_'+str(resolution)+'m_gridded'
    gscript.run_command('v.to.rast', quiet=True, input=input_vector, type='area', 
                        output=gridded_admin_units, use='attr', 
                        attribute_column=id, overwrite=True)
    gscript.run_command('r.to.vect', quiet=True, input=gridded_admin_units, 
                        output='%s@%s'%(gridded_vector,current_mapset), type='area', column=id, 
                        flags='v',overwrite=True)
    tmp_name=random_layer_name()
    gscript.run_command('g.copy', quiet=True, vector='%s,%s'%(input_vector,tmp_name))
    gscript.run_command('v.db.join', quiet=True, map_=gridded_vector, column='cat', other_table=tmp_name, other_column=id, subset_columns=pop_column) #join the population count
    gscript.run_command('g.remove', quiet=True, flags='f', type='vector', name=tmp_name+'@'+current_mapset)
    check_no_missing_zones(input_vector, gridded_vector, resolution)    

In [37]:
# Create layer with boundaries corresponding to the grid 
gridded_admin_boundaries("admin_level0", 'cluster', population_column, grid)

## Get list of values from categorical raster (land cover and land use)

In [38]:
def Data_prep(categorical_raster):
    '''
    Function that extracts resolution and sorted list of classes of 
    a categorical raster (like land cover or land use information).
    '''
    info = gscript.raster_info(categorical_raster)
    nsres=info.nsres
    ewres=info.ewres
    L = []
    L=[cl.split("	")[0] for cl in gscript.parse_command('r.category',map=categorical_raster)]
    for i,x in enumerate(L):  #Make sure the format is UTF8 and not Unicode
        L[i]=x.encode('UTF8')
    L.sort(key=float) #Sort the raster categories in ascending.
    return nsres, ewres, L

In [39]:
# Data preparation : extract list of classes from the Land Cover
lc_classes_list = Data_prep(Land_cover)[2]
message="Classes of raster '"+str(Land_cover)+"' used: "+",".join(lc_classes_list)
log_text+=message+'\n'
print message

Classes of raster 'landcover' used: 10,22,23,33,34,45,111,112,113


In [40]:
# Data preparation : extract list of classes from the land use
lu_classes_list = Data_prep(Land_use)[2]
message="Classes of raster '"+str(Land_use)+"' used: "+",".join(lu_classes_list)
log_text+=message+'\n'
print message

Classes of raster 'landuse' used: 1,2,3,4,5,6,7,8


## Compute proportion of each land cover and land use class

In [41]:
def proportion_class(rasterLayer, cl):
    '''
    Function extracting a binary map for class 'cl' in raster 'rasterLayer', then computing the proportion of this class in both administratives units and in grids.
    The computational region should be defined properly before running this function.
    '''
    #global outputdirectory_admin, outputdirectory_grid
    #Set the region to match the extend of the raster
    ### Create a binary raster for the current class
    if rasterLayer == Land_cover.split("@")[0]:
        prefix = 'LC' 
    elif rasterLayer == Land_use.split("@")[0]:
        prefix = 'LU' 
    else: prefix = 'MR'  
    # Adaptative prefix according to the input raster (land_cover of land_use)
    binary_raster = prefix+"_"+cl  # Set the name of the binary raster
    gscript.run_command('r.mapcalc', expression='%s=if(%s==%s,1,0)'%(binary_raster,rasterLayer,cl),
                        overwrite=True,quiet=True) # Mapcalc to create binary raster for the expected class 'cl'
    ### Create a temporary copy of the current binary raster with all pixels values equal to 1 (to be used for computing proportion of current binary class)
    tmplayer = random_layer_name(prefix='tmp_%s'%binary_raster)
    gscript.run_command('r.mapcalc', expression='%s=if(%s==1,1,1)'%(tmplayer,binary_raster),
                        overwrite=True,quiet=True)
    # Fill potential remaining null values with 0 value (when using r.mapcalc, null values existing in the 'rasterLayer' will remain null in the binary)
    gscript.run_command('r.null', quiet=True, map=binary_raster, null='0')
    gscript.run_command('r.null', quiet=True, map=tmplayer, null='0')
    
    ### Compute proportion of pixels of the current class - Administrative units
    stat_csv=os.path.join(outputdirectory_admin,"%s_%s.csv"%(prefix,cl))
    ref_map = gridded_admin_units
    gscript.run_command('i.segment.stats', flags='s', map=ref_map, rasters='%s,%s'%(tmplayer,binary_raster), raster_statistics='sum', csvfile=stat_csv, separator='comma', quiet=True, overwrite=True)
    output_csv_1=compute_proportion_csv(stat_csv) #Create a new csv containing the proportion
    ### Compute proportion of pixels of the current class - Grids
    stat_csv=os.path.join(outputdirectory_grid,"%s_%s.csv"%(prefix,cl))
    ref_map='clumped_grid'
    gscript.run_command('i.segment.stats', flags='s', map=ref_map, rasters='%s,%s'%(tmplayer,binary_raster), raster_statistics='sum', csvfile=stat_csv, separator='comma', quiet=True, overwrite=True)
    output_csv_2=compute_proportion_csv(stat_csv) #Create a new csv containing the proportion
    
    ### Remove temporary layer
    gscript.run_command('g.remove', quiet=True, flags='f',type='raster',name=tmplayer)
    # Return lists
    return (binary_raster,output_csv_1,output_csv_2)

In [42]:
import csv
def compute_proportion_csv(infile):
    '''
    Function used in 'proportion_class' function. It take as input the csv from i.segment.stats with the area (in number of pixels)
    the sum of pixels of the binary raster and create a new csv with the proportion
    '''
    # Set the path to the outputfile
    head, tail = os.path.split(infile)
    root, ext = os.path.splitext(tail)
    outfile=os.path.join(head,root+"_prop"+ext)
    # Create new csv reader and writer objects
    reader=csv.reader(open(infile,'r'), delimiter=",")
    writer=csv.writer(open(outfile,'w'), delimiter=",")
    # Initialize empty lists
    crash_report=[]
    content=[]
    # Save the first line as header and create the new header
    header=reader.next()
    new_header=[]
    new_header.append(header[0])
    index=header[2].find("_sum")
    new_header.append(header[2][:index]+'_proportion')
    content.append(new_header)  #Create new header with first original column and current class related name for proportion
    # Loop through the rest of the rows (header is passed)
    for row in reader:
        pix_nb=float(row[1]) #Area of the unit (in number of pixels)
        class_nb=float(row[2]) #Number of pixels of current class (binary raster)
        try:
            prop=100*class_nb/pix_nb
            content.append([row[0],"{0:.5f}".format(prop)])
        except ZeroDivisionError:  #If computation of proportion failed because of 'ZeroDivisionError'
            crash_report.append(row[0])
            content.append([row[0],"{0:.5f}".format(0.0)])  # If ZeroDivisionError, set the proportion to zero to avoid errors in next steps
            continue
    writer.writerows(content)
    os.remove(infile)
    # Print notification of ZeroDivisionError if it happened
    if len(crash_report)>0:
        print "An 'ZeroDivisionError' has been registered for the following <%s>"%header[0]+"\n".join(crash_report)
    # Return the path to the temporary csv file
    return outfile

### VHR Land cover proportions

In [43]:
# Save time at starting
start = start_processing() 
## Compute proportion of each class of categorical raster (parallel processing).
gscript.run_command('g.region', raster=Land_cover.split("@")[0])  #Set the region to match the extend of the raster
p=Pool(n_jobs) #Create a 'pool' of processes and launch them using 'map' function
func=partial(proportion_class,Land_cover.split("@")[0]) # Set fixed argument of the function
output=p.map(func,lc_classes_list) # Launch the processes for as many items in the list (if function with a return, the returned results are ordered thanks to 'map' function)
p.close()
p.join()
temp_rasterlist,temp_csvlist_1,temp_csvlist_2=zip(*output)
[TMP_MAPS.append(x) for x in temp_rasterlist]  # Append the name of binary rasters to the list of temporary maps
[TMP_CSV.append(x) for x in temp_csvlist_1]  # Append the paths to .csv files to the list of temporary .csv
[TMP_CSV.append(x) for x in temp_csvlist_2]  # Append the paths to .csv files to the list of temporary .csv
# Print processing time
print_processing_time(begintime=start, printmessage="Proportion of each class of categorical raster computed in: ")  

'Proportion of each class of categorical raster computed in: 13 minutes and 26.9 seconds'

### VHR Land use proportions

In [44]:
# Save time at starting
start = start_processing() 
## Compute proportion of each class of categorical raster (parallel processing).
gscript.run_command('g.region', raster=Land_use.split("@")[0])  #Set the region to match the extend of the raster
p=Pool(n_jobs) #Create a 'pool' of processes and launch them using 'map' function
func=partial(proportion_class,Land_use.split("@")[0]) # Set fixed argument of the function
output=p.map(func,lu_classes_list) # Launch the processes for as many items in the list (if function with a return, the returned results are ordered thanks to 'map' function)
p.close()
p.join()
temp_rasterlist,temp_csvlist_1,temp_csvlist_2=zip(*output)
[TMP_MAPS.append(x) for x in temp_rasterlist]  # Append the name of binary rasters to the list of temporary maps
[TMP_CSV.append(x) for x in temp_csvlist_1]  # Append the paths to .csv files to the list of temporary .csv
[TMP_CSV.append(x) for x in temp_csvlist_2]  # Append the paths to .csv files to the list of temporary .csv
# Print processing time
print_processing_time(begintime=start, printmessage="Proportion of each class of categorical raster computed in: ")  

'Proportion of each class of categorical raster computed in: 11 minutes and 55.0 seconds'

### MR Built-up proportion

In [45]:
# Save time at starting
start = start_processing() 
## Compute proportion of each class of categorical raster (parallel processing).
gscript.run_command('g.region', raster=mr_builtup.split("@")[0])  #Set the region to match the extend of the raster
temp_rasterlist,temp_csvlist_1,temp_csvlist_2 = proportion_class(mr_builtup.split("@")[0],mr_built_pixelvalue)
TMP_MAPS.append(temp_rasterlist)  # Append the name of binary rasters to the list of temporary maps
TMP_CSV.append(temp_csvlist_1)  # Append the paths to .csv files to the list of temporary .csv
TMP_CSV.append(temp_csvlist_2)  # Append the paths to .csv files to the list of temporary .csv
# Print processing time
print_processing_time(begintime=start, printmessage="Proportion of MR Built-up raster computed in: ")  

An 'ZeroDivisionError' has been registered for the following <cat>2769
2077
2076
2210
2770
2916
2209
2485
2626
2627
2915
2345
2346
2484


'Proportion of MR Built-up raster computed in: 13 minutes and 14.5 seconds'

### Export layers for archive or visualisation in another GIS

**Binary layers**

In [62]:
# Set output folder
output_folder = '/media/tais/My_Book_1/MAUPP/Traitement/Population_modelling_dasymetry/Results/Dakar/binary_rasters'
gscript.run_command('r.mask', overwrite=True, raster='maskcopy')  # Apply mask
# Export all binary raster
for binary_raster in TMP_MAPS:
    output_file = os.path.join(output_folder,'Dakar_binary_%s.tiff'%binary_raster)
    gscript.run_command('g.region', raster=binary_raster)
    gscript.run_command('r.out.gdal', overwrite=True, input=binary_raster, output=output_file, format='GTiff', createopt='COMPRESS=DEFLATE')
gscript.run_command('r.mask', flags='r')  # Remove mask

0

**Create VHR built-up binary (classes 111,112,113)**

In [65]:
# Set output folder
output_folder = '/media/tais/My_Book_1/MAUPP/Traitement/Population_modelling_dasymetry/Results/Dakar/binary_rasters'
gscript.run_command('r.mask', overwrite=True, raster='maskcopy')  # Apply mask
vhr_builtup = 'VHR_builtup'
gscript.use_temp_region()  # Define temp region
gscript.run_command('g.region', raster=Land_cover)
gscript.run_command('r.mapcalc', 
                    expression='%s=if(%s==111,111,if(%s==112,112,if(%s==113,113,0)))'%(vhr_builtup,Land_cover,Land_cover,Land_cover),
                    overwrite=True, quiet=True) # Mapcalc to create binary raster for the expected class 'cl'
# Export raster
output_file = os.path.join(output_folder,'Dakar_%s.tiff'%vhr_builtup)
gscript.run_command('r.out.gdal', overwrite=True, input=vhr_builtup, output=output_file, format='GTiff', createopt='COMPRESS=DEFLATE')
gscript.run_command('r.mask', flags='r')  # Remove mask
gscript.del_temp_region() # Remove temp region

NameError: name 'grass' is not defined

**Clumped Grid**

In [47]:
# Set output folder
output_folder = '/media/tais/My_Book_1/MAUPP/Traitement/Population_modelling_dasymetry/Results/Dakar'
# Export clumped grid
output_file = os.path.join(output_folder,'Dakar_%s.tiff'%clumped_grid)
gscript.run_command('g.region', raster=clumped_grid)
gscript.run_command('r.out.gdal', overwrite=True, input=clumped_grid, output=output_file, format='GTiff', createopt='COMPRESS=DEFLATE')

0

**Gridded administrative units (level0)**

In [48]:
# Set output folder
output_folder = '/media/tais/My_Book_1/MAUPP/Traitement/Population_modelling_dasymetry/Results/Dakar'
# Export gridded admin units (level0)
output_file = os.path.join(output_folder,'Dakar_%s.shp'%gridded_vector)
gscript.run_command('v.out.ogr', overwrite=True, flags='m', type='area', 
                    input=gridded_vector, output=output_file, format='ESRI_Shapefile')

0

### Join all .csv files with statistics

In [49]:
import glob
import os 

def atoi(text):
    '''
    Function that return integer if text is digit - Used in 'natural_keys' function
    '''
    return int(text) if text.isdigit() else text


def natural_keys(text):   #Return key to be used for sorting string containing numerical values - Used in 'join_csv' function
    '''
    Trick was found here
    https://stackoverflow.com/questions/5967500/how-to-correctly-sort-a-string-with-a-number-inside
    alist.sort(key=natural_keys) sorts in human order
    http://nedbatchelder.com/blog/200712/human_sorting.html
    (See Toothy's implementation in the comments)

    '''
    import re  #Import needed library
    return [ atoi(c) for c in re.split('(\d+)', text) ]  #Split the string


def ordered_list_of_path(indir,pattern_A,pattern_B="",pattern_C=""):
    '''
    Function that return a list of ordered path for the files in the folder 'indir'.
    'pattern_A', 'pattern_B', 'pattern_C'
    '''
    # Make a list of .csv files according to their filename pattern
    os.chdir(indir) # Change the current directory to the folder containing all the .csv files
    csvList=glob.glob(pattern_A) #Make a list of strings with the name of .csv files
    csvList.sort(key=natural_keys) #Sort the list on a human natural order (for strings containing numericals)
    if pattern_B !="":
        csvList_B=glob.glob(pattern_B) #Make a list of strings with the name of .csv files
        csvList_B.sort(key=natural_keys) #Sort the list on a human natural order (for strings containing numericals)
        for item in csvList_B:
            csvList.append(item)
    if pattern_C !="":
        csvList_C=glob.glob(pattern_C) #Make a list of strings with the name of .csv files
        csvList_C.sort(key=natural_keys) #Sort the list on a human natural order (for strings containing numericals)
        for item in csvList_C:
            csvList.append(item)
    return csvList

In [50]:
def join_2csv(file1,file2,separator=";",join='inner',fillempty='NULL'):
    '''
    Function that join two csv files according to the first column (primary key).
    'file1' and 'file2' wait for complete path (strings) to the corresponding files. Please not that 'file1' is assume to be the left-one in the join
    'separator' wait for the character to be considered as .csv delimiter (string)
    'join' parameter wait either for 'left' or 'inner' according to type of join
    'fillempty' wait for the string to be use to fill the blank when no occurance is found for the join operation
    '''
    import time,csv,os
    header_list=[]
    file1_values_dict={}
    file2_values_dict={}
    reader1=csv.reader(open(file1), delimiter=separator) #Csv reader for file 1
    reader2=csv.reader(open(file2), delimiter=separator) #Csv reader for file 2
    # Make a list of headers
    header_list1=[ x for x in reader1.next()]
    header_list2=[ x for x in reader2.next()[1:]]
    # Make a list of unique IDs from the first and second table according to type of join
    if join=='inner':
        id_list=[row[0] for row in reader1]
        [id_list.append(row[0]) for row in reader2]
        id_list=list(set(id_list))
        id_list.sort(key=natural_keys)
    if join=='left':
        id_list=[row[0] for row in reader1]
        id_list=list(set(id_list))
        id_list.sort(key=natural_keys)
    # Build dictionnary for values of file 1
    reader1=csv.reader(open(file1), delimiter=separator)
    reader1.next()
    values_dict1={rows[0]:rows[1:] for rows in reader1}
    # Build dictionnary for values of file 2
    reader2=csv.reader(open(file2), delimiter=separator)
    reader2.next()
    values_dict2={rows[0]:rows[1:] for rows in reader2}
    # Built new content
    new_content=[]
    new_header=header_list1+header_list2
    new_content.append(new_header)
    for key in id_list:
        new_row=[key]
        try:
            [new_row.append(value) for value in values_dict1[key]]
        except:
            [new_row.append('%s'%fillempty) for x in header_list1[1:]]
        try:
            [new_row.append(value) for value in values_dict2[key]]
        except:
            [new_row.append('%s'%fillempty) for x in header_list2]
        new_content.append(new_row)
    #Return the result
    outfile=gscript.tempfile()
    fout=open(outfile,"w")
    writer=csv.writer(fout, delimiter=separator)
    writer.writerows(new_content) #Write multiples rows in the file
    time.sleep(0.5) # To be sure the file will not be close to fast (the content could be uncompletly filled)
    fout.close()
    return outfile

In [51]:
def join_multiplecsv(fileList,outfile,separator=";",join='inner', fillempty='NULL', overwrite=False):
    '''
    Function that apply join on multiple csv files
    '''
    import os, sys, shutil
    # Stop execution if outputfile exitst and can not be overwriten
    if os.path.isfile(outfile) and overwrite==False:
        print "File '%s' aleady exists and overwrite option is not enabled."%outfile
    else:
        if os.path.isfile(outfile) and overwrite==True:  # If outputfile exitst and can be overwriten
            os.remove(outfile)
            #print "File '%s' will be overwrited."%outfile   # Uncomment if you want a print
        nbfile=len(fileList)
        if nbfile<=1: #Check if there are at least 2 files in the list
            sys.exit("This function require at least two .csv files to be jointed together.")
        # Copy the list of file in a queue list
        queue_list=list(fileList)
        # Left join on the two first files
        file1=queue_list.pop(0)
        file2=queue_list.pop(0)
        tmp_file=join_2csv(file1,file2,separator=separator,join=join, fillempty=fillempty)
        # Left join on the rest of the files in the list
        while len(queue_list)>0:
            file2=queue_list.pop(0)
            tmp_file=join_2csv(tmp_file,file2,separator=separator,join=join, fillempty=fillempty)
        #Copy the temporary file to the desired output path
        shutil.copy2(tmp_file,outfile)
        # Print what happend
        #print "%s individual .csv files were joint together."%nbfile    # Uncomment if you want a print

In [52]:
## Join .csv files of statistics
for directory in [outputdirectory_grid, outputdirectory_admin]:
    allstatfile=os.path.join(directory,"all_stats.csv")
    pattern_A="LC_*_prop.csv"   #Add all csv with proportions of Land cover classes
    pattern_B="LU_*_prop.csv"  #Add all csv with proportions of Land use classes
    pattern_C="MR_*_prop.csv" 
    list_paths = ordered_list_of_path(directory,pattern_A,pattern_B,pattern_C)
    join_multiplecsv(list_paths,allstatfile,separator=",",join='inner', fillempty='NULL', overwrite=True)

### Visualisation of .csv files with statistics to be used

**Admin level 0**

In [58]:
## Use pandas for preview of .csv content
df_level0 = pd.read_csv(os.path.join(outputdirectory_admin,"all_stats.csv"))
print "There are %s rows in the dataframe"%len(df_level0)

There are 92 rows in the dataframe


In [59]:
## Display tail of the dataframe
df_level0.tail(8)

Unnamed: 0,cat,LC_10_proportion,LC_22_proportion,LC_23_proportion,LC_33_proportion,LC_34_proportion,LC_45_proportion,LC_111_proportion,LC_112_proportion,LC_113_proportion,LU_1_proportion,LU_2_proportion,LU_3_proportion,LU_4_proportion,LU_5_proportion,LU_6_proportion,LU_7_proportion,LU_8_proportion,MR_1_proportion
84,90,4.14186,2.5389,3.49121,0.0,0.00837,47.07824,18.13912,16.40363,8.19866,0.0,0.0,1.48147,29.29234,26.56825,8.89439,3.28561,30.47794,91.56137
85,92,6.16394,2.32306,5.33746,0.0,0.00288,38.47639,27.32008,16.67622,3.69996,13.22556,0.0,2.82596,2.69384,54.6732,6.84909,16.33996,3.39238,92.63615
86,93,1.43904,1.11119,2.31706,0.0,0.0,46.07337,29.22652,15.45421,4.3786,0.0,2.10331,5.92561,0.0,57.30684,6.51643,20.75077,7.39703,88.18728
87,95,13.90201,8.18194,7.58257,0.19749,0.00095,33.79304,20.68137,10.10989,5.55074,0.0,0.03407,2.38112,45.81027,34.61187,11.39264,0.36809,5.40193,92.56219
88,96,2.42438,2.32184,8.4156,0.15801,0.0,38.04187,41.01151,7.38604,0.24075,0.0,0.30591,0.50372,0.50732,67.14672,19.87354,0.74341,10.91938,99.18418
89,97,5.22106,5.08049,7.81146,4.35,0.00173,35.53913,35.86044,5.80834,0.32735,0.0,7.74403,0.0,0.08942,24.73067,10.65976,50.86511,5.911,90.82697
90,98,7.96489,5.06971,15.86843,0.96281,0.00174,29.11977,37.21976,3.67354,0.11935,0.0,1.49157,0.83214,0.52367,7.66176,18.49773,67.06992,3.92321,97.80284
91,99,4.02054,3.35277,1.9005,0.0,0.00206,48.92888,16.84599,20.04965,4.8996,0.0,0.0,6.23071,6.34851,69.79553,14.26131,0.33849,3.02545,79.02314


**Grid level**

In [60]:
## Use pandas for preview of .csv content
df_grid = pd.read_csv(os.path.join(outputdirectory_grid,"all_stats.csv"))
print "There are %s rows in the dataframe"%len(df_grid)

There are 20049 rows in the dataframe


In [61]:
## Display tail of the dataframe
df_grid.tail(8)

Unnamed: 0,cat,LC_10_proportion,LC_22_proportion,LC_23_proportion,LC_33_proportion,LC_34_proportion,LC_45_proportion,LC_111_proportion,LC_112_proportion,LC_113_proportion,LU_1_proportion,LU_2_proportion,LU_3_proportion,LU_4_proportion,LU_5_proportion,LU_6_proportion,LU_7_proportion,LU_8_proportion,MR_1_proportion
20041,20042,0.0,23.5875,0.0,0.0,0.0,8.61,24.44,43.2875,0.075,0.0,9.3275,0.0,28.84,61.8325,0.0,0.0,0.0,88.125
20042,20043,0.0,45.385,8.41,0.0,0.0,29.85,13.76,2.595,0.0,0.0,29.66,0.0,66.845,3.495,0.0,0.0,0.0,13.75
20043,20044,0.0,0.0,20.76581,0.0,0.0,78.59745,0.63674,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,45.56763
20044,20045,0.0,9.60881,24.84916,0.0,0.0,14.17438,0.15084,1.58136,49.63546,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,85.5625
20045,20046,0.155,14.555,0.37,0.0,0.0,47.6275,10.875,20.8375,5.58,0.0,0.0,0.0,65.1375,0.0,34.8625,0.0,0.0,63.375
20046,20047,0.0,11.4525,31.26,0.0,0.0,56.0875,0.8625,0.3375,0.0,0.0,0.0,0.0,80.405,0.0,19.595,0.0,0.0,4.125
20047,20048,0.0,6.325,2.6225,0.0,0.0,80.555,0.3925,1.6825,8.4225,0.0,0.0,0.0,9.59,0.0,90.41,0.0,0.0,3.52423
20048,20049,2.74968,0.0,49.92149,0.0,0.0,47.32883,0.0,0.0,0.0,0.0,0.0,0.0,80.05456,0.0,19.94544,0.0,0.0,4.54545


# Random Forest model

In [None]:


## Random Forest
RandomForest(output_weighting_layer,vector.split("@")[0],id)

In [None]:
r.population.density.py -a -f --overwrite vector=admin_level0@PERMANENT land_cover=landcover@PERMANENT land_use=landuse@PERMANENT tile_size=100 id=Sect_ID population=pop_Total output=Ouaga_lc_lu_100m_weighting_RF plot=/media/tais/My_Book_1/MAUPP/Traitement/Population_modelling_dasymetry/Results/Ouagadougou/RF_feature_importance.png log_file=/media/tais/My_Book_1/MAUPP/Traitement/Population_modelling_dasymetry/Results/Ouagadougou/RF_log n_jobs=8

# Re-allocate population count from level 0 to the grid level

In [None]:
v.area.weigh --overwrite vector=admin_level0_100m_gridded@PERMANENT column=pop_Total weight=Ouaga_lc_lu_100m_weighting_RF@PERMANENT output=Ouaga_lc_lu_100m_pop_estimation

# Cleaning mapset

In [None]:
### Remove all temporary layers
# raster corresponding to administrative units (level0)
gscript.run_command('g.remove', quiet=True, flags='f',type='raster',name=gridded_admin_units)