In [1]:
from PIL import Image
import os

import numpy as np
import pandas as pd
from numba import cuda
from geotiff import GeoTiff
from tensorflow import keras
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
from tqdm.keras import TqdmCallback

from inputs import models

Image.MAX_IMAGE_PIXELS = None

0epoch [00:00, ?epoch/s]

0batch [00:00, ?batch/s]

In [2]:
def kernelCopyArray(limsy, limsx):
    @cuda.jit
    def f(popseeds, result):
        posy, posx = cuda.grid(2)
        
        if posy < limsy and posx < limsx: # Must have or crashes out of bounds
            popseeds[posy,posx] = result[posy,posx]
    return f
def kernelReduceToPoint(limsy, limsx):
    @cuda.jit
    def f(popseeds, result):
        posy, posx = cuda.grid(2)
        
        if posy < limsy and posx < limsx: # Must have or crashes out of bounds
            if popseeds[posy, posx]==0: return

            count = 0
            if posy-1>=0:
                if popseeds[posy-1, posx]!=0: count+=1
            if posx-1>=0:
                if popseeds[posy, posx-1]!=0: count+=1
            if posy<limsy:
                if popseeds[posy+1, posx]!=0: count+=1
            if posx<limsx:
                if popseeds[posy, posx+1]!=0: count+=1

            if count==2 or count==1:
                result[posy,posx] = 0
    return f

def reduceToPointGPU(popseeds):
    limsy, limsx = popseeds.shape

    gpuPopseeds = cuda.to_device(np.uint8(popseeds))
    gpuResult = cuda.to_device(np.uint8(popseeds))

    threadsperblock = (16, 16)
    blockspergridy = np.ceil(limsy / threadsperblock[0]).astype(np.int32)
    blockspergridx = np.ceil(limsx / threadsperblock[1]).astype(np.int32)
    blockspergrid = (blockspergridy, blockspergridx)

    for _ in range(10):
        kernelReduceToPoint(limsy, limsx)[blockspergrid, threadsperblock](gpuPopseeds, gpuResult)
        kernelCopyArray(limsy, limsx)[blockspergrid, threadsperblock](gpuPopseeds, gpuResult)

    popseeds = gpuPopseeds.copy_to_host()

    # Deallocation
    gpuPopseeds, gpuResult = None, None
    return popseeds

In [3]:
# Import water mask and image with gray-128 circles indicating where the province seeds lay. It's important that the circles have a unique center pixel, which usually means uneven brush width like 15 or 5
popseeds = np.array(Image.open('intermediate/popseeds.png').convert('L'))
seaMask = np.array(Image.open('intermediate/seaMask.png').convert('L'))
riverMask = np.array(Image.open('intermediate/lakeRiverCenterlines.png').convert('L'))
impassableMask = np.array(Image.open('intermediate/impassablesMask.png').convert('L'))
lakeMask = np.array(Image.open('intermediate/waterNoRivers.png').convert('L'))
lakeMask[seaMask==255] = 0

# Reduce circles into isolated pixels
popseeds = np.uint16(reduceToPointGPU(popseeds))
#Image.fromarray(np.uint8(popseeds/np.max(popseeds)*255)).save('testtttt.png')

# Build process colors,                         # 0: available seas
popseeds[(popseeds==0) & (seaMask==0)] = 1      # 1: available land
popseeds[riverMask==255] = 2                      # 2: rivers
popseeds[lakeMask==255] = 3                     # 3: lakes
popseeds[impassableMask==255] = 10            # 10 for impassables

# Find out how many provinces we have by counting those pixels
nSeaProvinces = np.sum((popseeds==128) & (seaMask==255))
nLandProvinces = np.sum((popseeds==128) & (seaMask==0))
nSpecialProvinces = np.sum(popseeds==255)
nProvinces = nSeaProvinces + nLandProvinces + nSpecialProvinces

# Generate unique colors for each id
colors = {
    'sea': set(),
    'land': set(),
    'special': set()
}
while len(colors['sea']) < nSeaProvinces:
    color = tuple(np.uint8(x) for x in [1]+list(np.random.randint(1, 255, size=1))+list(np.random.randint(150, 255, size=1)))
    colors['sea'].add(color)
colors['sea'] = list(colors['sea'])
while len(colors['land']) < nLandProvinces:
    color = tuple(np.uint8(x) for x in list(np.random.randint(1, 255, size=2))+[1])
    colors['land'].add(color)
colors['land'] = list(colors['land'])
while len(colors['special']) < nSpecialProvinces:
    color = tuple(np.uint8(x) for x in list(np.random.randint(100, 255, size=2))+[100])
    colors['special'].add(color)
colors['special'] = list(colors['special'])

# Assign id to each dot. The first 10 are reserved for the build process
locations = np.where((popseeds==128) | (popseeds==255))
countSea = 0
countLand = 0
countSpecial = 11
for i in tqdm(range(len(locations[0]))):
    y, x = locations[0][i], locations[1][i]
    if popseeds[y,x] == 255:
        popseeds[y,x] = countSpecial
        countSpecial += 1
    elif seaMask[y,x] == 0:
        popseeds[y,x] = countLand+nSpecialProvinces+11
        countLand += 1
    elif seaMask[y,x] == 255:
        popseeds[y,x] = countSea+nLandProvinces+nSpecialProvinces+11
        countSea += 1

print(f'Provinces: {nProvinces} ({nSeaProvinces} sea / {nLandProvinces} land / {nSpecialProvinces} special)')

  0%|          | 0/1034 [00:00<?, ?it/s]

Provinces: 1034 (366 sea / 643 land / 25 special)


In [4]:
def kernelPaintFromId(limsy, limsx, nSpecialProvinces, nLandProvinces):
    @cuda.jit
    def f(provinces, result, colorSpecial, colorLand, colorSea):
        posy, posx = cuda.grid(2)
        
        if posy < limsy and posx < limsx: # Must have or crashes out of bounds
            ids = provinces[posy, posx]
            if ids>10:
                if ids>=nSpecialProvinces+nLandProvinces+11:
                    ids = int(ids-nSpecialProvinces-nLandProvinces-11)
                    color = colorSea[ids]
                elif ids>=nSpecialProvinces+11:
                    ids = int(ids-nSpecialProvinces-11)
                    color = colorLand[ids]
                else:
                    ids = int(ids-1)
                    color = colorSpecial[ids]
                result[posy,posx] = (color[0], color[1], color[2])
            else:
                result[posy,posx] = (ids,ids,ids)
    return f

def paintFromIdGPU(provinces, colors, nSpecialProvinces, nLandProvinces):
    limsy, limsx = popseeds.shape

    gpuProvinces = cuda.to_device(provinces)
    gpuColorSpecial = cuda.to_device(np.array(colors['special']))
    gpuColorLand = cuda.to_device(np.array(colors['land']))
    gpuColorSea = cuda.to_device(np.array(colors['sea']))
    gpuResult = cuda.to_device(np.zeros((limsy,limsx,3), dtype=np.uint8))

    threadsperblock = (16, 16)
    blockspergridy = np.ceil(limsy / threadsperblock[0]).astype(np.int32)
    blockspergridx = np.ceil(limsx / threadsperblock[1]).astype(np.int32)
    blockspergrid = (blockspergridy, blockspergridx)

    kernelPaintFromId(limsy, limsx, nSpecialProvinces, nLandProvinces)[blockspergrid, threadsperblock](gpuProvinces, gpuResult, gpuColorSpecial, gpuColorLand, gpuColorSea)
    result = gpuResult.copy_to_host()
    
    # Deallocation
    gpuProvinces, gpuColorSpecial, gpuColorLand, gpuColorSea, gpuResult = None, None, None, None, None
    return result

In [5]:
import concurrent.futures
from numba.cuda.random import create_xoroshiro128p_states, xoroshiro128p_uniform_float32
def kernelCopyArray(limsy, limsx):
    @cuda.jit
    def f(provinces, provincesRes, condition, conditionRes):
        y, x = cuda.grid(2)
        
        if y < limsy and x < limsx: # Must have or crashes out of bounds
            provinces[y,x] = provincesRes[y,x]
            condition[y,x] = conditionRes[y,x]
    return f
def kernelVoronoi(limsy, limsx, nSpecialProvinces, nLandProvinces):
    @cuda.jit
    def seaCheck(ids2):
        if ids2==0:
            return True
        return False
    @cuda.jit
    def landCheck(ids2):
        if ids2==1:
            return True
        return False
    @cuda.jit
    def specialCheck(ids2):
        if ids2==1 or ids2==2:
            return True
        return False
    @cuda.jit
    def applySingleChange(provinces, provincesRes, colored, conditionRes, ids, color, y, x, neighborCheck):
        ids2 = provinces[y,x]
        if neighborCheck(ids2):
            provincesRes[y,x] = ids
            if ids2!=2: colored[y,x] = (color[0], color[1], color[2])
            conditionRes[y,x] = True
    @cuda.jit
    def applyChanges(y, x, conditionRes, ids, provinces, provincesRes, colored, color, neighborCheck):
        if y-1>=0: applySingleChange(provinces, provincesRes, colored, conditionRes, ids, color, y-1, x, neighborCheck)
        if x-1>=0: applySingleChange(provinces, provincesRes, colored, conditionRes, ids, color, y, x-1, neighborCheck)
        if y+1<limsy: applySingleChange(provinces, provincesRes, colored, conditionRes, ids, color, y+1, x, neighborCheck)
        if x+1<limsx: applySingleChange(provinces, provincesRes, colored, conditionRes, ids, color, y, x+1, neighborCheck)
    @cuda.jit
    def f(provinces, provincesRes, condition, conditionRes, randomStates, colored, colorSpecial, colorLand, colorSea):
        y, x = cuda.grid(2)
        
        if y < limsy and x < limsx: # Must have or crashes out of bounds
            if condition[y,x]==False: return
            randomState = randomStates[y,x]
            if randomState>0.5: return
            ids = provinces[y,x]
            if ids>10:
                if ids>nSpecialProvinces+nLandProvinces+10: # Sea
                    ids2 = int(ids-nSpecialProvinces-nLandProvinces-11)
                    color = colorSea[ids2]
                    #colored[y,x] = (color[0], color[1], color[2])
                    applyChanges(y, x, conditionRes, ids, provinces, provincesRes, colored, color, seaCheck)
                    conditionRes[y,x] = False
                elif ids>nSpecialProvinces+10: # Land
                    ids2 = int(ids-nSpecialProvinces-11)
                    color = colorLand[ids2]
                    #colored[y,x] = (color[0], color[1], color[2])
                    applyChanges(y, x, conditionRes, ids, provinces, provincesRes, colored, color, landCheck)
                    conditionRes[y,x] = False
                else: # Special
                    if randomState>0.15: return
                    ids2 = int(ids-11)
                    color = colorSpecial[ids2]
                    #colored[y,x] = (color[0], color[1], color[2])
                    applyChanges(y, x, conditionRes, ids, provinces, provincesRes, colored, color, specialCheck)
    return f

def saveImage(coloredProvinces, i):
    Image.fromarray(coloredProvinces).save(f'intermediate/imgs/{i:04d}.png')

def voronoiGPU(provinces, nSpecialProvinces, nLandProvinces, cycles, printImgs=False):
    limsy, limsx = provinces.shape

    coloredProvinces = paintFromIdGPU(provinces, colors, nSpecialProvinces, nLandProvinces)

    cuda.close()
    gpuProvinces = cuda.to_device(provinces)
    gpuProvincesRes = cuda.to_device(provinces)
    gpuCondition = cuda.to_device(provinces>10)
    gpuConditionRes = cuda.to_device(provinces>10)
    gpuColored = cuda.to_device(coloredProvinces)
    gpuColorSpecial = cuda.to_device(np.array(colors['special']))
    gpuColorLand = cuda.to_device(np.array(colors['land']))
    gpuColorSea = cuda.to_device(np.array(colors['sea']))

    gpuRandomNumbers = cuda.to_device(np.zeros((8192, 16384)))
    #gpuRandomStates = cuda.to_device(create_xoroshiro128p_states(limsy+limsx, seed=1))

    threadsperblock = (32, 32)
    blockspergridy = np.ceil(limsy / threadsperblock[0]).astype(np.int32)
    blockspergridx = np.ceil(limsx / threadsperblock[1]).astype(np.int32)
    blockspergrid = (blockspergridy, blockspergridx)

    imgTakes = [0, 1, 2, 4, 6, 8, 12, 16, 20, 28, 36, 44, 56, 68, 80, 100, 120, 160, 200, 240, 300, 360, 420, 500, 600, 700, 800, 900, 1000, 1100, 1200]
    with concurrent.futures.ThreadPoolExecutor() as executor:
        for i in tqdm(range(cycles)):    
            if i % 5 and i>0 == 0:
                gpuRandomNumbers = None
                gpuRandomNumbers = cuda.to_device(np.float16(np.random.rand(8192, 16384)))#cuda.to_device(np.random.randint(0, 2, size=(8192, 16384), dtype=np.uint8))
                
            kernelVoronoi(limsy, limsx, nSpecialProvinces, nLandProvinces)[blockspergrid, threadsperblock](
                gpuProvinces, 
                gpuProvincesRes, 
                gpuCondition, 
                gpuConditionRes, 
                gpuRandomNumbers, 
                gpuColored, 
                gpuColorSpecial,
                gpuColorLand, 
                gpuColorSea
            )
            kernelCopyArray(limsy, limsx)[blockspergrid, threadsperblock](gpuProvinces, gpuProvincesRes, gpuCondition, gpuConditionRes)
            # This copy function is fundamental. Simultaneously using and writing the provinces and conditions arrays were giving very strange memory errors. We need different arrays for writing vs reading

            if printImgs and i in imgTakes:
                coloredProvinces = gpuColored.copy_to_host()
                # Submit the image saving task to the executor
                executor.submit(saveImage, coloredProvinces.copy(), i)
        if printImgs:
            coloredProvinces = gpuColored.copy_to_host()
            # Submit the image saving task to the executor
            executor.submit(saveImage, coloredProvinces.copy(), i)
        
    gpuProvinces.copy_to_host(provinces)
        
    # Deallocation
    gpuProvinces, gpuRandomNumbers = None, None
    return provinces

In [None]:
provinces = voronoiGPU(popseeds, nSpecialProvinces, nLandProvinces, cycles=2000, printImgs=True)

coloredProvinces = paintFromIdGPU(provinces, colors, nSpecialProvinces, nLandProvinces)
Image.fromarray(coloredProvinces).show()