# Import packages

In [1]:
import numpy as np 
from matplotlib import pyplot as plt
import pandas as pd
import geopandas as gpd
import gdal
from shapely.geometry import Point, Polygon, box, MultiPolygon
import os
import rasterio
from rasterio.mask import mask
cwd = os.getcwd()

Turn on features

In [2]:
%matplotlib inline

# Define constants

json file locations

In [3]:
# 
json_file = cwd + '/1/Building info/testdata1_bbox.geojson'




get image path

In [4]:
image_path = cwd + '/1/After/Modified'
os.chdir(image_path)

extract images from dir

In [6]:
image_list_raw = os.listdir(os.getcwd())

image_list = [file for file in image_list_raw if file in ['georeference_after%2FRescUAV_22917_DutchCul_modified.tif',
                                                            'georeference_after%2FRescUAV_17917_Middle_modified.tif']]

In [7]:
image_list

['georeference_after%2FRescUAV_17917_Middle_modified.tif',
 'georeference_after%2FRescUAV_22917_DutchCul_modified.tif']

# Geojson

Read in geojson file

In [8]:
df_training_raw = gpd.read_file(json_file)

Remove lines without geometries

In [9]:
df_training = df_training_raw[df_training_raw.geometry.notna()]

# .tiff file

Open .tiff file

In [11]:
def image_extent(file):

    # get width and heigth
    width = image.RasterXSize
    height = image.RasterYSize

    # get geotransform
    gt = image.GetGeoTransform()

    # get image extent
    minx = gt[0]
    miny = gt[3] + width*gt[4] + height*gt[5] 
    maxx = gt[0] + width*gt[1] + height*gt[2]
    maxy = gt[3] 

    
    return [minx,maxx,maxy,miny]

In [12]:
def create_bounding_box(extent):

    return Polygon([[extent[0], extent[2]],\
         [extent[0], extent[3]],\
         [extent[1], extent[3]],\
         [extent[1], extent[2]]])

In [13]:
def extract_house(df, file, image_name):

    #Define set of polygons to clip
    geomSet = df['geometry'].geometry
    graphcount = 0
    black_list = []

    # the polygon GeoJSON geometry
    for geoms in geomSet:
        # load the raster, mask it by the polygon and crop it
        with rasterio.open(file) as src:
            out_image, out_transform = mask(src, MultiPolygon([geoms]), crop=True)
            out_meta = src.meta.copy()

        percentage_black = np.count_nonzero(out_image==0)/ (3* out_image.shape[1] * out_image.shape[2])
        black_list.append(percentage_black)
        if percentage_black < 0.2:
            # save the resulting raster  
            out_meta.update({"driver": "GTiff",
                "height": out_image.shape[1],
                "width": out_image.shape[2],
            "transform": out_transform})

            graphcount = graphcount + 1
            output_path =  '/Users/apancham002/Documents/Projecten/Hackathon/test_images2/'+\
                            image_name[19:-4] + '_'+ str(graphcount) + '_'+ "masked.jpg" 
            with rasterio.open(output_path, "w", **out_meta) as dest:
                dest.write(out_image)
    return black_list

In [14]:
list_df = []
for ind, image_name in enumerate(image_list):
    print("{}: {}".format(ind, image_name))
    # get abs image path
    abs_image_path = os.path.join(image_path,image_name)
    
    # open file
    image = gdal.Open(abs_image_path)
    
    # get image extent
    extent = image_extent(image)
    
    # get bounding box
    bounding_box = create_bounding_box(extent)
    
    # get overlap as first filter
    df_overlap = df_training[df_training.geometry.centroid.within(bounding_box)]
    
    damage_list = extract_house(df_overlap, abs_image_path, image_name)
    
    df_overlap['damage_list'] = damage_list
    
    df_csv = df_overlap[df_overlap.damage_list < 0.2].reset_index(drop=True).reset_index()\
        .rename(columns={'index':'number', '_damage':'tags'})
    
    df_csv['image_name'] = image_name[19:-4] + '_' + (df_csv.number + 1).astype(str) + '_'+ 'masked'
    
    list_df.append(df_csv[['image_name']])

0: georeference_after%2FRescUAV_17917_Middle_modified.tif
1: georeference_after%2FRescUAV_22917_DutchCul_modified.tif


In [15]:
df_overview = pd.concat(list_df)

In [None]:
df_overview.to_csv(r'/Users/apancham002/Documents/Projecten/Hackathon/test_images/test_label.csv',\
                  index=False)

In [None]:
# DutchCul and is overlapping with:
# - Cul
# - Colehill
# - Cayhill

# testfile1 is Middle and is overlapping with
# - MtWilliam