# Import packages

In [1]:
import numpy as np 
from matplotlib import pyplot as plt
import pandas as pd
import geopandas as gpd
import gdal
from shapely.geometry import Point, Polygon, box, MultiPolygon
import os
import rasterio
from rasterio.mask import mask
cwd = os.getcwd()

Turn on features

In [2]:
%matplotlib inline

# Define constants

json file locations

In [3]:
json_file = cwd + '/1/Building info/training_bbox.geojson'

get image path

In [4]:
image_path = cwd + '/1/After/Modified'
os.chdir(image_path)

extract images from dir

In [5]:
image_list_raw = os.listdir(os.getcwd())

image_list = [file for file in image_list_raw if file != '.DS_Store']

# Geojson

Read in geojson file

In [6]:
df_training_raw = gpd.read_file(json_file)

Remove lines without geometries

In [7]:
df_training = df_training_raw[df_training_raw.geometry.notna()]

In [8]:
df_training.head()

Unnamed: 0,OBJECTID,osm_way_id,name,building,_damage,geometry
0,2,7553886,Auto Depot,yes,none,"POLYGON ((493363.4892000108 1993911.488412196,..."
1,4,7554274,Island Finance,commercial,significant,"POLYGON ((494159.3213000094 1993344.12256257, ..."
2,5,7564898,,residential,none,"POLYGON ((486365.6538000216 1995800.083562638,..."
3,7,7575764,,yes,partial,"POLYGON ((495659.9875000064 1995598.881562633,..."
4,8,7580073,,yes,none,"POLYGON ((493368.3394000101 1996769.319962667,..."


# .tiff file

Open .tiff file

In [9]:
image_list

['georeference_after%2FRescUAV_17917_Pt.Blanche_modified.tif',
 'georeference_after%2FRescUAV_24917_Ebenezer_modified.tif',
 'georeference_after%2FRescUAV_29917_Almond_modified.tif',
 'georeference_after%2FRescUAV_29917_Almond_modified (1).tif',
 'georeference_after%2FRescUAV_29917_ColeW_modified.tif',
 'georeference_after%2FRescUAV_23917_ColeHill_modified.tif',
 'georeference_after%2FRescUAV_13917_PhilNE_modified.tif',
 'georeference_after%2FRescUAV_17917_Middle_modified.tif',
 'georeference_after%2FRescUAV_21017_BillyFolly_modified.tif',
 'georeference_after%2FRescUAV_16917_Pt.Blanche3_modified.tif',
 'georeference_after%2FRescUAV_21017_Beacon_modified.tif',
 'georeference_after%2FRescUAV_20917_PhilipsBurgE_modified.tif',
 'georeference_after%2FRescUAV_11917_Lowlands_modified.tif',
 'georeference_after%2FRescUAV_26917_StPeters_modified.tif',
 'georeference_after%2FRescUAV_21917_Illidge_modified.tif',
 'georeference_after%2FRescUAV_22917_DutchCul_modified.tif',
 'georeference_after%2F

In [10]:
def image_extent(file):

    # get width and heigth
    width = image.RasterXSize
    height = image.RasterYSize

    # get geotransform
    gt = image.GetGeoTransform()

    # get image extent
    minx = gt[0]
    miny = gt[3] + width*gt[4] + height*gt[5] 
    maxx = gt[0] + width*gt[1] + height*gt[2]
    maxy = gt[3] 

    
    return [minx,maxx,maxy,miny]

In [11]:
def create_bounding_box(extent):

    return Polygon([[extent[0], extent[2]],\
         [extent[0], extent[3]],\
         [extent[1], extent[3]],\
         [extent[1], extent[2]]])

In [12]:
def extract_house(df, file, image_name):

    #Define set of polygons to clip
    geomSet = df['geometry'].geometry
    graphcount = 0
    black_list = []

    # the polygon GeoJSON geometry
    for geoms in geomSet:
        # load the raster, mask it by the polygon and crop it
        with rasterio.open(file) as src:
            out_image, out_transform = mask(src, MultiPolygon([geoms]), crop=True)
            out_meta = src.meta.copy()

        percentage_black = np.count_nonzero(out_image==0)/ (3* out_image.shape[1] * out_image.shape[2])
        black_list.append(percentage_black)
        if percentage_black < 0.2:

            # save the resulting raster  
            out_meta.update({"driver": "GTiff",
                "height": out_image.shape[1],
                "width": out_image.shape[2],
            "transform": out_transform})

            graphcount = graphcount + 1
            output_path =  '/Users/apancham002/Documents/Projecten/Hackathon/full_images_unique/'+\
                            image_name[19:-4] + '_'+ str(graphcount) + '_'+ "masked.jpg" 
#             print(output_path)
#             print(output_path)
            with rasterio.open(output_path, "w", **out_meta) as dest:
                dest.write(out_image)
    return black_list

In [13]:
list_df = []
unique_building_list = []

for ind, image_name in enumerate(image_list):
    print("{}: {}".format(ind, image_name))
    # get abs image path
    abs_image_path = os.path.join(image_path,image_name)
    
    # open file
    image = gdal.Open(abs_image_path)
    
    # get image extent
    extent = image_extent(image)
    
    # get bounding box
    bounding_box = create_bounding_box(extent)
    
    # get overlap as first filter
    df_overlap = df_training[df_training.geometry.centroid.within(bounding_box)]
    
    df_overlap = df_overlap[~df_overlap.OBJECTID.isin(unique_building_list)]
    
    unique_building_list.extend(df_overlap.OBJECTID.values)
    print(len(unique_building_list))
    damage_list = extract_house(df_overlap, abs_image_path, image_name)
    
    df_overlap['damage_list'] = damage_list
    
    df_csv = df_overlap[df_overlap.damage_list < 0.2].reset_index(drop=True).reset_index()\
        .rename(columns={'index':'number', '_damage':'tags'})
    
    df_csv['image_name'] = image_name[19:-4] + '_' + (df_csv.number + 1).astype(str) + '_'+ 'masked'
    
    list_df.append(df_csv[['image_name','tags']])

0: georeference_after%2FRescUAV_17917_Pt.Blanche_modified.tif
32
1: georeference_after%2FRescUAV_24917_Ebenezer_modified.tif
1169
2: georeference_after%2FRescUAV_29917_Almond_modified.tif
1178
3: georeference_after%2FRescUAV_29917_Almond_modified (1).tif
1178
4: georeference_after%2FRescUAV_29917_ColeW_modified.tif
1211
5: georeference_after%2FRescUAV_23917_ColeHill_modified.tif
1462
6: georeference_after%2FRescUAV_13917_PhilNE_modified.tif
1965
7: georeference_after%2FRescUAV_17917_Middle_modified.tif
2283
8: georeference_after%2FRescUAV_21017_BillyFolly_modified.tif
2440
9: georeference_after%2FRescUAV_16917_Pt.Blanche3_modified.tif
2606
10: georeference_after%2FRescUAV_21017_Beacon_modified.tif
2606
11: georeference_after%2FRescUAV_20917_PhilipsBurgE_modified.tif
3148
12: georeference_after%2FRescUAV_11917_Lowlands_modified.tif
3174
13: georeference_after%2FRescUAV_26917_StPeters_modified.tif
3794
14: georeference_after%2FRescUAV_21917_Illidge_modified.tif
4005
15: georeference_afte

In [14]:
df_overview = pd.concat(list_df)

In [None]:
# DutchCul and is overlapping with:
# - Cul
# - Colehill
# - Cayhill

# testfile1 is Middle and is overlapping with
# - MtWilliam

In [15]:
df_overview_filtered = df_overview[~((df_overview.image_name.str.contains(r'2FRescUAV_23917_Cul_modified')) | \
             (df_overview.image_name.str.contains(r'2FRescUAV_22917_DutchCul_modified')) |\
             (df_overview.image_name.str.contains(r'2FRescUAV_23917_ColeHill_modified')) |\
            (df_overview.image_name.str.contains(r'2FRescUAV_28917_CayHill_modified')) |\
            (df_overview.image_name.str.contains(r'2FRescUAV_17917_Middle_modified')) |\
           (df_overview.image_name.str.contains(r'2FRescUAV_31017_MtWilliam_modified'))) ]


In [16]:
df_overview_filtered.query('tags!="unknown"')\
                 .to_csv(r'/Users/apancham002/Documents/Projecten/Hackathon/full_images_unique/unique_label.csv',\
                  index=False)