In [None]:
import geopandas as gpd
import numpy as np 
import pandas
import rasterio
import matplotlib.pyplot as plt
from rasterio.plot import show
from rasterio.mask import mask
import os
import json
from shapely.geometry import box, Polygon
import logging

class Crop_tif_varsize():
    """
    In: tif image to be cropped, and whole extent of 100x100 rutor
    Returns: directory of one cropped tif per 100x100 ruta.
    """

    def __init__(self, img_name_code, img_path, rutor_path, destination_path, dims, logger):

        self.img_name_code = img_name_code
        self.dimensions = dims
        print(self.dimensions)
        self.destination_path = destination_path
        self.logger = logger
        self.img_path = img_path
        self.rutor_path = rutor_path

        self.img = rasterio.open(img_path)
        self.rutor = gpd.read_file(rutor_path)
        self.filtered_rutor = self.filter_rutor()
        self.img_rutor = self.reshape_rutor() # this will at some point have as input the filtered rutor. 

    def filter_rutor(self):
        # Find which 100x100 squares overlap with the current TIF

        minx, miny, maxx, maxy = self.img.bounds
        img_rutor = self.rutor.cx[minx:maxx, miny:maxy] # coordinates derived manually from plotting img
        return img_rutor
    
    def reshape_rutor(self):
        # Reshape rutor according to dims 

        new_dim_rutor = self.generate_geoseries(self.img.bounds, self.img.crs, self.dimensions)
        return new_dim_rutor


    def crop_rutor(self):

        """
        Crop TIF according to the polygons containing palsa. 
        """

        cropped_tifs_percentages = {}
        # Iterate over each polygon in the GeoDataFrame
        for idx, percentage, polygon in zip(self.img_rutor.index, self.img_rutor.PALS, self.img_rutor.geometry):
            # Crop the TIF file using the polygon
            cropped_data, cropped_transform = mask(self.img, [polygon], crop=True)

            # Update the metadata for the cropped TIF
            cropped_meta = self.img.meta.copy()
            cropped_meta.update({"driver": "GTiff",
                                "height": cropped_data.shape[1],
                                "width": cropped_data.shape[2],
                                "transform": cropped_transform})

            # Save the cropped TIF file with a unique name
            output_path = os.path.join(self.destination_path, f"{self.img_name_code}_crop_{idx}.tif") # CHANGE THIS NAMING? 
            with rasterio.open(output_path, "w", **cropped_meta) as dest:
                dest.write(cropped_data)

            # Write the corresponding percentage to a dictionary as label 
            cropped_tifs_percentages[f"{self.img_name_code}_crop_{idx}"] = percentage

        return cropped_tifs_percentages
    
    def generate_geoseries(self, bounds, crs, dims):

        """
        Generates all 100x100m polygons present in a TIF.
        Enables the negative sampling from the image. 
        """

        # height and width of new squares 
        square_dims = dims # 100x100 meters

        # Calculate the number of segments in each dimension (tif width // desired width in pixels!)
        segments_x = 5000 // square_dims
        segments_y = 5000 // square_dims

        # Create an empty list to store the polygons
        polygons = []

        # Iterate over the segments
        for i in range(segments_y):
            for j in range(segments_x):
                # Calculate the coordinates of the segment
                left = bounds.left + j * square_dims
                bottom = bounds.bottom + i * square_dims
                right = left + square_dims
                top = bottom + square_dims

                # Create a polygon for the segment
                polygon = Polygon([(right, bottom), (left, bottom), (left, top), (right, top), (right, bottom)])

                # Append the polygon to the list
                polygons.append(polygon)

        # Create a GeoSeries from the list of polygons
        all_rutor = gpd.GeoSeries(polygons, crs=crs)
        return all_rutor

    def crop_negatives(self):

        """
        Generates negative samples. Equal amount of negative as positive samples are
        taken from each image such that the final dataset is 50/50 positive and negative. 

            1) split the whole TIF into 100x100m polygons.
            2) filter out the areas containing palsa (positive samples)
            3) randomly sample as many negative samples as positive samples from that image
            4) crop the TIF according to the sampled areas and write locally

        """

        # generate polygon for all 100x100m patches in the tif
        all_rutor = self.generate_geoseries(self.img.bounds, self.img.crs, dims = 100)

        # filter out the squares with palsa 
        positives_mask = ~all_rutor.isin(self.img_rutor.geometry)
        all_negatives = all_rutor[positives_mask]

        # randomly sample 
        sample_size = int(len(self.img_rutor)) # based on number of positive samples 
        if sample_size <= len(all_negatives): # default case
            negative_samples = all_negatives.sample(n=sample_size) # sample randomly
        else:
            self.logger.info('Exception occurred! Number of positive samples > 1/2 image. Training set now contains fewer negative than positive samples.')
            negative_samples = all_negatives

        cropped_tifs_percentages = {}
        # Iterate over each polygon in the GeoDataFrame
        for idx, polygon in enumerate(negative_samples.geometry):
            # Crop the TIF file using the polygon
            cropped_data, cropped_transform = mask(self.img, [polygon], crop=True)

            # Update the metadata for the cropped TIF
            cropped_meta = self.img.meta.copy()
            cropped_meta.update({"driver": "GTiff",
                                "height": cropped_data.shape[1],
                                "width": cropped_data.shape[2],
                                "transform": cropped_transform})

            # Save the cropped TIF file with a unique name
            output_path = os.path.join(self.destination_path, f"{self.img_name_code}_neg_crop_{idx}.tif") # CHANGE THIS NAMING? 
            with rasterio.open(output_path, "w", **cropped_meta) as dest:
                dest.write(cropped_data)

            # Write the corresponding percentage to a dictionary as label 
            cropped_tifs_percentages[f"{self.img_name_code}_neg_crop_{idx}"] = 0

        return cropped_tifs_percentages
                

In [7]:
#############
## imports ##
#############

# libraries 
import geopandas as gpd
import numpy as np 
import pandas as pd
import rasterio
import matplotlib.pyplot as plt
from rasterio.plot import show
from rasterio.mask import mask
import os
import json
import logging

# functions 
from functions import filter_imgs, Crop_tif

##################
## setup logger ##
##################

logger = logging.getLogger('my_logger')
logger.setLevel(logging.DEBUG)

# Setup logger
ch = logging.StreamHandler() # create console handler
ch.setLevel(logging.DEBUG) # set level to debug
formatter = logging.Formatter("%(asctime)s - %(message)s \n", "%Y-%m-%d %H:%M:%S") # create formatter
ch.setFormatter(formatter) # add formatter to ch
logger.addHandler(ch) # add ch to logger

# logger.info('Imports successful')

##################
## load configs ##
##################

config_path = os.path.join(os.getcwd(), 'configs.json')
with open(config_path, 'r') as config_file:
    configs = json.load(config_file)

# load paths from configs 
config_paths = configs.get('paths', {}) 
palsa_shapefile_path = config_paths.get('palsa_shapefile_path') # load shapefile path
save_crops_dir = config_paths.get('save_crops_dir') # load directory with all tifs
original_tif_dir = config_paths.get('original_tif_dir') # load directory with all tifs

config_img = configs.get('image_info', {}) 
dims = config_img.get('meters_per_axis') # load shapefile path

# logger.info('Configurations were loaded')

##########
## code ##
##########

# logger.info('Starting to sample relevant TIF paths...')

# extract tif file names which contain palsa
palsa_tifs = filter_imgs(original_tif_dir) # returns a list of filenames to be cropped

# logger.info(f'{len(palsa_tifs)} TIF paths have been loaded!')
# logger.info('Starting to generate training samples from TIFs..')

labels = {}

print(dims, type(dims))

# load palsa shape path
for idx, img_name in enumerate(palsa_tifs):
    img_name_code = img_name.split('.')[0]
    img_path = os.path.join(original_tif_dir, img_name)
    cropping = Crop_tif(img_name_code, img_path, palsa_shapefile_path, save_crops_dir, dims, logger)

    break

#     positive_labels = cropping.crop_rutor()
#     negative_labels = cropping.crop_negatives()
#     all_labels = positive_labels | negative_labels
#     labels = labels | all_labels
#     # logger.info(f'Generated training samples from image {idx+1}/{len(palsa_tifs)}')

# label_df = pd.DataFrame.from_dict(labels, orient='index', columns = ['palsa_percentage'])
# label_df.to_csv(os.path.join(save_crops_dir, "palsa_labels.csv"))


100 <class 'int'>
100


In [8]:
orig_rutor = cropping.filtered_rutor
all_rutor = cropping.img_rutor

In [9]:
import geopandas as gpd

# Assuming you have all_rutor and orig_rutor loaded as GeoDataFrame objects
d = {'name': [i for i in range(len(all_rutor))]}
df = pd.DataFrame(d)
all_rutor_df = gpd.GeoDataFrame(d, geometry = all_rutor, crs=all_rutor.crs)


# Perform a spatial join between all_rutor and orig_rutor
joined_df = gpd.sjoin(all_rutor_df, orig_rutor, how='inner')

# Get the unique index values of the polygons in all_rutor_df that cover at least one smaller polygon
covering_polygons_index = joined_df.index.unique()

# Select the polygons from all_rutor_df that cover at least one smaller polygon
result_df = all_rutor_df.loc[covering_polygons_index]

In [11]:
len(joined_df.head(5))

5

In [6]:
def test(x):
    if x == 1:
        return 2
    
    print('hello')
    return 3

test(1)

2