In [4]:
#Code for fixing "broken cells". Creates a new grid, places a center point in each cell and then assigns the values for that cell from the cell under the center point
#IMPORTANT - THE BROKEN CELL FILE SHOULD HAVE FULL CELLS ON THE EDGES 

import geopandas as gpd
from shapely.geometry import Point, Polygon
import numpy as np
import pandas as pd  
import time

#load the original shapefile
path = #define path here
original_grid = gpd.read_file(path + "test_broken_cells.shp")


#create a new regular grid
minx, miny, maxx, maxy = original_grid.total_bounds

cell_size = 15.7894999999553  #identify the cell size

x_coords = np.arange(minx, maxx, cell_size)
y_coords = np.arange(miny, maxy, cell_size)


grid_cells = []
for x in x_coords:
    for y in y_coords:
        grid_cells.append(Polygon([(x, y), (x + cell_size, y),
                                   (x + cell_size, y + cell_size), (x, y + cell_size)]))

new_grid = gpd.GeoDataFrame({'geometry': grid_cells}, crs=original_grid.crs)
print(f"Created {len(new_grid)} grid cells.")

#assign the same columns as the original grid
for column in original_grid.columns:
    if column != 'geometry':
        new_grid[column] = None

#assign attributes based on center points

new_grid['center'] = new_grid.geometry.centroid

start_time = time.time()
for index, row in new_grid.iterrows():
    center_point = row['center']
    for _, original_row in original_grid.iterrows():
        if original_row.geometry.contains(center_point):
            for column in original_grid.columns:
                if column != 'geometry':  
                    value = original_row[column]

                    #convert numeric columns if needed
                    if pd.api.types.is_numeric_dtype(original_grid[column]):
                        new_grid.at[index, column] = pd.to_numeric(value, errors='coerce')
                    else:
                        new_grid.at[index, column] = value
            break  
    if index % 100 == 0:  #print progress every 100 iterations
        print(f"Processed {index}/{len(new_grid)} cells...")

elapsed_time = time.time() - start_time
print(f"Attribute assignment completed in {elapsed_time:.2f} seconds.")

#drop the temporary center column
new_grid.drop(columns='center', inplace=True)

#ensure numeric data types for all numeric columns
for column in new_grid.columns:
    if column != 'geometry' and pd.api.types.is_numeric_dtype(original_grid[column]):
        new_grid[column] = pd.to_numeric(new_grid[column], errors='coerce')

#recompute shape metrics

new_grid['Shape_Leng'] = new_grid.geometry.length
new_grid['Shape_Area'] = new_grid.geometry.area

#drop rows where any column has Null values
new_grid = new_grid.dropna()

#convert 'OBJECTID' (cell id) and 'Bestands_I' (stand id) from float to integer
new_grid["OBJECTID"] = new_grid["OBJECTID"].astype(int)
new_grid["Bestands_I"] = new_grid["Bestands_I"].astype(int)

#save the new shapefile

new_grid.to_file(path + "test_full_07_30.shp")
print("Shapefile saved")


Created 3762 grid cells.
Processed 0/3762 cells...
Processed 100/3762 cells...
Processed 200/3762 cells...
Processed 300/3762 cells...
Processed 400/3762 cells...
Processed 500/3762 cells...
Processed 600/3762 cells...
Processed 700/3762 cells...
Processed 800/3762 cells...
Processed 900/3762 cells...
Processed 1000/3762 cells...
Processed 1100/3762 cells...
Processed 1200/3762 cells...
Processed 1300/3762 cells...
Processed 1400/3762 cells...
Processed 1500/3762 cells...
Processed 1600/3762 cells...
Processed 1700/3762 cells...
Processed 1800/3762 cells...
Processed 1900/3762 cells...
Processed 2000/3762 cells...
Processed 2100/3762 cells...
Processed 2200/3762 cells...
Processed 2300/3762 cells...
Processed 2400/3762 cells...
Processed 2500/3762 cells...
Processed 2600/3762 cells...
Processed 2700/3762 cells...
Processed 2800/3762 cells...
Processed 2900/3762 cells...
Processed 3000/3762 cells...
Processed 3100/3762 cells...
Processed 3200/3762 cells...
Processed 3300/3762 cells...
P