In [None]:
import geopandas as gpd
import pandas as pd
import numpy as np
from shapely import wkt
import matplotlib.pyplot as plt
import rasterio
from rasterio.features import rasterize
import torch
import time

In [None]:
#TODO: parallelisieren / gdal python binding über cli direct call / 24xhöhe von bbox, nur steps nach rechts und dann splitten? / polygonbasiert 


# Bounding box und Geometrie für Sentinel-Abfrage

In [None]:
df_raw = pd.read_csv('bern_landkult.csv')
labels = pd.read_csv('../Level_hierarchy.csv', usecols=['level3', 'LNF_code'])
labels.columns=['level3', 'LNF_CODE']

In [None]:
df_landkult = pd.merge(df_raw, labels, how='left', on='LNF_CODE' )
df_landkult = df_landkult.dropna(subset = ['level3'])

In [None]:
df_landkult['geometry'] = df_landkult['geometry'].apply(wkt.loads)
crs = {'init': 'epsg:2056'}
gdf = gpd.GeoDataFrame(df_landkult, crs=crs, geometry='geometry')
gdf_32632 = gdf.to_crs(32632)
gdf_32632.head()

In [None]:
#gdf_32632.plot()

In [None]:
# from sentinelhub import (
#     CRS,
#     BBox,
#     bbox_to_dimensions
# )

Sentinel stellt Funktionen bereit, die mithilfe des Parameters "resolution" die Pixelgrösse angibt. Damit können wir einen durch 240 pixel (=2.4km) teilbaren Ausschnitt wählen, den wir jeweils mit einem request bei Sentinel abfragen können.
Wir werden ein Mosaik aus 23 x 41 Elementen haben für den Kanton Bern.

In [None]:
x_start = 361630.
y_start = 5140066.
x_end = x_start + 23*2400
y_end = y_start + 41*2400
bbox_to_dimensions(BBox(bbox=(x_start, y_start, x_end, y_end), crs=32632), resolution=10)

In [None]:
area_box = []
x_start = 361630.
y_start = 5140066.
step= 240
x_end = x_start + 230*step
y_end = y_start + 410*step
x_coordinate = list(np.arange(x_start, x_end, step))
y_coordinate = list(np.arange(y_start, y_end, step))
for i in range(len(x_coordinate)):
    for j in range(len(y_coordinate)):
        area = gdf_32632.cx[x_coordinate[i]:x_coordinate[i]+240, y_coordinate[j]:y_coordinate[j]+240].Shape_Area.sum()/10000
        box_entry = {'x1': x_coordinate[i], 'y1': y_coordinate[j],'x2': x_coordinate[i]+240,'y2': y_coordinate[j]+240, 'area': area}
        area_box.append(box_entry)
area_boxes = pd.DataFrame(area_box)

In [None]:
plt.scatter(x='x1', y='y1', s='area', data=area_boxes, alpha=0.5, c='g')

In [None]:
area_boxes.replace(0, np.nan, inplace=True)
print(area_boxes.area.isna().sum())

In [None]:
bboxes = area_boxes.copy()
bboxes = bboxes.dropna(subset = ['area'])
bboxes.to_csv('../raw_data/BernCrop/bboxes_sentinel_240x240.csv')
bboxes = bboxes.drop(columns=['x2', 'y2', 'area'])

### Bounding-Box für ETH

In [None]:
x_start,x_end, y_start, y_end

In [None]:
gdf_32632.cx[x_start:x_end, y_start: y_end].plot()

# Rasterize GIS-Data
## Label-Ebene
Die Polygonzüge der einzelnen Landwirtschaftsfelder haben jeweils ein LNF_Code hinterlegt. In einem ersten Schritt wird jedem Polygonzug das Label der 3. Hierarchie (3level) hinterlegt. Die Originaldaten des Kanton Bern werden in das erforderliche Koordinatensystem WGS 84 / UTM zone 32N umgewandelt.
In einem zweiten Schritt werden die Polygon-Label auf Rasterebene umgewandelt, so dass jedes Pixel (10 x 10m) ein Label hat. Das Ergebnis ist eine Liste von numpy-arrays, bei dem jeder array die 2.4 x 2.4km der Sentinel-Abfrage darstellt.

In [None]:
gdf_pair = gdf_32632[['geometry', 'level3']]

In [None]:
test_gdf = gdf_pair.itertuples(index=False, name=None)
test_gdf = list(test_gdf)


In [None]:
bboxes = pd.read_csv(r'..\raw_data\BernCrop\bboxes_sentinel_24x24.csv',index_col=[0])

In [None]:
import geopandas as gpd
from shapely.geometry import Polygon

# Assuming your data is in a CSV file named 'polygons_data.csv'
data = bboxes[bboxes.index.isin([10,409,410])]

# Create a GeoDataFrame
geometry = [Polygon([(row['x1'], row['y1']), (row['x2'], row['y1']),
                     (row['x2'], row['y2']), (row['x1'], row['y2'])]) for _, row in data.iterrows()]

gdf = gpd.GeoDataFrame(data, geometry=geometry)

# Plot the GeoDataFrame
gdf.plot(edgecolor='blue', facecolor='green', alpha=0.4)
plt.title('Polygons Plot on Map')
plt.xlabel('X-axis')
plt.ylabel('Y-axis')
plt.show()


In [None]:
import time
import rasterio
from rasterio.features import rasterize

def raster(coord, geo_df, field_df):
    """
    coord: DataFrame with x and y coordinates of bounding_boxes
    geo_df: list of tuple with geometry (Polygon) and value pair
    """
    # result_list_pixel = []
    # result_list_field = []
    
    with rasterio.Env():
        # for i, row in coord.iterrows():
        x_coord, y_coord = coord['x1'].min(), coord['y1'].min()
        start_time = time.time()
        # result = rasterize(geo_df, out_shape=(24, 24*410), transform=(10, 0, x_coord, 0, -10, y_coord, 0, 0, 1))
        result_pixel = rasterize(geo_df, out_shape=(24, 24*410), transform=(10, 0, x_coord, 0, -10, y_coord, 0, 0, 1))
        result_field = rasterize(field_df, out_shape=(24, 24*410), transform=(10, 0, x_coord, 0, -10, y_coord, 0, 0, 1))
        print(f"End Time: {time.time() - start_time}")
    
    return result_pixel,result_field



## Feld-Ebene

In [None]:
gdf_pair_field = gdf_32632[['geometry', 'Unnamed: 0', 'level3']]

In [None]:
gdf_pair_field.columns=['geometry', 'gt_instance', 'level3']


In [None]:
gdf_pair_field['gt_instance'] = gdf_pair_field['gt_instance']+1000000
gdf_pair_field.head()

Falls wir die Zuordnung Feld - Label nochmals brauchen, exportieren wir dieses als csv. Dies gäbe uns die Möglichkeit, ein stratified Fold zu machen.

In [None]:
gdf_pair_field.drop('geometry',axis=1).to_csv(r'gt_instance-level3.csv') 

In [None]:
gdf_pair_field = gdf_pair_field.drop('level3', axis=1)

In [None]:
pair_gdf = gdf_pair_field.itertuples(index=False, name=None)
pair_gdf = list(pair_gdf)

In [None]:
# loop over slices
result_pixel_list = []
result_field_list = []

for slice in range(0, 2):
    start_index = slice * 410
    end_index = (slice + 1) * 410
    indexes = [i for i in range(start_index, end_index)]
    # print(indexes)
    bboxes_slice = bboxes[bboxes.index.isin(indexes)]
    result_pixel,result_field = raster(bboxes_slice, test_gdf,pair_gdf)
    result_pixel_list.append(result_pixel.reshape(-1,24,24))
    result_field_list.append(result_field.reshape(-1,24,24))
    


In [None]:
np.array(result_pixel_list).shape

In [None]:
np.array(result_pixel_list).reshape(-1,24,24).shape

In [None]:
np.array(result_field_list).shape

In [None]:
np.unique(result_pixel_list[0])

In [None]:
result_pixel,result_field = raster(
    bboxes, test_gdf,pair_gdf)


In [None]:
plt.imshow(result_field_list[1][3])
plt.show()
plt.imshow(result_pixel_list[1][3])
plt.show()

In [None]:
result_pixel_list = np.array(result_pixel_list).reshape(-1,24,24)
result_field_list = np.array(result_field_list).reshape(-1,24,24)

In [None]:
result_field_list = result_field_list.astype(int)

In [None]:
result_field_list

In [None]:
tensor_pixel= torch.Tensor(result_pixel_list)
print(tensor_pixel.size())
tensor_field= torch.Tensor(result_field_list)
print(tensor_field.size())

In [None]:
torch.save(tensor_pixel, '../raw_data/BernCrop/tensor_label_24x24.pt')
torch.save(tensor_field, '../raw_data/BernCrop/tensor_field_24x24.pt')