In [1]:
import geopandas as gpd
import pandas as pd
import numpy as np
import rasterio
from rasterio.features import geometry_mask, rasterize
from rasterio.transform import from_origin
from rasterio.mask import mask

In [None]:


# Read the CSV and California shapefile
csv_filepath = 'og_csv/averaged_data_2021_Q3.csv'
combined_raster_path = 'test_out/out.tiff'
target_col = 'target'

df = pd.read_csv(csv_filepath)

gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df['lon'], df['lat']), crs="EPSG:32633")
gdf['geometry'] = gdf['geometry'].buffer(0.05)  # Buffer the points 5km
gdf[target_col] = gdf['Evapotranspiration (mm)']
gdf[target_col] += 1

california_border = gpd.read_file("California.shp")
california_border.crs = "EPSG:4269"

# Determine raster extent and pixel size
xmin, ymin, xmax, ymax = california_border.total_bounds
pixel_size = 0.01  # Adjust as needed
width = int((xmax - xmin) / pixel_size)
height = int((ymax - ymin) / pixel_size)
transform = from_origin(xmin, ymax, pixel_size, pixel_size)

# Create raster for buffer zones with AQI values
shapes_with_aqi = [(geom, value) for geom, value in zip(gdf['geometry'], gdf[target_col])]


buffered_raster = rasterize(shapes_with_aqi, out_shape=(height, width), transform=transform, fill=0, dtype=rasterio.float32)

# Create raster for California border
california_raster = np.zeros((height, width), dtype=np.uint8)
california_raster_mask = geometry_mask(california_border['geometry'], transform=transform, invert=True, out_shape=(height, width))
california_raster[california_raster_mask] = 1  # Set pixels inside California border to 1

# Combine the two rasters by taking the max value (AQI values will override the border)
combined_raster = np.maximum(buffered_raster, california_raster)


with rasterio.open(
    combined_raster_path, 'w',
    driver='GTiff',
    height=combined_raster.shape[0],
    width=combined_raster.shape[1],
    count=1,
    dtype=rasterio.float32,
    crs="EPSG:4269",
    transform=transform,
) as dst:
    dst.write(combined_raster, 1)

# Mask out values outside California
ca_shapes = [feature["geometry"] for feature in california_border.__geo_interface__["features"]]
with rasterio.open(combined_raster_path, 'r+') as src:
    # Read the entire raster
    data = src.read(1)

    # Generate mask
    mask_array, _ = mask(src, ca_shapes, crop=False)

    # Set values outside the mask to NoData
    nodata_value = src.nodata if src.nodata is not None else -1
    data[mask_array[0] == False] = nodata_value

    # Update metadata to include NoData value
    out_meta = src.meta.copy()
    out_meta.update({"nodata": nodata_value})

    src.write(data, 1)

In [17]:
with rasterio.open('HUC8_CA_PFAS_GTruth_Summa.tif') as src:
    # Get resolution
    resolution = src.res
    # Get bounding box (min/max longitude and latitude)
    bounds = src.bounds
    min_lon, max_lon = bounds.left, bounds.right
    min_lat, max_lat = bounds.bottom, bounds.top
    # Get pixel dimensions
    width, height = src.width, src.height
    # Print information
    #print(f"File: {combined_raster_path}")
    print(f"  Resolution: {resolution}")
    print(f"  Bounding Box: ({min_lon}, {max_lon}, {min_lat}, {max_lat})")
    print(f"  Pixel Dimensions: {width} x {height}")

  Resolution: (0.0010006858070884998, 0.001000000149001847)
  Bounding Box: (-124.34911344788206, -114.13211135750848, 32.534282317412945, 42.004283728460436)
  Pixel Dimensions: 10210 x 9470


In [18]:
import rasterio
from rasterio.enums import Resampling

large_data = 'HUC8_CA_PFAS_GTruth_Summa2.tiff'

# Open the source TIFF file
with rasterio.open('HUC8_CA_PFAS_GTruth_Summa.tif') as src:
    # Calculate new shape for doubling
    new_width = 10210
    new_height = 9460

    # Calculate new transform (to adjust the resolution)
    new_transform = src.transform * src.transform.scale(
        (src.width / new_width),  # scaling in the X direction
        (src.height / new_height)  # scaling in the Y direction
    )
    
    # Read and resample the data to the new shape
    data = src.read(
        out_shape=(src.count, new_height, new_width),
        resampling=Resampling.bilinear  # you can use other resampling methods if needed
    )
    
    # Update metadata
    profile = src.profile
    profile.update({
        'height': new_height,
        'width': new_width,
        'transform': new_transform,
        'driver': 'GTiff'
    })

    # Write the output to a new TIFF file
    with rasterio.open(large_data, 'w', **profile) as dst:
        dst.write(data)


In [None]:
import rasterio
from rasterio.enums import Resampling

large_data = 'HUC8_CA_PFAS_GTruth_Summa.tif'

# Open the source TIFF file
with rasterio.open('HUC8_CA_PFAS_GTruth_Summa2.tif') as src:
    # Calculate new shape for doubling
    new_width = src.width * 10
    new_height = src.height * 10

    # Calculate new transform (to adjust the resolution)
    new_transform = src.transform * src.transform.scale(
        (src.width / new_width),  # scaling in the X direction
        (src.height / new_height)  # scaling in the Y direction
    )
    
    # Read and resample the data to the new shape
    data = src.read(
        out_shape=(src.count, new_height, new_width),
        resampling=Resampling.bilinear  # you can use other resampling methods if needed
    )
    
    # Update metadata
    profile = src.profile
    profile.update({
        'height': new_height,
        'width': new_width,
        'transform': new_transform,
        'driver': 'GTiff'
    })

    # Write the output to a new TIFF file
    with rasterio.open(large_data, 'w', **profile) as dst:
        dst.write(data)


In [11]:
with rasterio.open('large_tiff_data2/2016_averaged_Lead_data_Q1.tiff') as src:
    # Get resolution
    resolution = src.res
    # Get bounding box (min/max longitude and latitude)
    bounds = src.bounds
    min_lon, max_lon = bounds.left, bounds.right
    min_lat, max_lat = bounds.bottom, bounds.top
    # Get pixel dimensions
    width, height = src.width, src.height
    # Print information
    # print(f"File: {combined_raster_path}")
    print(f"  Resolution: {resolution}")
    print(f"  Bounding Box: ({min_lon}, {max_lon}, {min_lat}, {max_lat})")
    print(f"  Pixel Dimensions: {width} x {height}")

  Resolution: (0.001, 0.001)
  Bounding Box: (-124.34811344799999, -114.13811344799998, 32.54367419300005, 42.00367419300005)
  Pixel Dimensions: 10210 x 9460


In [16]:
with rasterio.open('HUC8_CA_PFAS_GTruth_Summa3.tiff') as src:
    # Get resolution
    resolution = src.res
    # Get bounding box (min/max longitude and latitude)
    bounds = src.bounds
    min_lon, max_lon = bounds.left, bounds.right
    min_lat, max_lat = bounds.bottom, bounds.top
    # Get pixel dimensions
    width, height = src.width, src.height
    # Print information
    # print(f"File: {combined_raster_path}")
    print(f"  Resolution: {resolution}")
    print(f"  Bounding Box: ({min_lon}, {max_lon}, {min_lat}, {max_lat})")
    print(f"  Pixel Dimensions: {width} x {height}")

  Resolution: (0.0010006858070884998, 0.0010010572316117854)
  Bounding Box: (-124.34911344788206, -114.13211135750848, 32.534282317412945, 42.004283728460436)
  Pixel Dimensions: 10210 x 9460
