In [6]:
import pandas as pd
import geopandas
from shapely.geometry import Point
import rasterio
from rasterio.mask import mask
import numpy as np
import os
import glob
import re

In [None]:
def approximate_co2_rice_linkage(csv_path, raster_path, output_csv_path,
                                 buffer_radius_meters,
                                 rice_proportion_threshold, 
                                 lon_col='longitude', lat_col='latitude',
                                 co2_points_crs_epsg=4326, 
                                 quality_flag_col=None,
                                 valid_quality_flags=None):
    co2_df = pd.read_csv(csv_path)

    if quality_flag_col and valid_quality_flags is not None:
        original_rows = len(co2_df)
        co2_df = co2_df[co2_df[quality_flag_col].isin(valid_quality_flags)].copy()

    with rasterio.open(raster_path) as src_rice_map:
        rice_map_crs = src_rice_map.crs
        rice_map_nodata = src_rice_map.nodata
        print(f"  CRS bản đồ lúa: {rice_map_crs}")
        print(f"  NoData bản đồ lúa: {rice_map_nodata}")

        geometry = [Point(xy) for xy in zip(co2_df[lon_col], co2_df[lat_col])]
        co2_gdf = geopandas.GeoDataFrame(co2_df, geometry=geometry, crs=f"EPSG:{co2_points_crs_epsg}")
        if co2_gdf.crs != rice_map_crs:
            print(f"  Chiếu lại điểm CO2 từ {co2_gdf.crs} sang {rice_map_crs}...")
            co2_gdf = co2_gdf.to_crs(rice_map_crs)

        co2_gdf['buffer_geometry'] = co2_gdf.geometry.buffer(buffer_radius_meters)

        rice_proportions = []
        is_rice_influenced = []

        for index, row in co2_gdf.iterrows():
            buffer_geom = row['buffer_geometry']
                
            out_image, out_transform = mask(dataset=src_rice_map,
                                            shapes=[buffer_geom],
                                            crop=True,
                                            nodata=rice_map_nodata if rice_map_nodata is not None else -9999,  # Giá trị để fill vùng ngoài mask nếu crop=False
                                            filled=True)

            rice_pixels_in_buffer = out_image[0]
            if rice_map_nodata is not None:
                valid_pixels = rice_pixels_in_buffer[rice_pixels_in_buffer != rice_map_nodata]
            else: 
                valid_pixels = rice_pixels_in_buffer.flatten()

            if valid_pixels.size == 0: 
                rice_proportions.append(0.0)
                is_rice_influenced.append(False)
                continue

            # Đếm số pixel lúa (giá trị = 1)
            num_rice_pixels = np.sum(valid_pixels == 1)
            proportion = num_rice_pixels / valid_pixels.size
            rice_proportions.append(proportion)
            is_rice_influenced.append(proportion >= rice_proportion_threshold)

        co2_gdf['rice_proportion_in_buffer'] = rice_proportions
        co2_gdf['is_rice_influenced'] = is_rice_influenced
        output_df = pd.DataFrame(co2_gdf.drop(columns=['geometry', 'buffer_geometry']))

    if 'is_rice_influenced' in output_df.columns:
        influenced_count = output_df['is_rice_influenced'].sum() 
        print(f"Số điểm CO2 được xác định là ảnh hưởng bởi lúa (tỷ lệ >= {rice_proportion_threshold*100}%): {influenced_count}")
        filtered_output_df = output_df[output_df['is_rice_influenced'] == True]
        filtered_output_df.to_csv(output_csv_path, index=False, float_format='%.6f')

In [None]:
def process_all_csv_files():
    data_folder = "Output\\2020\\raw_data" 

    filter_in_rice_folder = "Output\\2020\\filter_in_rice" 
    raster_rice_map_projected = "Data\\rice_map_2020_250m_UTM48N_mode.tif" 

    buffer_radius = 1000 
    rice_threshold = 0.3 

    csv_files_to_process = glob.glob(os.path.join(data_folder, "raw_data_*.csv"))

    for csv_file_path in csv_files_to_process:
        match = re.search(r"raw_data_(\d{2}-\d{2}-\d{2})\.csv", os.path.basename(csv_file_path))
        if match:
            date_str = match.group(1) 
            output_csv_filename = f"data_in_rice_buffer_{date_str}.csv"
            output_csv_full_path = os.path.join(filter_in_rice_folder, output_csv_filename) 

            approximate_co2_rice_linkage(
                csv_path=csv_file_path,
                raster_path=raster_rice_map_projected,
                output_csv_path=output_csv_full_path,
                buffer_radius_meters=buffer_radius,
                rice_proportion_threshold=rice_threshold,
                lon_col='longitude', 
                lat_col='latitude',  
                co2_points_crs_epsg=4326,
                quality_flag_col=None, 
                valid_quality_flags=None 
            )
        else:
            print(f"Không thể trích xuất ngày từ tên file: {os.path.basename(csv_file_path)}. Bỏ qua file này.")

if __name__ == "__main__":
    process_all_csv_files()

In [None]:
import os
import pandas as pd
folder_path = "D:\\NCKH - Estimation GHG\\Output\\2021\\filter_in_rice"
csv_files = [f for f in os.listdir(folder_path) if f.endswith('.csv')]
df_list = []
for file in csv_files:
    file_path = os.path.join(folder_path, file)
    df = pd.read_csv(file_path)
    df_list.append(df)
merged_df = pd.concat(df_list, ignore_index=True)
print(f"Đã hợp nhất {len(df_list)} file CSV thành một DataFrame với {len(merged_df)} dòng.")