In [1]:
import geopandas as gpd 
import pandas as pd 
from shapely.geometry import Polygon

In [2]:
# Function to process district data
def process_district_data(file_path):
    # Load data
    df = pd.read_csv(file_path)
    
    # Assuming 'Unnamed: 0' acts as a unique identifier for crop boundaries
    unique_id = 'Unnamed: 0'
    
    # Fill NaN values in 'yield' and convert to string for pivot table creation
    df['yield'] = df['yield'].fillna(0).astype(str)
    
    # Pivot table creation
    pivoted_df = df.pivot_table(
        index=unique_id,
        columns='season',
        values=['crop', 'yield'],
        aggfunc=lambda x: ', '.join(x.dropna().astype(str))
    )
    
    # Flatten multi-level columns
    pivoted_df.columns = [f"{val}_{season}" for val, season in pivoted_df.columns]
    pivoted_df.reset_index(inplace=True)
    
    # Select additional columns from the original DataFrame
    additional_columns = ['district','palika','shape', 'shape_area', 'sowing_date_mon', 
                          'sowing_date_summ', 'sowing_date_winter', '_geolocation', 
                          'harvest_date_mon', 'harvest_date_summ', 'harvest_date_winter',
                          'remarks']
    
    # Drop duplicates based on the unique identifier and select the additional columns
    additional_data = df[[unique_id] + additional_columns].drop_duplicates(subset=[unique_id])
    
    # Merge the additional columns with the pivoted DataFrame
    final_df = pd.merge(additional_data, pivoted_df, on=unique_id)
    
    # Convert the 'yield' columns back to float after processing
    yield_columns = [col for col in final_df.columns if col.startswith('yield')]
    for col in yield_columns:
        final_df[col] = pd.to_numeric(final_df[col], errors='coerce')
    
    return final_df

In [3]:
# Function to create and filter GeoDataFrame
def create_and_filter_geodataframe(final_df, min_area, min_distance, district_name):
    # Function to parse coordinates and create Polygon objects with (long, lat) order
    def parse_polygon_coordinates(geo_shape):
        coords = [(float(coord.split()[1]), float(coord.split()[0])) for coord in geo_shape.split(';')]
        return Polygon(coords)

    # Convert the DataFrame to a GeoDataFrame
    geodf = gpd.GeoDataFrame(
        final_df, 
        geometry=final_df['shape'].apply(parse_polygon_coordinates),
        crs='EPSG:4326'  # Assuming the initial CRS is WGS84 (longitude, latitude)
    )

    # Reproject the GeoDataFrame to UTM zone 44N (EPSG:32644)
    geodf_utm = geodf.to_crs('EPSG:32644')
    
    # Print the number of unfiltered polygons
    print(f"Number of unfiltered polygons in {district_name}: {len(geodf_utm)}")
    
    # Export the unfiltered polygons as a shapefile with a dynamic name
    unfiltered_filename = f'./Output/{district_name.lower()}_poly_unfiltered.shp'
    geodf_utm.to_file(unfiltered_filename)
    
    # Filter polygons with area greater than the minimum area
    geodf_filtered_area = geodf_utm[geodf_utm['shape_area'] > min_area]

    # Function to calculate distance between two polygons
    def calculate_distance(polygon1, polygon2):
        return polygon1.distance(polygon2)

    # Filter polygons based on minimum distance
    def filter_polygons(data, min_distance):
        kept_polygons = []
        for i, poly1 in data.iterrows():
            keep_polygon = True
            for j, poly2 in data.loc[i+1:].iterrows():
                distance = calculate_distance(poly1.geometry, poly2.geometry)
                # print(f"Distance between polygon {i} and {j}: {distance} meters")
                if distance < min_distance:
                    keep_polygon = False
                    break
            if keep_polygon:
                kept_polygons.append(poly1)

        # Convert list of kept polygons back to a GeoDataFrame
        return gpd.GeoDataFrame(kept_polygons, crs=data.crs)

    # Filter polygons
    filtered_data = filter_polygons(geodf_filtered_area, min_distance)
    
    return filtered_data


In [4]:
# Define parameters
min_area = 400.0
min_distance = 40.0

# Process Bardiya data
bardiya_final = process_district_data('./Output/bardiya_expanded.csv')
filtered_bardiya = create_and_filter_geodataframe(bardiya_final, min_area, min_distance, 'bardiya')
print(f"Number of filtered polygons in baridya district : {len(filtered_bardiya)}")
filtered_bardiya.to_file('./Output/bardiya_poly_filtered.shp')

# Process Kailali data
kailali_final = process_district_data('./Output/kailali_expanded.csv')
filtered_kailali = create_and_filter_geodataframe(kailali_final, min_area, min_distance, 'kailali')
print(f"Number of filtered polygons in Kailali district : {len(filtered_kailali)}")
filtered_kailali.to_file('./Output/kailali_poly_filtered.shp')

# Process Kanchanpur data
kanchanpur_final = process_district_data('./Output/kanchanpur_expanded.csv')
filtered_kanchanpur = create_and_filter_geodataframe(kanchanpur_final, min_area, min_distance, 'kanchanpur')
print(f"Number of filtered polygons in Kanchanpur district : {len(filtered_kanchanpur)}")
filtered_kanchanpur.to_file('./Output/kanchanpur_poly_filtered.shp')


Number of unfiltered polygons in bardiya: 508


  geodf_utm.to_file(unfiltered_filename)


Number of filtered polygons in baridya district : 492


  filtered_bardiya.to_file('./Output/bardiya_poly_filtered.shp')


Number of unfiltered polygons in kailali: 605


  geodf_utm.to_file(unfiltered_filename)


Number of filtered polygons in Kailali district : 560


  filtered_kailali.to_file('./Output/kailali_poly_filtered.shp')


Number of unfiltered polygons in kanchanpur: 712


  geodf_utm.to_file(unfiltered_filename)


Number of filtered polygons in Kanchanpur district : 664


  filtered_kanchanpur.to_file('./Output/kanchanpur_poly_filtered.shp')


In [6]:
filtered_bardiya.columns

Index(['Unnamed: 0', 'district', 'palika', 'shape', 'shape_area',
       'sowing_date_mon', 'sowing_date_summ', 'sowing_date_winter',
       '_geolocation', 'harvest_date_mon', 'harvest_date_summ',
       'harvest_date_winter', 'remarks', 'crop_monsoon', 'crop_summer',
       'crop_winter', 'yield_monsoon', 'yield_summer', 'yield_winter',
       'geometry'],
      dtype='object')

In [5]:
# import pandas as pd
# import matplotlib.pyplot as plt

# def count_unique_crops_by_palika(final_df, palika_col='palika'):
#     crop_columns = ['crop_monsoon', 'crop_summer', 'crop_winter']
    
#     for col in crop_columns:
#         print(f"\nUnique crops in {col} by {palika_col}:")
        
#         # Group by palika and apply the counting operation for each group
#         grouped = final_df.groupby(palika_col)[col].apply(lambda x: x.dropna().str.split(', ').sum())
        
#         for palika, crops in grouped.items():
#             # Count the occurrences of each crop
#             if crops:  # Ensure crops list is not empty
#                 unique_crops = pd.Series(crops).value_counts()
#                 print(f"\nPalika: {palika}")
#                 print(unique_crops)
                
#                 # # Generate and display the histogram
#                 # plt.figure(figsize=(10, 6))
#                 # unique_crops.plot(kind='bar')
#                 # plt.title(f"Histogram of Unique Crops in {col} for {palika}")
#                 # plt.xlabel("Crops")
#                 # plt.ylabel("Count")
#                 # plt.xticks(rotation=45, ha='right')
#                 # plt.tight_layout()
#                 # plt.show()

# # Process Bardiya data
# print('Crop count for Bardiya by Palika')
# count_unique_crops_by_palika(filtered_bardiya)

# # Process Kailali data
# print('Crop count for Kailali by Palika')
# count_unique_crops_by_palika(filtered_kailali)


# # Process Kanchanpur data
# print('Crop count for Kanchanpur by Palika')
# count_unique_crops_by_palika(filtered_kanchanpur)


Crop count for Bardiya by Palika

Unique crops in crop_monsoon by palika:

Palika: Barbardiya
rice          222
ground_nut      8
grassland       7
maize           4
fallow          2
vegetables      2
soyabean        1
shrub_tree      1
Name: count, dtype: int64

Palika: Madhuban
rice          210
soyabean       13
grassland      12
shrub_tree      5
millet          2
fallow          1
vegetables      1
maize           1
Name: count, dtype: int64

Unique crops in crop_summer by palika:

Palika: Barbardiya
fallow        137
maize          72
vegetables     15
black_gram      9
grassland       7
rice            5
pulses          1
shrub_tree      1
Name: count, dtype: int64

Palika: Madhuban
fallow        126
maize          65
vegetables     30
grassland      12
shrub_tree      5
rice            5
black_gram      2
Name: count, dtype: int64

Unique crops in crop_winter by palika:

Palika: Barbardiya
mustard       56
wheat         46
lentil        37
vegetables    33
potato        29
pea

In [11]:
import pandas as pd
import matplotlib.pyplot as plt

def count_unique_crops_and_avg_yields_by_palika(final_df, palika_col='palika'):
    crop_columns = ['crop_monsoon', 'crop_summer', 'crop_winter']
    yield_columns = ['yield_monsoon', 'yield_summer', 'yield_winter']
    
    all_crops_data = []

    for crop_col, yield_col in zip(crop_columns, yield_columns):
        for palika, group in final_df.groupby(palika_col):
            crops = group[crop_col].dropna().str.split(', ').sum()
            if crops:
                unique_crops = pd.Series(crops).value_counts()
                
                # Calculate average yields
                avg_yields = {}
                for crop in unique_crops.index:
                    yield_values = group[group[crop_col].str.contains(crop, na=False)][yield_col].dropna().astype(float)
                    avg_yields[crop] = yield_values.mean() if not yield_values.empty else None

                for crop, count in unique_crops.items():
                    all_crops_data.append([palika, crop_col, crop, count, avg_yields[crop]])

                # Generate and display the histogram for crops
                plt.figure(figsize=(10, 6))
                unique_crops.plot(kind='bar')
                plt.title(f"Histogram of Unique Crops in {crop_col} for {palika}")
                plt.xlabel("Crops")
                plt.ylabel("Count")
                plt.xticks(rotation=45, ha='right')
                plt.tight_layout()
                plt.savefig(f'./Output/cropcount_{crop_col}_{palika}.png')
                plt.close()
                
                # Generate and display the histogram for yields
                avg_yields_series = pd.Series(avg_yields).dropna()
                if not avg_yields_series.empty:
                    plt.figure(figsize=(10, 6))
                    avg_yields_series.plot(kind='bar')
                    plt.title(f"Histogram of Average Yields in {crop_col} for {palika}")
                    plt.xlabel("Crops")
                    plt.ylabel("Average Yield")
                    plt.xticks(rotation=45, ha='right')
                    plt.tight_layout()
                    plt.savefig(f'./Output/avg_yields_{crop_col}_{palika}.png')
                    plt.close()

    # Export crop data to CSV
    crop_df = pd.DataFrame(all_crops_data, columns=['Palika', 'Season', 'Crop', 'Count', 'Avg_Yield'])
    for palika in crop_df['Palika'].unique():
        crop_df[crop_df['Palika'] == palika].to_csv(f'./Output/crop_info_{palika}.csv', index=False)

# Process Bardiya data
print('Crop and average yield for Bardiya by Palika')
count_unique_crops_and_avg_yields_by_palika(filtered_bardiya)

# Process Kailali data
print('Crop and average yield for Kailali by Palika')
count_unique_crops_and_avg_yields_by_palika(filtered_kailali)

# Process Kanchanpur data
print('Crop and average yield for Kanchanpur by Palika')
count_unique_crops_and_avg_yields_by_palika(filtered_kanchanpur)





Crop and average yield for Bardiya by Palika
Crop and average yield for Kailali by Palika
Crop and average yield for Kanchanpur by Palika
