In [None]:
import matplotlib.pyplot as plt

def plot_class_distribution_with_percentage(class_distribution, title="Class Distribution with Percentages", save_path=None):
    """
    Plot the class distribution as a bar chart with percentages annotated on each bar.
    
    Args:
        class_distribution (dict): A dictionary of class counts.
        title (str): The title of the plot.
        save_path (str): Optional path to save the plot.
    """
    classes = list(class_distribution.keys())
    counts = list(class_distribution.values())
    
    total_samples = sum(counts)  # Total samples in the dataset
    percentages = [count / total_samples * 100 for count in counts]  # Calculate percentage for each class
    
    plt.figure(figsize=(12, 6))
    bars = plt.bar(classes, counts, color='skyblue')
    
    # Annotate each bar with percentage
    for bar, percentage in zip(bars, percentages):
        height = bar.get_height()
        if percentage >= 0.25:
            plt.text(bar.get_x() + bar.get_width() / 2, height + 10, f'{percentage:.1f}%', ha='center', va='bottom', fontsize=10)
    
    plt.xlabel("Class", fontsize=14)
    plt.ylabel("Count", fontsize=14)
    plt.title(title, fontsize=16)
    plt.xticks(classes, rotation=90)  # Rotate x-axis labels for better readability
    plt.grid(axis='y', linestyle='--', alpha=0.7)

    if save_path:
        plt.savefig(save_path)
        print(f"Plot saved to {save_path}")
    else:
        plt.show()

In [19]:
import pandas as pd
import pprint

with open('data/land_cover_representation/metadata.csv') as f:
    metadata = pd.read_csv(f)

classes = pd.unique(metadata['land_cover'])
class_distribution = pd.Series(metadata['land_cover']).value_counts()
classes_dict = pd.Series.to_dict(class_distribution)

pprint.pp(classes_dict)

{'Grapes': 90484,
 'Almonds': 49792,
 'Grassland/Pasture': 42771,
 'Fallow/Idle Cropland': 18897,
 'Developed/Med Intensity': 16955,
 'Developed/Open Space': 15231,
 'Alfalfa': 12274,
 'Developed/Low Intensity': 10359,
 'Pistachios': 6334,
 'Tomatoes': 4443,
 'Developed/High Intensity': 4273,
 'Winter Wheat': 3485,
 'Walnuts': 3299,
 'Oranges': 3280,
 'Cotton': 2754,
 'Dbl Crop WinWht/Corn': 2697,
 'Triticale': 2527,
 'Open Water': 2320,
 'Corn': 1557,
 'Onions': 1017,
 'Barren': 598,
 'Garlic': 592,
 'Oats': 553,
 'Barley': 489,
 'Dbl Crop WinWht/Sorghum': 410,
 'Woody Wetlands': 307,
 'Shrubland': 296,
 'Carrots': 219,
 'Pomegranates': 207,
 'Sorghum': 206,
 'Safflower': 194,
 'Dbl Crop Oats/Corn': 161,
 'Citrus': 141,
 'Cherries': 134,
 'Dry Beans': 121,
 'Other Hay/Non Alfalfa': 90,
 'Plums': 76,
 'Olives': 75,
 'Lettuce': 62,
 'Peaches': 60,
 'Evergreen Forest': 58,
 'Other Tree Crops': 54,
 'Durum Wheat': 49,
 'Dbl Crop Barley/Corn': 19,
 'Herbaceous Wetlands': 16,
 'Deciduous Fo

In [14]:
class_names = pd.unique(metadata['land_cover'])

test1 = metadata[metadata['y'] == 29]
print(pd.unique(test1['land_cover']))

['Walnuts']


In [18]:
class_doc = dict(zip(pd.unique(metadata['y']), pd.unique(metadata['land_cover'])))
class_doc = dict(sorted(class_doc.items()))

pprint.pp(class_doc)

{0: 'Corn',
 1: 'Cotton',
 2: 'Rice',
 3: 'Sorghum',
 4: 'Sweet Corn',
 5: 'Barley',
 6: 'Durum Wheat',
 7: 'Spring Wheat',
 8: 'Winter Wheat',
 9: 'Rye',
 10: 'Oats',
 11: 'Safflower',
 12: 'Alfalfa',
 13: 'Other Hay/Non Alfalfa',
 14: 'Dry Beans',
 15: 'Other Crops',
 16: 'Watermelons',
 17: 'Onions',
 18: 'Peas',
 19: 'Tomatoes',
 20: 'Sod/Grass Seed',
 21: 'Fallow/Idle Cropland',
 22: 'Cherries',
 23: 'Peaches',
 24: 'Grapes',
 25: 'Other Tree Crops',
 26: 'Citrus',
 27: 'Pecans',
 28: 'Almonds',
 29: 'Walnuts',
 30: 'Pears',
 31: 'Open Water',
 32: 'Developed/Open Space',
 33: 'Developed/Low Intensity',
 34: 'Developed/Med Intensity',
 35: 'Developed/High Intensity',
 36: 'Barren',
 37: 'Deciduous Forest',
 38: 'Evergreen Forest',
 39: 'Shrubland',
 40: 'Grassland/Pasture',
 41: 'Woody Wetlands',
 42: 'Herbaceous Wetlands',
 43: 'Pistachios',
 44: 'Triticale',
 45: 'Carrots',
 46: 'Asparagus',
 47: 'Garlic',
 48: 'Cantaloupes',
 49: 'Olives',
 50: 'Oranges',
 51: 'Honeydew Melons'