In [7]:
# Required Libraries
import pandas as pd
import numpy as np

# Load Dataset
def load_data(file_path):
    """Loads the agricultural dataset from a CSV file."""
    try:
        data = pd.read_csv(file_path)
        print("Data loaded successfully.")
        return data
    except Exception as e:
        print(f"Error loading data: {e}")
        return None

# Clean Data
def clean_data(data):
    """Performs basic data cleaning."""
    # Convert columns to appropriate data types if necessary
    numeric_columns = data.columns[5:]  # Assuming first 5 columns are non-numeric
    data[numeric_columns] = data[numeric_columns].apply(pd.to_numeric, errors='coerce')
    
    # Handle missing values (optional: here filling with -1 to denote missingness)
    data.fillna(-1, inplace=True)
    
    print("Data cleaning completed.")
    return data

# Calculate Average Yield by Crop
def calculate_avg_yield(data, crop_columns):
    """Calculates average yield for specified crops."""
    yield_data = {}
    for crop in crop_columns:
        avg_yield = data[crop].mean()
        yield_data[crop] = avg_yield
        print(f"Average yield for {crop}: {avg_yield}")
    return yield_data

# Analyze Crop Production by Year
def production_by_year(data, crop_column, year_column="Year"):
    """Calculates total production by year for a given crop."""
    yearly_production = data.groupby(year_column)[crop_column].sum()
    print(f"Total production by year for {crop_column}:\n{yearly_production}")
    return yearly_production

# Main Function
def main():
    # Define file path
    file_path = "ICRISAT-District Level Data.csv"
    
    # Load the dataset
    data = load_data(file_path)
    if data is None:
        return
    
    # Clean the data
    data = clean_data(data)
    
    # Select specific columns for yield calculation (adjust column names based on your dataset)
    crop_yield_columns = [
        "RICE YIELD (Kg per ha)", 
        "WHEAT YIELD (Kg per ha)", 
        "SORGHUM YIELD (Kg per ha)"
    ]
    avg_yield = calculate_avg_yield(data, crop_yield_columns)
    
    # Calculate production by year for a specific crop
    crop_column = "RICE PRODUCTION (1000 tons)"
    production_data = production_by_year(data, crop_column)
    
    # Optional: Further analysis or visualization can be added here

if __name__ == "__main__":
    main()


Data loaded successfully.
Data cleaning completed.
Average yield for RICE YIELD (Kg per ha): 1486.9247838473925
Average yield for WHEAT YIELD (Kg per ha): 1492.4198587885544
Average yield for SORGHUM YIELD (Kg per ha): 586.0935612535613
Total production by year for RICE PRODUCTION (1000 tons):
Year
1966     29133.23
1967     35056.64
1968     34701.52
1969     35836.01
1970     39324.12
1971     39946.79
1972     37925.93
1973     42026.94
1974     37690.51
1975     46144.36
1976     40226.45
1977     50439.41
1978     50868.02
1979     40560.64
1980     52796.25
1981     52486.37
1982     45081.34
1983     57766.50
1984     56451.83
1985     62289.85
1986     58592.01
1987     54890.34
1988     68923.09
1989     73530.86
1990     71613.37
1991     71858.31
1992     70205.91
1993     77179.69
1994     79273.91
1995     72615.41
1996     78120.14
1997     80504.09
1998     82394.23
1999     86527.07
2000     83476.59
2001     91556.12
2002     69917.86
2003     84971.15
2004     80951.6