<a href="https://colab.research.google.com/github/logeshm2006/Harvest-Nexus/blob/ML/HarvestNexus.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [9]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler

def prepare_data_for_cnn():
    """
    Loads, cleans, and combines all relevant data for 2017-18 into a single
    data point for conceptual model input.
    """
    print("Step 1: Loading and Preprocessing Data...")

    # Load all relevant data files
    rainfall_data = pd.read_csv('rainfall_data.csv')
    temp_humidity_data = pd.read_csv('temperature_humidity_data.csv')
    land_utilization_data = pd.read_csv('land_utilization_data.csv')
    irrigated_area_data = pd.read_csv('irrigated_area_data.csv')
    major_crops_data = pd.read_csv('major_crops_data.csv')
    fertilizer_data = pd.read_csv('fertilizer_data.csv')
    district_crop_data = pd.read_csv('district_crop_data.csv')

    # -------------------------------------------------------------------------
    # --- Data Cleaning and Aggregation for 2017-18 ---
    # -------------------------------------------------------------------------

    # Extract relevant data for the year 2017-18
    # All of these tables are for 2017-18, so we can combine them directly.
    # Note: `temp_humidity_data` is for 2017, which aligns with 2017-18.

    # 1. Major Crops Data for Rice (2017-18)
    rice_data = major_crops_data[major_crops_data['Crops'] == 'Rice'].copy()
    rice_data.drop(columns=['Crops'], inplace=True)
    rice_data = rice_data.iloc[0] # Select the single row

    # 2. Fertilizer Consumption (2017-18)
    fertilizer_2017 = fertilizer_data[fertilizer_data['YEAR'] == '2017-18'].copy()
    fertilizer_2017.drop(columns=['YEAR'], inplace=True)
    fertilizer_2017 = fertilizer_2017.iloc[0]

    # 3. Rainfall Data (2017)
    rainfall_2017 = rainfall_data[rainfall_data['Year'] == 2017].copy()
    rainfall_2017.drop(columns=['Year'], inplace=True)
    rainfall_2017 = rainfall_2017.iloc[0]

    # 4. Total Irrigated Area (2017-18)
    total_irrigated = irrigated_area_data[irrigated_area_data['Crops'] == 'Total'].copy()
    total_irrigated.drop(columns=['Crops'], inplace=True)
    total_irrigated = total_irrigated.iloc[0]

    # 5. Land Utilization (aggregated for 2017-18)
    land_utilization_2017 = land_utilization_data.drop(columns=['DISTRICT']).sum()

    # 6. Temperature and Humidity (aggregated for 2017)
    temp_humidity_2017 = temp_humidity_data.drop(columns=['STATIONS/DISTRICTS']).mean()

    # -------------------------------------------------------------------------
    # --- Combining all data into a single feature vector (X) ---
    # -------------------------------------------------------------------------

    # Concatenate all the data series into a single row (DataFrame)
    combined_features = pd.concat([
        rice_data,
        fertilizer_2017,
        rainfall_2017,
        total_irrigated,
        land_utilization_2017,
        temp_humidity_2017
    ])

    # Create the feature matrix X with a single sample
    X = pd.DataFrame([combined_features.values], columns=combined_features.index)

    # The target variable (Rice Yield Rate)
    y = rice_data['Total_Yield (Kg/ha)']

    print("--- Final Combined Data for 2017-18 (Features): ---")
    print(X.to_string())
    print("\n--- Target Value (Rice Yield): ---")
    print(y)

if __name__ == '__main__':
    prepare_data_for_cnn()

Step 1: Loading and Preprocessing Data...
--- Final Combined Data for 2017-18 (Features): ---
   Kharif_Area (lakh ha)  Kharif_Production (lakh MT/bales)  Kharif_Yield (Kg/ha)  Rabi_Area (lakh ha)  Rabi_Production (lakh MT/bales)  Rabi_Yield (Kg/ha)  Total_Area (lakh ha)  Total_Production (lakh MT/bales)  Total_Yield (Kg/ha)  NITROGENOUS (Lakh MT)  PHOSPHATE (Lakh MT)  POTASH (Lakh MT)  TOTAL CONSUMPTION (Lakh MT)  JAN   FEB  MAR   APR    MAY   JUNE   JULY    AUG    SEP   OCT  NOV  DEC   TOTAL  Kharif_Total Area (lakh ha)  Kharif_Area Irrigated (lakh ha)  Kharif_% of Total Area  Rabi_Total Area (lakh ha)  Rabi_Area Irrigated (lakh ha)  Rabi_% of Total Area  Total_Total Area (lakh ha)  Total_Area Irrigated (lakh ha)  Total_% of Total Area  GEOGRAPHICAL AREA  FOREST AREA  MISC. TREE & GROVES  PERMANENT PASTURE  CULTURABLE WASTE  LAND PUT TO NON AGRIL USE  BARREN & UNCULTURABLE LAND  CURRENT FALLOW  OTHER FALLOW  NET AREA SOWN  JULY_Temp_Max  JULY_Temp_Min  JULY_RH_830  JULY_RH_1730  AUGU