In [1]:
%cd /content/drive/MyDrive/Agriculture App/agriculture-predictor-planner

/content/drive/MyDrive/Agriculture App/agriculture-predictor-planner


In [2]:
import pandas as pd
import numpy as np

In [3]:
#Loading the dataframe
crop_data = pd.read_csv('data/merged/crop_soil_weather_merged.csv')

In [4]:
#Converting data types so that district code, year, and state code are not treated as numeric during calculations
crop_data = crop_data.astype({'Dist Code':'category', 'Year':'int64', 'State Code':'category', 'State Name':'category', 'Dist Name':'category'})


In [None]:
# Filter out all the crop yield columns
yield_columns = [col for col in crop_data.columns if "YIELD (Kg per ha)" in col]

#Removing the outlier
yield_columns.remove('SUGARCANE YIELD (Kg per ha)')


In [None]:
print(len(crop_data['State Name'].unique()))
print(crop_data['State Name'].unique())

In [23]:
# Reshape the dataframe from wide to long format where each row corresponds to a specific district, year, and crop yield
df_long = crop_data.melt(id_vars=['Dist Name', 'Year'], value_vars=yield_columns, var_name='Crop', value_name='Yield')


In [24]:
print(df_long.head(28))

   Dist Name  Year                    Crop   Yield
0       Durg  1990  RICE YIELD (Kg per ha)  1210.0
1       Durg  1991  RICE YIELD (Kg per ha)  1293.0
2       Durg  1992  RICE YIELD (Kg per ha)  1291.0
3       Durg  1993  RICE YIELD (Kg per ha)  1387.0
4       Durg  1994  RICE YIELD (Kg per ha)  1399.0
5       Durg  1995  RICE YIELD (Kg per ha)  1507.0
6       Durg  1996  RICE YIELD (Kg per ha)  1486.0
7       Durg  1997  RICE YIELD (Kg per ha)  1265.0
8       Durg  1998  RICE YIELD (Kg per ha)   859.0
9       Durg  1999  RICE YIELD (Kg per ha)  1314.0
10      Durg  2000  RICE YIELD (Kg per ha)   515.0
11      Durg  2001  RICE YIELD (Kg per ha)  1385.0
12      Durg  2002  RICE YIELD (Kg per ha)   539.0
13      Durg  2003  RICE YIELD (Kg per ha)  1618.0
14      Durg  2004  RICE YIELD (Kg per ha)  1409.0
15      Durg  2005  RICE YIELD (Kg per ha)  1409.0
16      Durg  2006  RICE YIELD (Kg per ha)  1645.0
17      Durg  2007  RICE YIELD (Kg per ha)  1571.0
18      Durg  2008  RICE YIELD 

In [25]:
# Compute the average yield for each District and Crop pair
df_avg = df_long.groupby(['Dist Name', 'Crop'])['Yield'].mean().reset_index()

# For each district, get the top 5 crops based on the average yield
top5_per_district = df_avg.groupby('Dist Name', group_keys=False).apply(
        lambda group: group.nlargest(5, 'Yield'))

print(top5_per_district.head(20))



  df_avg = df_long.groupby(['Dist Name', 'Crop'])['Yield'].mean().reset_index()
  top5_per_district = df_avg.groupby('Dist Name', group_keys=False).apply(


     Dist Name                              Crop        Yield
7     Adilabad           MAIZE YIELD (Kg per ha)  2911.973077
14    Adilabad            RICE YIELD (Kg per ha)  2137.807692
5     Adilabad       GROUNDNUT YIELD (Kg per ha)  1412.269231
6     Adilabad  KHARIF SORGHUM YIELD (Kg per ha)  1293.531923
20    Adilabad           WHEAT YIELD (Kg per ha)  1040.357692
41        Agra           WHEAT YIELD (Kg per ha)  3202.596154
21        Agra          BARLEY YIELD (Kg per ha)  2807.131538
35        Agra            RICE YIELD (Kg per ha)  2058.576923
28        Agra           MAIZE YIELD (Kg per ha)  1998.928462
31        Agra    PEARL MILLET YIELD (Kg per ha)  1533.461538
56   Ahmedabad            RICE YIELD (Kg per ha)  1808.923077
62   Ahmedabad           WHEAT YIELD (Kg per ha)  1702.066154
43   Ahmedabad          CASTOR YIELD (Kg per ha)  1642.600000
51   Ahmedabad        OILSEEDS YIELD (Kg per ha)  1365.328846
49   Ahmedabad           MAIZE YIELD (Kg per ha)  1310.275385
70  Ahme

  top5_per_district = df_avg.groupby('Dist Name', group_keys=False).apply(


In [26]:
# Resetting the index to a sequential format
top5_per_district.reset_index(drop=True, inplace=True)
print(top5_per_district.head(10))

  Dist Name                              Crop        Yield
0  Adilabad           MAIZE YIELD (Kg per ha)  2911.973077
1  Adilabad            RICE YIELD (Kg per ha)  2137.807692
2  Adilabad       GROUNDNUT YIELD (Kg per ha)  1412.269231
3  Adilabad  KHARIF SORGHUM YIELD (Kg per ha)  1293.531923
4  Adilabad           WHEAT YIELD (Kg per ha)  1040.357692
5      Agra           WHEAT YIELD (Kg per ha)  3202.596154
6      Agra          BARLEY YIELD (Kg per ha)  2807.131538
7      Agra            RICE YIELD (Kg per ha)  2058.576923
8      Agra           MAIZE YIELD (Kg per ha)  1998.928462
9      Agra    PEARL MILLET YIELD (Kg per ha)  1533.461538


In [None]:
#Saving the dataframe
file_save_path = 'data/final/top5crops_per_district.csv'
top5_per_district.to_csv(file_save_path, index=False)
print(f"Top five crops per district data saved to {file_save_path}")
