In [None]:
%cd /content/drive/MyDrive/Agriculture App/agriculture-predictor-planner

In [17]:
import pandas as pd

In [18]:
#Loading the dataframe
crop_data = pd.read_csv('data/merged/crop_soil_weather_merged.csv')

In [19]:
#Converting data types so that district code, year, and state code are not treated as numeric during calculations
crop_data = crop_data.astype({'Dist Code':'category', 'Year':'int64', 'State Code':'category', 'State Name':'category', 'Dist Name':'category'})


In [24]:
# Filter out all the crop yield columns
yield_columns = [col for col in crop_data.columns if "YIELD (Kg per ha)" in col]


In [28]:
# Reshape the dataframe from wide to long format where each row corresponds to a specific district, year, and crop yield
df_long = crop_data.melt(id_vars=['Dist Name', 'Year'], value_vars=yield_columns, var_name='Crop', value_name='Yield')


In [33]:
print(df_long.head(28))

   Dist Name  Year                    Crop   Yield
0       Durg  1990  RICE YIELD (Kg per ha)  1210.0
1       Durg  1991  RICE YIELD (Kg per ha)  1293.0
2       Durg  1992  RICE YIELD (Kg per ha)  1291.0
3       Durg  1993  RICE YIELD (Kg per ha)  1387.0
4       Durg  1994  RICE YIELD (Kg per ha)  1399.0
5       Durg  1995  RICE YIELD (Kg per ha)  1507.0
6       Durg  1996  RICE YIELD (Kg per ha)  1486.0
7       Durg  1997  RICE YIELD (Kg per ha)  1265.0
8       Durg  1998  RICE YIELD (Kg per ha)   859.0
9       Durg  1999  RICE YIELD (Kg per ha)  1314.0
10      Durg  2000  RICE YIELD (Kg per ha)   515.0
11      Durg  2001  RICE YIELD (Kg per ha)  1385.0
12      Durg  2002  RICE YIELD (Kg per ha)   539.0
13      Durg  2003  RICE YIELD (Kg per ha)  1618.0
14      Durg  2004  RICE YIELD (Kg per ha)  1409.0
15      Durg  2005  RICE YIELD (Kg per ha)  1409.0
16      Durg  2006  RICE YIELD (Kg per ha)  1645.0
17      Durg  2007  RICE YIELD (Kg per ha)  1571.0
18      Durg  2008  RICE YIELD 

In [None]:
# Compute the average yield for each District and Crop pair
df_avg = df_long.groupby(['Dist Name', 'Crop'])['Yield'].mean().reset_index()

# For each district, get the top 5 crops based on the average yield
top5_per_district = df_avg.groupby('Dist Name', group_keys=False).apply(
        lambda group: group.nlargest(5, 'Yield'))

print(top5_per_district.head(20))



In [42]:
# Optionally, aggregate the top crop names into a list per district
top5_df = top5_per_district.groupby('Dist Name')['Crop'].apply(list).reset_index()
top5_df.columns = ['District', 'Top_5_Crops']
print(top5_df.head(20))


      District                                        Top_5_Crops
0     Adilabad  [SUGARCANE YIELD (Kg per ha), MAIZE YIELD (Kg ...
1         Agra  [SUGARCANE YIELD (Kg per ha), WHEAT YIELD (Kg ...
2    Ahmedabad  [SUGARCANE YIELD (Kg per ha), RICE YIELD (Kg p...
3   Ahmednagar  [SUGARCANE YIELD (Kg per ha), MAIZE YIELD (Kg ...
4        Ajmer  [SUGARCANE YIELD (Kg per ha), WHEAT YIELD (Kg ...
5        Akola  [SUGARCANE YIELD (Kg per ha), MAIZE YIELD (Kg ...
6    Alappuzha  [RICE YIELD (Kg per ha), SUGARCANE YIELD (Kg p...
7      Aligarh  [SUGARCANE YIELD (Kg per ha), WHEAT YIELD (Kg ...
8    Allahabad  [SUGARCANE YIELD (Kg per ha), WHEAT YIELD (Kg ...
9      Almorah  [MAIZE YIELD (Kg per ha), RICE YIELD (Kg per h...
10       Alwar  [SUGARCANE YIELD (Kg per ha), WHEAT YIELD (Kg ...
11   Amarawati  [SUGARCANE YIELD (Kg per ha), MAIZE YIELD (Kg ...
12      Ambala  [SUGARCANE YIELD (Kg per ha), WHEAT YIELD (Kg ...
13      Amreli  [SUGARCANE YIELD (Kg per ha), WHEAT YIELD (Kg ...
14    Amri

  top5_df = top5_per_district.groupby('Dist Name')['Crop'].apply(list).reset_index()
