In [1]:
%cd /content/drive/MyDrive/Agriculture App/agriculture-predictor-planner

/content/drive/MyDrive/Agriculture App/agriculture-predictor-planner


In [28]:
import pandas as pd
import calendar

In [49]:
df = pd.read_csv("data/final/crop_produce_data.csv")

In [50]:
pattern = r'Winter|Summer|Rainy|Autumn'
# Identify all columns matching the pattern
cols_to_drop = df.columns[df.columns.str.contains(pattern, regex=True)]

# Drop them
df = df.drop(columns=cols_to_drop)

df = df.rename(columns={'JAN WINDSPEED (Meter per second)': 'JANUARY WINDSPEED (Meter per second)', 'FEB WINDSPEED (Meter per second)':'FEBRUARY WINDSPEED (Meter per second)', 'AUG WINDSPEED (Meter per second)':'AUGUST WINDSPEED (Meter per second)',
                        'SEPT WINDSPEED (Meter per second)':'SEPTEMBER WINDSPEED (Meter per second)', 'OCT WINDSPEED (Meter per second)':'OCTOBER WINDSPEED (Meter per second)', 'NOV WINDSPEED (Meter per second)':'NOVEMBER WINDSPEED (Meter per second)',
                         'DEC WINDSPEED (Meter per second)':'DECEMBER WINDSPEED (Meter per second)'})




In [51]:
sowing_df = pd.read_csv("data/final/crop_sowing_month.csv")

In [52]:
# 2) Melt all the per‑crop yield columns into long form
yield_cols = [c for c in df.columns if c.endswith("YIELD (Kg per ha)")]
# e.g. "RICE YIELD (Kg per ha)" → crop="RICE"
yield_long = ((df.melt(
        id_vars=["Dist Name","Year"],
        value_vars=yield_cols,
        var_name="crop_full",
        value_name="yield_kg_per_ha"))
        .assign(crop=lambda d: d["crop_full"].str.replace(r" YIELD \(Kg per ha\)", "", regex=True))
        .drop(columns="crop_full"))

In [53]:
print(yield_long.head(50))

   Dist Name  Year  yield_kg_per_ha  crop
0       Durg  1990           1210.0  RICE
1       Durg  1991           1293.0  RICE
2       Durg  1992           1291.0  RICE
3       Durg  1993           1387.0  RICE
4       Durg  1994           1399.0  RICE
5       Durg  1995           1507.0  RICE
6       Durg  1996           1486.0  RICE
7       Durg  1997           1265.0  RICE
8       Durg  1998            859.0  RICE
9       Durg  1999           1314.0  RICE
10      Durg  2000            515.0  RICE
11      Durg  2001           1385.0  RICE
12      Durg  2002            539.0  RICE
13      Durg  2003           1618.0  RICE
14      Durg  2004           1409.0  RICE
15      Durg  2005           1409.0  RICE
16      Durg  2006           1645.0  RICE
17      Durg  2007           1571.0  RICE
18      Durg  2008            906.0  RICE
19      Durg  2009            788.0  RICE
20      Durg  2010           1775.0  RICE
21      Durg  2011           1922.0  RICE
22      Durg  2012           2282.

In [None]:
# 4) Build a map of full names & abbreviations to month numbers
full_map = {name: num for num, name in enumerate(calendar.month_name) if name}

def month_to_num(m):
    if m in full_map:     return full_map[m]
    raise ValueError(f"Unknown month: {m}")

# 5) Apply the mapping
sowing_df['sowing_month'] = sowing_df['Sowing Month'].map(month_to_num)

# 6) Drop the old column (or keep for reference)
sowing_df = sowing_df.drop(columns='Sowing Month')



print(sowing_df)


In [None]:
sowing_df = sowing_df.rename(columns={'Crop': 'crop'})
print(sowing_df)

In [56]:
sowing_df.head(10)

Unnamed: 0,crop,sowing_month
0,RICE,6
1,RICE,7
2,RICE,11
3,RICE,12
4,RICE,1
5,RICE,2
6,RICE,5
7,RICE,8
8,PEARL MILLET,7
9,CHICKPEA,10


In [57]:
# 3) Merge in your sowing‑month lookup (assumes you have sowing_df with columns crop (exact name) and month (1–12))
#    Example:
# sowing_df = pd.DataFrame({
#     "crop": ["RICE","WHEAT", …],
#     "sowing_month": [6, 11, …]
# })
master = yield_long.merge(sowing_df, on="crop", how="left")


In [58]:
print(master.head(10))

  Dist Name  Year  yield_kg_per_ha  crop  sowing_month
0      Durg  1990           1210.0  RICE           6.0
1      Durg  1990           1210.0  RICE           7.0
2      Durg  1990           1210.0  RICE          11.0
3      Durg  1990           1210.0  RICE          12.0
4      Durg  1990           1210.0  RICE           1.0
5      Durg  1990           1210.0  RICE           2.0
6      Durg  1990           1210.0  RICE           5.0
7      Durg  1990           1210.0  RICE           8.0
8      Durg  1991           1293.0  RICE           6.0
9      Durg  1991           1293.0  RICE           7.0


In [59]:
tmax_cols   = [c for c in df if c.endswith("MAXIMUM TEMPERATURE (Centigrate)")]
tmin_cols   = [c for c in df if c.endswith("MINIMUM TEMPERATURE (Centigrate)")]
precip_cols = [c for c in df if c.endswith("PERCIPITATION (Millimeters)")]
wind_cols   = [c for c in df if c.endswith("WINDSPEED (Meter per second)")]


In [60]:
print(tmax_cols)
print(tmin_cols)
print(precip_cols)
print(wind_cols)

['JANUARY MAXIMUM TEMPERATURE (Centigrate)', 'FEBRUARY MAXIMUM TEMPERATURE (Centigrate)', 'MARCH MAXIMUM TEMPERATURE (Centigrate)', 'APRIL MAXIMUM TEMPERATURE (Centigrate)', 'MAY MAXIMUM TEMPERATURE (Centigrate)', 'JUNE MAXIMUM TEMPERATURE (Centigrate)', 'JULY MAXIMUM TEMPERATURE (Centigrate)', 'AUGUST MAXIMUM TEMPERATURE (Centigrate)', 'SEPTEMBER MAXIMUM TEMPERATURE (Centigrate)', 'OCTOBER MAXIMUM TEMPERATURE (Centigrate)', 'NOVEMBER MAXIMUM TEMPERATURE (Centigrate)', 'DECEMBER MAXIMUM TEMPERATURE (Centigrate)']
['JANUARY MINIMUM TEMPERATURE (Centigrate)', 'FEBRUARY MINIMUM TEMPERATURE (Centigrate)', 'MARCH MINIMUM TEMPERATURE (Centigrate)', 'APRIL MINIMUM TEMPERATURE (Centigrate)', 'MAY MINIMUM TEMPERATURE (Centigrate)', 'JUNE MINIMUM TEMPERATURE (Centigrate)', 'JULY MINIMUM TEMPERATURE (Centigrate)', 'AUGUST MINIMUM TEMPERATURE (Centigrate)', 'SEPTEMBER MINIMUM TEMPERATURE (Centigrate)', 'OCTOBER MINIMUM TEMPERATURE (Centigrate)', 'NOVEMBER MINIMUM TEMPERATURE (Centigrate)', 'DECEMB

In [None]:
print(df)

In [65]:
# 5) Melt each weather block into long form keyed by (Dist Name, Year, month_number)
def melt_weather(var_cols, value_name, month_parser):
    w = df[["Dist Name","Year"] + var_cols]
    long = (w.melt(id_vars=["Dist Name","Year"], var_name="month_str", value_name=value_name)
         .assign(month=lambda d: d["month_str"].apply(month_parser))
         .drop(columns="month_str")
    )
    return long

# parsers for each block:
month_name_parser = lambda s: list(calendar.month_name).index(s.split()[0].title())
# e.g. "JANUARY MAXIMUM…" → "JANUARY" → 1


# melt them:
tmax_long   = melt_weather(tmax_cols,   "tmax",   month_name_parser)
tmin_long   = melt_weather(tmin_cols,   "tmin",   month_name_parser)
precip_long = melt_weather(precip_cols, "precip", month_name_parser)
wind_long   = melt_weather(wind_cols,   "wind",   month_name_parser)


In [62]:
print(tmax_long)
print(tmin_long)
print(precip_long)
print(wind_long)

      Dist Name  Year       tmax  month
0          Durg  1990  28.830000      1
1          Durg  1991  27.490000      1
2          Durg  1992  26.959999      1
3          Durg  1993  28.250000      1
4          Durg  1994  28.559999      1
...         ...   ...        ...    ...
94087    Ranchi  2011  25.340000     12
94088    Ranchi  2012  25.120001     12
94089    Ranchi  2013  23.219999     12
94090    Ranchi  2014  22.760000     12
94091    Ranchi  2015  23.860001     12

[94092 rows x 4 columns]
      Dist Name  Year   tmin  month
0          Durg  1990  14.22      1
1          Durg  1991  13.80      1
2          Durg  1992  13.05      1
3          Durg  1993  14.02      1
4          Durg  1994  14.10      1
...         ...   ...    ...    ...
94087    Ranchi  2011  10.12     12
94088    Ranchi  2012   9.79     12
94089    Ranchi  2013  10.24     12
94090    Ranchi  2014   9.93     12
94091    Ranchi  2015  10.25     12

[94092 rows x 4 columns]
      Dist Name  Year  precip  month

In [64]:
print(tmax_long.head(50))
print(tmax_long.shape)


   Dist Name  Year       tmax  month
0       Durg  1990  28.830000      1
1       Durg  1991  27.490000      1
2       Durg  1992  26.959999      1
3       Durg  1993  28.250000      1
4       Durg  1994  28.559999      1
5       Durg  1995  26.110001      1
6       Durg  1996  28.670000      1
7       Durg  1997  26.770000      1
8       Durg  1998  27.820000      1
9       Durg  1999  27.020000      1
10      Durg  2000  28.820000      1
11      Durg  2001  28.059999      1
12      Durg  2002  27.790001      1
13      Durg  2003  28.150000      1
14      Durg  2004  27.250000      1
15      Durg  2005  27.799999      1
16      Durg  2006  27.889999      1
17      Durg  2007  28.320000      1
18      Durg  2008  28.400000      1
19      Durg  2009  30.230000      1
20      Durg  2010  27.000000      1
21      Durg  2011  26.820000      1
22      Durg  2012  27.139999      1
23      Durg  2013  28.190001      1
24      Durg  2014  32.270000      1
25      Durg  2015  26.840000      1
2

In [66]:
# 6) Stitch all weather pieces together
weather_long = (
    tmax_long
    .merge(tmin_long,   on=["Dist Name","Year","month"])
    .merge(precip_long,     on=["Dist Name","Year","month"])
    .merge(wind_long,   on=["Dist Name","Year","month"])
)


In [67]:
print(weather_long.head(50))

   Dist Name  Year       tmax  month       tmin     precip      wind
0       Durg  1990  28.830000      1  14.220000   0.950000  1.094413
1       Durg  1991  27.490000      1  13.800000   8.560000  1.094413
2       Durg  1992  26.959999      1  13.050000   2.600000  1.094413
3       Durg  1993  28.250000      1  14.020000   0.270000  1.094413
4       Durg  1994  28.559999      1  14.100000   8.000000  1.094413
5       Durg  1995  26.110001      1  11.960000  25.110001  1.094413
6       Durg  1996  28.670000      1  14.600000   6.760000  1.094413
7       Durg  1997  26.770000      1  12.200000   8.740000  1.094413
8       Durg  1998  27.820000      1  14.290000   4.350000  1.094413
9       Durg  1999  27.020000      1  12.140000   0.210000  1.094413
10      Durg  2000  28.820000      1  14.050000   0.030000  0.808000
11      Durg  2001  28.059999      1  13.700000   2.500000  0.732000
12      Durg  2002  27.790001      1  13.430000  15.530000  0.782000
13      Durg  2003  28.150000     

In [68]:
print(master.head(50))

   Dist Name  Year  yield_kg_per_ha  crop  sowing_month
0       Durg  1990           1210.0  RICE           6.0
1       Durg  1990           1210.0  RICE           7.0
2       Durg  1990           1210.0  RICE          11.0
3       Durg  1990           1210.0  RICE          12.0
4       Durg  1990           1210.0  RICE           1.0
5       Durg  1990           1210.0  RICE           2.0
6       Durg  1990           1210.0  RICE           5.0
7       Durg  1990           1210.0  RICE           8.0
8       Durg  1991           1293.0  RICE           6.0
9       Durg  1991           1293.0  RICE           7.0
10      Durg  1991           1293.0  RICE          11.0
11      Durg  1991           1293.0  RICE          12.0
12      Durg  1991           1293.0  RICE           1.0
13      Durg  1991           1293.0  RICE           2.0
14      Durg  1991           1293.0  RICE           5.0
15      Durg  1991           1293.0  RICE           8.0
16      Durg  1992           1291.0  RICE       

In [69]:
master = (master.merge(weather_long,
        left_on=["Dist Name","Year","sowing_month"],
        right_on=["Dist Name","Year","month"],
        how="left")
    # drop the extra 'month' column since you already have sowing_month
    .drop(columns="month"))

In [73]:
print(master.head(50))
print(master.shape)

   district  year   yield  crop  month       tmax       tmin      precip  \
0      Durg  1990  1210.0  RICE    6.0  35.290001  24.889999  302.850006   
1      Durg  1990  1210.0  RICE    7.0  29.309999  23.240000  432.079987   
2      Durg  1990  1210.0  RICE   11.0  29.660000  16.510000    0.790000   
3      Durg  1990  1210.0  RICE   12.0  28.150000  13.530000   11.960000   
4      Durg  1990  1210.0  RICE    1.0  28.830000  14.220000    0.950000   
5      Durg  1990  1210.0  RICE    2.0  30.700001  16.680000   33.630001   
6      Durg  1990  1210.0  RICE    5.0  39.660000  26.450001   57.950001   
7      Durg  1990  1210.0  RICE    8.0  30.110001  24.110001  456.390015   
8      Durg  1991  1293.0  RICE    6.0  36.270000  25.709999  187.100006   
9      Durg  1991  1293.0  RICE    7.0  30.840000  24.620001  445.809998   
10     Durg  1991  1293.0  RICE   11.0  28.620001  15.210000    3.040000   
11     Durg  1991  1293.0  RICE   12.0  27.180000  12.720000    4.820000   
12     Durg 

In [72]:
master = master.rename(columns={
    "Dist Name":    "district",
    "Year":         "year",
    "yield_kg_per_ha": "yield",
    "sowing_month":"month"
})


In [74]:
#Saving the dataframe
file_save_path = 'data/final/master_crop.csv'
master.to_csv(file_save_path, index=False)
print(f"Master file data saved to {file_save_path}")


Master file data saved to data/final/master_crop.csv
