In [23]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder

In [92]:
# Load the data from CSV using the Pandas library
df = pd.read_csv('./crop_yield.csv')
df.head()
numerical_cols = ["Rainfall_mm","Temperature_Celsius","Days_to_Harvest"]
X=df[numerical_cols]
y=df["Yield_tons_per_hectare"]


In [104]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_normalized = scaler.fit_transform(X)
X_normalized = pd.DataFrame(X_normalized, columns=numerical_cols)
print(X_normalized.head())
df.update(X_normalized)
df.head()

   Rainfall_mm  Temperature_Celsius  Days_to_Harvest
0     1.335747             0.023821         0.674477
1     1.703634            -1.312747         1.368028
2    -1.546977             0.317020         0.057988
3     1.681287            -1.504137         1.599212
4     0.694233             0.569997         0.212110


Unnamed: 0,Region,Soil_Type,Crop,Rainfall_mm,Temperature_Celsius,Fertilizer_Used,Irrigation_Used,Weather_Condition,Days_to_Harvest,Yield_tons_per_hectare
0,West,Sandy,Cotton,1.335747,0.023821,False,True,Cloudy,0.674477,6.555816
1,South,Clay,Rice,1.703634,-1.312747,True,True,Rainy,1.368028,8.527341
2,North,Loam,Barley,-1.546977,0.31702,False,False,Sunny,0.057988,1.127443
3,North,Sandy,Soybean,1.681287,-1.504137,False,True,Rainy,1.599212,6.517573
4,South,Silt,Wheat,0.694233,0.569997,True,True,Cloudy,0.21211,7.248251


In [108]:
categorical_columns = df.select_dtypes(include=['object']).columns.tolist()
encoder = OneHotEncoder(sparse_output=False)
one_hot_encoded = encoder.fit_transform(df[categorical_columns])
one_hot_df = pd.DataFrame(one_hot_encoded, columns=encoder.get_feature_names_out(categorical_columns))
df_encoded = pd.concat([df, one_hot_df], axis=1)
df_encoded = df_encoded.drop(categorical_columns, axis=1)
df_encoded.head()
    

Unnamed: 0,Rainfall_mm,Temperature_Celsius,Fertilizer_Used,Irrigation_Used,Days_to_Harvest,Yield_tons_per_hectare,Region_East,Region_North,Region_South,Region_West,...,Soil_Type_Silt,Crop_Barley,Crop_Cotton,Crop_Maize,Crop_Rice,Crop_Soybean,Crop_Wheat,Weather_Condition_Cloudy,Weather_Condition_Rainy,Weather_Condition_Sunny
0,1.335747,0.023821,False,True,0.674477,6.555816,0.0,0.0,0.0,1.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
1,1.703634,-1.312747,True,True,1.368028,8.527341,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
2,-1.546977,0.31702,False,False,0.057988,1.127443,0.0,1.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3,1.681287,-1.504137,False,True,1.599212,6.517573,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0
4,0.694233,0.569997,True,True,0.21211,7.248251,0.0,0.0,1.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0


In [110]:
crops = ["Crop_Barley", "Crop_Cotton", "Crop_Maize", "Crop_Rice", "Crop_Soybean", "Crop_Wheat"]
barley_df = df_encoded[df_encoded["Crop_Barley"] == 1.0]
barley_df = barley_df.drop(crops, axis=1)
cotton_df = df_encoded[df_encoded["Crop_Cotton"] == 1.0]
cotton_df = cotton_df.drop(crops, axis=1)
maize_df = df_encoded[df_encoded["Crop_Maize"] == 1.0]
maize_df = maize_df.drop(crops, axis=1)
rice_df = df_encoded[df_encoded["Crop_Rice"] == 1.0]
rice_df = rice_df.drop(crops, axis=1)
soybean_df = df_encoded[df_encoded["Crop_Soybean"] == 1.0]
soybean_df = soybean_df.drop(crops, axis=1)
wheat_df = df_encoded[df_encoded["Crop_Wheat"] == 1.0]
wheat_df = wheat_df.drop(crops, axis=1)

crop_df = {"Barley":barley_df, "Cotton":cotton_df, "Maize":maize_df, "Rice":rice_df, "Soybean":soybean_df, "Wheat":wheat_df}
for key in crop_df.keys():
    print(crop_df[key])

        Rainfall_mm  Temperature_Celsius  Fertilizer_Used  Irrigation_Used  \
2         -1.546977             0.317020            False            False   
13         1.459073             0.300853            False             True   
14         0.276298            -0.091653             True            False   
18         1.644315            -1.371196            False             True   
21        -1.408661             0.931080             True            False   
...             ...                  ...              ...              ...   
999977     1.304376             0.403407             True            False   
999981     0.850605            -1.715847            False            False   
999988    -0.598684            -0.729247            False             True   
999989    -1.398043            -0.357133            False            False   
999996     1.473957             1.683526             True            False   

        Days_to_Harvest  Yield_tons_per_hectare  Region_East  R

BARLEY

In [113]:
barley_X = crop_df["Barley"][['Rainfall_mm', 
        'Temperature_Celsius', 
        'Fertilizer_Used', 
        'Irrigation_Used', 
        'Days_to_Harvest',
        'Region_East',
        'Region_West',
        'Region_North',
        'Region_South',
        'Soil_Type_Clay',
        'Soil_Type_Sandy',
        'Soil_Type_Loam',
        'Soil_Type_Silt',
        'Soil_Type_Peaty',
        'Soil_Type_Chalky',
        'Weather_Condition_Cloudy',
        'Weather_Condition_Rainy',
        'Weather_Condition_Sunny',
         ]]
barley_y = crop_df["Barley"][["Yield_tons_per_hectare"]]
barley_linreg = LinearRegression()
barley_linreg.fit(barley_X, barley_y)
print(barley_linreg.intercept_)
print(barley_linreg.coef_)


[-2.26279944e+12]
[[ 1.29784167e+00  1.44514730e-01  1.50108117e+00  1.19704739e+00
   2.33790363e-03 -4.72989265e+11 -4.72989265e+11 -4.72989265e+11
  -4.72989265e+11 -7.95320158e+10 -7.95320158e+10 -7.95320158e+10
  -7.95320158e+10 -7.95320158e+10 -7.95320158e+10  2.81532072e+12
   2.81532072e+12  2.81532072e+12]]


COTTON

In [116]:
cotton_X = crop_df["Cotton"][['Rainfall_mm', 
        'Temperature_Celsius', 
        'Fertilizer_Used', 
        'Irrigation_Used', 
        'Days_to_Harvest',
        'Region_East',
        'Region_West',
        'Region_North',
        'Region_South',
        'Soil_Type_Clay',
        'Soil_Type_Sandy',
        'Soil_Type_Loam',
        'Soil_Type_Silt',
        'Soil_Type_Peaty',
        'Soil_Type_Chalky',
        'Weather_Condition_Cloudy',
        'Weather_Condition_Rainy',
        'Weather_Condition_Sunny',
         ]]
cotton_y = crop_df["Cotton"][["Yield_tons_per_hectare"]]
cotton_linreg = LinearRegression()
cotton_linreg.fit(cotton_X, cotton_y)
print(cotton_linreg.intercept_)
print(cotton_linreg.coef_)


[7.73351255e+12]
[[ 1.29623954e+00  1.45820013e-01  1.49610635e+00  1.19996484e+00
  -1.43069677e-03 -6.72984546e+12 -6.72984546e+12 -6.72984546e+12
  -6.72984546e+12 -2.61351993e+11 -2.61351993e+11 -2.61351993e+11
  -2.61351993e+11 -2.61351993e+11 -2.61351993e+11 -7.42315094e+11
  -7.42315094e+11 -7.42315094e+11]]


MAIZE

In [119]:
maize_X = crop_df["Maize"][['Rainfall_mm', 
        'Temperature_Celsius', 
        'Fertilizer_Used', 
        'Irrigation_Used', 
        'Days_to_Harvest',
        'Region_East',
        'Region_West',
        'Region_North',
        'Region_South',
        'Soil_Type_Clay',
        'Soil_Type_Sandy',
        'Soil_Type_Loam',
        'Soil_Type_Silt',
        'Soil_Type_Peaty',
        'Soil_Type_Chalky',
        'Weather_Condition_Cloudy',
        'Weather_Condition_Rainy',
        'Weather_Condition_Sunny',
         ]]
maize_y = crop_df["Maize"][["Yield_tons_per_hectare"]]
maize_linreg = LinearRegression()
maize_linreg.fit(maize_X, maize_y)
print(maize_linreg.intercept_)
print(maize_linreg.coef_)


[9.81235115e+11]
[[ 1.29829978e+00  1.43878531e-01  1.49833108e+00  1.20140461e+00
  -2.55901683e-04 -1.19895450e+12 -1.19895450e+12 -1.19895450e+12
  -1.19895450e+12 -1.04863673e+12 -1.04863673e+12 -1.04863673e+12
  -1.04863673e+12 -1.04863673e+12 -1.04863673e+12  1.26635611e+12
   1.26635611e+12  1.26635611e+12]]


RICE

In [122]:
rice_X = crop_df["Rice"][['Rainfall_mm', 
        'Temperature_Celsius', 
        'Fertilizer_Used', 
        'Irrigation_Used', 
        'Days_to_Harvest',
        'Region_East',
        'Region_West',
        'Region_North',
        'Region_South',
        'Soil_Type_Clay',
        'Soil_Type_Sandy',
        'Soil_Type_Loam',
        'Soil_Type_Silt',
        'Soil_Type_Peaty',
        'Soil_Type_Chalky',
        'Weather_Condition_Cloudy',
        'Weather_Condition_Rainy',
        'Weather_Condition_Sunny',
         ]]
rice_y = crop_df["Rice"][["Yield_tons_per_hectare"]]
rice_linreg = LinearRegression()
rice_linreg.fit(rice_X, rice_y)
print(rice_linreg.intercept_)
print(rice_linreg.coef_)

[-7.77440561e+10]
[[ 1.29909247e+00  1.41402318e-01  1.49905921e+00  1.20248914e+00
   1.54361117e-03 -2.23967806e+11 -2.23967806e+11 -2.23967806e+11
  -2.23967806e+11  9.24117348e+11  9.24117348e+11  9.24117348e+11
   9.24117348e+11  9.24117348e+11  9.24117348e+11 -6.22405485e+11
  -6.22405485e+11 -6.22405485e+11]]


SOYBEAN

In [125]:
soybean_X = crop_df["Soybean"][['Rainfall_mm', 
        'Temperature_Celsius', 
        'Fertilizer_Used', 
        'Irrigation_Used', 
        'Days_to_Harvest',
        'Region_East',
        'Region_West',
        'Region_North',
        'Region_South',
        'Soil_Type_Clay',
        'Soil_Type_Sandy',
        'Soil_Type_Loam',
        'Soil_Type_Silt',
        'Soil_Type_Peaty',
        'Soil_Type_Chalky',
        'Weather_Condition_Cloudy',
        'Weather_Condition_Rainy',
        'Weather_Condition_Sunny',
         ]]
soybean_y = crop_df["Soybean"][["Yield_tons_per_hectare"]]
soybean_linreg = LinearRegression()
soybean_linreg.fit(soybean_X, soybean_y)
print(soybean_linreg.intercept_)
print(soybean_linreg.coef_)

[1.17594848e+11]
[[ 1.29937049e+00  1.43888662e-01  1.50408681e+00  1.20255980e+00
   1.87995977e-03  7.80365575e+11  7.80365575e+11  7.80365575e+11
   7.80365575e+11  1.45902110e+12  1.45902110e+12  1.45902110e+12
   1.45902110e+12  1.45902110e+12  1.45902110e+12 -2.35698152e+12
  -2.35698152e+12 -2.35698152e+12]]


WHEAT

In [128]:
wheat_X = crop_df["Wheat"][['Rainfall_mm', 
        'Temperature_Celsius', 
        'Fertilizer_Used', 
        'Irrigation_Used', 
        'Days_to_Harvest',
        'Region_East',
        'Region_West',
        'Region_North',
        'Region_South',
        'Soil_Type_Clay',
        'Soil_Type_Sandy',
        'Soil_Type_Loam',
        'Soil_Type_Silt',
        'Soil_Type_Peaty',
        'Soil_Type_Chalky',
        'Weather_Condition_Cloudy',
        'Weather_Condition_Rainy',
        'Weather_Condition_Sunny',
         ]]
wheat_y = crop_df["Wheat"][["Yield_tons_per_hectare"]]
wheat_linreg = LinearRegression()
wheat_linreg.fit(wheat_X, wheat_y)
print(wheat_linreg.intercept_)
print(wheat_linreg.coef_)

[-3.15968667e+12]
[[ 1.29959202e+00  1.44077218e-01  1.50172638e+00  1.19495721e+00
   3.98371881e-04  5.49410579e+11  5.49410579e+11  5.49410579e+11
   5.49410579e+11 -8.36949763e+11 -8.36949763e+11 -8.36949763e+11
  -8.36949763e+11 -8.36949763e+11 -8.36949763e+11  3.44722585e+12
   3.44722585e+12  3.44722585e+12]]
