In [17]:
import warnings
warnings.filterwarnings("ignore")
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report, roc_curve, roc_auc_score, r2_score
from sklearn.svm import SVR

In [18]:
data = pd.DataFrame(pd.read_csv('datasets/climate_change_impact_on_agriculture_2024.csv'))

In [19]:
data.head()

Unnamed: 0,Year,Country,Region,Crop_Type,Average_Temperature_C,Total_Precipitation_mm,CO2_Emissions_MT,Crop_Yield_MT_per_HA,Extreme_Weather_Events,Irrigation_Access_%,Pesticide_Use_KG_per_HA,Fertilizer_Use_KG_per_HA,Soil_Health_Index,Adaptation_Strategies,Economic_Impact_Million_USD
0,2001,India,West Bengal,Corn,1.55,447.06,15.22,1.737,8,14.54,10.08,14.78,83.25,Water Management,808.13
1,2024,China,North,Corn,3.23,2913.57,29.82,1.737,8,11.05,33.06,23.25,54.02,Crop Rotation,616.22
2,2001,France,Ile-de-France,Wheat,21.11,1301.74,25.75,1.719,5,84.42,27.41,65.53,67.78,Water Management,796.96
3,2001,Canada,Prairies,Coffee,27.85,1154.36,13.91,3.89,5,94.06,14.38,87.58,91.39,No Adaptation,790.32
4,1998,India,Tamil Nadu,Sugarcane,2.19,1627.48,11.81,1.08,9,95.75,44.35,88.08,49.61,Crop Rotation,401.72


In [20]:
data.tail()

Unnamed: 0,Year,Country,Region,Crop_Type,Average_Temperature_C,Total_Precipitation_mm,CO2_Emissions_MT,Crop_Yield_MT_per_HA,Extreme_Weather_Events,Irrigation_Access_%,Pesticide_Use_KG_per_HA,Fertilizer_Use_KG_per_HA,Soil_Health_Index,Adaptation_Strategies,Economic_Impact_Million_USD
9995,2022,France,Nouvelle-Aquitaine,Cotton,30.48,685.93,17.64,3.033,9,27.56,41.96,10.95,43.41,No Adaptation,1483.06
9996,1999,Australia,Queensland,Soybeans,9.53,2560.38,10.68,2.56,4,77.02,5.45,82.32,59.39,No Adaptation,829.61
9997,2000,Argentina,Patagonia,Coffee,31.92,357.76,26.01,1.161,10,78.53,11.94,26.0,41.46,Water Management,155.99
9998,1996,Brazil,Southeast,Soybeans,13.95,1549.52,17.31,3.348,2,42.65,44.71,25.07,75.1,Crop Rotation,1613.9
9999,2015,China,South,Corn,11.78,1676.25,5.34,3.71,5,46.41,48.28,98.27,59.38,Water Management,453.14


In [21]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 15 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   Year                         10000 non-null  int64  
 1   Country                      10000 non-null  object 
 2   Region                       10000 non-null  object 
 3   Crop_Type                    10000 non-null  object 
 4   Average_Temperature_C        10000 non-null  float64
 5   Total_Precipitation_mm       10000 non-null  float64
 6   CO2_Emissions_MT             10000 non-null  float64
 7   Crop_Yield_MT_per_HA         10000 non-null  float64
 8   Extreme_Weather_Events       10000 non-null  int64  
 9   Irrigation_Access_%          10000 non-null  float64
 10  Pesticide_Use_KG_per_HA      10000 non-null  float64
 11  Fertilizer_Use_KG_per_HA     10000 non-null  float64
 12  Soil_Health_Index            10000 non-null  float64
 13  Adaptation_Strate

In [22]:
data.boxplot()

<Axes: >

In [23]:
data.isnull().sum()

Year                           0
Country                        0
Region                         0
Crop_Type                      0
Average_Temperature_C          0
Total_Precipitation_mm         0
CO2_Emissions_MT               0
Crop_Yield_MT_per_HA           0
Extreme_Weather_Events         0
Irrigation_Access_%            0
Pesticide_Use_KG_per_HA        0
Fertilizer_Use_KG_per_HA       0
Soil_Health_Index              0
Adaptation_Strategies          0
Economic_Impact_Million_USD    0
dtype: int64

In [24]:
data.describe() 

Unnamed: 0,Year,Average_Temperature_C,Total_Precipitation_mm,CO2_Emissions_MT,Crop_Yield_MT_per_HA,Extreme_Weather_Events,Irrigation_Access_%,Pesticide_Use_KG_per_HA,Fertilizer_Use_KG_per_HA,Soil_Health_Index,Economic_Impact_Million_USD
count,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0
mean,2007.0887,15.241299,1611.663834,15.246608,2.240017,4.9809,55.248332,24.955735,49.973708,64.901278,674.269658
std,10.084245,11.466955,805.016815,8.589423,0.998342,3.165808,25.988305,14.490962,28.711027,20.195882,414.591431
min,1990.0,-4.99,200.15,0.5,0.45,0.0,10.01,0.0,0.01,30.0,47.84
25%,1999.0,5.43,925.6975,7.76,1.449,2.0,32.6775,12.5275,25.39,47.235,350.545
50%,2007.0,15.175,1611.16,15.2,2.17,5.0,55.175,24.93,49.635,64.65,583.92
75%,2016.0,25.34,2306.9975,22.82,2.93,8.0,77.5825,37.47,74.825,82.4725,917.505
max,2024.0,35.0,2999.67,30.0,5.0,10.0,99.99,49.99,99.99,100.0,2346.47


In [25]:
# identify the categorical variables
categorical = data.select_dtypes(include = ['object'])
categorical.head()

Unnamed: 0,Country,Region,Crop_Type,Adaptation_Strategies
0,India,West Bengal,Corn,Water Management
1,China,North,Corn,Crop Rotation
2,France,Ile-de-France,Wheat,Water Management
3,Canada,Prairies,Coffee,No Adaptation
4,India,Tamil Nadu,Sugarcane,Crop Rotation


In [26]:
# convert the categorical variables into numerical variables using label encoding
from sklearn.preprocessing import LabelEncoder
labelencoder = LabelEncoder()

for column in categorical:
    data[column] = labelencoder.fit_transform(data[column])
    print(f'Column {column} has been encoded.')
    print(data[column].value_counts())
    print('------------------------------------')
    print()


Column Country has been encoded.
Country
9    1032
1    1032
4    1031
7    1029
6    1025
3     984
0     984
5     978
8     961
2     944
Name: count, dtype: int64
------------------------------------

Column Region has been encoded.
Region
24    754
11    752
8     524
1     466
20    288
29    283
7     276
2     273
26    270
15    269
32    267
10    264
30    261
25    260
31    257
3     254
22    253
6     253
14    252
17    249
28    249
12    247
23    246
18    243
0     242
19    236
4     236
9     235
21    230
13    227
16    224
5     221
33    220
27    219
Name: count, dtype: int64
------------------------------------

Column Crop_Type has been encoded.
Crop_Type
9    1047
3    1044
8    1036
2    1022
5    1022
7     995
4     979
6     958
0     952
1     945
Name: count, dtype: int64
------------------------------------

Column Adaptation_Strategies has been encoded.
Adaptation_Strategies
4    2049
2    2024
1    1995
3    1975
0    1957
Name: count, dtype: int6

In [27]:
# allocate x and y coordinates
x = data.drop('Economic_Impact_Million_USD', axis = 1)
y = data['Economic_Impact_Million_USD']

In [28]:
# split the data into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 42)

In [29]:
svr = SVR(kernel='linear', C=1.0, epsilon=0.1)
svr.fit(x_train, y_train)

In [30]:
# model score
print("Model score: ", svr.score(x_test, y_test))

Model score:  0.5453937718679585


In [31]:
from sklearn.tree import DecisionTreeRegressor
rgt = DecisionTreeRegressor(max_depth=3)
rgt.fit(x_train, y_train)

In [32]:
y_pred = rgt.predict(x_test)

# get model score
score = rgt.score(x_test, y_test)
score

0.5429435625631038