In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import plot_tree
import matplotlib.pyplot as plt
from joblib import dump

In [2]:
df = pd.read_csv('keyur_dataset_converted.csv')
display(df)

Unnamed: 0,Region,Population_Density,Avg_Temperature,Sunshine_Hours,Soil_Type,Agricultural_Practices,Crop,Quantity_Produced (in million kg),Market_Price (per kg)
0,Alberta,6.4,-2.5°C,2300,Chernozem,Mixed farming,Canola,7.2,$0.35
1,Alberta,6.4,-2.5°C,2300,Chernozem,Mixed farming,Wheat,12.5,$0.20
2,Alberta,6.4,-2.5°C,2300,Chernozem,Mixed farming,Barley,8.3,$0.25
3,Alberta,6.4,-2.5°C,2300,Chernozem,Mixed farming,Potatoes,2.1,$0.50
4,Alberta,6.4,-2.5°C,2300,Chernozem,Mixed farming,Peas,3.8,$0.40
5,British Columbia,5.0,12.5°C,1800,Podzol,Fruit farming,Blueberries,0.9,$1.50
6,British Columbia,5.0,12.5°C,1800,Podzol,Fruit farming,Apples,1.5,$0.80
7,British Columbia,5.0,12.5°C,1800,Podzol,Fruit farming,Cherries,0.6,$2.00
8,British Columbia,5.0,12.5°C,1800,Podzol,Fruit farming,Grapes,0.8,$1.20
9,British Columbia,5.0,12.5°C,1800,Podzol,Fruit farming,Peaches,0.4,$1.80


In [3]:
le_region = LabelEncoder()
le_soil = LabelEncoder()
le_crop = LabelEncoder()
le_agricultural_practices = LabelEncoder()

In [4]:
df['Region'] = le_region.fit_transform(df['Region'])
df['Soil_Type'] = le_soil.fit_transform(df['Soil_Type'])
df['Crop'] = le_crop.fit_transform(df['Crop'])
df['Agricultural_Practices'] = le_agricultural_practices.fit_transform(df['Agricultural_Practices'])


In [12]:
# Accessing the encoded values and their corresponding classes
encoded_classes_region = le_region.classes_
print("Encoded Classes:", encoded_classes_region)

# Printing encoded values for each class
for cls, encoded_value in zip(encoded_classes_region, range(len(encoded_classes_region))):
    print(f"Class: {cls} - Encoded Value: {encoded_value}")

Encoded Classes: ['Alberta' 'British Columbia' 'Manitoba' 'New Brunswick'
 'Newfoundland and Labrador' 'Nova Scotia' 'Ontario'
 'Prince Edward Island' 'Quebec' 'Saskatchewan']
Class: Alberta - Encoded Value: 0
Class: British Columbia - Encoded Value: 1
Class: Manitoba - Encoded Value: 2
Class: New Brunswick - Encoded Value: 3
Class: Newfoundland and Labrador - Encoded Value: 4
Class: Nova Scotia - Encoded Value: 5
Class: Ontario - Encoded Value: 6
Class: Prince Edward Island - Encoded Value: 7
Class: Quebec - Encoded Value: 8
Class: Saskatchewan - Encoded Value: 9


In [23]:
# Accessing the encoded values and their corresponding classes
encoded_classes_soil = le_soil.classes_
print("Encoded Classes:", encoded_classes_soil)

# Printing encoded values for each class
for cls, encoded_value in zip(encoded_classes_soil, range(len(encoded_classes_soil))):
    print(f"Class: {cls} - Encoded Value: {encoded_value}")

Encoded Classes: ['Chernozem' 'Luvisol' 'Podzol']
Class: Chernozem - Encoded Value: 0
Class: Luvisol - Encoded Value: 1
Class: Podzol - Encoded Value: 2


In [21]:
# Accessing the encoded values and their corresponding classes
encoded_classes_Crop = le_agricultural_practices.classes_
print("Encoded Classes:", encoded_classes_Crop)

# Printing encoded values for each class
for cls, encoded_value in zip(encoded_classes_Crop, range(len(encoded_classes_Crop))):
    print(f"Class: {cls} - Encoded Value: {encoded_value}")

Encoded Classes: ['Dairy farming' 'Fishing' 'Fruit farming' 'Grain farming' 'Mixed farming'
 'Potato farming']
Class: Dairy farming - Encoded Value: 0
Class: Fishing - Encoded Value: 1
Class: Fruit farming - Encoded Value: 2
Class: Grain farming - Encoded Value: 3
Class: Mixed farming - Encoded Value: 4
Class: Potato farming - Encoded Value: 5


In [14]:
# Accessing the encoded values and their corresponding classes
encoded_classes_Agricultural_Practices = le_agricultural_practices.classes_
print("Encoded Classes:", encoded_classes_Agricultural_Practices)

# Printing encoded values for each class
for cls, encoded_value in zip(encoded_classes_Agricultural_Practices, range(len(encoded_classes_Agricultural_Practices))):
    print(f"Class: {cls} - Encoded Value: {encoded_value}")

Encoded Classes: ['Dairy farming' 'Fishing' 'Fruit farming' 'Grain farming' 'Mixed farming'
 'Potato farming']
Class: Dairy farming - Encoded Value: 0
Class: Fishing - Encoded Value: 1
Class: Fruit farming - Encoded Value: 2
Class: Grain farming - Encoded Value: 3
Class: Mixed farming - Encoded Value: 4
Class: Potato farming - Encoded Value: 5


In [5]:
X = df[['Region', 'Soil_Type', 'Agricultural_Practices']]
y = df['Crop']

In [6]:
# Train a decision tree classifier
dt = DecisionTreeClassifier()
dt.fit(X, y)

In [9]:
def predict_crop(region, soil_type, agricultural_practices):
    # Encode the region, soil_type, and agricultural_practices
    region_encoded = le_region.transform([region])[0]
    soil_type_encoded = le_soil.transform([soil_type])[0]
    agricultural_practices_encoded = le_agricultural_practices.transform([agricultural_practices])[0]
    
    # Predict the crop
    crop_encoded = dt.predict([[region_encoded, soil_type_encoded, agricultural_practices_encoded]])[0]
    
    # Decode the predicted crop
    predicted_crop = le_crop.inverse_transform([crop_encoded])[0]
    
    return predicted_crop

In [19]:
region = 'Manitoba'
soil_type = 'Podzol'
agricultural_practices = 'Fruit farming'
predicted_crop = predict_crop(region, soil_type, agricultural_practices)
print(f"The predicted crop for region '{region}', soil type '{soil_type}', and agricultural practices '{agricultural_practices}' is: {predicted_crop}")


The predicted crop for region 'Manitoba', soil type 'Podzol', and agricultural practices 'Fruit farming' is: Apples




In [24]:
dump(dt, '../saved_model/model.joblib')

['../saved_model/model.joblib']