In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import LinearRegression, Lasso, Ridge
from sklearn.tree import DecisionTreeRegressor
from tkinter import *
from tkinter import messagebox

df = pd.read_csv('yield_df.csv')
df.drop('Unnamed: 0', axis=1, inplace=True)
df.drop_duplicates(inplace=True)

def isStr(obj):
    try:
        float(obj)
        return False
    except:
        return True

to_drop = df[df['average_rain_fall_mm_per_year'].apply(isStr)].index
df = df.drop(to_drop)
df['average_rain_fall_mm_per_year'] = df['average_rain_fall_mm_per_year'].astype(np.float64)

col = ['Year', 'average_rain_fall_mm_per_year', 'pesticides_tonnes', 'avg_temp', 'Area', 'Item', 'hg/ha_yield']
df = df[col]
X = df.iloc[:, :-1]
y = df.iloc[:, -1]

X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, random_state=0, shuffle=True)

ohe = OneHotEncoder(drop='first')
scale = StandardScaler()

preprocesser = ColumnTransformer(
    transformers=[
        ('StandardScale', scale, [0, 1, 2, 3]),
        ('OHE', ohe, [4, 5]),
    ],
    remainder='passthrough'
)

X_train_dummy = preprocesser.fit_transform(X_train)
X_test_dummy = preprocesser.transform(X_test)

dtr = DecisionTreeRegressor()
dtr.fit(X_train_dummy, y_train)

def prediction(Year, average_rain_fall_mm_per_year, pesticides_tonnes, avg_temp, Area, Item):
    features = np.array([[Year, average_rain_fall_mm_per_year, pesticides_tonnes, avg_temp, Area, Item]], dtype=object)
    transformed_features = preprocesser.transform(features)
    predicted_yield = dtr.predict(transformed_features).reshape(1, -1)
    return predicted_yield[0]

def predict_yield():
    try:
        Year = int(year_entry.get())
        average_rain_fall_mm_per_year = float(rainfall_entry.get())
        pesticides_tonnes = float(pesticides_entry.get())
        avg_temp = float(temp_entry.get())
        Area = area_entry.get()
        Item = crop_entry.get()
        
        result = prediction(Year, average_rain_fall_mm_per_year, pesticides_tonnes, avg_temp, Area, Item)
        
        result_label.config(text=f"Predicted Yield: {result}")
    except ValueError:
        messagebox.showerror("Error", "Please enter valid input values.")

root = Tk()
root.title("Crop Yield Prediction")

year_label = Label(root, text="Year:")
year_label.grid(row=0, column=0)
year_entry = Entry(root)
year_entry.grid(row=0, column=1)

rainfall_label = Label(root, text="Rainfall (mm/year):")
rainfall_label.grid(row=1, column=0)
rainfall_entry = Entry(root)
rainfall_entry.grid(row=1, column=1)

pesticides_label = Label(root, text="Pesticides (tonnes):")
pesticides_label.grid(row=2, column=0)
pesticides_entry = Entry(root)
pesticides_entry.grid(row=2, column=1)

temp_label = Label(root, text="Average Temperature:")
temp_label.grid(row=3, column=0)
temp_entry = Entry(root)
temp_entry.grid(row=3, column=1)

area_label = Label(root, text="Area:")
area_label.grid(row=4, column=0)
area_entry = Entry(root)
area_entry.grid(row=4, column=1)

crop_label = Label(root, text="Crop:")
crop_label.grid(row=5, column=0)
crop_entry = Entry(root)
crop_entry.grid(row=5, column=1)

predict_button = Button(root, text="Predict Yield", command=predict_yield)
predict_button.grid(row=6, column=0, columnspan=2)

result_label = Label(root, text="")
result_label.grid(row=7, column=0, columnspan=2)

root.mainloop()


