In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.metrics import mean_squared_error
from sklearn.compose import ColumnTransformer
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, LSTM, Dense, Dropout
from sklearn.tree import DecisionTreeRegressor
import pickle

# Load the dataset
df = pd.read_csv('C:/Users/abdul/OneDrive/Desktop/ByteWise_ML/Project/yield_df.csv')
df.drop(columns=['Unnamed: 0'], inplace=True)
df.drop_duplicates(inplace=True)

# Remove non-numeric rows from 'average_rain_fall_mm_per_year'
def isstr(obj):
    try:
        float(obj)
        return False
    except:
        return True

to_drop = df[df['average_rain_fall_mm_per_year'].apply(isstr)].index
df = df.drop(to_drop)

# Summing 'hg/ha_yield' per country and crop
country = df['Area'].unique()
yield_per_country = [df[df['Area'] == state]['hg/ha_yield'].sum() for state in country]

crops = df['Item'].unique()
yield_per_item = [df[df['Item'] == crop]['hg/ha_yield'].sum() for crop in crops]

# Select columns for modeling
col = ['Year', 'average_rain_fall_mm_per_year', 'pesticides_tonnes', 'avg_temp', 'Area', 'Item', 'hg/ha_yield']
df = df[col]

# Features and target variable
X = df.drop('hg/ha_yield', axis=1)
y = df['hg/ha_yield']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Preprocessing: OneHotEncoding (with sparse=False) and Standardization
ohe = OneHotEncoder(drop='first', sparse_output=False)
scaler = StandardScaler()

preprocessor = ColumnTransformer(
    transformers=[
        ('oneHotEncoder', ohe, [4, 5]),  # Encode 'Area' and 'Item'
        ('standardization', scaler, [0, 1, 2, 3])  # Standardize numerical features
    ],
    remainder='passthrough'
)

X_train_preprocessed = preprocessor.fit_transform(X_train)
X_test_preprocessed = preprocessor.transform(X_test)

# Convert to dense arrays before reshaping
X_train_preprocessed = np.array(X_train_preprocessed)
X_test_preprocessed = np.array(X_test_preprocessed)

# CNN-LSTM model
cnn_lstm_model = Sequential()
cnn_lstm_model.add(Conv1D(64, 2, activation='relu', input_shape=(X_train_preprocessed.shape[1], 1)))
cnn_lstm_model.add(MaxPooling1D(2))
cnn_lstm_model.add(LSTM(50, return_sequences=True))
cnn_lstm_model.add(LSTM(50))
cnn_lstm_model.add(Dropout(0.3))
cnn_lstm_model.add(Dense(1, activation='linear'))

cnn_lstm_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Reshape input for CNN-LSTM
X_train_preprocessed_reshaped = np.reshape(X_train_preprocessed, (X_train_preprocessed.shape[0], X_train_preprocessed.shape[1], 1))
X_test_preprocessed_reshaped = np.reshape(X_test_preprocessed, (X_test_preprocessed.shape[0], X_test_preprocessed.shape[1], 1))

# Train the CNN-LSTM model with 5 epochs
cnn_lstm_model.fit(X_train_preprocessed_reshaped, y_train, epochs=5, batch_size=16, validation_data=(X_test_preprocessed_reshaped, y_test))

# RNN model
rnn_model = Sequential()
rnn_model.add(LSTM(100, activation='relu', input_shape=(X_train_preprocessed.shape[1], 1)))
rnn_model.add(Dense(1, activation='linear'))

rnn_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the RNN model with 5 epochs
rnn_model.fit(X_train_preprocessed_reshaped, y_train, epochs=5, batch_size=16, validation_data=(X_test_preprocessed_reshaped, y_test))

# Decision Tree Regressor model
dtr_model = DecisionTreeRegressor()
dtr_model.fit(X_train_preprocessed, y_train)
y_pred_dtr = dtr_model.predict(X_test_preprocessed)
dtr_mse = mean_squared_error(y_test, y_pred_dtr)

def prediction(Year, average_rain_fall_mm_per_year, pesticides_tonnes, avg_temp, Area, Item):
    # Create a feature array with the input values
    features = np.array([[Year, average_rain_fall_mm_per_year, pesticides_tonnes, avg_temp, Area, Item]])
    
    # Apply the preprocessing pipeline to the features (OneHotEncoding + Standardization)
    transformed_feature = preprocessor.transform(features)
    
    # Use the preprocessed features to predict using the trained Decision Tree Regressor
    predicted = dtr_model.predict(transformed_feature).reshape(1, -1)
    
    return predicted

# Example input values for the prediction
Year = 2000
average_rain_fall_mm_per_year = 59.0
pesticides_tonnes = 3024.11
avg_temp = 26.55
Area = "Saudi Arabia"
Item = "Sorghum"

# Call the prediction function
result = prediction(Year, average_rain_fall_mm_per_year, pesticides_tonnes, avg_temp, Area, Item)
print("Predicted Yield:", result)



# Evaluate models
cnn_lstm_loss, cnn_lstm_accuracy = cnn_lstm_model.evaluate(X_test_preprocessed_reshaped, y_test, verbose=0)
rnn_loss, rnn_accuracy = rnn_model.evaluate(X_test_preprocessed_reshaped, y_test, verbose=0)

print(f'CNN-LSTM Accuracy: {cnn_lstm_accuracy}, Loss: {cnn_lstm_loss}')
print(f'RNN Accuracy: {rnn_accuracy}, Loss: {rnn_loss}')
print(f'Decision Tree Regressor Mean Squared Error: {dtr_mse}')

# Save models using pickle
with open('cnn_lstm_model.pkl', 'wb') as file:
    pickle.dump(cnn_lstm_model, file)

with open('rnn_model.pkl', 'wb') as file:
    pickle.dump(rnn_model, file)

with open('dtr_model.pkl', 'wb') as file:
    pickle.dump(dtr_model, file)

# Save the preprocessor using pickle
with open('preprocessor.pkl', 'wb') as file:
    pickle.dump(preprocessor, file)
