In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
import pickle

# Load the data
file_path = 'data/Rice.csv'  # Update this path
data = pd.read_csv(file_path)

# Drop rows with missing values in relevant columns
data = data.dropna(subset=['tmax_tmax', 'rain_mm', 'rice_name'])

# Select relevant columns and rename for clarity
data = data[['tmax_tmax', 'rain_mm', 'rice_name']]

# Encode the rice_name
label_encoder = LabelEncoder()
data['rice_name'] = label_encoder.fit_transform(data['rice_name'])

# Define features and target
X = data[['tmax_tmax', 'rain_mm']]
y = data['rice_name']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a Random Forest model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Evaluate the model
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")

# Save the model and label encoder
with open('models/rice_model.pkl', 'wb') as file:
    pickle.dump(model, file)

with open('models/rice_label_encoder.pkl', 'wb') as file:
    pickle.dump(label_encoder, file)


Accuracy: 0.0
