In [1]:
import pandas as pd

# Load the dataset
data = pd.read_csv('arecanut_price.csv')

# Ensure 'Price Date' is in datetime format
data['Price Date'] = pd.to_datetime(data['Price Date'], errors='coerce')

# Drop rows with missing values
data = data.dropna(subset=['Price Date', 'Modal Price (Rs./Quintal)', 'Variety'])

# Extract features for training
data['Year'] = data['Price Date'].dt.year
data['Month'] = data['Price Date'].dt.month

# One-hot encode the 'Variety' column
data_encoded = pd.get_dummies(data, columns=['Variety'], drop_first=False)

# Select relevant columns for training
features = ['Year', 'Month'] + [col for col in data_encoded.columns if 'Variety_' in col]
target = 'Modal Price (Rs./Quintal)'
X = data_encoded[features]
y = data_encoded[target]

print("Training features:")
print(X.head())


  data['Price Date'] = pd.to_datetime(data['Price Date'], errors='coerce')


Training features:
   Year  Month  Variety_Bette  Variety_Bilegotu  Variety_Chali  Variety_Churu  \
0  2011      1          False             False          False          False   
1  2012     11          False             False          False          False   
2  2017      2          False             False          False          False   
3  2020     11          False             False          False          False   
4  2014      2          False             False          False          False   

   Variety_Cqca  Variety_EDI  Variety_Gorabalu  Variety_Kempugotu  ...  \
0         False        False             False              False  ...   
1         False        False             False              False  ...   
2         False        False             False              False  ...   
3         False        False             False              False  ...   
4         False        False             False              False  ...   

   Variety_Other  Variety_Pudi  Variety_Pylone  V

In [7]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Evaluate the model
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse:.2f}")


Mean Squared Error: 3642530.33


In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestRegressor

# Define the model
model = RandomForestRegressor(random_state=42)

# Define hyperparameter grid
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'max_features': ['auto', 'sqrt', 'log2']
}

# Perform GridSearchCV
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5, n_jobs=-1, verbose=2)
grid_search.fit(X_train, y_train)

# Best parameters and model
print("Best hyperparameters:", grid_search.best_params_)
best_model = grid_search.best_estimator_

# Evaluate the tuned model
y_pred = best_model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f"Tuned Mean Squared Error: {mse:.2f}")


Fitting 5 folds for each of 324 candidates, totalling 1620 fits


In [None]:
pip install xgboost


In [None]:
//Other
import xgboost as xgb
from sklearn.metrics import mean_squared_error

# Define the model
xg_model = xgb.XGBRegressor(objective='reg:squarederror', n_estimators=200, learning_rate=0.05)

# Train the model
xg_model.fit(X_train, y_train)

# Make predictions
y_pred_xgb = xg_model.predict(X_test)

# Evaluate the model
mse_xgb = mean_squared_error(y_test, y_pred_xgb)
print(f"XGBoost Mean Squared Error: {mse_xgb:.2f}")


In [None]:
//Other
from sklearn.preprocessing import StandardScaler

# Scale the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the scaled data
X_train_scaled, X_test_scaled, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Train your model with the scaled data
model.fit(X_train_scaled, y_train)

# Evaluate on scaled test data
y_pred_scaled = model.predict(X_test_scaled)
mse_scaled = mean_squared_error(y_test, y_pred_scaled)
print(f"Mean Squared Error after scaling: {mse_scaled:.2f}")


In [4]:
# User input for variety and prediction date
selected_variety = input(f"Select a variety from {data['Variety'].unique().tolist()}: ")
year = int(input("Enter the year for prediction (e.g., 2025): "))
month = int(input("Enter the month for prediction (1-12): "))

# Create feature vector for prediction
input_features = pd.DataFrame([[year, month]], columns=['Year', 'Month'])

# Add variety columns with default 0 values
for col in [col for col in X.columns if 'Variety_' in col]:
    input_features[col] = 0

# Set the selected variety to 1 in the feature vector
variety_column = f"Variety_{selected_variety}"
if variety_column in input_features.columns:
    input_features[variety_column] = 1
else:
    print(f"Error: Selected variety '{selected_variety}' is not in the training data.")
    exit()

# Predict the price
predicted_price = model.predict(input_features)[0]
print(f"Predicted price for {selected_variety} in {year}-{month}: Rs. {predicted_price:.2f} per quintal.")


Select a variety from ['api', 'Bette', 'Chali', 'EDI', 'Gorabalu', 'Kempugotu', 'Other', 'Pudi', 'Rashi', 'Red', 'Sippegotu', 'Bilegotu', 'White', 'Cqca', 'Pylone', 'Ripe', 'New Variety', 'Saraku', 'Churu', 'Kole']:  Rashi
Enter the year for prediction (e.g., 2025):  2024
Enter the month for prediction (1-12):  11


Predicted price for Rashi in 2024-11: Rs. 47902.81 per quintal.


In [6]:
# User input for variety and prediction date
selected_variety = input(f"Select a variety from {data['Variety'].unique().tolist()}: ")
year = int(input("Enter the year for prediction (e.g., 2025): "))
month = int(input("Enter the month for prediction (1-12): "))

# Create feature vector for prediction
input_features = pd.DataFrame([[year, month]], columns=['Year', 'Month'])

# Add variety columns with default 0 values
for col in [col for col in X.columns if 'Variety_' in col]:
    input_features[col] = 0

# Set the selected variety to 1 in the feature vector
variety_column = f"Variety_{selected_variety}"
if variety_column in input_features.columns:
    input_features[variety_column] = 1
else:
    print(f"Error: Selected variety '{selected_variety}' is not in the training data.")
    exit()

# Predict the price
predicted_price = model.predict(input_features)[0]
print(f"Predicted price for {selected_variety} in {year}-{month}: Rs. {predicted_price:.2f} per quintal.")


Select a variety from ['api', 'Bette', 'Chali', 'EDI', 'Gorabalu', 'Kempugotu', 'Other', 'Pudi', 'Rashi', 'Red', 'Sippegotu', 'Bilegotu', 'White', 'Cqca', 'Pylone', 'Ripe', 'New Variety', 'Saraku', 'Churu', 'Kole']:  bette
Enter the year for prediction (e.g., 2025):  2024
Enter the month for prediction (1-12):  11


Error: Selected variety 'bette' is not in the training data.
Predicted price for bette in 2024-11: Rs. 29898.67 per quintal.
