In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

df = pd.read_csv('dataset.csv')

# Split data into features (X) and target variable (y)
X = df.drop(columns=['Modal Price'])  # All columns except 'Sales' are features
y = df['Modal Price']

print(X.shape)

# Define the columns to be one-hot encoded
categorical_cols = ['State','District','Market','Commodity','Variety','Grade','Weekday']

# Create a column transformer
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(), categorical_cols)
    ],
    remainder='passthrough'  # Pass through any other columns
)

# Apply the preprocessor to the feature columns (X)
X_encoded = preprocessor.fit_transform(X)

print(X_encoded.shape)

X_train, X_test, y_train, y_test = train_test_split(X_encoded, y, test_size = 0.2, random_state = 0)

sc = StandardScaler(with_mean=False)
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

from sklearn.ensemble import RandomForestRegressor
regressor = RandomForestRegressor(n_estimators = 10, random_state = 0)
regressor.fit(X_train, y_train)

y_pred = regressor.predict(X_test)
np.set_printoptions(precision=2)
y_test_array = y_test.values.reshape(-1, 1)
comparison = np.concatenate((y_pred.reshape(-1, 1), y_test_array), axis=1)
print("Predicted vs. Actual Values:")
print(comparison)

from sklearn.metrics import r2_score
r2_score(y_test, y_pred)

user_state = input("Enter the state: ")
user_district = input("Enter the district: ")
user_market = input("Enter the market: ")
user_commodity = input("Enter the commodity: ")
user_variety = input("Enter the variety: ")
user_grade = input("Enter the grade: ")
user_weekday = input("Enter the Weekday: ")
user_minp = input("Enter the min price: ")
user_maxp = input("Enter the max price: ")

# State,District,Market,Commodity,Variety,Grade,Weekday,Min Price,Max Price,Modal Price
# Gujarat,Amreli,Damnagar,Bhindi(Ladies Finger),Bhindi,FAQ,Thursday,4100,4500,4350

user_input = pd.DataFrame({
    'State': [user_state],
    'District': [user_district],
    'Market': [user_market],
    'Commodity': [user_commodity],
    'Variety': [user_variety],
    'Grade': [user_grade],
    'Weekday': [user_weekday],
    'Min Price': [user_minp],
    'Max Price': [user_maxp]
})

# Save the scaler during training
from joblib import dump, load
dump(sc, 'standard_scaler.joblib')

# Load the scaler for user input
sc = load('standard_scaler.joblib')

# Use the same scaler for user input
user_input_preprocessed = sc.transform(user_input_preprocessed)

user_input_preprocessed = preprocessor.transform(user_input)
print(user_input_preprocessed)

predicted_price = regressor.predict(user_input_preprocessed)
print(predicted_price)

print("Predicted Crop Price:", predicted_price[0])

In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score
from joblib import dump, load  # Import joblib for saving and loading the scaler

# Load your dataset
df = pd.read_csv('dataset.csv')

# Split data into features (X) and target variable (y)
X = df.drop(columns=['Modal Price'])  # All columns except 'Modal Price' are features
y = df['Modal Price']

print(X.shape)

# Define the columns to be one-hot encoded
categorical_cols = ['State','District','Market','Commodity','Variety','Grade','Weekday']

# Create a column transformer
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(), categorical_cols)
    ],
    remainder='passthrough'  # Pass through any other columns
)

# Apply the preprocessor to the feature columns (X)
X_encoded = preprocessor.fit_transform(X)

print(X_encoded.shape)

X_train, X_test, y_train, y_test = train_test_split(X_encoded, y, test_size=0.2, random_state=0)

sc = StandardScaler(with_mean=False)
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

# Train the random forest regressor
regressor = RandomForestRegressor(n_estimators=10, random_state=0)
regressor.fit(X_train, y_train)

y_pred = regressor.predict(X_test)
np.set_printoptions(precision=2)
y_test_array = y_test.values.reshape(-1, 1)
comparison = np.concatenate((y_pred.reshape(-1, 1), y_test_array), axis=1)
print("Predicted vs. Actual Values:")
print(comparison)

# Calculate and print R-squared score for evaluation
r2 = r2_score(y_test, y_pred)
print("R-squared:", r2)

# Save the scaler during training
dump(sc, 'standard_scaler.joblib')

# Load the scaler for user input
sc = load('standard_scaler.joblib')



(23093, 9)
(23093, 2417)
Predicted vs. Actual Values:
[[ 2018.5  2015. ]
 [ 1200.   1000. ]
 [ 5000.   5000. ]
 ...
 [ 3789.   3820. ]
 [ 3000.   3000. ]
 [10253.  10250. ]]
R-squared: 0.9553284206670105


In [None]:
# State,District,Market,Commodity,Variety,Grade,Weekday,Min Price,Max Price,Modal Price
# Gujarat,Amreli,Damnagar,Bhindi(Ladies Finger),Bhindi,FAQ,Thursday,4100,4500,4350

In [5]:

# User input
user_state = input("Enter the state: ")
user_district = input("Enter the district: ")
user_market = input("Enter the market: ")
user_commodity = input("Enter the commodity: ")
user_variety = input("Enter the variety: ")
user_grade = input("Enter the grade: ")
user_weekday = input("Enter the Weekday: ")
user_minp = input("Enter the min price: ")
user_maxp = input("Enter the max price: ")

# Create a DataFrame for user input
user_input = pd.DataFrame({
    'State': [user_state],
    'District': [user_district],
    'Market': [user_market],
    'Commodity': [user_commodity],
    'Variety': [user_variety],
    'Grade': [user_grade],
    'Weekday': [user_weekday],
    'Min Price': [user_minp],
    'Max Price': [user_maxp]
})

# Preprocess user input and scale it
user_input_preprocessed = preprocessor.transform(user_input)
user_input_preprocessed = sc.transform(user_input_preprocessed)

# Predict the crop price for user input
predicted_price = regressor.predict(user_input_preprocessed)

print("Predicted Crop Price:", predicted_price[0])

# State,District,Market,Commodity,Variety,Grade,Weekday,Min Price,Max Price,Modal Price
# Gujarat,Amreli,Damnagar,Bhindi(Ladies Finger),Bhindi,FAQ,Thursday,4100,4500,4350

Enter the state: Gujarat
Enter the district: Amreli
Enter the market: Damnagar
Enter the commodity: Bhindi(Ladies Finger)
Enter the variety: Bhindi
Enter the grade: FAQ
Enter the Weekday: Thursday
Enter the min price: 4100
Enter the max price: 4500
Predicted Crop Price: 4330.0
