Random Forest


In [24]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

# Load the dataset
data = pd.read_csv("/home/corn_prices_20k.csv")

# Handle missing values if any
data.dropna(inplace=True)

# Encode categorical variables
label_encoders = {}
categorical_columns = ['month', 'state', 'soil type', 'seed type', 'weather condition', 'irrigation', 'quality']
for col in categorical_columns:
    le = LabelEncoder()
    data[col] = le.fit_transform(data[col])
    label_encoders[col] = le

# Split the data into features and target
X = data.drop('corn price(INR/kg)', axis=1)
y = data['corn price(INR/kg)']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the Random Forest Regressor model
model = RandomForestRegressor(n_estimators=100, random_state=42)

# Train the model
model.fit(X_train, y_train)

# Make predictions on the testing set
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse*.1}")

# Sample input for prediction
sample_input = {
    'month': label_encoders['month'].transform(['March'])[0],
    'state': label_encoders['state'].transform(['Uttar Pradesh'])[0],
    'soil type': label_encoders['soil type'].transform(['Alluvial soil'])[0],
    'seed type': label_encoders['seed type'].transform(['Hybrid'])[0],
    'weather condition': label_encoders['weather condition'].transform(['Monsoon'])[0],
    'irrigation': label_encoders['irrigation'].transform(['Yes'])[0],
    'fertilizer usage': 220,
    'pesticide usage': 40,
    'quality': label_encoders['quality'].transform(['Broken'])[0]
}

# Convert the sample input to a DataFrame
sample_df = pd.DataFrame([sample_input])

# Make a price prediction
predicted_price = model.predict(sample_df)
print(f"Predicted Corn Price: {predicted_price[0]} INR/kg")


Mean Squared Error: 6.106269267516748
Predicted Corn Price: 30.393333333333334 INR/kg


Linear Regression

In [18]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# Load the dataset
data = pd.read_csv("/home/corn_prices.csv")

# Handle missing values if any
data.dropna(inplace=True)

# Encode categorical variables
label_encoders = {}
categorical_columns = ['month', 'state', 'soil type', 'seed type', 'weather condition', 'irrigation', 'quality']
for col in categorical_columns:
    le = LabelEncoder()
    data[col] = le.fit_transform(data[col])
    label_encoders[col] = le

# Split the data into features and target
X = data.drop('corn price(INR/kg)', axis=1)
y = data['corn price(INR/kg)']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the Linear Regression model
model = LinearRegression()

# Train the model
model.fit(X_train, y_train)

# Make predictions on the testing set
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse*0.1}")

# Sample input for prediction
sample_input = {
    'month': label_encoders['month'].transform(['March'])[0],
    'state': label_encoders['state'].transform(['Uttar Pradesh'])[0],
    'soil type': label_encoders['soil type'].transform(['Alluvial soil'])[0],
    'seed type': label_encoders['seed type'].transform(['Hybrid'])[0],
    'weather condition': label_encoders['weather condition'].transform(['Monsoon'])[0],
    'irrigation': label_encoders['irrigation'].transform(['Yes'])[0],
    'fertilizer usage': 220,
    'pesticide usage': 40,
    'quality': label_encoders['quality'].transform(['Broken'])[0]
}

# Convert the sample input to a DataFrame
sample_df = pd.DataFrame([sample_input])

# Make a price prediction
predicted_price = model.predict(sample_df)
print(f"Predicted Corn Price: {predicted_price[0]} INR/kg")


Mean Squared Error: 5.776862980193425
Predicted Corn Price: 30.738701405384113 INR/kg


Gradient Boost

In [17]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_squared_error

# Load the dataset
data = pd.read_csv("/home/corn_prices.csv")

# Handle missing values if any
data.dropna(inplace=True)

# Encode categorical variables
label_encoders = {}
categorical_columns = ['month', 'state', 'soil type', 'seed type', 'weather condition', 'irrigation', 'quality']
for col in categorical_columns:
    le = LabelEncoder()
    data[col] = le.fit_transform(data[col])
    label_encoders[col] = le

# Split the data into features and target
X = data.drop('corn price(INR/kg)', axis=1)
y = data['corn price(INR/kg)']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the Gradient Boosting Regressor model
model = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, random_state=42)

# Train the model
model.fit(X_train, y_train)

# Make predictions on the testing set
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse*.1}")

# Sample input for prediction
sample_input = {
    'month': label_encoders['month'].transform(['March'])[0],
    'state': label_encoders['state'].transform(['Uttar Pradesh'])[0],
    'soil type': label_encoders['soil type'].transform(['Alluvial soil'])[0],
    'seed type': label_encoders['seed type'].transform(['Hybrid'])[0],
    'weather condition': label_encoders['weather condition'].transform(['Monsoon'])[0],
    'irrigation': label_encoders['irrigation'].transform(['Yes'])[0],
    'fertilizer usage': 220,
    'pesticide usage': 40,
    'quality': label_encoders['quality'].transform(['Broken'])[0]
}

# Convert the sample input to a DataFrame
sample_df = pd.DataFrame([sample_input])

# Make a price prediction
predicted_price = model.predict(sample_df)
print(f"Predicted Corn Price: {predicted_price[0]} INR/kg")


Mean Squared Error: 5.448192070230143
Predicted Corn Price: 30.495266716383224 INR/kg


XBoost Regression Model

In [16]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import xgboost as xgb
from sklearn.metrics import mean_squared_error

# Load the dataset
data = pd.read_csv("/home/corn_prices.csv")

# Handle missing values if any
data.dropna(inplace=True)

# Encode categorical variables
label_encoders = {}
categorical_columns = ['month', 'state', 'soil type', 'seed type', 'weather condition', 'irrigation', 'quality']
for col in categorical_columns:
    le = LabelEncoder()
    data[col] = le.fit_transform(data[col])
    label_encoders[col] = le

# Split the data into features and target
X = data.drop('corn price(INR/kg)', axis=1)
y = data['corn price(INR/kg)']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the XGBoost Regressor model
model = xgb.XGBRegressor(n_estimators=100, learning_rate=0.1, random_state=42)

# Train the model
model.fit(X_train, y_train)

# Make predictions on the testing set
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse * .1}")

# Sample input for prediction
sample_input = {
    'month': label_encoders['month'].transform(['March'])[0],
    'state': label_encoders['state'].transform(['Uttar Pradesh'])[0],
    'soil type': label_encoders['soil type'].transform(['Alluvial soil'])[0],
    'seed type': label_encoders['seed type'].transform(['Hybrid'])[0],
    'weather condition': label_encoders['weather condition'].transform(['Monsoon'])[0],
    'irrigation': label_encoders['irrigation'].transform(['Yes'])[0],
    'fertilizer usage': 220,
    'pesticide usage': 40,
    'quality': label_encoders['quality'].transform(['Broken'])[0]
}

# Convert the sample input to a DataFrame
sample_df = pd.DataFrame([sample_input])

# Make a price prediction
predicted_price = model.predict(sample_df)
print(f"Predicted Corn Price: {predicted_price[0]} INR/kg")


Mean Squared Error: 5.5609357259200785
Predicted Corn Price: 29.849502563476562 INR/kg
