In [1]:
import pandas as pd
import numpy as np

# Set random seed for reproducibility
np.random.seed(42)

# Define pulse types
pulses = ['Lentils', 'Chickpeas', 'Black Beans', 'Pigeon Peas', 'Green Peas', 'Kidney Beans']

# Generate random data for 10000 rows
num_rows = 10000
data_pulses = {
    'Pulse Type': np.random.choice(pulses, num_rows),
    'Price (INR/kg)': np.random.randint(30, 100, num_rows),
    'Temperature (°C)': np.random.uniform(20, 35, num_rows).round(2),
    'Rainfall (mm)': np.random.uniform(0, 25, num_rows).round(2),
    'Supply (tons)': np.random.randint(20, 80, num_rows),
    'Demand (tons)': np.random.randint(20, 80, num_rows)
}

# Create DataFrame for pulses
df_pulses = pd.DataFrame(data_pulses)

# Save to CSV
df_pulses.to_csv('pulses_price_prediction_10000.csv', index=False)


In [18]:
dataset=pd.read_csv('/content/pulses_price_prediction_10000.csv')

In [4]:
!pip install catboost

Collecting catboost
  Downloading catboost-1.2.7-cp310-cp310-manylinux2014_x86_64.whl.metadata (1.2 kB)
Downloading catboost-1.2.7-cp310-cp310-manylinux2014_x86_64.whl (98.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m98.7/98.7 MB[0m [31m9.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: catboost
Successfully installed catboost-1.2.7


In [5]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.svm import LinearSVR, SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from catboost import CatBoostRegressor


In [19]:
dataset.head()

Unnamed: 0,Pulse Type,Price (INR/kg),Temperature (°C),Rainfall (mm),Supply (tons),Demand (tons)
0,Pigeon Peas,31,29.0,8.71,22,51
1,Green Peas,72,28.56,24.06,69,72
2,Black Beans,55,20.58,0.21,35,74
3,Green Peas,67,31.26,7.63,79,44
4,Green Peas,71,34.24,15.23,53,40


In [20]:
dataset.value_counts('Pulse Type')

Unnamed: 0_level_0,count
Pulse Type,Unnamed: 1_level_1
Chickpeas,1692
Green Peas,1689
Pigeon Peas,1672
Lentils,1665
Kidney Beans,1657
Black Beans,1625


In [21]:
dataset['Pulse Type'].replace({
    'Chickpeas': 0,
    'Green Peas': 1,
    'Pigeon Peas': 2,
    'Lentils': 3,
    'Kidney Beans': 4,
    'Black Beans': 5
},inplace=True)

In [22]:
dataset.head()

Unnamed: 0,Pulse Type,Price (INR/kg),Temperature (°C),Rainfall (mm),Supply (tons),Demand (tons)
0,2,31,29.0,8.71,22,51
1,1,72,28.56,24.06,69,72
2,5,55,20.58,0.21,35,74
3,1,67,31.26,7.63,79,44
4,1,71,34.24,15.23,53,40


In [24]:
X=dataset.drop(['Price (INR/kg)'],axis=1)
Y=dataset['Price (INR/kg)']

In [25]:
X.head()

Unnamed: 0,Pulse Type,Temperature (°C),Rainfall (mm),Supply (tons),Demand (tons)
0,2,29.0,8.71,22,51
1,1,28.56,24.06,69,72
2,5,20.58,0.21,35,74
3,1,31.26,7.63,79,44
4,1,34.24,15.23,53,40


In [26]:
Y.head()

Unnamed: 0,Price (INR/kg)
0,31
1,72
2,55
3,67
4,71


In [27]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

In [28]:
print(X.shape,X_train.shape,X_test.shape)

(10000, 5) (8000, 5) (2000, 5)


In [29]:
models = {
    "                     Linear Regression": LinearRegression(),
    " Linear Regression (L2 Regularization)": Ridge(),
    " Linear Regression (L1 Regularization)": Lasso(),
    "                   K-Nearest Neighbors": KNeighborsRegressor(),
    "                        Neural Network": MLPRegressor(),
     "Support Vector Machine (Linear Kernel)": LinearSVR(),
     "   Support Vector Machine (RBF Kernel)": SVR(),
    "                         Decision Tree": DecisionTreeRegressor(),
    "                         Random Forest": RandomForestRegressor(),
     "                     Gradient Boosting": GradientBoostingRegressor(),
    "                               XGBoost": XGBRegressor(),
     "                              LightGBM": LGBMRegressor(),
     "                              CatBoost": CatBoostRegressor(verbose=0)
}

for name, model in models.items():
    model.fit(X_train, Y_train)
    print(name + " trained.")

                     Linear Regression trained.
 Linear Regression (L2 Regularization) trained.
 Linear Regression (L1 Regularization) trained.
                   K-Nearest Neighbors trained.




                        Neural Network trained.




Support Vector Machine (Linear Kernel) trained.
   Support Vector Machine (RBF Kernel) trained.
                         Decision Tree trained.
                         Random Forest trained.
                     Gradient Boosting trained.
                               XGBoost trained.
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001007 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 638
[LightGBM] [Info] Number of data points in the train set: 8000, number of used features: 5
[LightGBM] [Info] Start training from score 64.531625
                              LightGBM trained.
                              CatBoost trained.


In [30]:
for name, model in models.items():
    print(name + " R^2 Score: {:.5f}".format(model.score(X_test, Y_test)))

                     Linear Regression R^2 Score: -0.00407
 Linear Regression (L2 Regularization) R^2 Score: -0.00407
 Linear Regression (L1 Regularization) R^2 Score: -0.00323
                   K-Nearest Neighbors R^2 Score: -0.21113
                        Neural Network R^2 Score: -0.01886
Support Vector Machine (Linear Kernel) R^2 Score: -0.05568
   Support Vector Machine (RBF Kernel) R^2 Score: -0.00696
                         Decision Tree R^2 Score: -1.10811
                         Random Forest R^2 Score: -0.08422
                     Gradient Boosting R^2 Score: -0.01790
                               XGBoost R^2 Score: -0.20182
                              LightGBM R^2 Score: -0.04689
                              CatBoost R^2 Score: -0.07120


In [31]:
model1=LinearRegression()

In [32]:
model1.fit(X_train,Y_train)

In [33]:
training_data_prediction=model1.predict(X_train)
score1=metrics.r2_score(Y_train,training_data_prediction)
score2=metrics.mean_absolute_error(Y_train,training_data_prediction)
print("R square error: ", score1)
print("Mean Absolute Error: ", score2)

R square error:  0.0013893607348636206
Mean Absolute Error:  17.60477947421864


In [34]:
testing_data_prediction=model1.predict(X_test)
score3=metrics.r2_score(Y_test,testing_data_prediction)
score4=metrics.mean_absolute_error(Y_test,testing_data_prediction)
print("R square error: ", score3)
print("Mean Absolute Error: ", score4)

R square error:  -0.00406637596902093
Mean Absolute Error:  17.540577764322812


In [35]:
import pickle
pickle.dump(model1,open('/content/Pulses_prices.pkl','wb'))