In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, classification_report
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM

In [7]:
df = pd.read_csv('/content/onion_12_24_clean.csv')
# Convert 'Price Date' to datetime
df['Price Date'] = pd.to_datetime(df['Price Date'], errors='coerce')
df.dropna(subset=['Price Date'], inplace=True)

# Extract Year, Month, Day
df['Year'] = df['Price Date'].dt.year
df['Month'] = df['Price Date'].dt.month
df['Day'] = df['Price Date'].dt.day
# sample year
df = df[df['Year'] == 2018]

# Reset the index for convenience
df.reset_index(drop=True, inplace=True)

In [8]:
# Calculate average price
df['Average Price'] = (df['Min Price (Rs./Quintal)'] + df['Max Price (Rs./Quintal)'] + df['Modal Price (Rs./Quintal)']) / 3

# Categorize the average price into 'Low', 'Mid', 'High'
df['Price Category'] = pd.cut(df['Average Price'], bins=[0, 1000, 2000, float('inf')], labels=['Low', 'Mid', 'High'])

In [9]:
df.head()

Unnamed: 0,District Name,Commodity,Min Price (Rs./Quintal),Max Price (Rs./Quintal),Modal Price (Rs./Quintal),Price Date,Year,Month,Day,Average Price,Price Category
0,Fazilka,Onion,550.0,950.0,750.0,2018-04-13,2018,4,13,750.0,Low
1,Fazilka,Onion,600.0,950.0,800.0,2018-05-22,2018,5,22,783.333333,Low
2,Fazilka,Onion,700.0,1250.0,1050.0,2018-09-13,2018,9,13,1000.0,Low
3,Fazilka,Onion,750.0,1100.0,900.0,2018-10-01,2018,10,1,916.666667,Low
4,Fazilka,Onion,750.0,1100.0,900.0,2018-09-26,2018,9,26,916.666667,Low


In [10]:
# Extra: LSTM Model
def create_lstm_model(input_shape):
    model = Sequential()
    model.add(LSTM(50, return_sequences=True, input_shape=input_shape))
    model.add(LSTM(50, return_sequences=False))
    model.add(Dense(25))
    model.add(Dense(1, activation='linear'))
    model.compile(optimizer='adam', loss='mse')
    return model

# Reshape data for LSTM
X_lstm = df[['Average Price']].values
scaler_lstm = StandardScaler()
X_lstm = scaler_lstm.fit_transform(X_lstm)

# Create dataset for LSTM
def create_dataset(X, time_step=1):
    Xs, ys = [], []
    for i in range(len(X) - time_step - 1):
        Xs.append(X[i:(i + time_step), 0])
        ys.append(X[i + time_step, 0])
    return np.array(Xs), np.array(ys)

time_step = 10
X_lstm, y_lstm = create_dataset(X_lstm, time_step)
X_lstm = X_lstm.reshape(X_lstm.shape[0], X_lstm.shape[1], 1)

# Split data
train_size = int(len(X_lstm) * 0.7)
X_train_lstm, X_test_lstm = X_lstm[:train_size], X_lstm[train_size:]
y_train_lstm, y_test_lstm = y_lstm[:train_size], y_lstm[train_size:]


In [11]:
# Train LSTM Model
lstm_model = create_lstm_model((X_train_lstm.shape[1], 1))
lstm_model.fit(X_train_lstm, y_train_lstm, epochs=10, batch_size=1, verbose=1)
lstm_predictions = lstm_model.predict(X_test_lstm)

  super().__init__(**kwargs)


Epoch 1/10
[1m65680/65680[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m503s[0m 8ms/step - loss: 0.5187
Epoch 2/10
[1m65680/65680[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m494s[0m 8ms/step - loss: 0.4748
Epoch 3/10
[1m65680/65680[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m485s[0m 7ms/step - loss: 0.4671
Epoch 4/10
[1m65680/65680[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m493s[0m 7ms/step - loss: 0.4549
Epoch 5/10
[1m65680/65680[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m499s[0m 7ms/step - loss: 0.4700
Epoch 6/10
[1m65680/65680[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m465s[0m 7ms/step - loss: 0.4543
Epoch 7/10
[1m65680/65680[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m463s[0m 7ms/step - loss: 0.4435
Epoch 8/10
[1m65680/65680[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m512s[0m 7ms/step - loss: 0.4545
Epoch 9/10
[1m65680/65680[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m505s[0m 7ms/step - loss: 0.4532
Epoch 10/10
[1m65680/65680[0m [32m

In [14]:
from sklearn.metrics import mean_squared_error, r2_score

# Evaluate regression metrics
mse = mean_squared_error(y_test_lstm, lstm_predictions)
r2 = r2_score(y_test_lstm, lstm_predictions)

print("Mean Squared Error (MSE):", mse)
print("R-squared (R2):", r2)

Mean Squared Error (MSE): 0.4663810411443935
R-squared (R2): 0.5256075319070306


In [15]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Threshold predictions to binary
threshold = 0.5
binary_predictions = (lstm_predictions > threshold).astype(int)

# Ensure y_test_lstm is binary
y_test_lstm_binary = (y_test_lstm > threshold).astype(int)

# Calculate classification metrics
accuracy = accuracy_score(y_test_lstm_binary, binary_predictions)
conf_matrix = confusion_matrix(y_test_lstm_binary, binary_predictions)
class_report = classification_report(y_test_lstm_binary, binary_predictions)

print("Accuracy:", accuracy)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", class_report)


Accuracy: 0.8819851504493943
Confusion Matrix:
 [[20539  1853]
 [ 1469  4288]]
Classification Report:
               precision    recall  f1-score   support

           0       0.93      0.92      0.93     22392
           1       0.70      0.74      0.72      5757

    accuracy                           0.88     28149
   macro avg       0.82      0.83      0.82     28149
weighted avg       0.89      0.88      0.88     28149



In [16]:
print("Accuracy: {:.2f}%".format(accuracy * 100))

Accuracy: 88.20%
