In [13]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models, optimizers
from sklearn.preprocessing import MinMaxScaler

In [14]:
df= pd.read_csv(r'C:\Users\Ayush\OneDrive\Desktop\AI-search-algo\ai-project\datasets\Crop_recommendation.csv')
print(df.head())  # Print the first few rows of the DataFrame to inspect its structure
print(df.columns)  # Print all column names to verify if 'label' is present

    N   P   K  temperature   humidity        ph    rainfall label
0  90  42  43    20.879744  82.002744  6.502985  202.935536  rice
1  85  58  41    21.770462  80.319644  7.038096  226.655537  rice
2  60  55  44    23.004459  82.320763  7.840207  263.964248  rice
3  74  35  40    26.491096  80.158363  6.980401  242.864034  rice
4  78  42  42    20.130175  81.604873  7.628473  262.717340  rice
Index(['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall', 'label'], dtype='object')


In [15]:
# Scale numerical features to a range between 0 and 1
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(df[['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall']])

# Combine scaled features and one-hot encode the 'label' column
processed_data = pd.concat([pd.DataFrame(scaled_data, columns=['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall']), 
                            pd.get_dummies(df['label'])], axis=1)

print(processed_data.shape)

(2200, 29)


In [16]:
processed_data.head()

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall,apple,banana,blackgram,...,mango,mothbeans,mungbean,muskmelon,orange,papaya,pigeonpeas,pomegranate,rice,watermelon
0,0.642857,0.264286,0.19,0.345886,0.790267,0.466264,0.656458,False,False,False,...,False,False,False,False,False,False,False,False,True,False
1,0.607143,0.378571,0.18,0.371445,0.770633,0.54948,0.741675,False,False,False,...,False,False,False,False,False,False,False,False,True,False
2,0.428571,0.357143,0.195,0.406854,0.793977,0.674219,0.87571,False,False,False,...,False,False,False,False,False,False,False,False,True,False
3,0.528571,0.214286,0.175,0.506901,0.768751,0.540508,0.799905,False,False,False,...,False,False,False,False,False,False,False,False,True,False
4,0.557143,0.264286,0.185,0.324378,0.785626,0.641291,0.871231,False,False,False,...,False,False,False,False,False,False,False,False,True,False


In [17]:
class CustomEnvironment:
    def __init__(self, data):
        self.data = data
        self.num_actions = len(data.columns) - 7
        self.state_shape = data.shape[1] - self.num_actions

    def reset(self):
        self.current_step = 0
        self.state = self.data.iloc[self.current_step, :-self.num_actions].values
        return self.state

    def step(self, action):
        self.current_step += 1
        if self.current_step < len(self.data) and self.num_actions < len(self.data.columns):
            next_state = self.data.iloc[self.current_step, :-self.num_actions].values
        else:
            next_state = None
        reward = 0  # Define your reward logic based on the action and state
        done = False  # Define your terminal condition
        return next_state, reward, done

In [18]:
# Create an instance of your custom environment
env = CustomEnvironment(processed_data)

# Define a more complex deep RL model using TensorFlow/Keras
def create_model(input_shape, num_actions):
    inputs = layers.Input(shape=(input_shape,))
    x = layers.Dense(256, activation='relu')(inputs)
    x = layers.Dense(512, activation='relu')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.3)(x)
    x = layers.Dense(256, activation='relu')(x)
    x = layers.Dense(128, activation='relu')(x)
    outputs = layers.Dense(num_actions, activation='softmax')(x)  # Softmax for action probabilities
    return models.Model(inputs=inputs, outputs=outputs)


In [19]:
# Initialize model and optimizer
input_shape = env.state_shape
num_actions = env.num_actions
model = create_model(input_shape, num_actions)
optimizer = optimizers.Adam(learning_rate=0.001)

# Define other training parameters
num_episodes = 1
batch_size = 32
epsilon = 1.0
epsilon_decay = 0.999
min_epsilon = 0.01
gamma = 0.99
buffer_size = 10000
replay_buffer = []


In [20]:
from tensorflow.python.client import device_lib

print(device_lib.list_local_devices())

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 13653953045966682597
xla_global_id: -1
]


In [21]:
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
    except RuntimeError as e:
        print(e)


In [22]:
for episode in range(num_episodes):
    state = env.reset()
    episode_reward = 0

    while True:
        # Epsilon-greedy policy for exploration
        if np.random.rand() < epsilon:
            action = np.random.randint(num_actions)
        else:
            q_values = model.predict(state.reshape(1, -1))
            action = np.argmax(q_values)

        next_state, reward, done = env.step(action)
        episode_reward += reward

        # Store experience in replay buffer
        replay_buffer.append((state, action, reward, next_state, done))
        if len(replay_buffer) > buffer_size:
            replay_buffer.pop(0)  # Remove oldest experience if buffer size exceeded

        # Sample mini-batch from replay buffer if buffer size is larger than batch size
        if len(replay_buffer) >= batch_size:
            batch_indices = np.random.choice(len(replay_buffer), batch_size, replace=False)
            batch = [replay_buffer[idx] for idx in batch_indices]

            # Check if any batch item has None in next_state
            if any(item[3] is None for item in batch):
                # Skip this batch and continue with the next iteration
                continue

            # Separate elements of batch into separate lists for easier processing
            states_list, action_list, reward_list, next_states_list, done_list = zip(*batch)

            # Convert lists to numpy arrays
            states_batch = np.array(states_list)
            action_batch = np.array(action_list)
            reward_batch = np.array(reward_list)
            next_states_batch = np.array(next_states_list)
            done_batch = np.array(done_list)

            reward_batch = tf.constant(reward_batch, dtype=tf.float32)
            done_batch = tf.constant(done_batch, dtype=tf.float32)

            # Calculate target Q-values using Bellman equation
            target_q_values = reward_batch + (1 - done_batch) * gamma * tf.reduce_max(
                model.predict(next_states_batch.astype(np.float32)), axis=1
            )

            # Compute loss and perform gradient descent
            with tf.GradientTape() as tape:
                predicted_q_values = model(states_batch)
                predicted_q_values_actions = tf.reduce_sum(
                    predicted_q_values * tf.one_hot(action_batch, num_actions), axis=1
                )
                loss = tf.keras.losses.mean_squared_error(target_q_values, predicted_q_values_actions)
            gradients = tape.gradient(loss, model.trainable_variables)
            optimizer.apply_gradients(zip(gradients, model.trainable_variables))

        state = next_state

        if done:
            break

    # Decay epsilon for epsilon-greedy policy
    epsilon = max(min_epsilon, epsilon * epsilon_decay)
    print(f"Episode: {episode + 1}, Episode Reward: {episode_reward}")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14

In [None]:
new_environment_params = np.array([70, 40, 35, 25, 70, 6.5, 200])  # Example input

# Scale the new environmental parameters using the same scaler used for training data
scaled_new_params = scaler.transform(new_environment_params.reshape(1, -1))

# Predict action probabilities using the trained model
predicted_action_probs = model.predict(scaled_new_params)

# Get the index of the recommended crop based on the highest predicted action probability
recommended_crop_index = np.argmax(predicted_action_probs)
recommended_crop = processed_data.columns[-(len(processed_data.columns) - 7):][recommended_crop_index]

print("Recommended Crop:", recommended_crop)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step
Recommended Crop: mango


