In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Load the dataset
df = pd.read_csv("deepsea.csv")

# Display the dataset
print("Dataset Overview:")
print(df.head())

# Define the RL environment
class MigrationEnvironment:
    def __init__(self, temperature_range, salinity_range):
        self.temperature_range = temperature_range
        self.salinity_range = salinity_range
        self.state = None
        self.reset()

    def reset(self):
        # Start with a random state (temperature, salinity)
        self.state = (
            np.random.uniform(self.temperature_range[0], self.temperature_range[1]),
            np.random.uniform(self.salinity_range[0], self.salinity_range[1])
        )
        return self.state

    def step(self, action):
        # Simulate the effect of an action on the environment
        temperature, salinity = self.state

        # Action 0: Move to colder water
        # Action 1: Move to warmer water
        # Action 2: Move to higher salinity
        # Action 3: Move to lower salinity
        if action == 0:
            temperature = max(temperature - 1, self.temperature_range[0])
        elif action == 1:
            temperature = min(temperature + 1, self.temperature_range[1])
        elif action == 2:
            salinity = min(salinity + 1, self.salinity_range[1])
        elif action == 3:
            salinity = max(salinity - 1, self.salinity_range[0])

        self.state = (temperature, salinity)

        # Calculate reward (simplified: species prefers specific conditions)
        target_temperature = 15  # Example target temperature
        target_salinity = 33    # Example target salinity
        reward = -abs(temperature - target_temperature) - abs(salinity - target_salinity)

        done = False  # The episode never ends in this simplified example
        return self.state, reward, done

# Define the Q-Learning agent
class QLearningAgent:
    def __init__(self, env, learning_rate=0.1, discount_factor=0.9, epsilon=0.1):
        self.env = env
        self.learning_rate = learning_rate
        self.discount_factor = discount_factor
        self.epsilon = epsilon

        # Initialize Q-table
        self.q_table = np.zeros((100, 100, 4))  # Grid for temperature, salinity, and 4 actions

    def discretize_state(self, state):
        # Discretize continuous state (temperature, salinity) into grid indices
        temperature, salinity = state
        temp_index = int((temperature - self.env.temperature_range[0]) / (self.env.temperature_range[1] - self.env.temperature_range[0]) * 99)
        salinity_index = int((salinity - self.env.salinity_range[0]) / (self.env.salinity_range[1] - self.env.salinity_range[0]) * 99)
        return temp_index, salinity_index

    def choose_action(self, state):
        # Epsilon-greedy policy
        if np.random.uniform(0, 1) < self.epsilon:
            return np.random.choice(4)  # Random action
        else:
            temp_index, salinity_index = self.discretize_state(state)
            return np.argmax(self.q_table[temp_index, salinity_index])

    def update_q_table(self, state, action, reward, next_state):
        # Update Q-value using the Bellman equation
        temp_index, salinity_index = self.discretize_state(state)
        next_temp_index, next_salinity_index = self.discretize_state(next_state)

        current_q = self.q_table[temp_index, salinity_index, action]
        max_future_q = np.max(self.q_table[next_temp_index, next_salinity_index])
        new_q = current_q + self.learning_rate * (reward + self.discount_factor * max_future_q - current_q)

        self.q_table[temp_index, salinity_index, action] = new_q

# Initialize environment and agent
env = MigrationEnvironment(temperature_range=(0, 30), salinity_range=(30, 35))
agent = QLearningAgent(env)

# Train the agent
episodes = 1000
rewards = []

for episode in range(episodes):
    state = env.reset()
    total_reward = 0
    done = False

    while not done:
        action = agent.choose_action(state)
        next_state, reward, done = env.step(action)
        agent.update_q_table(state, action, reward, next_state)
        state = next_state
        total_reward += reward

    rewards.append(total_reward)

# Plot training progress
plt.plot(rewards)
plt.xlabel("Episode")
plt.ylabel("Total Reward")
plt.title("Q-Learning Training Progress")
plt.show()

# Evaluate the agent
test_episodes = 10
test_rewards = []

for episode in range(test_episodes):
    state = env.reset()
    total_reward = 0
    done = False

    while not done:
        action = agent.choose_action(state)
        next_state, reward, done = env.step(action)
        state = next_state
        total_reward += reward

    test_rewards.append(total_reward)

print("Average Test Reward:", np.mean(test_rewards))

Dataset Overview:
           Species Name Migration Type  Water Temperature (°C)  \
0         Doomsday Fish       Atypical                      10   
1            Anglerfish        Typical                       5   
2  Southern Right Whale       Atypical                      15   
3   Olive Ridley Turtle       Atypical                      25   
4    Blackspot Seabream       Seasonal                      12   

   Salinity (ppt)        Sighting Location Sighting Date    IUCN Status  
0              35        Offshore Tasmania    2023-02-15     Not Listed  
1              32       Deep Sea, Atlantic    2022-08-20     Not Listed  
2              33  Adventure Bay, Tasmania    2023-06-01     Endangered  
3              30      Unexpected Location    2023-09-10     Vulnerable  
4              34    Seamount, NE Atlantic    2022-04-01  Least Concern  
