In [3]:
from load_data import load_json_data

# Load data
data = load_json_data("data/result.json", 1000, 1520)
data[-1]

[{'filename': '03577_0',
  'sentence1': 'a big blue building has been built in the middle of the scene ',
  'sentence2': 'the bareland has been replaced by a blue building and a big blue building has been built ',
  'sentence3': 'two big blue buildings have been replaced by a blue building '},
 {'filename': '03577_1',
  'sentence1': 'a small blue building has been constructed on the bareland at the corner of the scene ',
  'sentence2': 'many trees have appeared in many parts of the scene ',
  'sentence3': 'many green trees have been constructed on the bareland and the grassland '},
 {'filename': '03577_2',
  'sentence1': 'a big building has been constructed in many parts of the scene ',
  'sentence2': 'a blue building has been constructed in the green area ',
  'sentence3': 'a big building has been constructed in the green area '},
 {'filename': '03577_3',
  'sentence1': 'two blue buildings have been built in many parts of the scene ',
  'sentence2': 'two blue buildings have been built

In [None]:
# Path: RL_policy.ipynb
from load_data import load_extracted_features
features = load_extracted_features("02243")
features[0]

#### Prepare Training data

In [None]:
from load_data import load_extracted_features
training_data = []

for item in data:
    item = item[0]
    if "slider1" not in item:
        continue
    file_name = item['filename'].split("_")[0]
    print(file_name)
    features = load_extracted_features(file_name)
    training_data.append({
        "features": features,
        "sentences": [item["sentence1"], item["sentence2"], item["sentence3"]],
        "slider_values": [item["slider1"], item["slider2"], item["slider3"]]
    })
training_data[0]

#### Q-Learning Class
* Define Q-Learning Class

In [4]:
import numpy as np
import random
import tqdm
import matplotlib.pyplot as plt

class QLearningModel:
    def __init__(self, state_size, action_size, learning_rate=0.1, discount_rate=0.9, exploration_rate=0.5):
        self.state_size = state_size
        self.action_size = action_size
        self.learning_rate = learning_rate
        self.discount_rate = discount_rate
        self.exploration_rate = exploration_rate
        self.q_table = np.zeros((state_size, action_size))

    def choose_action(self, state):
        if random.uniform(0, 1) < self.exploration_rate:
            return random.randint(0, self.action_size - 1)
        else:
            return np.argmax(self.q_table[state])

    def update_q_table(self, state, action, reward, next_state):
        best_next_action = np.argmax(self.q_table[next_state])
        td_target = reward + self.discount_rate * self.q_table[next_state][best_next_action]
        td_error = td_target - self.q_table[state][action]
        self.q_table[state][action] += self.learning_rate * td_error

In [5]:
def evaluate_performance(model, training_data):
        total_reward = 0
        for i in range(model.state_size):
            state = i
            best_action = np.argmax(model.q_table[state])
            total_reward += training_data[state]['slider_values'][best_action]
        average_reward = total_reward / model.state_size
        return average_reward

def visualize_performance(performance_history):
    plt.plot(performance_history)
    plt.xlabel("Epoch")
    plt.ylabel("Average reward")
    plt.show()

def train_model(model, training_data, epochs):
    performance_history = []

    for epoch in tqdm.tqdm(range(epochs), desc="Training"):
        for i, data in enumerate(training_data):
            state = i % model.state_size
            action = model.choose_action(state)
            reward = data['slider_values'][action]
            next_state = (i + 1) % model.state_size if i + 1 < len(training_data) else state
            model.update_q_table(state, action, reward, next_state)
            
        average_reward = evaluate_performance(model, training_data)
        print("Epoch: ", epoch)
        print("Average reward: ", average_reward(training_data))
        performance_history.append(average_reward)

    return performance_history


state_size = len(training_data['features'][0]) * len(training_data[0]['features'])
action_size = len(training_data[0]['sentences'])
learning_rate = 0.1
discount_factor = 0.9
exploration_rate = 0.5
epochs = 100

model = QLearningModel(state_size, action_size, learning_rate, discount_factor, exploration_rate)
performance_history = train_model(model, training_data, epochs, learning_rate, discount_factor)
visualize_performance(performance_history)

NameError: name 'training_data' is not defined