In [1]:
from load_data import load_json_data

# Load data
data = load_json_data("data/result.json", 1000, 1520)
data[-1]

[{'filename': '03577_0',
  'sentence1': 'a big blue building has been built in the middle of the scene ',
  'sentence2': 'the bareland has been replaced by a blue building and a big blue building has been built ',
  'sentence3': 'two big blue buildings have been replaced by a blue building '},
 {'filename': '03577_1',
  'sentence1': 'a small blue building has been constructed on the bareland at the corner of the scene ',
  'sentence2': 'many trees have appeared in many parts of the scene ',
  'sentence3': 'many green trees have been constructed on the bareland and the grassland '},
 {'filename': '03577_2',
  'sentence1': 'a big building has been constructed in many parts of the scene ',
  'sentence2': 'a blue building has been constructed in the green area ',
  'sentence3': 'a big building has been constructed in the green area '},
 {'filename': '03577_3',
  'sentence1': 'two blue buildings have been built in many parts of the scene ',
  'sentence2': 'two blue buildings have been built

In [11]:
from load_data import load_extracted_features
features_im1 = load_extracted_features("im1")
# features shape is (2084, 2048, 1, 1)
features_im1 = features_im1.reshape((2084, 2048))

features_im2 = load_extracted_features("im2")
features_im2 = features_im2.reshape((2084, 2048))

features_im1.shape, features_im2.shape

((2084, 2048), (2084, 2048))

In [None]:
import numpy as np
import torch
import tqdm
from transformers import BertTokenizer, BertModel

def bert_encode(sentences, bert_model, tokenizer, device, batch_size=32):
    bert_model.to(device)
    bert_model.eval()  # Modeli değerlendirme modunda çalıştır

    # Tüm cümleleri batch'ler halinde işle
    all_sentence_embeddings = []
    for i in tqdm.tqdm(range(0, len(sentences), batch_size), desc="BERT Encoding"):
        batch_sentences = sentences[i:i+batch_size]
        encoded = tokenizer.batch_encode_plus(
            batch_sentences,
            add_special_tokens=True,
            max_length=512,
            padding='max_length',
            truncation=True,
            return_attention_mask=True,
            return_tensors='pt'
        )

        input_ids = encoded['input_ids'].to(device)
        attention_mask = encoded['attention_mask'].to(device)

        with torch.no_grad():
            outputs = bert_model(input_ids, attention_mask=attention_mask)
            sentence_embeddings = outputs[0][:, 0, :]  # [CLS] token'ın çıktısını al
            all_sentence_embeddings.extend(sentence_embeddings.cpu().numpy())

    return np.array(all_sentence_embeddings)

def collect_all_sentences(data):
    all_sentences = []
    sentence_lengths = []
    for data_point in data:
        sentences = data_point['sentences']
        all_sentences.extend(sentences)
        sentence_lengths.append(len(sentences))
    return all_sentences, sentence_lengths


# Load BERT model
bert_model = BertModel.from_pretrained('bert-base-uncased')
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Collect all sentences
all_sentences, sentence_lengths = collect_all_sentences(data)
sentence_lengths = np.array(sentence_lengths)

# Encode sentences
sentence_embeddings = bert_encode(all_sentences, bert_model, tokenizer, device, batch_size=64)

#free gpu memory
if torch.cuda.is_available():
    torch.cuda.empty_cache()


sentence_embeddings.shape

#### Prepare Training data

In [4]:
combined_features = []
combined_labels = []
slider_values = []

for data_point in data:
    for i in range(0, 4):
        for j in range(1, 4):
            slider_values.append(data_point[i]["slider" + str(j)])

slider_values = np.array(slider_values)


import numpy as np
from sklearn.model_selection import train_test_split

# Assuming the following shapes
# before_feature.shape = (2084, 2048)
# after_feature.shape = (2084, 2048)
# bert_encoded_sentences.shape = (6252, 768)
# slider_values.shape = (6252,)

# The number of sentences per image pair
sentences_per_image_pair = sentence_embeddings.shape[0] // features_im1.shape[0]

# Combining features
combined_features = []
for i in range(features_im1.shape[0]):
    for j in range(sentences_per_image_pair):
        sentence_index = i * sentences_per_image_pair + j
        combined_feature = np.concatenate([features_im1[i], features_im2[i], sentence_embeddings[sentence_index]])
        combined_features.append(combined_feature)

combined_features = np.array(combined_features)

# Check if the combined features align with the slider values
assert combined_features.shape[0] == slider_values.shape[0]

# Shuffle and split the data
X_train, X_test, y_train, y_test = train_test_split(combined_features, slider_values, test_size=0.2, random_state=42, shuffle=True)

combined_features.shape

{'features': [array([-3.895567  , -0.7710196 , -1.379711  , ..., -4.317973  ,
         -0.24954835, -0.5057722 ], dtype=float32),
  array([ 0.29719004, -2.1620078 , -4.0901904 , ..., -1.0636894 ,
         -0.65524375,  2.9486067 ], dtype=float32),
  array([-4.715664 , -2.0460691, -1.6050384, ..., -6.1682596, -1.5817506,
         -1.0122457], dtype=float32),
  array([-0.8934293,  1.7406154,  2.1094537, ..., -6.358255 , -2.441966 ,
          3.5570562], dtype=float32)],
 'sentences': ['two buildings have been constructed on the bareland in the middle of the scene ',
  'a huge building has been constructed on the green area at the corner of the scene ',
  'two buildings have been constructed on the green area in the middle '],
 'slider_values': [4, 2, 3]}

#### Scaling The Data

In [None]:
# Scale the data

from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

X_test[0]

In [None]:
import tensorflow as tf
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Check if TensorFlow is able to recognize the GPU
if tf.test.gpu_device_name():
    print(f"Default GPU Device: {tf.test.gpu_device_name()}")
else:
    print("Please install GPU version of TF")

#### Train The Model

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import regularizers
from tensorflow.keras.layers import BatchNormalization

model = Sequential()

# Input layer with L2 regularization
model.add(Dense(4864, input_dim=4864, activation="relu", kernel_regularizer=regularizers.l2(0.01)))
model.add(BatchNormalization())

# Hidden layer with L2 regularization
model.add(Dense(1024, activation="relu", kernel_regularizer=regularizers.l2(0.01)))
model.add(BatchNormalization())
model.add(Dropout(0.5))

# Another hidden layer with L2 regularization
model.add(Dense(512, activation="relu", kernel_regularizer=regularizers.l2(0.01)))
model.add(BatchNormalization())
model.add(Dropout(0.5))

# Output layer
model.add(Dense(1, activation=None))

# Compile model with a custom learning rate
optimizer = Adam(learning_rate=0.001)
model.compile(loss='mse', optimizer=optimizer, metrics=['mean_squared_error'])

# Callbacks
early_stop = EarlyStopping(monitor='val_loss', patience=30)
model_checkpoint = ModelCheckpoint('best_model.h5', monitor='val_loss', save_best_only=True)

# Summary
model.summary()

# Fit model
model.fit(
    X_train, y_train,
    epochs=1000,
    batch_size=128,
    validation_split=0.2,  # Use either validation_split or validation_data
    callbacks=[early_stop, model_checkpoint]
)

#### Visualize Sample And Get Rewards

In [52]:
import pandas as pd
import matplotlib.pyplot as plt

# Convert model history into a Pandas DataFrame
losses_df = pd.DataFrame(model.history.history)

# Plot the losses and MSE on separate subplots
fig, axes = plt.subplots(nrows=2, ncols=1, figsize=(10, 10), sharex=True)

# Loss plot
axes[0].plot(losses_df['loss'], label='Training Loss')
axes[0].plot(losses_df['val_loss'], label='Validation Loss')
axes[0].set_title('Training and Validation Loss')
axes[0].set_ylabel('Loss')
axes[0].legend()
axes[0].set_yscale('log')

# MSE plot
axes[1].plot(losses_df['mean_squared_error'], label='Training MSE', linestyle='--')
axes[1].plot(losses_df['val_mean_squared_error'], label='Validation MSE', linestyle='--')
axes[1].set_title('Training and Validation MSE')
axes[1].set_ylabel('MSE')
axes[1].set_xlabel('Epoch')
axes[1].legend()
axes[1].set_yscale('log')

# Show the plot
plt.show()


Cümleler ve Modelin Tahmin Ettiği Ödüller:
a blue roofed building has been constructed in the middle of the scene  - Tahmin Edilen Ödül: 0.00
the white roofed building has changed its color of the scene  - Tahmin Edilen Ödül: 0.00
The white roofed building has changed to the blue building in the middle of the scene  - Tahmin Edilen Ödül: 0.00

Cümleler ve Gerçek Ödüller:
a blue roofed building has been constructed in the middle of the scene  - Gerçek Ödül: -5.00
the white roofed building has changed its color of the scene  - Gerçek Ödül: -5.00
The white roofed building has changed to the blue building in the middle of the scene  - Gerçek Ödül: -5.00


#### Get Sample Predictions

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, mean_absolute_error

# Make predictions on the test set
y_pred = model.predict(X_test)

# Calculate MSE and MAE
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
print("Mean Squared Error:", mse)
print("Mean Absolute Error:", mae)

# Plotting the first N test samples for comparison
N = 100  # Number of samples to visualize
plt.figure(figsize=(15, 6))
plt.plot(y_test[:N], label='Actual')
plt.plot(y_pred[:N], label='Predicted')
plt.title('Comparison of Actual and Predicted Values')
plt.xlabel('Sample')
plt.ylabel('Slider Value')
plt.legend()
plt.show()