In [2]:
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error
from scipy.stats import pearsonr
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.linear_model import LinearRegression

# Load the merged dataframe
train_df = pd.read_csv('../dataset/intensity_result/train/gpt_35_clip_train.csv')
test_df = pd.read_csv('../dataset/intensity_result/test/gpt_35_4o_clip_test.csv')

# Extract the necessary columns
train_ml_predictions = train_df['roberta_clip_svo_intensity'].values
train_gpt_predictions = train_df['gpt_35_intensity'].values
train_true_values = train_df['intensity'].values

test_ml_predictions = test_df['roberta_clip_svo_intensity'].values
test_gpt_predictions = test_df['gpt_35_intensity'].values
test_true_values = test_df['intensity'].values

# Simple Averaging
combined_predictions_avg = (test_ml_predictions + test_gpt_predictions) / 2

# Weighted Averaging
weight_ml = 0.6
weight_gpt = 0.4
combined_predictions_weighted = weight_ml * test_ml_predictions + weight_gpt * test_gpt_predictions

# Evaluate the combined predictions
def evaluate(predictions, true_values):
    rmse = np.sqrt(mean_squared_error(true_values, predictions))
    pearson_corr, _ = pearsonr(true_values, predictions)
    cosine_sim = cosine_similarity([true_values], [predictions])[0, 0]
    return rmse, pearson_corr, cosine_sim

# Evaluate simple averaging
rmse_avg, pearson_avg, cosine_avg = evaluate(combined_predictions_avg, test_true_values)
print(f"Simple Averaging - RMSE: {rmse_avg}, Pearson Correlation: {pearson_avg}, Cosine Similarity: {cosine_avg}")

# Evaluate weighted averaging
rmse_weighted, pearson_weighted, cosine_weighted = evaluate(combined_predictions_weighted, test_true_values)
print(f"Weighted Averaging - RMSE: {rmse_weighted}, Pearson Correlation: {pearson_weighted}, Cosine Similarity: {cosine_weighted}")

# Stacking
train_stacked_features = np.vstack((train_ml_predictions, train_gpt_predictions)).T
test_stacked_features = np.vstack((test_ml_predictions, test_gpt_predictions)).T

stacked_model = LinearRegression().fit(train_stacked_features, train_true_values)
stacked_predictions = stacked_model.predict(test_stacked_features)

# Evaluate stacking
rmse_stacked, pearson_stacked, cosine_stacked = evaluate(stacked_predictions, test_true_values)
print(f"Stacking - RMSE: {rmse_stacked}, Pearson Correlation: {pearson_stacked}, Cosine Similarity: {cosine_stacked}")


Simple Averaging - RMSE: 1.1880936666268944, Pearson Correlation: 0.8257735650476966, Cosine Similarity: 0.9804678875334012
Weighted Averaging - RMSE: 1.1647936594511867, Pearson Correlation: 0.8317521333545543, Cosine Similarity: 0.9810829804157318
Stacking - RMSE: 1.4007660110094655, Pearson Correlation: 0.7432816012684108, Cosine Similarity: 0.9723462594527532


In [6]:
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error
from scipy.stats import pearsonr
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.linear_model import LinearRegression

# Load the merged dataframe
train_df = pd.read_csv('../dataset/intensity_result/train/gpt_35_clip_train.csv')
test_df = pd.read_csv('../dataset/intensity_result/test/gpt_35_4o_clip_test.csv')

# Extract the necessary columns
train_ml_predictions = train_df['roberta_clip_svo_intensity'].values
train_gpt_predictions = train_df['gpt_4o_intensity'].values
train_true_values = train_df['intensity'].values

test_ml_predictions = test_df['roberta_clip_svo_intensity'].values
test_gpt_predictions = test_df['gpt_4o_intensity'].values
test_true_values = test_df['intensity'].values

# Simple Averaging
combined_predictions_avg = (test_ml_predictions + test_gpt_predictions) / 2

# Weighted Averaging
weight_ml = 0.75
weight_gpt = 0.25
combined_predictions_weighted = weight_ml * test_ml_predictions + weight_gpt * test_gpt_predictions

# Evaluate the combined predictions
def evaluate(predictions, true_values):
    rmse = np.sqrt(mean_squared_error(true_values, predictions))
    pearson_corr, _ = pearsonr(true_values, predictions)
    cosine_sim = cosine_similarity([true_values], [predictions])[0, 0]
    return rmse, pearson_corr, cosine_sim

# Evaluate simple averaging
rmse_avg, pearson_avg, cosine_avg = evaluate(combined_predictions_avg, test_true_values)
print(f"Simple Averaging - RMSE: {rmse_avg}, Pearson Correlation: {pearson_avg}, Cosine Similarity: {cosine_avg}")

# Evaluate weighted averaging
rmse_weighted, pearson_weighted, cosine_weighted = evaluate(combined_predictions_weighted, test_true_values)
print(f"Weighted Averaging - RMSE: {rmse_weighted}, Pearson Correlation: {pearson_weighted}, Cosine Similarity: {cosine_weighted}")

# Stacking
train_stacked_features = np.vstack((train_ml_predictions, train_gpt_predictions)).T
test_stacked_features = np.vstack((test_ml_predictions, test_gpt_predictions)).T

stacked_model = LinearRegression().fit(train_stacked_features, train_true_values)
stacked_predictions = stacked_model.predict(test_stacked_features)

# Evaluate stacking
rmse_stacked, pearson_stacked, cosine_stacked = evaluate(stacked_predictions, test_true_values)
print(f"Stacking - RMSE: {rmse_stacked}, Pearson Correlation: {pearson_stacked}, Cosine Similarity: {cosine_stacked}")


Simple Averaging - RMSE: 1.7301432460515898, Pearson Correlation: 0.6004497165120009, Cosine Similarity: 0.958125210782388
Weighted Averaging - RMSE: 1.3325807955110172, Pearson Correlation: 0.7767175136689977, Cosine Similarity: 0.9755813225410042
Stacking - RMSE: 2.072475292310843, Pearson Correlation: 0.17624184922624248, Cosine Similarity: 0.9389865888365081


In [5]:
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error
from scipy.stats import pearsonr
from sklearn.metrics.pairwise import cosine_similarity
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

# Load the merged dataframe
train_df = pd.read_csv('../dataset/intensity_result/train/gpt_35_clip_train.csv')
test_df = pd.read_csv('../dataset/intensity_result/test/gpt_35_4o_clip_test.csv')

# Extract the necessary columns
train_ml_predictions = train_df['clip_intensity'].values
train_gpt_predictions = train_df['gpt_35_intensity'].values
train_true_values = train_df['intensity'].values

test_ml_predictions = test_df['clip_intensity'].values
test_gpt_predictions = test_df['gpt_35_intensity'].values
test_true_values = test_df['intensity'].values

# Stacking with Dropout
train_stacked_features = np.vstack((train_ml_predictions, train_gpt_predictions)).T
test_stacked_features = np.vstack((test_ml_predictions, test_gpt_predictions)).T

# Define the model with dropout layers
def create_model():
    model = Sequential()
    model.add(Dense(64, activation='relu', input_dim=train_stacked_features.shape[1]))
    model.add(Dropout(0.6))  # 20% dropout
    model.add(Dense(32, activation='relu'))
    model.add(Dropout(0.6))  # 20% dropout
    model.add(Dense(1))  # Output layer
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

# Create and train the model
stacked_model = create_model()
stacked_model.fit(train_stacked_features, train_true_values, epochs=100, batch_size=32, verbose=0)

# Make predictions
stacked_predictions = stacked_model.predict(test_stacked_features).flatten()

# Evaluate the combined predictions
def evaluate(predictions, true_values):
    rmse = np.sqrt(mean_squared_error(true_values, predictions))
    pearson_corr, _ = pearsonr(true_values, predictions)
    cosine_sim = cosine_similarity([true_values], [predictions])[0, 0]
    return rmse, pearson_corr, cosine_sim

# Evaluate stacking
rmse_stacked, pearson_stacked, cosine_stacked = evaluate(stacked_predictions, test_true_values)
print(f"Stacking with Dropout - RMSE: {rmse_stacked}, Pearson Correlation: {pearson_stacked}, Cosine Similarity: {cosine_stacked}")


Stacking with Dropout - RMSE: 1.4325723129431034, Pearson Correlation: 0.7380077071672558, Cosine Similarity: 0.9713283490500861


In [4]:
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error
from scipy.stats import pearsonr
from sklearn.metrics.pairwise import cosine_similarity
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Input
from tensorflow.keras.models import Model

# Load the merged dataframe
train_df = pd.read_csv('../dataset/intensity_result/train/gpt_35_clip_train.csv')
test_df = pd.read_csv('../dataset/intensity_result/test/gpt_35_4o_clip_test.csv')

# Extract the necessary columns
train_ml_predictions = train_df['clip_intensity'].values
train_gpt_predictions = train_df['gpt_35_intensity'].values
train_true_values = train_df['intensity'].values

test_ml_predictions = test_df['clip_intensity'].values
test_gpt_predictions = test_df['gpt_35_intensity'].values
test_true_values = test_df['intensity'].values

# Stacking with Dropout
train_stacked_features = np.vstack((train_ml_predictions, train_gpt_predictions)).T
test_stacked_features = np.vstack((test_ml_predictions, test_gpt_predictions)).T

# Define the model with dropout layers
def create_pointer_generation_model():
    inputs = Input(shape=(train_stacked_features.shape[1],))
    x = Dense(64, activation='relu')(inputs)
    x = Dropout(0.4)(x)  # Dropout for regularization
    x = Dense(32, activation='relu')(x)
    x = Dropout(0.4)(x)  # Another dropout layer
    output = Dense(1)(x)  # Output layer
    model = Model(inputs=inputs, outputs=output)
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

# Create and train the model
pointer_model = create_pointer_generation_model()
pointer_model.fit(train_stacked_features, train_true_values, epochs=100, batch_size=32, verbose=0)

# Make predictions
stacked_predictions = pointer_model.predict(test_stacked_features).flatten()




In [5]:
# Evaluate the combined predictions
def evaluate(predictions, true_values):
    rmse = np.sqrt(mean_squared_error(true_values, predictions))
    pearson_corr, _ = pearsonr(true_values, predictions)
    cosine_sim = cosine_similarity([true_values], [predictions])[0, 0]
    return rmse, pearson_corr, cosine_sim

In [6]:

# Evaluate stacking
rmse_stacked, pearson_stacked, cosine_stacked = evaluate(stacked_predictions, test_true_values)
print(f"Pointer Generation with Dropout - RMSE: {rmse_stacked}, Pearson Correlation: {pearson_stacked}, Cosine Similarity: {cosine_stacked}")

Pointer Generation with Dropout - RMSE: 1.4014293683654484, Pearson Correlation: 0.7420871971590375, Cosine Similarity: 0.9722956621511978
