In [14]:
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error
from scipy.stats import pearsonr
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.linear_model import LinearRegression

# Load the merged dataframe
merged_df = pd.read_csv('merged_output.csv')

# Extract the necessary columns
ml_predictions = merged_df['Predicted_Intensity'].values
gpt_predictions = merged_df['gpt_intensity'].values
true_values = merged_df['intensity'].values


In [15]:
# Simple Averaging
combined_predictions_avg = (ml_predictions + gpt_predictions) / 2

# Weighted Averaging
weight_ml = 0.7
weight_gpt = 0.3
combined_predictions_weighted = weight_ml * ml_predictions + weight_gpt * gpt_predictions

# Evaluate the combined predictions
def evaluate(predictions, true_values):
    rmse = np.sqrt(mean_squared_error(true_values, predictions))
    pearson_corr, _ = pearsonr(true_values, predictions)
    cosine_sim = cosine_similarity([true_values], [predictions])[0, 0]
    return rmse, pearson_corr, cosine_sim

# Evaluate simple averaging
rmse_avg, pearson_avg, cosine_avg = evaluate(combined_predictions_avg, true_values)
print(f"Simple Averaging - RMSE: {rmse_avg}, Pearson Correlation: {pearson_avg}, Cosine Similarity: {cosine_avg}")

# Evaluate weighted averaging
rmse_weighted, pearson_weighted, cosine_weighted = evaluate(combined_predictions_weighted, true_values)
print(f"Weighted Averaging - RMSE: {rmse_weighted}, Pearson Correlation: {pearson_weighted}, Cosine Similarity: {cosine_weighted}")

# Stacking
stacked_features = np.vstack((ml_predictions, gpt_predictions)).T
stacked_model = LinearRegression().fit(stacked_features, true_values)
stacked_predictions = stacked_model.predict(stacked_features)

# Evaluate stacking
rmse_stacked, pearson_stacked, cosine_stacked = evaluate(stacked_predictions, true_values)
print(f"Stacking - RMSE: {rmse_stacked}, Pearson Correlation: {pearson_stacked}, Cosine Similarity: {cosine_stacked}")


Simple Averaging - RMSE: 0.9092646204400056, Pearson Correlation: 0.9002543655210911, Cosine Similarity: 0.9881170705343353
Weighted Averaging - RMSE: 0.9331484406194158, Pearson Correlation: 0.8943887774887066, Cosine Similarity: 0.987452865681709
Stacking - RMSE: 0.9064992969610338, Pearson Correlation: 0.9002769898783781, Cosine Similarity: 0.988139605860322


In [3]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler
import numpy as np



In [4]:

# Read the CSV file
file_path = 'merged_output.csv'  # Replace with your file path
data = pd.read_csv(file_path)

# Extract the necessary columns
outputgpt = data['gpt_intensity'].values
outputroberta = data['Original_Predicted_Intensity'].values
groundtruth = data['intensity'].values

# Normalize the inputs
scaler = StandardScaler()
outputgpt = scaler.fit_transform(outputgpt.reshape(-1, 1))
outputroberta = scaler.fit_transform(outputroberta.reshape(-1, 1))
groundtruth = scaler.fit_transform(groundtruth.reshape(-1, 1))

class CombineModel(nn.Module):
    def __init__(self):
        super(CombineModel, self).__init__()
        self.fc1 = nn.Linear(1, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 1)
        self.dropout = nn.Dropout(0.2)
        self.w = nn.Parameter(torch.randn(1, requires_grad=True))
        
    def forward(self, outputgpt, outputroberta):
        sigmoid_w = torch.sigmoid(self.w)
        outputinterm = sigmoid_w * outputgpt + (1 - sigmoid_w) * outputroberta
        outputinterm = torch.relu(self.fc1(outputinterm))
        outputinterm = self.dropout(outputinterm)
        outputinterm = torch.relu(self.fc2(outputinterm))
        outputinterm = self.dropout(outputinterm)
        outputinterm = self.fc3(outputinterm)
        return outputinterm


In [5]:
# Prepare data for training
outputgpt_tensor = torch.tensor(outputgpt, dtype=torch.float32)
outputroberta_tensor = torch.tensor(outputroberta, dtype=torch.float32)
groundtruth_tensor = torch.tensor(groundtruth, dtype=torch.float32)

# Instantiate the model, define the loss function and the optimizer
model = CombineModel()
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training the model
num_epochs = 1000
for epoch in range(num_epochs):
    model.train()
    
    # Zero the parameter gradients
    optimizer.zero_grad()
    
    # Forward pass
    outputs = model(outputgpt_tensor, outputroberta_tensor)
    
    # Compute the loss
    loss = criterion(outputs, groundtruth_tensor)
    
    # Backward pass and optimize
    loss.backward()
    optimizer.step()
    
    if (epoch+1) % 100 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

# Evaluate the model
model.eval()
with torch.no_grad():
    predicted = model(outputgpt_tensor, outputroberta_tensor).numpy()

# Inverse transform the predicted values to their original scale
predicted = scaler.inverse_transform(predicted)
groundtruth = scaler.inverse_transform(groundtruth)

# Compute RMSE
rmse = np.sqrt(mean_squared_error(groundtruth, predicted))
print(f'RMSE: {rmse:.4f}')

  from .autonotebook import tqdm as notebook_tqdm


Epoch [100/1000], Loss: 0.2197
Epoch [200/1000], Loss: 0.2111
Epoch [300/1000], Loss: 0.2095
Epoch [400/1000], Loss: 0.2065
Epoch [500/1000], Loss: 0.2049
Epoch [600/1000], Loss: 0.2045
Epoch [700/1000], Loss: 0.2036
Epoch [800/1000], Loss: 0.2023
Epoch [900/1000], Loss: 0.2029
Epoch [1000/1000], Loss: 0.2008
RMSE: 0.8943
