In [13]:
import pandas as pd
import numpy as np
from transformers import RobertaTokenizer, RobertaModel
import torch
import torch.nn as nn
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

# Load sample data
# Assuming data has columns: 'service_name', 'review', 'cost', 'latency', 'availability', 'scalability'
df = pd.read_csv("cloud_services_reviews.csv")

# Initialize RoBERTa tokenizer and model
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
roberta_model = RobertaModel.from_pretrained('roberta-base')

# Function to extract embeddings
def get_roberta_embedding(text):
    inputs = tokenizer(text, return_tensors='pt', truncation=True, padding=True, max_length=64)
    with torch.no_grad():
        outputs = roberta_model(**inputs)
    return outputs.last_hidden_state[:, 0, :].squeeze().numpy()  # [CLS] token

# Generate embeddings for reviews
print("Extracting RoBERTa embeddings...")
df['embedding'] = df['review'].apply(get_roberta_embedding)

# Normalize numerical criteria
criteria_cols = ['cost', 'latency', 'availability', 'scalability']
scaler = MinMaxScaler()
df[criteria_cols] = scaler.fit_transform(df[criteria_cols])

# Prepare features and labels for DL model
X_text = np.stack(df['embedding'].values)
X_numeric = df[criteria_cols].values
X_combined = np.hstack([X_text, X_numeric])
y = df[['availability', 'scalability']]  # Target could be composite or multi-output

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X_combined, y.values, test_size=0.2, random_state=42)

# Define simple Deep Neural Network
class CloudRankNN(nn.Module):
    def __init__(self, input_size, output_size):
        super(CloudRankNN, self).__init__()
        self.fc1 = nn.Linear(input_size, 256)
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(256, 128)
        self.relu2 = nn.ReLU()
        self.fc3 = nn.Linear(128, output_size)

    def forward(self, x):
        out = self.relu1(self.fc1(x))
        out = self.relu2(self.fc2(out))
        return self.fc3(out)

# Model training
model = CloudRankNN(input_size=X_combined.shape[1], output_size=y.shape[1])
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Convert data to tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)

# Train loop
print("Training deep learning model...")
for epoch in range(30):
    outputs = model(X_train_tensor)
    loss = criterion(outputs, y_train_tensor)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    if epoch % 5 == 0:
        print(f"Epoch [{epoch}/30], Loss: {loss.item():.4f}")

# Get predictions for MCDA
X_all_tensor = torch.tensor(X_combined, dtype=torch.float32)
predicted_scores = model(X_all_tensor).detach().numpy()

# Add prediction as new criteria for MCDA
df['predicted_score'] = predicted_scores.mean(axis=1)

# ------- MCDA using TOPSIS ----------
def topsis(matrix, weights, impacts):
    norm_matrix = matrix / np.sqrt((matrix ** 2).sum(axis=0))
    weighted_matrix = norm_matrix * weights

    ideal_best = np.max(weighted_matrix, axis=0) if impacts == '+' else np.min(weighted_matrix, axis=0)
    ideal_worst = np.min(weighted_matrix, axis=0) if impacts == '+' else np.max(weighted_matrix, axis=0)

    dist_best = np.sqrt(((weighted_matrix - ideal_best) ** 2).sum(axis=1))
    dist_worst = np.sqrt(((weighted_matrix - ideal_worst) ** 2).sum(axis=1))

    score = dist_worst / (dist_best + dist_worst)
    return score

# Apply TOPSIS with weights and impact directions
criteria_matrix = df[['cost', 'latency', 'availability', 'scalability', 'predicted_score']].values
weights = np.array([0.2, 0.2, 0.2, 0.2, 0.2])  # Equal weights (customizable)
impacts = np.array(['-', '-', '+', '+', '+'])  # cost and latency are negatives

# Convert impact to numerical mask
impacts_mask = np.array([1 if i == '+' else -1 for i in impacts])
topsis_scores = topsis(criteria_matrix, weights, impacts_mask)

# Add ranking to dataframe
df['rank_score'] = topsis_scores
df = df.sort_values(by='rank_score', ascending=False)

# Final output
print("\nTop Cloud Services based on Hybrid RoBERTa + MCDA Ranking:")
print(df[['service_name', 'rank_score']].head(10))


SyntaxError: (unicode error) 'unicodeescape' codec can't decode bytes in position 2-3: truncated \UXXXXXXXX escape (ipython-input-13-982031627.py, line 11)