In [None]:
import joblib
from sklearn.metrics import accuracy_score, f1_score
from scipy.special import softmax
import numpy as np
from sklearn.ensemble import RandomForestClassifier

## Simple Average-Fusion

In [None]:
# model files for both datasets
rnaseq_model = joblib.load('baseline_model_outputs/rnaseq/XGBClassifier/trained_model.pkl')
microRNA_model = joblib.load('baseline_model_outputs/microRNA/XGBClassifier/trained_model.pkl')

# data files for the first dataset (RNA seq)
X_train_RNAseq = joblib.load('baseline_model_outputs/rnaseq/X_train.pkl')
X_test_RNAseq = joblib.load('baseline_model_outputs/rnaseq/X_test.pkl')
y_train_RNAseq = joblib.load('baseline_model_outputs/rnaseq/y_train.pkl')
y_test_RNAseq = joblib.load('baseline_model_outputs/rnaseq/y_test.pkl')

# data files for the second dataset (microRNA)
X_train_microRNA = joblib.load('baseline_model_outputs/microRNA/X_train.pkl')
X_test_microRNA = joblib.load('baseline_model_outputs/microRNA/X_test.pkl')
y_train_microRNA = joblib.load('baseline_model_outputs/microRNA/y_train.pkl')
y_test_microRNA = joblib.load('baseline_model_outputs/microRNA/y_test.pkl')

In [None]:
# prediction with probabilities for microRNA data
xgb_probs_microRNA = microRNA_model.predict_proba(X_test_microRNA)

# prediction with probabilities for RNAseq data
xgb_probs_RNAseq = rnaseq_model.predict_proba(X_test_RNAseq)

# when only using the XGB probabilities, there's no averaging needed
avg_probs_microRNA = xgb_probs_microRNA
avg_probs_RNAseq = xgb_probs_RNAseq

# getting the predicted class
avg_predictions_microRNA = np.argmax(avg_probs_microRNA, axis=1)
avg_predictions_RNAseq = np.argmax(avg_probs_RNAseq, axis=1)

# evaluating the performance for each dataset
accuracy_microRNA = accuracy_score(y_test_microRNA, avg_predictions_microRNA)
f1_microRNA = f1_score(y_test_microRNA, avg_predictions_microRNA, average='macro')
accuracy_RNAseq = accuracy_score(y_test_RNAseq, avg_predictions_RNAseq)
f1_RNAseq = f1_score(y_test_RNAseq, avg_predictions_RNAseq, average='macro')

print("Micro RNA Accuracy:", accuracy_microRNA)
print("Micro RNA Macro F1-score:", f1_microRNA)
print("RNAseq Accuracy:", accuracy_RNAseq)
print("RNAseq Macro F1-score:", f1_RNAseq)

Micro RNA Accuracy: 0.8863636363636364
Micro RNA Macro F1-score: 0.8873285663022834
RNAseq Accuracy: 0.9913793103448276
RNAseq Macro F1-score: 0.9846808510638297


## Majority Voting

In [None]:
preds_RNAseq = rnaseq_model.predict(X_test_RNAseq)
preds_microRNA = microRNA_model.predict(X_test_microRNA)

final_predictions = []

for i in range(max(len(preds_RNAseq), len(preds_microRNA))):
    if i < len(preds_RNAseq) and i < len(preds_microRNA):
        pred1 = preds_RNAseq[i]
        pred2 = preds_microRNA[i]

        if pred1 == pred2:
            final_predictions.append(pred1)
        else:
            final_predictions.append(pred1)
    elif i < len(preds_RNAseq):
        final_predictions.append(preds_RNAseq[i])
    else:
        final_predictions.append(preds_microRNA[i])

final_predictions = np.array(final_predictions)

y_test_combined = y_test_RNAseq if len(y_test_RNAseq) == len(final_predictions) else y_test_microRNA

accuracy = accuracy_score(y_test_combined, final_predictions)
f1 = f1_score(y_test_combined, final_predictions, average='macro')

print("Majority Voting Accuracy:", accuracy)
print("Majority Voting Macro F1-score:", f1)


Majority Voting Accuracy: 0.9913793103448276
Majority Voting Macro F1-score: 0.9846808510638297


## Rank Fusion

In [None]:
probs_RNAseq = rnaseq_model.predict_proba(X_test_RNAseq)
probs_microRNA = microRNA_model.predict_proba(X_test_microRNA)

# convert probabilities to ranks for each sample
ranks_RNAseq = probs_RNAseq.argsort().argsort()   # a way to obtain ranks from probabilities
ranks_microRNA = probs_microRNA.argsort().argsort()

avg_ranks = []

# only average for the minimum of the two lengths to avoid out-of-index errors
for i in range(min(len(ranks_RNAseq), len(ranks_microRNA))):
    avg_rank = (ranks_RNAseq[i] + ranks_microRNA[i]) / 2.0
    avg_ranks.append(avg_rank)

avg_ranks = np.array(avg_ranks)

# choose the class with the highest average rank as the final prediction
final_predictions_rank_fusion = np.argmax(avg_ranks, axis=1)

y_test_combined = y_test_RNAseq[:len(final_predictions_rank_fusion)]

accuracy_rank_fusion = accuracy_score(y_test_combined, final_predictions_rank_fusion)
f1_rank_fusion = f1_score(y_test_combined, final_predictions_rank_fusion, average='macro')

print("Rank Fusion Accuracy:", accuracy_rank_fusion)
print("Rank Fusion Macro F1-score:", f1_rank_fusion)

Rank Fusion Accuracy: 0.5681818181818182
Rank Fusion Macro F1-score: 0.539533584694875


## Meta-Classifier Fusion

#### Training Phase

In [None]:
train_probs_RNAseq = rnaseq_model.predict_proba(X_train_RNAseq)
train_probs_microRNA = microRNA_model.predict_proba(X_train_microRNA)

min_length = min(len(train_probs_RNAseq), len(train_probs_microRNA))

train_probs_RNAseq = train_probs_RNAseq[:min_length]
train_probs_microRNA = train_probs_microRNA[:min_length]

# use the probabilities as features for the meta-classifier
X_train_meta = np.hstack((train_probs_RNAseq, train_probs_microRNA))
y_train_meta = y_train_RNAseq[:min_length]
meta_classifier = RandomForestClassifier()
meta_classifier.fit(X_train_meta, y_train_meta)


### Testing Phase

In [None]:
test_probs_RNAseq = rnaseq_model.predict_proba(X_test_RNAseq)
test_probs_microRNA = microRNA_model.predict_proba(X_test_microRNA)

min_length_test = min(len(test_probs_RNAseq), len(test_probs_microRNA))

test_probs_RNAseq = test_probs_RNAseq[:min_length_test]
test_probs_microRNA = test_probs_microRNA[:min_length_test]

# use the probabilities as features for the meta-classifier
X_test_meta = np.hstack((test_probs_RNAseq, test_probs_microRNA))

final_predictions_meta = meta_classifier.predict(X_test_meta)

y_test_combined = y_test_RNAseq[:min_length_test]

accuracy_meta = accuracy_score(y_test_combined, final_predictions_meta)
f1_meta = f1_score(y_test_combined, final_predictions_meta, average='macro')

print("Meta-classifier Fusion Accuracy:", accuracy_meta)
print("Meta-classifier Fusion Macro F1-score:", f1_meta)


Meta-classifier Fusion Accuracy: 1.0
Meta-classifier Fusion Macro F1-score: 1.0


Some improvements should be made soon. I have look over what I have done again.