## Multi-Label Classification Using Neural Network

In [None]:
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import numpy as np
import ast


In [None]:
# Dataset class definition

class TensorData(Dataset):
    def __init__(self, x, y):
        self.x = torch.tensor(x, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.float32)
        self.length = self.x.shape[0]

    def __getitem__(self, index):
        return self.x[index], self.y[index]

    def __len__(self):
        return self.length

In [None]:
# Neural network for multilabel classification

class MultiLabelClassifyNN(nn.Module):
    def __init__(self):
        super(MultiLabelClassifyNN, self).__init__()
        self.input = nn.Linear(3072, 1024)
        self.h1 = nn.Linear(1024, 512)
        self.output = nn.Linear(512, 50)

    def forward(self, x):
        x = F.relu(self.input(x))
        x = F.relu(self.h1(x))
        x = torch.sigmoid(self.output(x))
        return x


In [None]:
def train_MultiLabelClassifyNN(epochs, train_dataloader, dev_dataloader):
    best_loss = float('inf')
    best_model = None
    patience = 7
    no_improvement = 0

    model = MultiLabelClassifyNN()
    criterion = nn.BCELoss()
    optimizer = optim.SGD(model.parameters(), lr=0.01)

    for epoch in range(epochs):
        model.train()
        train_loss = 0

        for x_batch, y_batch in train_dataloader:
            y_pred = model(x_batch)
            loss = criterion(y_pred, y_batch)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            train_loss += loss.item()

        dev_loss, accuracy_dev = evaluate_model(model, dev_dataloader, criterion)

        if dev_loss < best_loss:
            best_loss = dev_loss
            best_model = model
            no_improvement = 0
        else:
            no_improvement += 1
            if no_improvement >= patience:
                print(f"Early stopping at epoch {epoch + 1} due to no improvement in validation loss.\n")
                break

        print(f"Epoch [{epoch+1}/{epochs}] | Train Loss: {train_loss/len(train_dataloader):.4f} | Dev Loss: {dev_loss:.4f} | Dev Accuracy: {accuracy_dev:.2f}%")

    return best_model

In [None]:
# Evaluation function

def evaluate_model(model, dataloader, criterion):
    model.eval()
    total_loss = 0
    correct = 0
    total = 0
    with torch.no_grad():
        for x_batch, y_batch in dataloader:
            outputs = model(x_batch)
            loss = criterion(outputs, y_batch)
            total_loss += loss.item()

            predictions = (outputs >= 0.5).float()
            correct += (predictions == y_batch).float().sum().item()
            total += y_batch.numel()

    accuracy = (100 * correct) / total
    average_loss = total_loss / len(dataloader)

    return average_loss, accuracy

In [None]:
# Load CSV data for processing using dataloader

def load_data_from_csv(file_path):
    df = pd.read_csv(file_path)

    embeddings = df['openai_embedding'].apply(lambda x: ast.literal_eval(x)).tolist()
    embeddings = np.array(embeddings)

    target_columns = ['00', '01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12', '13',
                      '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26',
                      '27', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39',
                      '40', '41', '42', '43', '44', '45', '46', '47', '48', '49']
    targets = df[target_columns].values
    print(targets)

    return embeddings, targets


In [None]:
# Function to make predictions on a dataset
def predict(model, dataloader):
    model.eval()
    predictions = []
    with torch.no_grad():
        for x_batch, _ in dataloader:
            outputs = model(x_batch)
            predictions.append(outputs)
    return torch.cat(predictions, dim=0)

# Function to map binary predictions to topic indices
def get_predicted_topics(predictions):
    topic_indices = []
    for prediction in predictions:
        topics = [i for i, val in enumerate(prediction) if val == 1.0]
        topic_indices.append(topics)
    return topic_indices


In [None]:
# Extracting data
train_file_path = '/content/frc_train_data.csv'
val_file_path = '/content/frc_val_data.csv'

train_embeddings, train_targets = load_data_from_csv(train_file_path)
val_embeddings, val_targets = load_data_from_csv(val_file_path)

# Convert data using dataloader
train_dataset = TensorData(train_embeddings, train_targets)
train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)

val_dataset = TensorData(val_embeddings, val_targets)
val_dataloader = DataLoader(val_dataset, batch_size=32, shuffle=False)


[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 1 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 1]
 [0 0 0 ... 0 0 0]]
[[0 0 1 ... 0 0 0]
 [0 0 1 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 1 ... 0 0 0]
 [0 0 1 ... 0 0 0]]


In [None]:
train_dataset[23]

(tensor([ 0.0500, -0.0448, -0.0137,  ...,  0.0047,  0.0063,  0.0201]),
 tensor([0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.,
         1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]))

In [None]:
# Train and find best model

best_model = train_MultiLabelClassifyNN(100, train_dataloader, val_dataloader)

Epoch [1/100] | Train Loss: 0.6917 | Dev Loss: 0.6887 | Dev Accuracy: 56.99%
Epoch [2/100] | Train Loss: 0.6857 | Dev Loss: 0.6828 | Dev Accuracy: 76.91%
Epoch [3/100] | Train Loss: 0.6798 | Dev Loss: 0.6768 | Dev Accuracy: 90.03%
Epoch [4/100] | Train Loss: 0.6738 | Dev Loss: 0.6709 | Dev Accuracy: 93.98%
Epoch [5/100] | Train Loss: 0.6679 | Dev Loss: 0.6649 | Dev Accuracy: 94.00%
Epoch [6/100] | Train Loss: 0.6619 | Dev Loss: 0.6589 | Dev Accuracy: 94.00%
Epoch [7/100] | Train Loss: 0.6558 | Dev Loss: 0.6527 | Dev Accuracy: 94.00%
Epoch [8/100] | Train Loss: 0.6495 | Dev Loss: 0.6463 | Dev Accuracy: 94.00%
Epoch [9/100] | Train Loss: 0.6430 | Dev Loss: 0.6396 | Dev Accuracy: 94.00%
Epoch [10/100] | Train Loss: 0.6362 | Dev Loss: 0.6327 | Dev Accuracy: 94.00%
Epoch [11/100] | Train Loss: 0.6291 | Dev Loss: 0.6254 | Dev Accuracy: 94.00%
Epoch [12/100] | Train Loss: 0.6215 | Dev Loss: 0.6176 | Dev Accuracy: 94.00%
Epoch [13/100] | Train Loss: 0.6135 | Dev Loss: 0.6092 | Dev Accuracy: 94

In [None]:
# Load validation data
test_file_path = '/content/frc_test_data.csv'
test_embeddings, test_targets = load_data_from_csv(test_file_path)

test_dataset = TensorData(test_embeddings, test_targets)
test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=False)

[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 1]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 1 ... 0 0 0]
 [0 0 0 ... 0 0 0]]


In [None]:
# Get predictions and convert to binary
predictions = predict(best_model, test_dataloader)
print(predictions)

predicted_topics = (predictions >= 0.5).float()
print(predicted_topics)

tensor([[0.0059, 0.0668, 0.1762,  ..., 0.0103, 0.0104, 0.1097],
        [0.0073, 0.0736, 0.1851,  ..., 0.0124, 0.0125, 0.1180],
        [0.0083, 0.0781, 0.1902,  ..., 0.0139, 0.0140, 0.1231],
        ...,
        [0.0071, 0.0729, 0.1836,  ..., 0.0121, 0.0122, 0.1167],
        [0.0063, 0.0690, 0.1794,  ..., 0.0110, 0.0110, 0.1126],
        [0.0079, 0.0769, 0.1889,  ..., 0.0134, 0.0135, 0.1215]])
tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])


In [None]:
predictions[123]

tensor([0.0062, 0.0683, 0.1784, 0.1527, 0.1217, 0.1208, 0.0505, 0.0088, 0.1208,
        0.0558, 0.0115, 0.0395, 0.0604, 0.0182, 0.0065, 0.0643, 0.0205, 0.0062,
        0.0372, 0.0201, 0.0164, 0.0065, 0.0101, 0.2057, 0.0067, 0.0081, 0.1698,
        0.0170, 0.0189, 0.0578, 0.0902, 0.0308, 0.0091, 0.0377, 0.0727, 0.0938,
        0.0337, 0.0692, 0.1022, 0.1006, 0.0535, 0.0403, 0.0919, 0.0674, 0.0493,
        0.0265, 0.0623, 0.0108, 0.0108, 0.1114])

In [None]:
torch.topk(predictions[123], 3)

torch.return_types.topk(
values=tensor([0.2057, 0.1784, 0.1698]),
indices=tensor([23,  2, 26]))

In [None]:
predicted_topics[123]

tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0.])

In [None]:
test_targets[123]

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0])

In [None]:
torch.topk(torch.from_numpy(test_targets[123]), 3)

torch.return_types.topk(
values=tensor([1, 1, 1]),
indices=tensor([26, 30, 34]))

In [None]:
# Get predicted topics
predicted_topics_indices = get_predicted_topics(predicted_topics)

for i, topics in enumerate(predicted_topics_indices[:10]):
    print(f"Sample {i + 1} Predicted Topics: {topics}")

Sample 1 Predicted Topics: []
Sample 2 Predicted Topics: []
Sample 3 Predicted Topics: []
Sample 4 Predicted Topics: []
Sample 5 Predicted Topics: []
Sample 6 Predicted Topics: []
Sample 7 Predicted Topics: []
Sample 8 Predicted Topics: []
Sample 9 Predicted Topics: []
Sample 10 Predicted Topics: []


In [None]:
# Comparing predicted and true topics
for i in range(20):
    print(f"Sample {i + 1} True Topics: {test_targets[i]}, Predicted Topics: {predicted_topics_indices[i]}")


Sample 1 True Topics: [0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0], Predicted Topics: []
Sample 2 True Topics: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0
 0 0 1 0 0 0 0 0 0 0 0 0 0], Predicted Topics: []
Sample 3 True Topics: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 1 0 0 0 0 0 0 0 0 0 1], Predicted Topics: []
Sample 4 True Topics: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0
 0 0 0 1 0 0 0 0 0 0 0 0 1], Predicted Topics: []
Sample 5 True Topics: [0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
 0 0 0 0 0 0 0 0 0 0 0 0 0], Predicted Topics: []
Sample 6 True Topics: [0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0], Predicted Topics: []
Sample 7 True Topics: [0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0
 1 0 0 0 0 0 0 0 0 0 

In [None]:
for i in range(10):
  print(torch.topk(predictions[i],3))

torch.return_types.topk(
values=tensor([0.2035, 0.1762, 0.1680]),
indices=tensor([23,  2, 26]))
torch.return_types.topk(
values=tensor([0.2127, 0.1851, 0.1766]),
indices=tensor([23,  2, 26]))
torch.return_types.topk(
values=tensor([0.2176, 0.1902, 0.1823]),
indices=tensor([23,  2, 26]))
torch.return_types.topk(
values=tensor([0.2087, 0.1815, 0.1731]),
indices=tensor([23,  2, 26]))
torch.return_types.topk(
values=tensor([0.2078, 0.1802, 0.1724]),
indices=tensor([23,  2, 26]))
torch.return_types.topk(
values=tensor([0.2071, 0.1797, 0.1712]),
indices=tensor([23,  2, 26]))
torch.return_types.topk(
values=tensor([0.2106, 0.1825, 0.1746]),
indices=tensor([23,  2, 26]))
torch.return_types.topk(
values=tensor([0.2099, 0.1828, 0.1743]),
indices=tensor([23,  2, 26]))
torch.return_types.topk(
values=tensor([0.2034, 0.1760, 0.1675]),
indices=tensor([23,  2, 26]))
torch.return_types.topk(
values=tensor([0.2117, 0.1840, 0.1761]),
indices=tensor([23,  2, 26]))


In [None]:
# Function to map top 3 predictions to topic indices
def get_top_3_predicted_topics(predictions):
    topic_indices = []
    for prediction in predictions:
        topics = torch.topk(prediction, 3).indices
        topic_indices.append(topics)
    return topic_indices


In [None]:
predicted_top_3_topics = get_top_3_predicted_topics(predictions)

In [None]:
# Comparing predicted and true topics
for i in range(20):
    print(f"Sample {i + 1} True Topics: \
    {torch.topk(torch.from_numpy(test_targets[i]), 3).indices}, \
    Predicted Topics: {predicted_top_3_topics[i]}")


Sample 1 True Topics:     tensor([23,  8,  5]),     Predicted Topics: tensor([23,  2, 26])
Sample 2 True Topics:     tensor([39, 16, 29]),     Predicted Topics: tensor([23,  2, 26])
Sample 3 True Topics:     tensor([49, 18, 39]),     Predicted Topics: tensor([23,  2, 26])
Sample 4 True Topics:     tensor([49, 29, 40]),     Predicted Topics: tensor([23,  2, 26])
Sample 5 True Topics:     tensor([35,  8,  2]),     Predicted Topics: tensor([23,  2, 26])
Sample 6 True Topics:     tensor([23, 30, 12]),     Predicted Topics: tensor([23,  2, 26])
Sample 7 True Topics:     tensor([23,  5, 37]),     Predicted Topics: tensor([23,  2, 26])
Sample 8 True Topics:     tensor([35,  8,  2]),     Predicted Topics: tensor([23,  2, 26])
Sample 9 True Topics:     tensor([26, 34, 15]),     Predicted Topics: tensor([23,  2, 26])
Sample 10 True Topics:     tensor([ 8, 39, 38]),     Predicted Topics: tensor([23,  2, 26])
Sample 11 True Topics:     tensor([26, 42, 15]),     Predicted Topics: tensor([23,  2, 26

## Logistic Regression for Multi-Label Classification

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.multioutput import MultiOutputClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score

In [None]:
def load_data_with_two_labels(file_path):
    df = pd.read_csv(file_path)

    embeddings = df['openai_embedding'].apply(lambda x: ast.literal_eval(x)).tolist()
    embeddings = np.array(embeddings)

    target_columns = ['01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12', '13',
                      '15', '16', '18', '19', '20', '22', '23', '25', '26',
                      '27', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39',
                      '40', '41', '42', '43', '44', '45', '46', '47', '48', '49']
    targets = df[target_columns].values

    return embeddings, targets

In [None]:
train_file_path = '/content/frc_train_data.csv'
test_file_path = '/content/frc_test_data.csv'

train_emb, train_targ = load_data_with_two_labels(train_file_path)
test_emb, test_targ = load_data_with_two_labels(test_file_path)

In [None]:
# Logistic regression model
log_reg = LogisticRegression(max_iter=1000)

# MultiOutputClassifier for multi-label classification
multi_target_log_reg = MultiOutputClassifier(log_reg, n_jobs=-1)


In [None]:
# Fit the mdoel
multi_target_log_reg.fit(train_emb, train_targ)

# Predict on test set
y_pred = multi_target_log_reg.predict(test_emb)

In [None]:
y_pred

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 1, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]])

In [None]:
# Evaluate the model
accuracy = accuracy_score(test_targ, y_pred)
f1 = f1_score(test_targ, y_pred, average='micro')

print(f"Accuracy: {accuracy:.2f}")
print(f"F1 Score: {f1:.2f}")

Accuracy: 0.00
F1 Score: 0.44


In [None]:
# Fixing topic numbers

original_labels = ['00', '01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12', '13',
                   '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26',
                   '27', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39',
                   '40', '41', '42', '43', '44', '45', '46', '47', '48', '49']

extracted_labels = ['01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12', '13',
                    '15', '16', '18', '19', '20', '22', '23', '25', '26', '27', '28', '29', '30',
                    '31', '32', '33', '34', '35', '36', '37', '38', '39', '40', '41', '42', '43',
                    '44', '45', '46', '47', '48', '49']

ext_to_orig_index = {extracted_labels.index(label): original_labels.index(label) for label in extracted_labels}

print(ext_to_orig_index)

{0: 1, 1: 2, 2: 3, 3: 4, 4: 5, 5: 6, 6: 7, 7: 8, 8: 9, 9: 10, 10: 11, 11: 12, 12: 13, 13: 15, 14: 16, 15: 18, 16: 19, 17: 20, 18: 22, 19: 23, 20: 25, 21: 26, 22: 27, 23: 28, 24: 29, 25: 30, 26: 31, 27: 32, 28: 33, 29: 34, 30: 35, 31: 36, 32: 37, 33: 38, 34: 39, 35: 40, 36: 41, 37: 42, 38: 43, 39: 44, 40: 45, 41: 46, 42: 47, 43: 48, 44: 49}


In [None]:
# Get top 3 predicted topics
def get_top_3_topics(probs, k=3):
    top3_indices = np.argsort(probs, axis=1)[:, -k:]
    return top3_indices

In [None]:
# Predict probabilities
y_prob = multi_target_log_reg.predict_proba(test_emb)
y_prob_matrix = np.hstack([prob[:, 1].reshape(-1, 1) for prob in y_prob])

# Get top 3 predicted topics
top_3_predicted_topics = get_top_3_topics(y_prob_matrix, k=3)

In [None]:
# Function to map predicted indices to original labels
def map_to_original_labels(indices, mapping):
    return [[mapping[i] for i in row] for row in indices]


In [None]:
# Mapping topics
top_3_predicted_topics_mapped = map_to_original_labels(top_3_predicted_topics, ext_to_orig_index)


In [None]:
for i in range(20):
    true_topics = [original_labels.index(extracted_labels[j]) for j in np.where(test_targ[i] == 1)[0].tolist()]
    predicted_topics = top_3_predicted_topics_mapped[i]
    print(f"Sample {i + 1} - True Topics: {true_topics}, Predicted Topics: {predicted_topics}")

Sample 1 - True Topics: [5, 8, 23], Predicted Topics: [5, 26, 8]
Sample 2 - True Topics: [16, 29, 39], Predicted Topics: [40, 16, 39]
Sample 3 - True Topics: [18, 39, 49], Predicted Topics: [49, 38, 18]
Sample 4 - True Topics: [29, 40, 49], Predicted Topics: [39, 40, 49]
Sample 5 - True Topics: [2, 8, 35], Predicted Topics: [9, 8, 5]
Sample 6 - True Topics: [12, 23, 30], Predicted Topics: [30, 12, 23]
Sample 7 - True Topics: [5, 23, 37], Predicted Topics: [5, 23, 35]
Sample 8 - True Topics: [2, 8, 35], Predicted Topics: [9, 8, 35]
Sample 9 - True Topics: [15, 26, 34], Predicted Topics: [49, 26, 3]
Sample 10 - True Topics: [8, 38, 39], Predicted Topics: [5, 35, 8]
Sample 11 - True Topics: [15, 26, 42], Predicted Topics: [2, 26, 4]
Sample 12 - True Topics: [2, 3, 26], Predicted Topics: [26, 46, 3]
Sample 13 - True Topics: [29, 35, 49], Predicted Topics: [39, 26, 29]
Sample 14 - True Topics: [2, 26, 34], Predicted Topics: [34, 23, 26]
Sample 15 - True Topics: [6, 8, 23], Predicted Topics:

In [None]:
# Measuring correctly identified topics

def correctly_identified_topics(true_topics, predicted_topics):
    total_correct = 0
    num_samples = len(true_topics)

    for i in range(num_samples):
        true_set = set(true_topics[i])
        pred_set = set(predicted_topics[i])

        correct_predictions = len(true_set & pred_set)

        total_correct += correct_predictions

    avg_correct = total_correct / (num_samples)

    return avg_correct, total_correct

In [None]:
all_true_topics = []

for i in range(len(test_targ)):
   all_true_topics.append([original_labels.index(extracted_labels[j]) for j in np.where(test_targ[i] == 1)[0].tolist()])

In [None]:
avg_correct_topics, total_correct = correctly_identified_topics(all_true_topics, top_3_predicted_topics_mapped)

total_avg_correct = (total_correct)/(len(test_targ)*3)
print(f"Average Correctly Identified Topics per Sample: {avg_correct_topics:.2f}")
print(f"Total Average Correctly Identified Topics: {total_avg_correct:.2f}")


Average Correctly Identified Topics per Sample: 1.80
Total Average Correctly Identified Topics: 0.60
