In [1]:
import sys
sys.path.append('..')

In [6]:
from pathlib import Path
from itertools import islice
import csv

import numpy as np
import torch
from torch import nn
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, roc_auc_score

import mmr.ds_loading
import mmr.vectorization
import cubert_wrapper

In [62]:
data_root = Path('/home/maxkvant/data/mmr/')

test_classes_vecs_path = data_root / 'mmr_vecs_np'
test_methods_vecs_path = data_root / 'mmr_vecs_wm'
test_ds = mmr.ds_loading.MMRDataset(data_root / 'MoveMethodDataset', test_methods_vecs_path, test_classes_vecs_path, 
                                    torch.FloatTensor, torch.FloatTensor, True, True, precalculated=True)

train_classes_vecs_path = data_root / 'mmr_tr_dsv'
train_methods_vecs_path = data_root / 'mmr_tr_dsvwm_v2'
train_ds = mmr.ds_loading.MMRDataset(data_root / 'mmr_tr_ds', train_methods_vecs_path, train_classes_vecs_path, 
                                     torch.FloatTensor, torch.FloatTensor, True, True, precalculated=True)

In [63]:
c_tr = {i[0] for i in train_ds}
c_te = {i[0] for i in test_ds}
tr_exclude_projects = c_tr & c_te
tr_exclude_projects

{'actor-platform',
 'atlas',
 'buck',
 'crate',
 'deeplearning4j',
 'drools',
 'hbase',
 'hive',
 'jenkins',
 'jstorm',
 'pinpoint',
 'pmd'}

In [7]:
proj_train, proj_val = train_test_split(list(c_tr - tr_exclude_projects))

In [66]:
def ds_to_xy(ds, val_projects, exclude_projects = {}):
    x_train, x_val, y_train, y_val = [], [], [], []
    mn_train, mn_val = [], []
    for project, mn, _, mv, cv, tgt in ds:
        if project in val_projects:
            x_val.append((mv, cv))
            y_val.append(tgt)
            mn_val.append(mn)
        elif project not in exclude_projects:
            x_train.append((mv, cv))
            y_train.append(tgt)
            mn_train.append(mn)
    return x_train, x_val, y_train, y_val, mn_train, mn_val


x_train, x_val, y_train, y_val, mn_train, mn_val = ds_to_xy(train_ds, proj_val, tr_exclude_projects)
x_test, _, y_test, _, mn_test, _ = ds_to_xy(test_ds, {})

In [67]:
dl_train = torch.utils.data.DataLoader(list(zip(x_train, y_train)), batch_size=1)
dl_val = torch.utils.data.DataLoader(list(zip(x_val, y_val)), batch_size=1)

In [12]:
class RNNClassifier(nn.Module):
    def __init__(self, hidden_dim, n_channels):
        super().__init__()
        self.hidden_dim = hidden_dim
        self.n_channels = n_channels
        self.method_rnn = nn.LSTM(1024, hidden_dim, n_channels, batch_first=True)
        self.class_rnn = nn.LSTM(1024, hidden_dim, n_channels, batch_first=True)
        self.linear_clf = nn.Sequential(
            nn.Linear(hidden_dim * n_channels * 2, 512),
            nn.PReLU(),
            nn.Linear(512, 512),
            nn.PReLU(),
            nn.Linear(512, 1),
            nn.Sigmoid()
        )
        
    def forward(self, x):
        method_line_embs, class_line_embs = x
        _, (_, method_last_c) = self.method_rnn(method_line_embs)
        _, (_, class_last_c) = self.class_rnn(class_line_embs)
        rnn_cs = torch.cat((method_last_c.reshape((-1, self.hidden_dim * self.n_channels)), 
                            class_last_c.reshape((-1, self.hidden_dim * self.n_channels))), axis=1)
        return self.linear_clf(rnn_cs)

In [68]:
class CNNClassifier(nn.Module):
    def __init__(self, ):
        super().__init__()
        self.method_convs = nn.Sequential(
            nn.Conv1d(1024, 1024, 5, padding=2),
            nn.Tanh(),
            nn.Conv1d(1024, 1024, 5, padding=2),
            nn.Tanh()
        )
        self.class_convs = nn.Sequential(
            nn.Conv1d(1024, 1024, 5, padding=2),
            nn.Tanh(),
            nn.Conv1d(1024, 1024, 5, padding=2),
            nn.Tanh(),
        )
        self.linear_clf = nn.Sequential(
            nn.Linear(2048, 1024),
            nn.PReLU(),
            nn.Linear(1024, 512),
            nn.PReLU(),
            nn.Linear(512, 512),
            nn.PReLU(),
            nn.Linear(512, 1),
            nn.Sigmoid()
        )
    
    def forward(self, x):
        method_line_embs, class_line_embs = x
        method_line_embs, class_line_embs = method_line_embs.permute(0, 2, 1), class_line_embs.permute(0, 2, 1)
        conved = torch.cat((self.method_convs(method_line_embs).mean(axis=2), 
                            self.class_convs(class_line_embs).mean(axis=2)),
                           axis=1)
        return self.linear_clf(conved)

In [14]:
class AttnClassifier(nn.Module):
    def __init__(self, ):
        super().__init__()
        self.method_convs = nn.Sequential(
            nn.Conv1d(1024, 1, 1)
        )
        self.class_convs = nn.Sequential(
            nn.Conv1d(1024, 1, 1)
        )
        self.linear_clf = nn.Sequential(
            nn.Linear(2048, 1024),
            nn.PReLU(),
            nn.Linear(1024, 512),
            nn.PReLU(),
            nn.Linear(512, 512),
            nn.PReLU(),
            nn.Linear(512, 1),
            nn.Sigmoid()
        )
    
    def forward(self, x):
        method_line_embs, class_line_embs = x
        method_line_embs, class_line_embs = method_line_embs.permute(0, 2, 1), class_line_embs.permute(0, 2, 1)
        method_weights = torch.softmax(self.method_convs(method_line_embs), axis=2)
        class_weights = torch.softmax(self.class_convs(class_line_embs), axis=2)
        method_rep = torch.sum(method_weights * method_line_embs, axis=2)
        class_rep = torch.sum(class_weights * class_line_embs, axis=2)
        reps = torch.cat((method_rep, class_rep), axis=1)
        return self.linear_clf(reps)

In [20]:
def train(model, n_epochs, dl_train, dl_val, lr, wd, save_path):
    loss_function = nn.BCELoss()
    opt = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=wd)
    best_score = -1
    
    for epoch in range(n_epochs):
        train_losses, val_losses, val_tgts, val_preds = [], [], [], []
        for x, y in tqdm(dl_train):
            x = tuple(i.cuda() for i in x)
            y = y.cuda()
            opt.zero_grad()
            pred = model(x)
            y = y.unsqueeze(1).float()
            loss = loss_function(pred, y)
            loss.backward()
            opt.step()
            train_losses.append(loss.detach())

        with torch.no_grad():
            for x, y in tqdm(dl_val):
                x = tuple(i.cuda() for i in x)
                pred = model(x).cpu()
                y = y.unsqueeze(1).float()
                loss = loss_function(pred, y)
                val_losses.append(loss)
                val_tgts.append(y[0, 0])
                val_preds.append(pred[0, 0])
        train_loss = torch.tensor(train_losses).mean()
        val_loss = torch.tensor(val_losses).mean()
        val_tgts, val_preds = torch.tensor(val_tgts), torch.tensor(val_preds)
        print((val_preds > .5).float().mean(), val_tgts.float().mean())
        val_f1 = f1_score(val_tgts, val_preds > .5)
        val_roc = roc_auc_score(val_tgts, val_preds)
        if val_f1 > best_score:
            best_score = val_f1
            torch.save(model.state_dict(), save_path)
        print(f'Epoch {epoch} loss: train {train_loss:.4f} val {val_loss:.4f} f1: {val_f1:.4f}, ROC AUC: {val_roc:.4f}')

In [None]:
rnn_model = RNNClassifier(1024, 2).cuda()
train(rnn_model, 20, dl_train, dl_val, 1e-4, 1e-5)

In [74]:
cnn_model = CNNClassifier().cuda()
train(cnn_model, 40, dl_train, dl_val, 1e-5, 1e-6, 'seq_cnn_wm.pt')

100%|██████████| 23679/23679 [03:26<00:00, 114.52it/s]
100%|██████████| 7459/7459 [00:22<00:00, 330.90it/s]


tensor(0.2465) tensor(0.5446)


  0%|          | 10/23679 [00:00<04:02, 97.41it/s]

Epoch 0 loss: train 0.4471 val 0.7125 f1: 0.5785, ROC AUC: 0.8342


100%|██████████| 23679/23679 [03:30<00:00, 112.61it/s]
100%|██████████| 7459/7459 [00:22<00:00, 330.29it/s]


tensor(0.2700) tensor(0.5446)


  0%|          | 11/23679 [00:00<03:52, 101.62it/s]

Epoch 1 loss: train 0.3874 val 0.6821 f1: 0.6185, ROC AUC: 0.8515


100%|██████████| 23679/23679 [04:03<00:00, 97.16it/s] 
100%|██████████| 7459/7459 [00:29<00:00, 255.42it/s]
  0%|          | 11/23679 [00:00<03:38, 108.24it/s]

tensor(0.2612) tensor(0.5446)
Epoch 2 loss: train 0.3666 val 0.7364 f1: 0.6116, ROC AUC: 0.8539


100%|██████████| 23679/23679 [04:47<00:00, 82.33it/s] 
100%|██████████| 7459/7459 [00:22<00:00, 336.45it/s]


tensor(0.2676) tensor(0.5446)


  0%|          | 10/23679 [00:00<03:57, 99.77it/s]

Epoch 3 loss: train 0.3530 val 0.7351 f1: 0.6273, ROC AUC: 0.8560


100%|██████████| 23679/23679 [04:59<00:00, 78.98it/s] 
100%|██████████| 7459/7459 [00:23<00:00, 323.57it/s]


tensor(0.2962) tensor(0.5446)


  0%|          | 10/23679 [00:00<04:14, 92.97it/s]

Epoch 4 loss: train 0.3420 val 0.6868 f1: 0.6605, ROC AUC: 0.8597


100%|██████████| 23679/23679 [06:15<00:00, 63.04it/s] 
100%|██████████| 7459/7459 [00:22<00:00, 331.27it/s]


tensor(0.3454) tensor(0.5446)


  0%|          | 9/23679 [00:00<04:25, 89.10it/s]

Epoch 5 loss: train 0.3321 val 0.5975 f1: 0.7099, ROC AUC: 0.8673


100%|██████████| 23679/23679 [05:11<00:00, 76.03it/s] 
100%|██████████| 7459/7459 [00:23<00:00, 323.08it/s]


tensor(0.3484) tensor(0.5446)


  0%|          | 10/23679 [00:00<04:11, 94.01it/s]

Epoch 6 loss: train 0.3238 val 0.5964 f1: 0.7137, ROC AUC: 0.8689


100%|██████████| 23679/23679 [03:59<00:00, 99.05it/s] 
100%|██████████| 7459/7459 [01:16<00:00, 97.61it/s] 


tensor(0.3645) tensor(0.5446)


  0%|          | 4/23679 [00:00<11:33, 34.14it/s]

Epoch 7 loss: train 0.3155 val 0.5601 f1: 0.7323, ROC AUC: 0.8743


100%|██████████| 23679/23679 [06:35<00:00, 59.83it/s] 
100%|██████████| 7459/7459 [01:16<00:00, 97.33it/s] 


tensor(0.3680) tensor(0.5446)


  0%|          | 4/23679 [00:00<12:30, 31.57it/s]

Epoch 8 loss: train 0.3089 val 0.5525 f1: 0.7381, ROC AUC: 0.8772


100%|██████████| 23679/23679 [07:43<00:00, 51.06it/s] 
100%|██████████| 7459/7459 [00:22<00:00, 336.50it/s]


tensor(0.3740) tensor(0.5446)


  0%|          | 11/23679 [00:00<03:52, 101.91it/s]

Epoch 9 loss: train 0.3028 val 0.5402 f1: 0.7469, ROC AUC: 0.8809


100%|██████████| 23679/23679 [03:23<00:00, 116.23it/s]
100%|██████████| 7459/7459 [00:22<00:00, 336.14it/s]


tensor(0.3755) tensor(0.5446)


  0%|          | 11/23679 [00:00<03:45, 104.86it/s]

Epoch 10 loss: train 0.2976 val 0.5323 f1: 0.7487, ROC AUC: 0.8837


100%|██████████| 23679/23679 [03:24<00:00, 116.05it/s]
100%|██████████| 7459/7459 [00:23<00:00, 322.18it/s]


tensor(0.3787) tensor(0.5446)


  0%|          | 11/23679 [00:00<03:50, 102.70it/s]

Epoch 11 loss: train 0.2927 val 0.5230 f1: 0.7516, ROC AUC: 0.8863


100%|██████████| 23679/23679 [03:23<00:00, 116.32it/s]
100%|██████████| 7459/7459 [00:22<00:00, 333.68it/s]


tensor(0.3852) tensor(0.5446)


  0%|          | 11/23679 [00:00<03:48, 103.75it/s]

Epoch 12 loss: train 0.2878 val 0.5104 f1: 0.7576, ROC AUC: 0.8890


100%|██████████| 23679/23679 [03:23<00:00, 116.39it/s]
100%|██████████| 7459/7459 [00:22<00:00, 333.74it/s]


tensor(0.3907) tensor(0.5446)


  0%|          | 11/23679 [00:00<03:48, 103.45it/s]

Epoch 13 loss: train 0.2822 val 0.5022 f1: 0.7652, ROC AUC: 0.8913


100%|██████████| 23679/23679 [03:27<00:00, 114.06it/s]
100%|██████████| 7459/7459 [00:22<00:00, 329.28it/s]


tensor(0.3962) tensor(0.5446)


  0%|          | 9/23679 [00:00<04:24, 89.60it/s]

Epoch 14 loss: train 0.2766 val 0.4918 f1: 0.7716, ROC AUC: 0.8942


100%|██████████| 23679/23679 [03:28<00:00, 113.70it/s]
100%|██████████| 7459/7459 [00:22<00:00, 337.17it/s]


tensor(0.4021) tensor(0.5446)


  0%|          | 11/23679 [00:00<03:48, 103.43it/s]

Epoch 15 loss: train 0.2710 val 0.4858 f1: 0.7744, ROC AUC: 0.8960


100%|██████████| 23679/23679 [03:26<00:00, 114.94it/s]
100%|██████████| 7459/7459 [00:22<00:00, 327.04it/s]


tensor(0.4058) tensor(0.5446)


  0%|          | 10/23679 [00:00<04:04, 96.70it/s]

Epoch 16 loss: train 0.2652 val 0.4871 f1: 0.7787, ROC AUC: 0.8971


100%|██████████| 23679/23679 [03:28<00:00, 113.74it/s]
100%|██████████| 7459/7459 [00:22<00:00, 336.34it/s]


tensor(0.4078) tensor(0.5446)


  0%|          | 11/23679 [00:00<03:51, 102.17it/s]

Epoch 17 loss: train 0.2596 val 0.4905 f1: 0.7804, ROC AUC: 0.8977


100%|██████████| 23679/23679 [09:13<00:00, 42.76it/s]
100%|██████████| 7459/7459 [01:16<00:00, 97.44it/s] 


tensor(0.4097) tensor(0.5446)


  0%|          | 4/23679 [00:00<11:46, 33.49it/s]

Epoch 18 loss: train 0.2545 val 0.4933 f1: 0.7825, ROC AUC: 0.8979


100%|██████████| 23679/23679 [11:03<00:00, 35.66it/s]
100%|██████████| 7459/7459 [01:16<00:00, 97.59it/s] 


tensor(0.4131) tensor(0.5446)


  0%|          | 4/23679 [00:00<11:43, 33.64it/s]

Epoch 19 loss: train 0.2479 val 0.4947 f1: 0.7859, ROC AUC: 0.8979


100%|██████████| 23679/23679 [08:55<00:00, 44.19it/s] 
100%|██████████| 7459/7459 [00:22<00:00, 333.93it/s]
  0%|          | 11/23679 [00:00<03:44, 105.44it/s]

tensor(0.4112) tensor(0.5446)
Epoch 20 loss: train 0.2416 val 0.4957 f1: 0.7827, ROC AUC: 0.8977


100%|██████████| 23679/23679 [04:07<00:00, 95.86it/s] 
100%|██████████| 7459/7459 [00:22<00:00, 336.56it/s]
  0%|          | 11/23679 [00:00<03:42, 106.49it/s]

tensor(0.4106) tensor(0.5446)
Epoch 21 loss: train 0.2356 val 0.4998 f1: 0.7812, ROC AUC: 0.8977


100%|██████████| 23679/23679 [03:23<00:00, 116.31it/s]
100%|██████████| 7459/7459 [00:22<00:00, 336.40it/s]
  0%|          | 11/23679 [00:00<03:44, 105.42it/s]

tensor(0.4151) tensor(0.5446)
Epoch 22 loss: train 0.2289 val 0.5004 f1: 0.7840, ROC AUC: 0.8979


100%|██████████| 23679/23679 [03:24<00:00, 116.00it/s]
100%|██████████| 7459/7459 [00:22<00:00, 331.67it/s]
  0%|          | 11/23679 [00:00<03:38, 108.46it/s]

tensor(0.4149) tensor(0.5446)
Epoch 23 loss: train 0.2215 val 0.5062 f1: 0.7850, ROC AUC: 0.8982


100%|██████████| 23679/23679 [03:24<00:00, 115.86it/s]
100%|██████████| 7459/7459 [00:22<00:00, 336.33it/s]
  0%|          | 12/23679 [00:00<03:31, 112.02it/s]

tensor(0.4123) tensor(0.5446)
Epoch 24 loss: train 0.2146 val 0.5268 f1: 0.7852, ROC AUC: 0.8980


100%|██████████| 23679/23679 [03:24<00:00, 116.02it/s]
100%|██████████| 7459/7459 [00:22<00:00, 334.78it/s]
  0%|          | 11/23679 [00:00<03:37, 108.93it/s]

tensor(0.4117) tensor(0.5446)
Epoch 25 loss: train 0.2081 val 0.5482 f1: 0.7845, ROC AUC: 0.8980


100%|██████████| 23679/23679 [03:22<00:00, 116.90it/s]
100%|██████████| 7459/7459 [00:22<00:00, 335.21it/s]


tensor(0.4143) tensor(0.5446)


  0%|          | 10/23679 [00:00<03:58, 99.04it/s]

Epoch 26 loss: train 0.2015 val 0.5450 f1: 0.7872, ROC AUC: 0.8981


100%|██████████| 23679/23679 [03:24<00:00, 116.00it/s]
100%|██████████| 7459/7459 [00:22<00:00, 335.85it/s]


tensor(0.4152) tensor(0.5446)


  0%|          | 11/23679 [00:00<03:49, 103.23it/s]

Epoch 27 loss: train 0.1947 val 0.5516 f1: 0.7884, ROC AUC: 0.8975


100%|██████████| 23679/23679 [03:23<00:00, 116.37it/s]
100%|██████████| 7459/7459 [00:22<00:00, 336.15it/s]


tensor(0.4239) tensor(0.5446)


  0%|          | 11/23679 [00:00<03:51, 102.21it/s]

Epoch 28 loss: train 0.1888 val 0.5527 f1: 0.7968, ROC AUC: 0.8964


100%|██████████| 23679/23679 [03:23<00:00, 116.16it/s]
100%|██████████| 7459/7459 [00:22<00:00, 336.20it/s]
  0%|          | 9/23679 [00:00<04:25, 89.25it/s]

tensor(0.4254) tensor(0.5446)
Epoch 29 loss: train 0.1829 val 0.5690 f1: 0.7950, ROC AUC: 0.8938


100%|██████████| 23679/23679 [03:29<00:00, 113.19it/s]
100%|██████████| 7459/7459 [00:22<00:00, 332.96it/s]


tensor(0.4330) tensor(0.5446)


  0%|          | 10/23679 [00:00<03:56, 99.88it/s]

Epoch 30 loss: train 0.1771 val 0.5696 f1: 0.8003, ROC AUC: 0.8936


100%|██████████| 23679/23679 [03:26<00:00, 114.40it/s]
100%|██████████| 7459/7459 [00:22<00:00, 334.00it/s]
  0%|          | 11/23679 [00:00<03:44, 105.33it/s]

tensor(0.4253) tensor(0.5446)
Epoch 31 loss: train 0.1712 val 0.5813 f1: 0.7943, ROC AUC: 0.8930


100%|██████████| 23679/23679 [03:27<00:00, 114.21it/s]
100%|██████████| 7459/7459 [00:22<00:00, 328.88it/s]


tensor(0.4364) tensor(0.5446)


  0%|          | 10/23679 [00:00<03:58, 99.17it/s]

Epoch 32 loss: train 0.1660 val 0.5801 f1: 0.8009, ROC AUC: 0.8934


100%|██████████| 23679/23679 [03:26<00:00, 114.46it/s]
100%|██████████| 7459/7459 [00:22<00:00, 330.39it/s]


tensor(0.4535) tensor(0.5446)


  0%|          | 10/23679 [00:00<04:00, 98.37it/s]

Epoch 33 loss: train 0.1586 val 0.5974 f1: 0.8110, ROC AUC: 0.8942


100%|██████████| 23679/23679 [03:27<00:00, 114.26it/s]
100%|██████████| 7459/7459 [00:22<00:00, 334.55it/s]


tensor(0.4733) tensor(0.5446)


  0%|          | 10/23679 [00:00<03:57, 99.75it/s]

Epoch 34 loss: train 0.1546 val 0.5923 f1: 0.8232, ROC AUC: 0.8939


100%|██████████| 23679/23679 [03:27<00:00, 114.12it/s]
100%|██████████| 7459/7459 [00:22<00:00, 334.61it/s]
  0%|          | 11/23679 [00:00<03:43, 105.81it/s]

tensor(0.4645) tensor(0.5446)
Epoch 35 loss: train 0.1484 val 0.6235 f1: 0.8179, ROC AUC: 0.8940


100%|██████████| 23679/23679 [03:26<00:00, 114.59it/s]
100%|██████████| 7459/7459 [00:22<00:00, 334.51it/s]


tensor(0.4782) tensor(0.5446)


  0%|          | 10/23679 [00:00<04:05, 96.27it/s]

Epoch 36 loss: train 0.1431 val 0.5851 f1: 0.8234, ROC AUC: 0.8932


100%|██████████| 23679/23679 [03:26<00:00, 114.60it/s]
100%|██████████| 7459/7459 [00:22<00:00, 329.64it/s]


tensor(0.4930) tensor(0.5446)


  0%|          | 11/23679 [00:00<03:50, 102.73it/s]

Epoch 37 loss: train 0.1401 val 0.6742 f1: 0.8275, ROC AUC: 0.8934


100%|██████████| 23679/23679 [03:26<00:00, 114.62it/s]
100%|██████████| 7459/7459 [00:22<00:00, 333.93it/s]


tensor(0.4985) tensor(0.5446)


  0%|          | 11/23679 [00:00<03:49, 102.98it/s]

Epoch 38 loss: train 0.1367 val 0.7493 f1: 0.8288, ROC AUC: 0.8916


100%|██████████| 23679/23679 [03:26<00:00, 114.41it/s]
100%|██████████| 7459/7459 [00:22<00:00, 334.82it/s]


tensor(0.5021) tensor(0.5446)
Epoch 39 loss: train 0.1317 val 0.7183 f1: 0.8305, ROC AUC: 0.8915


In [None]:
attn_model = AttnClassifier().cuda()
train(attn_model, 20, dl_train, dl_val, 1e-4, 1e-4)

In [75]:
model = CNNClassifier()
model.load_state_dict(torch.load('seq_cnn_wm.pt'))
model.train(False)
with torch.no_grad():
    pred = np.array([model(i) for i in torch.utils.data.DataLoader(x_test)])
    print(roc_auc_score(y_test, pred), f1_score(y_test, pred > .5))

0.882351462971083 0.8094045095394105


In [35]:
from collections import defaultdict


def per_project_f1(mns, tgts, preds, projs):
    m_true_scores = defaultdict(dict)
    m_false_scores = defaultdict(lambda: defaultdict(list))
    for mn, tgt, pred, proj in zip(mns, tgts, preds, projs):
        if tgt:
            m_true_scores[proj][mn] = pred
        else:
            m_false_scores[proj][mn].append(pred)
    f1s = []
    for proj in m_true_scores.keys():
        tp, n_refs, n_methods = 0, 0, 0
        for mn, mts in m_true_scores[proj].items():
            if not m_false_scores[proj][mn]:
                continue
            if max(mts, *m_false_scores[proj][mn]) > .5:
                n_refs += 1
                if mts > max(m_false_scores[proj][mn]):
                    tp += 1
            n_methods += 1
        precision = tp / n_refs
        recall = tp / n_methods
        f1s.append(2 * precision * recall / (precision + recall))
    return np.mean(f1s)

In [37]:
proj_test = [name for (name, *_) in test_ds]

In [76]:
per_project_f1(mn_test, y_test, pred, proj_test)

0.8165950197672277