-
Notifications
You must be signed in to change notification settings - Fork 1
/
model.py
135 lines (123 loc) · 6.24 KB
/
model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
import gzip
import os
import sys
import torch
import torch.nn as nn
from torch.autograd import Variable
import math
import torch.nn.functional as F
import numpy as np
#from torchtext.utils import download_from_url
from transformers import BertTokenizer, BertModel, BertForMaskedLM, BertForTokenClassification, BertConfig
from transformers.modeling_roberta import RobertaClassificationHead
from torch.nn import CrossEntropyLoss, MSELoss
class FocalLoss(nn.Module):
def __init__(self, alpha=0.25, gamma=5, logits=False, reduce=True):
super(FocalLoss, self).__init__()
self.alpha = alpha
self.gamma = gamma
self.logits = logits
self.reduce = reduce
def forward(self, inputs, targets):
if self.logits:
BCE_loss = F.binary_cross_entropy_with_logits(inputs, targets, reduce=False)
else:
loss_fct = CrossEntropyLoss()
BCE_loss = loss_fct(inputs, targets)
#BCE_loss = F.binary_cross_entropy(inputs, targets, reduce=False)
pt = torch.exp(-BCE_loss)
F_loss = self.alpha * (1-pt)**self.gamma * BCE_loss
if self.reduce:
return torch.mean(F_loss)
else:
return F_loss
class WeightedFocalLoss(nn.Module):
"Non weighted version of Focal Loss"
def __init__(self, alpha=.25, gamma=2):
super(WeightedFocalLoss, self).__init__()
self.alpha = torch.tensor([alpha, 1-alpha]).cuda()
self.gamma = gamma
def forward(self, labels, targets):
BCE_loss = F.binary_cross_entropy_with_logits(labels, targets, reduction='none')
targets = targets.type(torch.long)
at = self.alpha.gather(0, targets.data.view(-1))
pt = torch.exp(-BCE_loss)
F_loss = at*(1-pt)**self.gamma * BCE_loss
return F_loss.mean()
class BERT_Classification(nn.Module):
def __init__(self,
):
super(BERT_Classification, self).__init__()
config = BertConfig.from_pretrained("bert-base-uncased")
#self.model = BertModel.from_pretrained("bert-base-uncased")
self.model = BertModel(config)
self.dropout = nn.Dropout(config.hidden_dropout_prob)
self.classifier = nn.Linear(config.hidden_size, 2)
#self.apply(self.init_weights)
print("BERT loaded!!!!")
# tokens_tensor = []
# segments_tensor = []
def forward(self, input_ids, token_type_ids=None, attention_mask=None, labels=None,
position_ids=None, head_mask=None):
num_choices = input_ids.shape[1]
flat_input_ids = input_ids.view(-1, input_ids.size(-1))
flat_position_ids = position_ids.view(-1, position_ids.size(-1)) if position_ids is not None else None
flat_token_type_ids = token_type_ids.view(-1, token_type_ids.size(-1)) if token_type_ids is not None else None
flat_attention_mask = attention_mask.view(-1, attention_mask.size(-1)) if attention_mask is not None else None
outputs = self.model(input_ids=flat_input_ids, attention_mask=flat_attention_mask) #, attention_mask=attention_mask) #, token_type_ids=flat_token_type_ids, position_ids=flat_position_ids)
sequence_output = outputs[1]
sequence_output = self.dropout(sequence_output)
logits = F.softmax(self.classifier(sequence_output))
#reshaped_logits = logits.view(-1, num_choices)
#outputs = (reshaped_logits,) + outputs[2:] # add hidden states and attention if they are here
if labels is not None:
loss_fct = CrossEntropyLoss()
#loss_fct = FocalLoss()
#print(logits)
loss = loss_fct(logits, labels)
print("loss:", loss)
outputs = (logits,) + outputs[2:]
return ((loss,) + outputs) if loss is not None else outputs # (loss), reshaped_logits, (hidden_states), (attentions)
class BERT_MLP(nn.Module):
def __init__(self,
):
super(BERT_MLP, self).__init__()
config = BertConfig.from_pretrained("bert-large-uncased")
self.weights_add = Variable(torch.Tensor(config.hidden_size), requires_grad=True).cuda()
#self.model = BertModel.from_pretrained("bert-base-uncased")
self.model = BertModel(config)
for param in self.model.parameters():
param.requires_grad = False
self.dropout = nn.Dropout(config.hidden_dropout_prob)
self.oneLayer = nn.Linear(config.hidden_size, 128)
self.classifier = nn.Linear(128, 2)
#self.apply(self.init_weights)
self.weights_add.data.uniform_(-2.0, 2.0)
print("BERT loaded!!!!")
# tokens_tensor = []
# segments_tensor = []
def forward(self, input_ids, token_type_ids=None, attention_mask=None, labels=None,
position_ids=None, head_mask=None):
num_choices = input_ids.shape[1]
flat_input_ids = input_ids.view(-1, input_ids.size(-1))
flat_position_ids = position_ids.view(-1, position_ids.size(-1)) if position_ids is not None else None
flat_token_type_ids = token_type_ids.view(-1, token_type_ids.size(-1)) if token_type_ids is not None else None
flat_attention_mask = attention_mask.view(-1, attention_mask.size(-1)) if attention_mask is not None else None
outputs = self.model(input_ids=flat_input_ids, attention_mask=flat_attention_mask) #, attention_mask=attention_mask) #, token_type_ids=flat_token_type_ids, position_ids=flat_position_ids)
sequence_output = outputs[1]
mlp_output = self.oneLayer(nn.Tanh()(self.weights_add*sequence_output))
sequence_output = self.dropout(mlp_output)
logits = self.classifier(sequence_output)
#reshaped_logits = logits.view(-1, num_choices)
#outputs = (reshaped_logits,) + outputs[2:] # add hidden states and attention if they are here
if labels is not None:
loss_fct = CrossEntropyLoss(size_average=False)
loss = loss_fct(logits, labels)
#outputs = (logits,) + outputs[2:]
print(loss.item())
#return ((loss,))
if loss is not None:
return logits, ((loss,)+outputs)
else:
return logits, outputs
#return logits, ((loss,) + outputs) if loss is not None else logits, outputs # (loss), reshaped_logits, (hidden_states), (attentions)