In [2]:
import numpy as np
import pandas as pd
from tqdm import tqdm
from sklearn import metrics
import transformers
import torch
from torch.utils.data import Dataset, DataLoader 
from transformers import DistilBertTokenizer, DistilBertModel

In [2]:
train_data = pd.read_csv('dataset/train.csv') 

In [15]:
train_data

Unnamed: 0,id,comment_text,toxic,severe_toxic,obscene,threat,insult,identity_hate
0,0000997932d777bf,Explanation\nWhy the edits made under my usern...,0,0,0,0,0,0
1,000103f0d9cfb60f,D'aww! He matches this background colour I'm s...,0,0,0,0,0,0
2,000113f07ec002fd,"Hey man, I'm really not trying to edit war. It...",0,0,0,0,0,0
3,0001b41b1c6bb37e,"""\nMore\nI can't make any real suggestions on ...",0,0,0,0,0,0
4,0001d958c54c6e35,"You, sir, are my hero. Any chance you remember...",0,0,0,0,0,0
...,...,...,...,...,...,...,...,...
159566,ffe987279560d7ff,""":::::And for the second time of asking, when ...",0,0,0,0,0,0
159567,ffea4adeee384e90,You should be ashamed of yourself \n\nThat is ...,0,0,0,0,0,0
159568,ffee36eab5c267c9,"Spitzer \n\nUmm, theres no actual article for ...",0,0,0,0,0,0
159569,fff125370e4aaaf3,And it looks like it was actually you who put ...,0,0,0,0,0,0


In [3]:
train_data.drop(['id'], axis=1, inplace=True)
train_data

Unnamed: 0,comment_text,toxic,severe_toxic,obscene,threat,insult,identity_hate
0,Explanation\nWhy the edits made under my usern...,0,0,0,0,0,0
1,D'aww! He matches this background colour I'm s...,0,0,0,0,0,0
2,"Hey man, I'm really not trying to edit war. It...",0,0,0,0,0,0
3,"""\nMore\nI can't make any real suggestions on ...",0,0,0,0,0,0
4,"You, sir, are my hero. Any chance you remember...",0,0,0,0,0,0
...,...,...,...,...,...,...,...
159566,""":::::And for the second time of asking, when ...",0,0,0,0,0,0
159567,You should be ashamed of yourself \n\nThat is ...,0,0,0,0,0,0
159568,"Spitzer \n\nUmm, theres no actual article for ...",0,0,0,0,0,0
159569,And it looks like it was actually you who put ...,0,0,0,0,0,0


In [19]:
train_data

Unnamed: 0,comment_text,labels
0,Explanation\nWhy the edits made under my usern...,"[0, 0, 0, 0, 0]"
1,D'aww! He matches this background colour I'm s...,"[0, 0, 0, 0, 0]"
2,"Hey man, I'm really not trying to edit war. It...","[0, 0, 0, 0, 0]"
3,"""\nMore\nI can't make any real suggestions on ...","[0, 0, 0, 0, 0]"
4,"You, sir, are my hero. Any chance you remember...","[0, 0, 0, 0, 0]"
...,...,...
159566,""":::::And for the second time of asking, when ...","[0, 0, 0, 0, 0]"
159567,You should be ashamed of yourself \n\nThat is ...,"[0, 0, 0, 0, 0]"
159568,"Spitzer \n\nUmm, theres no actual article for ...","[0, 0, 0, 0, 0]"
159569,And it looks like it was actually you who put ...,"[0, 0, 0, 0, 0]"


In [4]:
train_data["comment_text"] = train_data["comment_text"].str.lower()
train_data["comment_text"] = train_data["comment_text"].str.replace("\xa0", " ", regex=False).str.split().str.join(" ")

In [5]:
train_data

Unnamed: 0,comment_text,toxic,severe_toxic,obscene,threat,insult,identity_hate
0,explanation why the edits made under my userna...,0,0,0,0,0,0
1,d'aww! he matches this background colour i'm s...,0,0,0,0,0,0
2,"hey man, i'm really not trying to edit war. it...",0,0,0,0,0,0
3,""" more i can't make any real suggestions on im...",0,0,0,0,0,0
4,"you, sir, are my hero. any chance you remember...",0,0,0,0,0,0
...,...,...,...,...,...,...,...
159566,""":::::and for the second time of asking, when ...",0,0,0,0,0,0
159567,you should be ashamed of yourself that is a ho...,0,0,0,0,0,0
159568,"spitzer umm, theres no actual article for pros...",0,0,0,0,0,0
159569,and it looks like it was actually you who put ...,0,0,0,0,0,0


In [6]:
train_data.to_csv('dataset/cleaned_train.csv', index=False)

In [10]:
testing = pd.read_csv('dataset/cleaned_train.csv')
testing

Unnamed: 0,comment_text,toxic,severe_toxic,obscene,threat,insult,identity_hate
0,explanation why the edits made under my userna...,0,0,0,0,0,0
1,d'aww! he matches this background colour i'm s...,0,0,0,0,0,0
2,"hey man, i'm really not trying to edit war. it...",0,0,0,0,0,0
3,""" more i can't make any real suggestions on im...",0,0,0,0,0,0
4,"you, sir, are my hero. any chance you remember...",0,0,0,0,0,0
...,...,...,...,...,...,...,...
159566,""":::::and for the second time of asking, when ...",0,0,0,0,0,0
159567,you should be ashamed of yourself that is a ho...,0,0,0,0,0,0
159568,"spitzer umm, theres no actual article for pros...",0,0,0,0,0,0
159569,and it looks like it was actually you who put ...,0,0,0,0,0,0


In [11]:

testing['labels'] = testing.iloc[:, 1:].values.tolist()
testing.drop(testing.columns.values[1:-1].tolist(), axis=1, inplace=True)

In [12]:
testing

Unnamed: 0,comment_text,labels
0,explanation why the edits made under my userna...,"[0, 0, 0, 0, 0, 0]"
1,d'aww! he matches this background colour i'm s...,"[0, 0, 0, 0, 0, 0]"
2,"hey man, i'm really not trying to edit war. it...","[0, 0, 0, 0, 0, 0]"
3,""" more i can't make any real suggestions on im...","[0, 0, 0, 0, 0, 0]"
4,"you, sir, are my hero. any chance you remember...","[0, 0, 0, 0, 0, 0]"
...,...,...
159566,""":::::and for the second time of asking, when ...","[0, 0, 0, 0, 0, 0]"
159567,you should be ashamed of yourself that is a ho...,"[0, 0, 0, 0, 0, 0]"
159568,"spitzer umm, theres no actual article for pros...","[0, 0, 0, 0, 0, 0]"
159569,and it looks like it was actually you who put ...,"[0, 0, 0, 0, 0, 0]"


In [4]:
outputs = torch.Tensor([-1.324, 0.6, 0.7678, 0.1])
targets = torch.Tensor([0, 1, 0, 0])
sigmoid = torch.sigmoid(outputs)
preds = torch.round(torch.sigmoid(outputs))
output = torch.eq(targets, preds).sum().item()/len(preds)
preds.shape


torch.Size([4])

0.5

In [1]:
preds.shape

NameError: name 'preds' is not defined