In [61]:
import numpy as np
import pandas as pd
import torch
import torch.nn.functional as F
import torch.utils.data
from keras.preprocessing import text, sequence
from sklearn.metrics import roc_auc_score
from torch import nn, optim
from torch.autograd import Variable
from torchvision import datasets,transforms
from torch.utils.data import Dataset, DataLoader


from sklearn.model_selection import GridSearchCV

In [62]:
df_train = pd.read_csv('Dataset/train.csv')
df_test = pd.read_csv('Dataset/test.csv')

In [63]:
columnlist=df_train.columns

In [64]:
columnlist=columnlist[2:]

In [65]:
columnlist

Index(['toxic', 'severe_toxic', 'obscene', 'threat', 'insult',
       'identity_hate'],
      dtype='object')

In [66]:
sentence_train=df_train["comment_text"].fillna("CVxTz").values
sentence_test=df_test["comment_text"].fillna("CVxTz").values

In [67]:
tokenize=text.Tokenizer(num_words=200000)

In [68]:
tokenize.fit_on_texts(list(sentence_train))

In [69]:
word_index=tokenize.word_index

In [70]:
sentence_train1=tokenize.texts_to_sequences(sentence_train)

In [71]:
sentence_test1=tokenize.texts_to_sequences(sentence_test)

In [72]:
Input_train=sequence.pad_sequences(sentence_train1,20)

In [73]:
Input_test=sequence.pad_sequences(sentence_test1,20)

In [74]:
def Pretrained_embedding(emb_file):
    embedding_index={}
    with open(emb_file,encoding="utf8") as f:
        for line in f:
            values=line.split()
            word=values[0]
            vector=np.asarray(values[1:],dtype="float32")
            embedding_index[word]=vector
    return embedding_index

In [75]:
embedding_index= Pretrained_embedding("Glove/glove.6B.100d.txt")

In [76]:
def embedding(word_index,embedding_ind):
    embedding_matrix=np.zeros((len(word_index)+1,100))
    for word,i in word_index.items():
        embedding_vector=embedding_ind.get(word)
        if embedding_vector is not None:
            embedding_matrix[i]=embedding_vector
    return embedding_matrix

In [77]:
embedding_matrix=embedding(word_index,embedding_index)

In [78]:
class CustomDataset(Dataset):
    def __init__(self,X,Y=None,Transform=None):
        self.X=X
        self.Y=Y
        self.Transform=Transform
        if self.Transform is not None:
            self.X=self.Transform(self.X)
            if self.Y is not None:
                self.Y=self.Transform(self.Y)
        self.X=torch.from_numpy(self.X).long()
        if self.Y is not None:
            self.Y=Y.values
            self.Y=torch.from_numpy(self.Y).float()
    def __getitem__(self,idx):
        data=self.X[idx]
        if self.Y is not None:
            label=self.Y[idx]
            return data,label
        return data
    def __len__(self):
        return len(self.X)

In [79]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.embedding=nn.Embedding(num_embeddings=100000,embedding_dim=100)
        self.embedding.weight.data=torch.Tensor(embedding_matrix)
        self.lstm=nn.LSTM(100,50,1,batch_first=True,bidirectional=True)
        self.hidden=(Variable(torch.zeros(2,1,50)),Variable(torch.zeros(2,1,50)))
        self.maxpool=nn.MaxPool1d(20)
        self.fc=nn.Linear(100,1)
        self.sigmoid=nn.Sigmoid()
    def forward(self,x):
        x=self.embedding(x)
        x,self.hidden=self.lstm(x)
        x=self.maxpool(x)
        x = x.view(x.size(0), -1)
        x=self.fc(x)
        x=self.sigmoid(x)
        return x
        

In [104]:
y_test_list=[]

In [105]:
def train():
    
    

    for column in columnlist:
        Train_set=CustomDataset(Input_train,df_train[column])
        Test_set=CustomDataset(Input_test)
        Train=torch.utils.data.DataLoader(Train_set,batch_size=1000)
        Test=torch.utils.data.DataLoader(Test_set,batch_size=1000)
        model=Net()
        model.train()
        learnin1g_rate = 1e-4
        optimizer = optim.Adam(model.parameters(), lr=learnin1g_rate)
        for batch_idx, (data, target) in enumerate(Train):
            data, target = Variable(data), Variable(target)
            y_pred = model(data)
            loss = F.binary_cross_entropy(y_pred, target)
            print(batch_idx,loss)
            model.zero_grad()
            loss.backward()
            optimizer.step()
        preds=[]
        for data in Test:
            data = Variable(data, volatile=True)
            output = model(data)
            result=output.data
            result=(result>.1).float()
            preds.append(result.numpy())
        y_one_test=np.concatenate(preds,axis=0)
        y_test_list.append(y_one_test)
 

In [106]:
train()



0 tensor(0.7454, grad_fn=<BinaryCrossEntropyBackward>)
1 tensor(0.7453, grad_fn=<BinaryCrossEntropyBackward>)
2 tensor(0.7420, grad_fn=<BinaryCrossEntropyBackward>)
3 tensor(0.7415, grad_fn=<BinaryCrossEntropyBackward>)
4 tensor(0.7393, grad_fn=<BinaryCrossEntropyBackward>)
5 tensor(0.7365, grad_fn=<BinaryCrossEntropyBackward>)
6 tensor(0.7340, grad_fn=<BinaryCrossEntropyBackward>)
7 tensor(0.7330, grad_fn=<BinaryCrossEntropyBackward>)
8 tensor(0.7300, grad_fn=<BinaryCrossEntropyBackward>)
9 tensor(0.7303, grad_fn=<BinaryCrossEntropyBackward>)
10 tensor(0.7268, grad_fn=<BinaryCrossEntropyBackward>)
11 tensor(0.7266, grad_fn=<BinaryCrossEntropyBackward>)
12 tensor(0.7216, grad_fn=<BinaryCrossEntropyBackward>)
13 tensor(0.7205, grad_fn=<BinaryCrossEntropyBackward>)
14 tensor(0.7166, grad_fn=<BinaryCrossEntropyBackward>)
15 tensor(0.7159, grad_fn=<BinaryCrossEntropyBackward>)
16 tensor(0.7144, grad_fn=<BinaryCrossEntropyBackward>)
17 tensor(0.7104, grad_fn=<BinaryCrossEntropyBackward>)
18

146 tensor(0.4675, grad_fn=<BinaryCrossEntropyBackward>)
147 tensor(0.4488, grad_fn=<BinaryCrossEntropyBackward>)
148 tensor(0.4536, grad_fn=<BinaryCrossEntropyBackward>)
149 tensor(0.4603, grad_fn=<BinaryCrossEntropyBackward>)
150 tensor(0.4518, grad_fn=<BinaryCrossEntropyBackward>)
151 tensor(0.4472, grad_fn=<BinaryCrossEntropyBackward>)
152 tensor(0.4500, grad_fn=<BinaryCrossEntropyBackward>)
153 tensor(0.4409, grad_fn=<BinaryCrossEntropyBackward>)
154 tensor(0.4433, grad_fn=<BinaryCrossEntropyBackward>)
155 tensor(0.4445, grad_fn=<BinaryCrossEntropyBackward>)
156 tensor(0.4390, grad_fn=<BinaryCrossEntropyBackward>)
157 tensor(0.4549, grad_fn=<BinaryCrossEntropyBackward>)
158 tensor(0.4315, grad_fn=<BinaryCrossEntropyBackward>)




159 tensor(0.4253, grad_fn=<BinaryCrossEntropyBackward>)




0 tensor(0.6852, grad_fn=<BinaryCrossEntropyBackward>)
1 tensor(0.6833, grad_fn=<BinaryCrossEntropyBackward>)
2 tensor(0.6793, grad_fn=<BinaryCrossEntropyBackward>)
3 tensor(0.6782, grad_fn=<BinaryCrossEntropyBackward>)
4 tensor(0.6755, grad_fn=<BinaryCrossEntropyBackward>)
5 tensor(0.6737, grad_fn=<BinaryCrossEntropyBackward>)
6 tensor(0.6710, grad_fn=<BinaryCrossEntropyBackward>)
7 tensor(0.6674, grad_fn=<BinaryCrossEntropyBackward>)
8 tensor(0.6649, grad_fn=<BinaryCrossEntropyBackward>)
9 tensor(0.6628, grad_fn=<BinaryCrossEntropyBackward>)
10 tensor(0.6597, grad_fn=<BinaryCrossEntropyBackward>)
11 tensor(0.6563, grad_fn=<BinaryCrossEntropyBackward>)
12 tensor(0.6564, grad_fn=<BinaryCrossEntropyBackward>)
13 tensor(0.6535, grad_fn=<BinaryCrossEntropyBackward>)
14 tensor(0.6504, grad_fn=<BinaryCrossEntropyBackward>)
15 tensor(0.6481, grad_fn=<BinaryCrossEntropyBackward>)
16 tensor(0.6450, grad_fn=<BinaryCrossEntropyBackward>)
17 tensor(0.6438, grad_fn=<BinaryCrossEntropyBackward>)
18

146 tensor(0.2674, grad_fn=<BinaryCrossEntropyBackward>)
147 tensor(0.2656, grad_fn=<BinaryCrossEntropyBackward>)
148 tensor(0.2646, grad_fn=<BinaryCrossEntropyBackward>)
149 tensor(0.2562, grad_fn=<BinaryCrossEntropyBackward>)
150 tensor(0.2526, grad_fn=<BinaryCrossEntropyBackward>)
151 tensor(0.2551, grad_fn=<BinaryCrossEntropyBackward>)
152 tensor(0.2458, grad_fn=<BinaryCrossEntropyBackward>)
153 tensor(0.2469, grad_fn=<BinaryCrossEntropyBackward>)
154 tensor(0.2388, grad_fn=<BinaryCrossEntropyBackward>)
155 tensor(0.2458, grad_fn=<BinaryCrossEntropyBackward>)
156 tensor(0.2405, grad_fn=<BinaryCrossEntropyBackward>)
157 tensor(0.2476, grad_fn=<BinaryCrossEntropyBackward>)
158 tensor(0.2393, grad_fn=<BinaryCrossEntropyBackward>)
159 tensor(0.2400, grad_fn=<BinaryCrossEntropyBackward>)
0 tensor(0.8412, grad_fn=<BinaryCrossEntropyBackward>)
1 tensor(0.8382, grad_fn=<BinaryCrossEntropyBackward>)
2 tensor(0.8325, grad_fn=<BinaryCrossEntropyBackward>)
3 tensor(0.8350, grad_fn=<BinaryCross

132 tensor(0.5155, grad_fn=<BinaryCrossEntropyBackward>)
133 tensor(0.5164, grad_fn=<BinaryCrossEntropyBackward>)
134 tensor(0.5039, grad_fn=<BinaryCrossEntropyBackward>)
135 tensor(0.5048, grad_fn=<BinaryCrossEntropyBackward>)
136 tensor(0.5037, grad_fn=<BinaryCrossEntropyBackward>)
137 tensor(0.4928, grad_fn=<BinaryCrossEntropyBackward>)
138 tensor(0.4950, grad_fn=<BinaryCrossEntropyBackward>)
139 tensor(0.4942, grad_fn=<BinaryCrossEntropyBackward>)
140 tensor(0.4891, grad_fn=<BinaryCrossEntropyBackward>)
141 tensor(0.4942, grad_fn=<BinaryCrossEntropyBackward>)
142 tensor(0.4858, grad_fn=<BinaryCrossEntropyBackward>)
143 tensor(0.4855, grad_fn=<BinaryCrossEntropyBackward>)
144 tensor(0.4844, grad_fn=<BinaryCrossEntropyBackward>)
145 tensor(0.4742, grad_fn=<BinaryCrossEntropyBackward>)
146 tensor(0.4909, grad_fn=<BinaryCrossEntropyBackward>)
147 tensor(0.4778, grad_fn=<BinaryCrossEntropyBackward>)
148 tensor(0.4797, grad_fn=<BinaryCrossEntropyBackward>)
149 tensor(0.4737, grad_fn=<Bin

118 tensor(0.4674, grad_fn=<BinaryCrossEntropyBackward>)
119 tensor(0.4689, grad_fn=<BinaryCrossEntropyBackward>)
120 tensor(0.4620, grad_fn=<BinaryCrossEntropyBackward>)
121 tensor(0.4636, grad_fn=<BinaryCrossEntropyBackward>)
122 tensor(0.4587, grad_fn=<BinaryCrossEntropyBackward>)
123 tensor(0.4564, grad_fn=<BinaryCrossEntropyBackward>)
124 tensor(0.4560, grad_fn=<BinaryCrossEntropyBackward>)
125 tensor(0.4523, grad_fn=<BinaryCrossEntropyBackward>)
126 tensor(0.4487, grad_fn=<BinaryCrossEntropyBackward>)
127 tensor(0.4452, grad_fn=<BinaryCrossEntropyBackward>)
128 tensor(0.4430, grad_fn=<BinaryCrossEntropyBackward>)
129 tensor(0.4425, grad_fn=<BinaryCrossEntropyBackward>)
130 tensor(0.4396, grad_fn=<BinaryCrossEntropyBackward>)
131 tensor(0.4356, grad_fn=<BinaryCrossEntropyBackward>)
132 tensor(0.4313, grad_fn=<BinaryCrossEntropyBackward>)
133 tensor(0.4327, grad_fn=<BinaryCrossEntropyBackward>)
134 tensor(0.4265, grad_fn=<BinaryCrossEntropyBackward>)
135 tensor(0.4247, grad_fn=<Bin

104 tensor(0.4235, grad_fn=<BinaryCrossEntropyBackward>)
105 tensor(0.4208, grad_fn=<BinaryCrossEntropyBackward>)
106 tensor(0.4219, grad_fn=<BinaryCrossEntropyBackward>)
107 tensor(0.4163, grad_fn=<BinaryCrossEntropyBackward>)
108 tensor(0.4207, grad_fn=<BinaryCrossEntropyBackward>)
109 tensor(0.4155, grad_fn=<BinaryCrossEntropyBackward>)
110 tensor(0.4114, grad_fn=<BinaryCrossEntropyBackward>)
111 tensor(0.4113, grad_fn=<BinaryCrossEntropyBackward>)
112 tensor(0.4088, grad_fn=<BinaryCrossEntropyBackward>)
113 tensor(0.4130, grad_fn=<BinaryCrossEntropyBackward>)
114 tensor(0.4068, grad_fn=<BinaryCrossEntropyBackward>)
115 tensor(0.3906, grad_fn=<BinaryCrossEntropyBackward>)
116 tensor(0.4065, grad_fn=<BinaryCrossEntropyBackward>)
117 tensor(0.3968, grad_fn=<BinaryCrossEntropyBackward>)
118 tensor(0.3925, grad_fn=<BinaryCrossEntropyBackward>)
119 tensor(0.3894, grad_fn=<BinaryCrossEntropyBackward>)
120 tensor(0.3893, grad_fn=<BinaryCrossEntropyBackward>)
121 tensor(0.3916, grad_fn=<Bin

90 tensor(0.4202, grad_fn=<BinaryCrossEntropyBackward>)
91 tensor(0.4208, grad_fn=<BinaryCrossEntropyBackward>)
92 tensor(0.4145, grad_fn=<BinaryCrossEntropyBackward>)
93 tensor(0.4130, grad_fn=<BinaryCrossEntropyBackward>)
94 tensor(0.4104, grad_fn=<BinaryCrossEntropyBackward>)
95 tensor(0.4102, grad_fn=<BinaryCrossEntropyBackward>)
96 tensor(0.4049, grad_fn=<BinaryCrossEntropyBackward>)
97 tensor(0.3998, grad_fn=<BinaryCrossEntropyBackward>)
98 tensor(0.4014, grad_fn=<BinaryCrossEntropyBackward>)
99 tensor(0.3997, grad_fn=<BinaryCrossEntropyBackward>)
100 tensor(0.3902, grad_fn=<BinaryCrossEntropyBackward>)
101 tensor(0.3909, grad_fn=<BinaryCrossEntropyBackward>)
102 tensor(0.3818, grad_fn=<BinaryCrossEntropyBackward>)
103 tensor(0.3834, grad_fn=<BinaryCrossEntropyBackward>)
104 tensor(0.3741, grad_fn=<BinaryCrossEntropyBackward>)
105 tensor(0.3702, grad_fn=<BinaryCrossEntropyBackward>)
106 tensor(0.3699, grad_fn=<BinaryCrossEntropyBackward>)
107 tensor(0.3653, grad_fn=<BinaryCrossEn

In [None]:
y_test=np.concatenate(y_test_list,axis=1)

In [108]:
sample_submission = pd.read_csv("Dataset/sample_submission.csv")
sample_submission[columnlist] = y_test
sample_submission.to_csv("submission.csv", index=False)

In [107]:
len(y_test)

153164