In [0]:
!pip3 install pydrive
!pip3 install keras
!wget https://www.dropbox.com/s/scj10j1z2cz9aah/data.zip
!unzip data.zip
!ls

In [6]:
import numpy as np
import csv
from nltk.tokenize import TweetTokenizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn import preprocessing
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.metrics import f1_score

def tcv2array(path):
    """Read tab separated values, # is for comments and dont be load it"""
    a = []
    with open(path) as tsvfile:
        reader = csv.reader(tsvfile, delimiter='\t')
        for row in reader:
            if row:
                if row[0][0]:
                    a.append(row)
    return a


aux_x = tcv2array('AggressiveDetection_train.txt')
y = np.loadtxt('AggressiveDetection_train_solution.txt')

aux_x[0]

['‼ En el sitio también se atendió a la madre del joven en moto, quien se desvaneció al ver lo ocurrido']

In [5]:
"""Solo correr esto para obtener resultados reales"""
# Tweet Tokenize
tknzr = TweetTokenizer()

x = []
for i in aux_x:
     x.append(' '.join(tknzr.tokenize(i[0])))

### Transform data for model
le = preprocessing.LabelEncoder()
y = le.fit_transform(y)

cv = TfidfVectorizer()
x = cv.fit_transform(x)

from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=1992)

print(x_train.shape)
print(y_train.shape)

print(x_test.shape)
print(y_test.shape)

"""SVM"""
from sklearn.svm import LinearSVC
from sklearn.metrics import confusion_matrix, classification_report

svm = LinearSVC()
svm.fit(x_train, y_train)  

y_pred = svm.predict(x_test)
print("______________Validation Confusion Matrix______________")
print(confusion_matrix(y_test, y_pred))
print("")
print("___________________Validation Report___________________")
print(classification_report(y_test, y_pred))

print(f1_score(y_test, y_pred, average='micro'))
print(f1_score(y_test, y_pred, average='macro'))
print(f1_score(y_test, y_pred, average='weighted'))



"""Neural Network"""
from keras import backend as K
from keras.models import Sequential 
from keras.layers import Dense, Activation, Dropout
from keras.layers.normalization import BatchNormalization as BN
from keras.layers import GaussianNoise as GN
from keras.optimizers import Adam, RMSprop
from keras.callbacks import LearningRateScheduler
from keras.callbacks import ModelCheckpoint

x_train = x_train.toarray()
x_test = x_test.toarray()

x_train = x_train.reshape(x_train.shape[0], x_train.shape[1])
x_test = x_test.reshape(x_test.shape[0], x_train.shape[1])
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')

num_classes = 1
batch_size = 32
epochs = 100
learnRate = 0.001

# Learning rate annealing
def step_decay(epoch):
    if epoch/epochs<0.3:
        lrate = learnRate
    elif epoch/epochs<=0.5:
        lrate = learnRate/2
    elif epoch/epochs<=0.70:
        lrate = learnRate/10
    else:
        lrate = learnRate/100
    return lrate

#Loss function for macro_fm
def macro_fm(y_true, y_pred, beta=1.0):
    beta2 = beta**2.0
    top = K.sum(y_true * y_pred, axis=0)
    bot = beta2 * K.sum(y_true, axis=0) + K.sum(y_pred, axis=0)
    return -(1.0 + beta2) * K.mean(top/bot)
  
#Define model architecture
model = Sequential()
model.add( Dense( 2048, activation='relu', input_shape=(x_train.shape[1],) ) )
model.add(BN())
model.add(GN(0.3))
model.add(Activation('relu'))

model.add(Dense(1024))
model.add(BN())
model.add(GN(0.3))
model.add(Activation('relu'))

model.add(Dense(512))
model.add(BN())
model.add(GN(0.3))
model.add(Activation('relu'))

model.add(Dense(256))
model.add(BN())
model.add(GN(0.3))
model.add(Activation('relu'))

model.add(Dense(num_classes, activation='sigmoid'))
model.summary()

checkpoint_path = "Wehigts.hdf5"
checkpointer = ModelCheckpoint(filepath=checkpoint_path,
                               monitor='val_loss', verbose=1,
                               save_best_only=True, mode='min')


loss = macro_fm#'binary_crossentropy'

adam = Adam(lr=learnRate, beta_1=0.9, beta_2=0.999,
            epsilon=None, decay=1e-6, amsgrad=False)

rms = RMSprop(lr=learnRate, rho=0.9, epsilon=None, decay=0.0)

lrate = LearningRateScheduler(step_decay)

model.compile(loss=loss, 
            optimizer=adam, 
            metrics=['accuracy']) 

history = model.fit(x_train, y_train, 
            batch_size=batch_size,
            epochs=epochs,
            verbose=1,
            validation_data=(x_test, y_test),
            callbacks=[checkpointer])

#Load best model
model.load_weights(checkpoint_path)

score = model.evaluate(x_test, y_test, verbose=0)

print('Test loss:', score[0])
print('Test accuracy:', score[1])


y_pred = model.predict(x_test, batch_size=1)
y_pred = np.where(y_pred > 0.5, 1, 0)
print("______________Validation Confusion Matrix______________")
print(confusion_matrix(y_test, y_pred))
print("")
print("___________________Validation Report___________________")
print(classification_report(y_test, y_pred))

print(f1_score(y_test, y_pred, average='micro'))
print(f1_score(y_test, y_pred, average='macro'))
print(f1_score(y_test, y_pred, average='weighted'))


(6160, 15970)
(6160,)
(1540, 15970)
(1540,)
______________Validation Confusion Matrix______________
[[883 102]
 [202 353]]

___________________Validation Report___________________
             precision    recall  f1-score   support

          0       0.81      0.90      0.85       985
          1       0.78      0.64      0.70       555

avg / total       0.80      0.80      0.80      1540

0.8025974025974028
0.7760749988042284
0.797593175466615
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 2048)              32708608  
_________________________________________________________________
batch_normalization_1 (Batch (None, 2048)              8192      
_________________________________________________________________
gaussian_noise_1 (GaussianNo (None, 2048)              0         
_________________________________________________________________
activation_1 (Activat


Epoch 00001: val_loss improved from inf to -0.66566, saving model to Wehigts.hdf5
Epoch 2/100

Epoch 00002: val_loss did not improve from -0.66566
Epoch 3/100


Epoch 00003: val_loss improved from -0.66566 to -0.68042, saving model to Wehigts.hdf5
Epoch 4/100

Epoch 00004: val_loss did not improve from -0.68042
Epoch 5/100


Epoch 00005: val_loss did not improve from -0.68042
Epoch 6/100

Epoch 00006: val_loss did not improve from -0.68042
Epoch 7/100


Epoch 00007: val_loss did not improve from -0.68042
Epoch 8/100

Epoch 00008: val_loss did not improve from -0.68042
Epoch 9/100


Epoch 00009: val_loss did not improve from -0.68042
Epoch 10/100

Epoch 00010: val_loss improved from -0.68042 to -0.68093, saving model to Wehigts.hdf5
Epoch 11/100


Epoch 00011: val_loss did not improve from -0.68093
Epoch 12/100

Epoch 00012: val_loss did not improve from -0.68093
Epoch 13/100


Epoch 00013: val_loss did not improve from -0.68093
Epoch 14/100

Epoch 00014: val_loss did not improve from -0.68093
Epoch 15/100


Epoch 00015: val_loss did not improve from -0.68093
Epoch 16/100

Epoch 00016: val_loss did not improve from -0.68093
Epoch 17/100


Epoch 00017: val_loss did not improve from -0.68093
Epoch 18/100

Epoch 00018: val_loss did not improve from -0.68093
Epoch 19/100


Epoch 00019: val_loss did not improve from -0.68093
Epoch 20/100

Epoch 00020: val_loss did not improve from -0.68093
Epoch 21/100


Epoch 00021: val_loss did not improve from -0.68093
Epoch 22/100

Epoch 00022: val_loss improved from -0.68093 to -0.68184, saving model to Wehigts.hdf5
Epoch 23/100


Epoch 00023: val_loss did not improve from -0.68184
Epoch 24/100

Epoch 00024: val_loss did not improve from -0.68184
Epoch 25/100


Epoch 00025: val_loss did not improve from -0.68184
Epoch 26/100

Epoch 00026: val_loss did not improve from -0.68184
Epoch 27/100


Epoch 00027: val_loss did not improve from -0.68184
Epoch 28/100

Epoch 00028: val_loss did not improve from -0.68184
Epoch 29/100


Epoch 00029: val_loss did not improve from -0.68184
Epoch 30/100

Epoch 00030: val_loss did not improve from -0.68184
Epoch 31/100


Epoch 00031: val_loss did not improve from -0.68184
Epoch 32/100

Epoch 00032: val_loss did not improve from -0.68184
Epoch 33/100


Epoch 00033: val_loss did not improve from -0.68184
Epoch 34/100

Epoch 00034: val_loss did not improve from -0.68184
Epoch 35/100


Epoch 00035: val_loss did not improve from -0.68184
Epoch 36/100

Epoch 00036: val_loss did not improve from -0.68184
Epoch 37/100


Epoch 00037: val_loss did not improve from -0.68184
Epoch 38/100

Epoch 00038: val_loss did not improve from -0.68184
Epoch 39/100


Epoch 00039: val_loss did not improve from -0.68184
Epoch 40/100

Epoch 00040: val_loss did not improve from -0.68184
Epoch 41/100


Epoch 00041: val_loss did not improve from -0.68184
Epoch 42/100

Epoch 00042: val_loss did not improve from -0.68184
Epoch 43/100


Epoch 00043: val_loss did not improve from -0.68184
Epoch 44/100

Epoch 00044: val_loss did not improve from -0.68184
Epoch 45/100


Epoch 00045: val_loss did not improve from -0.68184
Epoch 46/100

Epoch 00046: val_loss improved from -0.68184 to -0.68708, saving model to Wehigts.hdf5
Epoch 47/100


Epoch 00047: val_loss did not improve from -0.68708
Epoch 48/100

Epoch 00048: val_loss did not improve from -0.68708
Epoch 49/100


Epoch 00049: val_loss did not improve from -0.68708
Epoch 50/100

Epoch 00050: val_loss did not improve from -0.68708
Epoch 51/100


Epoch 00051: val_loss did not improve from -0.68708
Epoch 52/100

Epoch 00052: val_loss did not improve from -0.68708
Epoch 53/100


Epoch 00053: val_loss did not improve from -0.68708
Epoch 54/100

Epoch 00054: val_loss did not improve from -0.68708
Epoch 55/100


Epoch 00055: val_loss did not improve from -0.68708
Epoch 56/100

Epoch 00056: val_loss did not improve from -0.68708
Epoch 57/100


Epoch 00057: val_loss did not improve from -0.68708
Epoch 58/100

Epoch 00058: val_loss did not improve from -0.68708
Epoch 59/100


Epoch 00059: val_loss did not improve from -0.68708
Epoch 60/100

Epoch 00060: val_loss did not improve from -0.68708
Epoch 61/100


Epoch 00061: val_loss did not improve from -0.68708
Epoch 62/100

Epoch 00062: val_loss did not improve from -0.68708
Epoch 63/100


Epoch 00063: val_loss did not improve from -0.68708
Epoch 64/100

Epoch 00064: val_loss did not improve from -0.68708
Epoch 65/100


Epoch 00065: val_loss did not improve from -0.68708
Epoch 66/100

Epoch 00066: val_loss did not improve from -0.68708
Epoch 67/100


Epoch 00067: val_loss did not improve from -0.68708
Epoch 68/100

Epoch 00068: val_loss did not improve from -0.68708
Epoch 69/100


Epoch 00069: val_loss did not improve from -0.68708
Epoch 70/100

Epoch 00070: val_loss did not improve from -0.68708
Epoch 71/100


Epoch 00071: val_loss did not improve from -0.68708
Epoch 72/100

Epoch 00072: val_loss did not improve from -0.68708
Epoch 73/100


Epoch 00073: val_loss did not improve from -0.68708
Epoch 74/100

Epoch 00074: val_loss did not improve from -0.68708
Epoch 75/100


Epoch 00075: val_loss did not improve from -0.68708
Epoch 76/100

Epoch 00076: val_loss did not improve from -0.68708
Epoch 77/100


Epoch 00077: val_loss did not improve from -0.68708
Epoch 78/100

Epoch 00078: val_loss did not improve from -0.68708
Epoch 79/100


Epoch 00079: val_loss did not improve from -0.68708
Epoch 80/100

Epoch 00080: val_loss did not improve from -0.68708
Epoch 81/100


Epoch 00081: val_loss did not improve from -0.68708
Epoch 82/100

Epoch 00082: val_loss did not improve from -0.68708
Epoch 83/100


Epoch 00083: val_loss did not improve from -0.68708
Epoch 84/100

Epoch 00084: val_loss did not improve from -0.68708
Epoch 85/100


Epoch 00085: val_loss did not improve from -0.68708
Epoch 86/100

Epoch 00086: val_loss did not improve from -0.68708
Epoch 87/100


Epoch 00087: val_loss did not improve from -0.68708
Epoch 88/100

Epoch 00088: val_loss did not improve from -0.68708
Epoch 89/100


Epoch 00089: val_loss did not improve from -0.68708
Epoch 90/100

Epoch 00090: val_loss did not improve from -0.68708
Epoch 91/100


Epoch 00091: val_loss did not improve from -0.68708
Epoch 92/100

Epoch 00092: val_loss did not improve from -0.68708
Epoch 93/100


Epoch 00093: val_loss did not improve from -0.68708
Epoch 94/100

Epoch 00094: val_loss did not improve from -0.68708
Epoch 95/100


Epoch 00095: val_loss improved from -0.68708 to -0.69247, saving model to Wehigts.hdf5
Epoch 96/100

Epoch 00096: val_loss did not improve from -0.69247
Epoch 97/100


Epoch 00097: val_loss did not improve from -0.69247
Epoch 98/100

Epoch 00098: val_loss did not improve from -0.69247
Epoch 99/100


Epoch 00099: val_loss did not improve from -0.69247
Epoch 100/100

Epoch 00100: val_loss did not improve from -0.69247
Test loss: -0.6924745138589438
Test accuracy: 0.7928571428571428
______________Validation Confusion Matrix______________
[[845 140]
 [179 376]]

___________________Validation Report___________________
             precision    recall  f1-score   support

          0       0.83      0.86      0.84       985
          1       0.73      0.68      0.70       555

avg / total       0.79      0.79      0.79      1540

0.7928571428571428
0.7716810301356314
0.7910962294325394


In [7]:
#Antes de tokenizado

#Conteo de mayusculas

conteo_mayusculas = []
for i in aux_x:
  
  aux = i[0].replace("@USUARIO","").replace(" ","")
  total_character = len([w for w in aux if w.isalpha()])
  
  if total_character: 
    conteo_mayusculas.append( len([w for w in aux if w.isupper()])/total_character )
  else:
    conteo_mayusculas.append(0)
conteo_mayusculas = np.array(conteo_mayusculas)
#simbolos ¡!¿?
#insultos
"""
::insultos::

Hijo de puta, hijo puta, hjo puta, hijo de la gran puta, puta madre, chinga tu madre, hijo de perra, hijo de la chingada, hijo de tu pinche madre, hijo de la rechingada, hijo de tu puta madre, hijo de tu reputisima madre
-> hdp

pura mierda -> pura_mierda
(opcional: chingo, puta) su/tu madre ->  su_madre

mi madre -> mi_madre

después de tokenizar

::Insuto en Hashtag::

si en #_ aparece JOTO, PERRO, PUTO, PUTA, CHINGA, VERGA, PUTIZA, CULERO, CABRON, PENDEJO, FELON, EMPUTADO, MARICON, GRINGO
"""

for aux in aux_x:
  tempStr = aux[0].lower().replace("‼","! ! ").replace("!","! ").replace("¡","¡ ").replace("?","? ").replace("¿","¿ ")
  #insultos - hijo de puta
  tempStr = tempStr.replace("hijo de puta","hdp").replace("hijo puta","hdp").replace("hijo de la gran puta","hdp")
  tempStr = tempStr.replace("chinga tu madre","hdp").replace("hijo de perra","hdp").replace("hijo de la chingada","hdp")
  tempStr = tempStr.replace("hijo de tu pinche madre","hdp").replace("hijo de la rechingada","hdp").replace("hijo de tu puta madre","hdp")
  tempStr = tempStr.replace("hijo de tu reputa madre","hdp").replace("hijo de tu reputisima madre","hdp")
  tempStr = tempStr.replace("hijo de su pinche madre","hdp").replace("hijo de su puta madre","hdp").replace("hijo de su pinche madre","hdp")
  tempStr = tempStr.replace("hijo de su reputa madre","hdp").replace("hijo de su reputisima madre","hdp")
  
  tempStr = tempStr.replace("hija de puta","hdp").replace("hija puta","hdp").replace("hija de la gran puta","hdp")
  tempStr = tempStr.replace("hija de perra","hdp").replace("hija de la chingada","hdp")
  tempStr = tempStr.replace("hija de tu pinche madre","hdp").replace("hija de la rechingada","hdp").replace("hija de tu puta madre","hdp")
  tempStr = tempStr.replace("hija de tu reputa madre","hdp").replace("hija de tu reputisima madre","hdp")
  tempStr = tempStr.replace("hija de su pinche madre","hdp").replace("hija de su puta madre","hdp").replace("hija de su pinche madre","hdp")
  tempStr = tempStr.replace("hija de su reputa madre","hdp").replace("hija de su reputisima madre","hdp")
  
  tempStr = tempStr.replace("tu madre","tu_madre").replace("su madre","su_madre")
  tempStr = tempStr.replace("pura mierda","pura_mierda")
  
  aux[0] = tempStr
aux_x[0]


['!  !   en el sitio también se atendió a la madre del joven en moto, quien se desvaneció al ver lo ocurrido']

In [8]:
#Después de tokenizado
from nltk.tokenize import TweetTokenizer
tknzr = TweetTokenizer()

x_tok = []
for i in aux_x:
     x_tok.append(tknzr.tokenize(i[0]))

#Control de risas

#aggresive hash check
aggresive_hash = []

for s in x_tok:
    print(s)
    aggresive_hash.append(0)
    i = 0
    while i < len(s): # s = token_x
        #Hash remove JOTO, PERRO, PUTO, PUTA, CHINGA, VERGA, PUTIZA, CULERO, CABRON, PENDEJO, FELON, EMPUTADO, MARICON, GRINGO counts as aggresive
        aggresive_words = ["joto","perro","puto","puta","chinga","verga","putiza","culero","cabron","pendejo","felon","emputado","maricon","gringo"]
        if '#' in s[i]:
            if [agr for agr in aggresive_words if agr in s[i]]:
                aggresive_hash[-1] += 1
            s.pop(i)
        #i-=1
        else:
            #risas
            j_s = s[i].count('j')
            h_s = s[i].count('h')
            a_s = s[i].count('a')+s[i].count('s')
            e_s = s[i].count('e')
            i_s = s[i].count('i')
            o_s = s[i].count('o')
            u_s = s[i].count('u')
            #no considera risa algo como aaaahh, ha, haaa, ah etc, minimo un intercalado de grado 2
            laugh = False
            if h_s and not j_s:
                laugh = True
                auxCount = 0
                for j in range(1,len(s[i])):
                    if not s[i][j] == s[i][j-1]:
                        auxCount += 1
                if auxCount == 1:
                    laugh = False
            if j_s or laugh: #solo si hay js o hs, si no sería aaaa, eeee, iii y eso no es risa
                j_s+=h_s
                if len(s[i]) == j_s+a_s:
                    s.pop(i)
                    for j in range(max(j_s,a_s)):
                        s.insert(i,'ja')
                        i+=1
                elif len(s[i]) == j_s+e_s:
                    s.pop(i)
                    for j in range(max(j_s,e_s)):
                        s.insert(i,'je')
                        i+=1
                elif len(s[i]) == j_s+i_s:
                    s.pop(i)
                    for j in range(max(j_s,i_s)):
                        s.insert(i,'ji')
                        i+=1
                elif len(s[i]) == j_s+o_s:
                    s.pop(i)
                    for j in range(max(j_s,o_s)):
                        s.insert(i,'jo')
                        i+=1
                elif len(s[i]) == j_s+u_s:
                    s.pop(i)
                    for j in range(max(j_s,u_s)):
                        s.insert(i,'ju')
                        i+=1
                else: #se encuentra la estructura pero no se considera risa
                    i+=1
            else: #si no encuentra nada, avanza a la siguiente palabra
                i+=1
    print(s)
        
        
print("Check!")

['!', '!', 'en', 'el', 'sitio', 'también', 'se', 'atendió', 'a', 'la', 'madre', 'del', 'joven', 'en', 'moto', ',', 'quien', 'se', 'desvaneció', 'al', 'ver', 'lo', 'ocurrido']
['!', '!', 'en', 'el', 'sitio', 'también', 'se', 'atendió', 'a', 'la', 'madre', 'del', 'joven', 'en', 'moto', ',', 'quien', 'se', 'desvaneció', 'al', 'ver', 'lo', 'ocurrido']
['!', 'es', 'verdad', '!', '!', 'luis', 'le', 'manda', 'un', 'saludo', 'a', 'una', 'chica', 'luchona', 'y', 'luis', 'la', 'admira', '!', '!', 'y', 'le', 'doy', 'toda', 'la', 'razón', '!', '!', 'también', 'me', 'sorprendio', '!']
['!', 'es', 'verdad', '!', '!', 'luis', 'le', 'manda', 'un', 'saludo', 'a', 'una', 'chica', 'luchona', 'y', 'luis', 'la', 'admira', '!', '!', 'y', 'le', 'doy', 'toda', 'la', 'razón', '!', '!', 'también', 'me', 'sorprendio', '!']
['#méxico', 'es', 'un', '#país', 'de', 'gente', 'luchona', ',', 'sabemos', 'caminar', 'con', 'frente', 'en', 'alto', 'y', 'hoy', 'vamos', 'por', 'la', 'ruta', 'correcta', '#cao', '@usuario']
[


['frida', 'guerra', 'entrevista', 'a', 'sacrisanta', 'mosso', ',', 'madre', 'de', 'karen', 'y', 'erick', ',', 'quienes', 'fueron', 'asesinados', 'en', 'ecatepec', '.']
['gasta', 'más', 'en', 'publicidad', 'el', 'gobierno', ',', 'seis', 'veces', 'más', 'que', 'lo', 'que', 'le', 'dan', 'a', 'partidos', '!', ',', 'uta', 'madre', 'ya', 'me', 'hicieron', 'enojar', '!']
['gasta', 'más', 'en', 'publicidad', 'el', 'gobierno', ',', 'seis', 'veces', 'más', 'que', 'lo', 'que', 'le', 'dan', 'a', 'partidos', '!', ',', 'uta', 'madre', 'ya', 'me', 'hicieron', 'enojar', '!']
['gente', ',', 'cuando', 'estén', 'en', 'la', 'calle', ',', 'en', 'los', 'andenes', 'del', 'metro', ',', 'o', 'subiendose', 'a', 'un', 'camión', 'tengan', 'tantita', 'madre', 'y', 'no', 'se', 'detengan', 'a', 'mandar', 'msjs', '!', '!', '😠', '😠']
['gente', ',', 'cuando', 'estén', 'en', 'la', 'calle', ',', 'en', 'los', 'andenes', 'del', 'metro', ',', 'o', 'subiendose', 'a', 'un', 'camión', 'tengan', 'tantita', 'madre', 'y', 'no', 


['@usuario', 'luchona', 'diria', 'yo', 'jajajajaja']
['@usuario', 'luchona', 'diria', 'yo', 'ja', 'ja', 'ja', 'ja', 'ja']
['@usuario', 'luchona', 'y', 'con', 'la', 'discografia', 'de', 'la', 'diva', 'jenny', 'rivera']
['@usuario', 'luchona', 'y', 'con', 'la', 'discografia', 'de', 'la', 'diva', 'jenny', 'rivera']
['@usuario', 'machorra', 'te', 'mereces', 'mas', 'por', 'cerda', '🐷', '🐖', '🐽']
['@usuario', 'machorra', 'te', 'mereces', 'mas', 'por', 'cerda', '🐷', '🐖', '🐽']
['@usuario', 'madre', 'mía', '!', 'por', 'qué', 'solo', 'antojas', '?', 'yo', 'quiero', 'macho', 'y', 'tú', 'dando', 'esos', 'espectáculos']
['@usuario', 'madre', 'mía', '!', 'por', 'qué', 'solo', 'antojas', '?', 'yo', 'quiero', 'macho', 'y', 'tú', 'dando', 'esos', 'espectáculos']
['@usuario', 'madre', 'mía', ',', 'los', 'tatuajes', '!', 'que', 'lindo', 'diseño']
['@usuario', 'madre', 'mía', ',', 'los', 'tatuajes', '!', 'que', 'lindo', 'diseño']
['@usuario', 'madre', 'mía', ',', 'no', 'recordaba', 'la', 'leche', 'de', '

['su', 'puta', 'madre', 'también', 'estaba', 'fuera', 'de', 'lugar', ':)']
['su', 'puta', 'madre', ',', 'se', 'nos', 'viene', 'la', 'noche', '.']
['su', 'puta', 'madre', ',', 'se', 'nos', 'viene', 'la', 'noche', '.']
['súbele', 'a', 'esa', 'madre', 'wey', '😍', '🔥', '@usuario', 'las', 'chicas', 'lindas', 'es', 'un', 'rolon', '❤', '️', '❤', '️']
['súbele', 'a', 'esa', 'madre', 'wey', '😍', '🔥', '@usuario', 'las', 'chicas', 'lindas', 'es', 'un', 'rolon', '❤', '️', '❤', '️']
['suelo', 'enamorarme', 'en', 'los', 'últimos', 'meses', 'del', 'año', 'para', 'que', 'en', 'los', 'primeros', 'me', 'rompan', 'mi', 'madre', '.', 'como', 'que', 'ya', 'me', 'estoy', 'tardando', '.']
['suelo', 'enamorarme', 'en', 'los', 'últimos', 'meses', 'del', 'año', 'para', 'que', 'en', 'los', 'primeros', 'me', 'rompan', 'mi', 'madre', '.', 'como', 'que', 'ya', 'me', 'estoy', 'tardando', '.']
['sueños', 'de', 'jovenes', 'a', 'punto', 'de', 'romperse', 'por', 'la', 'negligencia', 'de', 'un', 'presidente', 'que', 'no'


['putas', 'perras', 'hipócritas', 'hambreadas', 'y', 'conveniencieras', 'hay', 'que', 'ir', 's', 'violarlas', 'y', 'matarlas', 'por', 'hijos', 'de', 'su', 'puta', 'perra', 'madre', ':', 'n']
['premio', 'nobel', 'de', 'literatura', 'a', 'tus', 'putas', 'palabras', 'de', 'desprecio', '.']
['premio', 'nobel', 'de', 'literatura', 'a', 'tus', 'putas', 'palabras', 'de', 'desprecio', '.']
['esta', 'mujer', 'es', 'como', 'las', 'putas', 'que', 'se', 'toman', 'una', 'foto', 'en', 'bola', 'y', 'la', 'publican', 'con', 'una', 'frase', 'motivacional', '.', 'sólo', 'que', 'las', 'putas', 'trabajan', 'y', 'está', 'roba']
['esta', 'mujer', 'es', 'como', 'las', 'putas', 'que', 'se', 'toman', 'una', 'foto', 'en', 'bola', 'y', 'la', 'publican', 'con', 'una', 'frase', 'motivacional', '.', 'sólo', 'que', 'las', 'putas', 'trabajan', 'y', 'está', 'roba']
['me', 'cagan', 'esas', 'morras', 'que', 'nadamas', 'te', 'hablan', 'cuando', 'se', 'pelean', 'o', 'cortan', 'con', 'su', 'novio', 'y', 'te', 'dicen', '"'


['"', 'no', 'me', 'importa', 'lo', 'que', 'digan', 'esos', 'putos', 'periodistas', 'la', 'puta', 'que', 'los', 'pario', 'oh', 'oh', 'oh', 'hay', 'que', 'alentar', 'a', 'la', 'selección', '"']
['vivo', 'a', 'la', 'expectativa', 'de', 'que', 'la', 'vida', 'les', 'cobre', 'sus', 'acciones', 'de', 'mierda', 'a', 'los', 'putos', 'rateros', '.']
['vivo', 'a', 'la', 'expectativa', 'de', 'que', 'la', 'vida', 'les', 'cobre', 'sus', 'acciones', 'de', 'mierda', 'a', 'los', 'putos', 'rateros', '.']
['voy', 'a', 'definir', 'lo', 'que', 'pienso', 'sobre', 'los', 'que', 'critican', 'a', 'messi', 'y', 'le', 'tiran', '💩', ':', 'la', 'cdtm', 'hijos', 'de', 'los', '7897', 'putos', 'sistemas', 'solares', 'que', 'los', 'parió', '.']
['voy', 'a', 'definir', 'lo', 'que', 'pienso', 'sobre', 'los', 'que', 'critican', 'a', 'messi', 'y', 'le', 'tiran', '💩', ':', 'la', 'cdtm', 'hijos', 'de', 'los', '7897', 'putos', 'sistemas', 'solares', 'que', 'los', 'parió', '.']
['habría', 'preferido', 'ver', 'tanto', 'tatuaj

['cuando', 'descubres', 'que', 'tienes', 'dos', 'putas', 'en', 'instagram', ':/']
['cuando', 'descubres', 'que', 'tienes', 'dos', 'putas', 'en', 'instagram', ':/']
['que', 'todas', 'las', 'putas', 'barras', 'desaparezcan', 'pinches', 'viciosos', 'de', 'mierda', 'todos', 'he', 'le', 'voy', 'al', 'amérca']
['que', 'todas', 'las', 'putas', 'barras', 'desaparezcan', 'pinches', 'viciosos', 'de', 'mierda', 'todos', 'he', 'le', 'voy', 'al', 'amérca']
['siempre', 'hay', 'putas', 'en', 'negación', ',', 'que', 'a', 'pesar', 'de', 'que', 'siempre', 'se', 'les', 'observa', 'con', 'distintos', 'hombres', ',', 'argumentan', 'no', 'ser', 'putas', 'solo', 'muy', '“', 'sociables', '”']
['siempre', 'hay', 'putas', 'en', 'negación', ',', 'que', 'a', 'pesar', 'de', 'que', 'siempre', 'se', 'les', 'observa', 'con', 'distintos', 'hombres', ',', 'argumentan', 'no', 'ser', 'putas', 'solo', 'muy', '“', 'sociables', '”']
['ya', 'quiero', 'tener', 'novia', 'para', 'poder', 'coger', 'con', 'sus', 'amigas', 'putas'

['es', 'bueno', 'saber', 'que', 'todavía', 'siguen', 'vivos', 'hijos', 'de', 'sus', 'putas', 'madres']
['mi', 'mamá', 'regalo', 'dos', 'jeans', 'que', 'uso', 'que', 'putas', 'madres', 'le', 'pasa', 'por', 'la', 'cabeza', '😩', '😩', '😩']
['mi', 'mamá', 'regalo', 'dos', 'jeans', 'que', 'uso', 'que', 'putas', 'madres', 'le', 'pasa', 'por', 'la', 'cabeza', '😩', '😩', '😩']
['hijos', 'de', 'puta', 'miserables', 'ojalá', 'os', 'quemen', 'las', 'putas', 'oficinas', 'hoy', 'simios']
['hijos', 'de', 'puta', 'miserables', 'ojalá', 'os', 'quemen', 'las', 'putas', 'oficinas', 'hoy', 'simios']
['hijos', 'de', 'de', 'las', 'mil', 'putas', 'no', 'tener', 'que', 'hacer', 'póngase', 'a', 'trabajar', 'suena', 'raro', 'pero', 'a', 'trabajar', 'a', 'trabajar', 'ociosos']
['hijos', 'de', 'de', 'las', 'mil', 'putas', 'no', 'tener', 'que', 'hacer', 'póngase', 'a', 'trabajar', 'suena', 'raro', 'pero', 'a', 'trabajar', 'a', 'trabajar', 'ociosos']
['festival', 'del', 'pan', 'de', 'muerto', ',', 'festival', 'de', '

['llevo', '3', 'putos', 'días', 'sin', 'dormir', ',', 'no', 'he', 'tenido', 'tiempo', 'ni', 'para', 'masturbarme', 'y', 'ahora', 'resulta', 'que', 'hasta', '6', 'diferentes', 'jales', 'cargo', '.', 'chale', '...']
['a', 'todos', 'los', 'que', 'tengan', 'vecinos', 'como', 'los', 'míos', 'que', 'ven', 'que', 'sigue', 'fallando', 'el', 'agua', 'y', 'se', 'ponen', 'a', 'lavar', 'les', 'deseo', 'todo', 'el', 'pinche', 'mal', 'por', 'putos', '.']
['a', 'todos', 'los', 'que', 'tengan', 'vecinos', 'como', 'los', 'míos', 'que', 'ven', 'que', 'sigue', 'fallando', 'el', 'agua', 'y', 'se', 'ponen', 'a', 'lavar', 'les', 'deseo', 'todo', 'el', 'pinche', 'mal', 'por', 'putos', '.']
['si', 'que', 'los', 'chilenos', 'se', 'vayan', 'a', 'la', 'boa', 'así', 'como', 'holanda', 'por', 'putos', '!']
['si', 'que', 'los', 'chilenos', 'se', 'vayan', 'a', 'la', 'boa', 'así', 'como', 'holanda', 'por', 'putos', '!']
['a', 'ver', ',', 'putos', ',', 'yo', 'me', 'woa', 'poner', 'a', 'hablar', 'de', 'sus', 'vergas', 


['asi', 'es', 'de', 'la', 'unica', 'forma', 'de', 'tomar', 'a', 'una', 'mujer', 'y', 'volverla', 'loca', '❤']
['así', 'mi', 'mañana', 'porque', 'loca', 'de', 'animales', 'y', 'tener', '3', 'es', 'bien', 'fácil', 'decía', '🎈', '😞']
['así', 'mi', 'mañana', 'porque', 'loca', 'de', 'animales', 'y', 'tener', '3', 'es', 'bien', 'fácil', 'decía', '🎈', '😞']
['aspe', ',', 'blanco', ',', 'del', 'olmo', ',', 'zague', 'jamás', 'lo', 'hicieron', 'esos', 'si', 'tenían', 'huevos', 'no', 'como', 'el', 'joto', 'del', 'chicharo', '!', '!', '!']
['aspe', ',', 'blanco', ',', 'del', 'olmo', ',', 'zague', 'jamás', 'lo', 'hicieron', 'esos', 'si', 'tenían', 'huevos', 'no', 'como', 'el', 'joto', 'del', 'chicharo', '!', '!', '!']
['aunque', 'seas', 'bien', 'maricon', 'y', 'no', 'respondas', '...', 'recuerdalo', 'tooooodaqa', 'tu', 'vida', '...', 'no', 'son', 'nada', 'sin', 'santand', '…']
['aunque', 'seas', 'bien', 'maricon', 'y', 'no', 'respondas', '...', 'recuerdalo', 'tooooodaqa', 'tu', 'vida', '...', 'no',


['me', 'gustas', 'un', 'chingooo', '!', '!', 'jejeje', 'linda', 'noche', 'hermosa', 'sigue', 'asi', 'de', 'bella', 'de', 'loca', 'y', 'de', 'fabulosa', 'siempre', 'besos', 'bonita']
['me', 'gustas', 'un', 'chingooo', '!', '!', 'je', 'je', 'je', 'linda', 'noche', 'hermosa', 'sigue', 'asi', 'de', 'bella', 'de', 'loca', 'y', 'de', 'fabulosa', 'siempre', 'besos', 'bonita']
['me', 'haces', 'la', 'más', 'feliz', ',', 'grite', 'como', 'loca', ':(', '(', 'aún', 'que', 'no', 'lo', 'veas', 'fav', 'si', 'rix', 'qué', 'lindo', 'eres', 'al', 'tomarte', 'el', 'tiempo', 'de', 'se', '…']
['me', 'haces', 'la', 'más', 'feliz', ',', 'grite', 'como', 'loca', ':(', '(', 'aún', 'que', 'no', 'lo', 'veas', 'fav', 'si', 'rix', 'qué', 'lindo', 'eres', 'al', 'tomarte', 'el', 'tiempo', 'de', 'se', '…']
['me', 'hiciste', 'explotar', 'ahorita', 'porque', 'julio', 'dice', 'que', 'milo', 'y', 'el', 'son', 'esposos', 'y', 'se', 'anda', 'haciendo', 'el', 'joto', 'con', 'el', 'aaahh']
['me', 'hiciste', 'explotar', 'aho


['superman', ',', 'superman', ',', 'ya', 'tu', 'chica', 'esta', 'muy', 'loca', 'por', 'meterse', 'tanta', 'coca', '🔥', '🎶', '🎶']
['supongo', 'que', 'yo', 'seré', 'una', 'loca', 'para', 'ellas', ',', 'alguien', 'que', 'no', 'quiere', 'cumplir', 'con', 'su', 'papel', 'de', 'mujer', 'buena', '.']
['supongo', 'que', 'yo', 'seré', 'una', 'loca', 'para', 'ellas', ',', 'alguien', 'que', 'no', 'quiere', 'cumplir', 'con', 'su', 'papel', 'de', 'mujer', 'buena', '.']
['talvez', 'pienses', 'que', 'estoy', 'loca', 'y', 'deserebrada', 'pero', 'no', 'savia', 'que', 'eres', 'm', 'villalpando', '😊', '😊']
['talvez', 'pienses', 'que', 'estoy', 'loca', 'y', 'deserebrada', 'pero', 'no', 'savia', 'que', 'eres', 'm', 'villalpando', '😊', '😊']
['también', 'de', 'escuchar', 'dope', 'y', 'llorar', 'como', 'la', 'gran', 'marica', 'que', 'soy']
['también', 'de', 'escuchar', 'dope', 'y', 'llorar', 'como', 'la', 'gran', 'marica', 'que', 'soy']
['tan', 'perdida', 'como', 'alicia', ',', 'tan', 'loca', 'como', 'el', '


['tu', 'novio', 'el', 'marica', 'que', 'comparte', 'comida', 'rica', 'y', 'saludable', 'como', 'son', 'la', 'tripitas', 'y', 'comenta', '“', 'qui', 'asco', '”', ',', 'pendejo', '.']
['tu', 'novio', 'el', 'marica', 'que', 'comparte', 'comida', 'rica', 'y', 'saludable', 'como', 'son', 'la', 'tripitas', 'y', 'comenta', '“', 'qui', 'asco', '”', ',', 'pendejo', '.']
['no', 'es', 'insulto', ',', 'eso', 'es', 'lo', 'que', 'es', 'serrano', ',', 'una', 'enano', 'esperpento', 'y', 'marica', 'porque', 'no', 'enfrenta']
['no', 'es', 'insulto', ',', 'eso', 'es', 'lo', 'que', 'es', 'serrano', ',', 'una', 'enano', 'esperpento', 'y', 'marica', 'porque', 'no', 'enfrenta']
['tu', 'novio', 'el', 'marica', 'que', 'toma', 'cerveza', 'con', 'popote', 'y', 'además', 'le', 'pone', 'limón', 'para', 'que', 'sepa', '“', 'menos', 'amarga', '”', '.']
['tu', 'novio', 'el', 'marica', 'que', 'toma', 'cerveza', 'con', 'popote', 'y', 'además', 'le', 'pone', 'limón', 'para', 'que', 'sepa', '“', 'menos', 'amarga', '”', 

['para', 'ti', 'solo', 'tengo', 'un', '"', 'vete', 'a', 'la', 'verga', '"', 'y', 'es', 'eterno', '.']
['para', 'ti', 'solo', 'tengo', 'un', '"', 'vete', 'a', 'la', 'verga', '"', 'y', 'es', 'eterno', '.']
['debe', 'sentirse', 'verga', 'que', 'jefes', 'pagen', 'un', 'vergal', 'dinero', 'para', 'hacerte', 'pendejo', 'en', 'clase', '.', '@usuario']
['debe', 'sentirse', 'verga', 'que', 'jefes', 'pagen', 'un', 'vergal', 'dinero', 'para', 'hacerte', 'pendejo', 'en', 'clase', '.', '@usuario']
['que', 'bueno', 'que', 'se', 'amen', ',', 'pero', 'aquí', 'nos', 'vale', 'verga', ',', 'amigos', '.', '¡', 'vayan', 'a', 'escribir', 'sus', 'cursilerías', 'pendejas', 'a', 'whatsapp', '!']
['que', 'bueno', 'que', 'se', 'amen', ',', 'pero', 'aquí', 'nos', 'vale', 'verga', ',', 'amigos', '.', '¡', 'vayan', 'a', 'escribir', 'sus', 'cursilerías', 'pendejas', 'a', 'whatsapp', '!']
['que', 'verga', 'con', 'esos', 'hombres', 'que', 'ocupan', 'la', 'opinión', 'de', 'su', 'mamá', 'hasta', 'para', 'ir', 'a', 'caga


['estoy', 'hasta', 'la', 'puta', 'madre', 'de', 'los', 'videos', 'del', 'puto', 'de', 'victor', 'gonzalez']
['estoy', 'hasta', 'la', 'puta', 'madre', 'de', 'los', 'videos', 'del', 'puto', 'de', 'victor', 'gonzalez']
['@usuario', 'vete', 'alv', 'arriba', 'las', 'chivas', 'puto', ':', "'", 'v']
['@usuario', 'vete', 'alv', 'arriba', 'las', 'chivas', 'puto', ':', "'", 'v']
['chingas', 'a', 'tu_madre', 'cada', 'vez', 'que', 'te', 'palpite', 'el', 'corazón']
['chingas', 'a', 'tu_madre', 'cada', 'vez', 'que', 'te', 'palpite', 'el', 'corazón']
['wey', ',', 'en', 'todos', 'lados', 'está', 'de', 'la', 'verga', '...', 'pero', 'sí', ',', 'pinche', '#establodeméxico', '😪', '😪', '😪']
['wey', ',', 'en', 'todos', 'lados', 'está', 'de', 'la', 'verga', '...', 'pero', 'sí', ',', 'pinche', '😪', '😪', '😪']
['si', 'te', 'vas', 'a', 'ofender', 'por', 'algún', 'tweet', 'mío', ',', 'solo', 'te', 'voy', 'a', 'recomendar', 'una', 'cosa', ',', 'lárgate', 'a', 'la', 'verga', 'de', 'twitter', 'por', 'favor', '.']
[

['buena', 'tarde', '!', '!', '!', '!', '!', '@usuario', '@usuario', 'q', 'opinan', 'de', 'este', 'hdp', 'de', '@usuario', 'q', 'se', 'hace', 'pasar', 'por', 'el', 'inmortal', '?']
['buena', 'tarde', '!', '!', '!', '!', '!', '@usuario', '@usuario', 'q', 'opinan', 'de', 'este', 'hdp', 'de', '@usuario', 'q', 'se', 'hace', 'pasar', 'por', 'el', 'inmortal', '?']
['como', 'me', 'caga', 'que', '@usuario', 'tenga', 'contrato', 'con', 'televisa', '😡', '😡', '😡', 'y', 'ahora', 'por', 'donde', 'vergas', 'lo', 'voy', 'a', 'ver', '?', 'pinche', 'cable', 'ya', 'no', 'tiene', 'tdn', '.', 'hdp', '.']
['como', 'me', 'caga', 'que', '@usuario', 'tenga', 'contrato', 'con', 'televisa', '😡', '😡', '😡', 'y', 'ahora', 'por', 'donde', 'vergas', 'lo', 'voy', 'a', 'ver', '?', 'pinche', 'cable', 'ya', 'no', 'tiene', 'tdn', '.', 'hdp', '.']
['el', 'día', 'que', 'tenga', 'mucho', 'dinero', 'le', 'pagaré', 'el', 'sueldo', 'a', 'todos', 'los', 'agentes', 'de', 'tránsito', '…', 'con', 'tal', 'de', 'no', 'volverlos', 'a'


['la', 'hormona', 'loca', 'ataca', 'y', 'estas', 'ganas', 'de', 'llorar', 'inexplicables', 'me', 'están', 'controlando', '🙃']
['la', 'hormona', 'loca', 'ataca', 'y', 'estas', 'ganas', 'de', 'llorar', 'inexplicables', 'me', 'están', 'controlando', '🙃']
['a', 'un', 'hombre', 'le', 'gusta', 'una', 'mujer', 'fiestera', ',', 'loca', 'y', 'bebedora', ',', 'siempre', 'y', 'cuando', ',', 'esa', 'no', 'sea', 'su', 'mujer', '.']
['a', 'un', 'hombre', 'le', 'gusta', 'una', 'mujer', 'fiestera', ',', 'loca', 'y', 'bebedora', ',', 'siempre', 'y', 'cuando', ',', 'esa', 'no', 'sea', 'su', 'mujer', '.']
['no', 'creía', 'que', 'estaba', 'tan', 'loca', ',', 'solo', 'recuerdo', 'que', 'me', 'enoje', 'mucho', ',', 'le', 'rompí', 'su', 'celular', '.']
['no', 'creía', 'que', 'estaba', 'tan', 'loca', ',', 'solo', 'recuerdo', 'que', 'me', 'enoje', 'mucho', ',', 'le', 'rompí', 'su', 'celular', '.']
['ay', 'no', ',', 'ya', 'llorar', 'en', 'un', 'video', 'por', 'los', 'haters', 'es', 'estar', 'loca', '😹', '😹', '


['a', 'ver', ',', 'no', 'le', 'entiendo', 'ni', 'una', 'puta', 'verga', 'a', 'la', 'tarea', '😭', '😭', '😭']
['a', 'ver', ',', 'no', 'le', 'entiendo', 'ni', 'una', 'puta', 'verga', 'a', 'la', 'tarea', '😭', '😭', '😭']
['...', 'di', 'un', 'chingo', 'de', 'vuelta', ',', 'llegué', 'tardísimo', 'y', 'al', 'final', 'de', 'cuentas', 'no', 'me', 'dejaron', 'pasar', ',', 'valiendo', 'verga', 'el', 'trabajo', 'por', 'el', 'que', 'llegué', 'tarde', '.']
['...', 'di', 'un', 'chingo', 'de', 'vuelta', ',', 'llegué', 'tardísimo', 'y', 'al', 'final', 'de', 'cuentas', 'no', 'me', 'dejaron', 'pasar', ',', 'valiendo', 'verga', 'el', 'trabajo', 'por', 'el', 'que', 'llegué', 'tarde', '.']
['ni', 'machismo', ',', 'ni', 'feminismo', ',', 'mejor', 'regrésenos', 'a', 'rbd', 'y', 'váyanse', 'a', 'la', 'verga', '.']
['ni', 'machismo', ',', 'ni', 'feminismo', ',', 'mejor', 'regrésenos', 'a', 'rbd', 'y', 'váyanse', 'a', 'la', 'verga', '.']
['estoy', 'leyendo', 'las', 'respuestas', 'a', 'esto', 'y', ',', 'verga', ','

['en', 'ocasiones', ',', 'me', 'salgo', 'con', 'la', 'mía', 'espectacularmente', ',', 'en', 'otras', ',', 'me', 'lleva', 'la', 'verga', ',', 'pero', 'siempre', 'estoy', 'en', 'el', 'límite', '.', '😎']
['que', 'la', 'pachanga', 'vuelva', 'y', 'que', 'los', 'antros', 'vuelvan', 'y', 'que', 'la', 'fiesta', 'vuelva', 'y', 'que', 'les', 'valga', 'verga']
['que', 'la', 'pachanga', 'vuelva', 'y', 'que', 'los', 'antros', 'vuelvan', 'y', 'que', 'la', 'fiesta', 'vuelva', 'y', 'que', 'les', 'valga', 'verga']
['quienes', 'se', 'ponen', 'de', 'delicados', 'con', 'la', 'palabra', '"', 'verga', '"', 'nunca', 'sabrán', 'la', 'paz', 'mental', 'que', 'da', 'decirle', 'a', 'alguien', 'que', 'te', 'tiene', 'harta', 'lo', 'siguiente', ':']
['quienes', 'se', 'ponen', 'de', 'delicados', 'con', 'la', 'palabra', '"', 'verga', '"', 'nunca', 'sabrán', 'la', 'paz', 'mental', 'que', 'da', 'decirle', 'a', 'alguien', 'que', 'te', 'tiene', 'harta', 'lo', 'siguiente', ':']
['¿', 'les', 'ha', 'pasado', 'que', 'ven', 'u


['hoy', 'si', 'logre', 'despertarme', 'temprano', 'me', 'encanta', 'no', 'valer', 'verga', 'por', 'primera', 'vez', 'en', 'mi', 'vida']
['como', 'le', 'encanta', 'la', 'verga', 'ayudenme', 'con', 'sus', 'rt', '@usuario', '@usuario', '@usuario', '@parejacachoond5']
['como', 'le', 'encanta', 'la', 'verga', 'ayudenme', 'con', 'sus', 'rt', '@usuario', '@usuario', '@usuario', '@parejacachoond5']
['que', 'himno', 'tan', 'más', 'verga', '#mexicandesmotherpalmundial']
['que', 'himno', 'tan', 'más', 'verga']
['lo', 'siento', 'por', 'decir', 'muchas', 'groserías', 'pero', 'me', 'vale', 'verga']
['lo', 'siento', 'por', 'decir', 'muchas', 'groserías', 'pero', 'me', 'vale', 'verga']
['me', 'caga', 'vivir', 'hasta', 'la', 'quinta', 'verga', 'y', 'que', 'no', 'haya', 'servicio', 'a', 'domicilio', 'hasta', 'mi', 'hogar']
['me', 'caga', 'vivir', 'hasta', 'la', 'quinta', 'verga', 'y', 'que', 'no', 'haya', 'servicio', 'a', 'domicilio', 'hasta', 'mi', 'hogar']
['desde', 'acá', 'arriba', 'se', 'puede', 'v


['échenle', 'chingazos', 'pra', 'dar', 'el', '100', 'ya', 'que', 'este', 'fin', 'de', 'semana', 'nada', 'más', 'fueron', 'a', 'dar', 'las', 'nalgas']
['no', 'se', 'xq', 'todo', 'el', 'mundo', 'esta', 'mamando', 'con', 'los', '280', 'caracteres', 'otra', 'vez']
['no', 'se', 'xq', 'todo', 'el', 'mundo', 'esta', 'mamando', 'con', 'los', '280', 'caracteres', 'otra', 'vez']
['ni', 'a', 'mi', ',', 'she', 'belongs', 'to', 'justin', 'tbh', 'pero', 'abel', 'se', 'apendejó', 'y', 'dejó', 'a', 'mi', 'bella', 'por', 'ella', ',', 'se', 'lo', 'merece']
['ni', 'a', 'mi', ',', 'she', 'belongs', 'to', 'justin', 'tbh', 'pero', 'abel', 'se', 'apendejó', 'y', 'dejó', 'a', 'mi', 'bella', 'por', 'ella', ',', 'se', 'lo', 'merece']
['elliot', 'se', 'mamo', 'con', 'el', 'documental', ',', 'está', 'bien', 'verga', '.', '💕']
['elliot', 'se', 'mamo', 'con', 'el', 'documental', ',', 'está', 'bien', 'verga', '.', '💕']
['throwback', 'al', 'parcial', 'pasado', 'de', 'física', 'donde', 'no', 'estudié', 'ni', 'madres'

In [0]:
#print(x_tok[2282])
x = []
for i in x_tok:
     x.append(' '.join(i))


"""Transform data for model"""
le = preprocessing.LabelEncoder()
y = le.fit_transform(y)

cv = TfidfVectorizer()
x = cv.fit_transform(x)
x = x.toarray()
x = np.insert(x, -1, conteo_mayusculas, axis =1)

print(x.shape)

from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=1992)

from sklearn import decomposition
pca = decomposition.PCA(n_components=0.99, svd_solver='full')
pca.fit(x_train)
x_train = pca.transform(x_train)
x_test = pca.transform(x_test)

print(x_train.shape)
print(y_train.shape)

print(x_test.shape)
print(y_test.shape)

(7700, 15210)


In [8]:
"""Neural Network"""
from keras import backend as K
from keras.models import Sequential 
from keras.layers import Dense, Activation, Dropout
from keras.layers.normalization import BatchNormalization as BN
from keras.layers import GaussianNoise as GN
from keras.optimizers import Adam, RMSprop
from keras.callbacks import LearningRateScheduler
from keras.callbacks import ModelCheckpoint

x_train = x_train.reshape(x_train.shape[0], x_train.shape[1])
x_test = x_test.reshape(x_test.shape[0], x_train.shape[1])
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')

num_classes = 1
batch_size = 32
epochs = 100
learnRate = 0.001

# Learning rate annealing
def step_decay(epoch):
    if epoch/epochs<0.3:
        lrate = learnRate
    elif epoch/epochs<=0.5:
        lrate = learnRate/2
    elif epoch/epochs<=0.70:
        lrate = learnRate/10
    else:
        lrate = learnRate/100
    return lrate

#Loss function for macro_fm
def macro_fm(y_true, y_pred, beta=1.0):
    beta2 = beta**2.0
    top = K.sum(y_true * y_pred, axis=0)
    bot = beta2 * K.sum(y_true, axis=0) + K.sum(y_pred, axis=0)
    return -(1.0 + beta2) * K.mean(top/bot)
  
#Define model architecture
model = Sequential()
model.add( Dense( 2048, activation='relu', input_shape=(x_train.shape[1],) ) )
model.add(BN())
model.add(GN(0.3))
model.add(Activation('relu'))

model.add(Dense(1024))
model.add(BN())
model.add(GN(0.3))
model.add(Activation('relu'))

model.add(Dense(512))
model.add(BN())
model.add(GN(0.3))
model.add(Activation('relu'))

model.add(Dense(256))
model.add(BN())
model.add(GN(0.3))
model.add(Activation('relu'))

model.add(Dense(num_classes, activation='sigmoid'))
model.summary()

checkpoint_path = "Wehigts.hdf5"
checkpointer = ModelCheckpoint(filepath=checkpoint_path,
                               monitor='val_loss', verbose=1,
                               save_best_only=True, mode='min')


loss = macro_fm#'binary_crossentropy'

adam = Adam(lr=learnRate, beta_1=0.9, beta_2=0.999,
            epsilon=None, decay=1e-6, amsgrad=False)

rms = RMSprop(lr=learnRate, rho=0.9, epsilon=None, decay=0.0)

lrate = LearningRateScheduler(step_decay)

model.compile(loss=loss, 
            optimizer=adam, 
            metrics=['accuracy']) 

history = model.fit(x_train, y_train, 
            batch_size=batch_size,
            epochs=epochs,
            verbose=1,
            validation_data=(x_test, y_test),
            callbacks=[checkpointer])

#Load best model
model.load_weights(checkpoint_path)

score = model.evaluate(x_test, y_test, verbose=0)

print('Test loss:', score[0])
print('Test accuracy:', score[1])


y_pred = model.predict(x_test, batch_size=1)
y_pred = np.where(y_pred > 0.5, 1, 0)
print("______________Validation Confusion Matrix______________")
print(confusion_matrix(y_test, y_pred))
print("")
print("___________________Validation Report___________________")
print(classification_report(y_test, y_pred))

print(f1_score(y_test, y_pred, average='micro'))
print(f1_score(y_test, y_pred, average='macro'))
print(f1_score(y_test, y_pred, average='weighted'))

Using TensorFlow backend.


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 2048)              10858496  
_________________________________________________________________
batch_normalization_1 (Batch (None, 2048)              8192      
_________________________________________________________________
gaussian_noise_1 (GaussianNo (None, 2048)              0         
_________________________________________________________________
activation_1 (Activation)    (None, 2048)              0         
_________________________________________________________________
dense_2 (Dense)              (None, 1024)              2098176   
_________________________________________________________________
batch_normalization_2 (Batch (None, 1024)              4096      
_________________________________________________________________
gaussian_noise_2 (GaussianNo (None, 1024)              0         
__________


Epoch 00001: val_loss improved from inf to -0.65726, saving model to Wehigts.hdf5
Epoch 2/100

Epoch 00002: val_loss did not improve from -0.65726
Epoch 3/100

Epoch 00003: val_loss improved from -0.65726 to -0.66127, saving model to Wehigts.hdf5
Epoch 4/100
1312/6160 [=====>........................] - ETA: 4s - loss: -0.8431 - acc: 0.8910


Epoch 00004: val_loss did not improve from -0.66127
Epoch 5/100

Epoch 00005: val_loss did not improve from -0.66127
Epoch 6/100

Epoch 00006: val_loss improved from -0.66127 to -0.66241, saving model to Wehigts.hdf5
Epoch 7/100


Epoch 00007: val_loss did not improve from -0.66241
Epoch 8/100

Epoch 00008: val_loss did not improve from -0.66241
Epoch 9/100

Epoch 00009: val_loss did not improve from -0.66241
Epoch 10/100


Epoch 00010: val_loss did not improve from -0.66241
Epoch 11/100

Epoch 00011: val_loss did not improve from -0.66241
Epoch 12/100

Epoch 00012: val_loss did not improve from -0.66241
Epoch 13/100


Epoch 00013: val_loss did not improve from -0.66241
Epoch 14/100

Epoch 00014: val_loss did not improve from -0.66241
Epoch 15/100

Epoch 00015: val_loss did not improve from -0.66241
Epoch 16/100


Epoch 00016: val_loss did not improve from -0.66241
Epoch 17/100

Epoch 00017: val_loss did not improve from -0.66241
Epoch 18/100

Epoch 00018: val_loss did not improve from -0.66241
Epoch 19/100


Epoch 00019: val_loss did not improve from -0.66241
Epoch 20/100

Epoch 00020: val_loss did not improve from -0.66241
Epoch 21/100

Epoch 00021: val_loss did not improve from -0.66241
Epoch 22/100


Epoch 00022: val_loss did not improve from -0.66241
Epoch 23/100

Epoch 00023: val_loss did not improve from -0.66241
Epoch 24/100

Epoch 00024: val_loss did not improve from -0.66241
Epoch 25/100


Epoch 00025: val_loss did not improve from -0.66241
Epoch 26/100

Epoch 00026: val_loss did not improve from -0.66241
Epoch 27/100

Epoch 00027: val_loss did not improve from -0.66241
Epoch 28/100


Epoch 00028: val_loss did not improve from -0.66241
Epoch 29/100

Epoch 00029: val_loss did not improve from -0.66241
Epoch 30/100

Epoch 00030: val_loss did not improve from -0.66241
Epoch 31/100


Epoch 00031: val_loss did not improve from -0.66241
Epoch 32/100

Epoch 00032: val_loss did not improve from -0.66241
Epoch 33/100

Epoch 00033: val_loss did not improve from -0.66241
Epoch 34/100


Epoch 00034: val_loss did not improve from -0.66241
Epoch 35/100

Epoch 00035: val_loss did not improve from -0.66241
Epoch 36/100

Epoch 00036: val_loss did not improve from -0.66241
Epoch 37/100


Epoch 00037: val_loss did not improve from -0.66241
Epoch 38/100

Epoch 00038: val_loss did not improve from -0.66241
Epoch 39/100

Epoch 00039: val_loss did not improve from -0.66241
Epoch 40/100


Epoch 00040: val_loss did not improve from -0.66241
Epoch 41/100

Epoch 00041: val_loss did not improve from -0.66241
Epoch 42/100

Epoch 00042: val_loss did not improve from -0.66241
Epoch 43/100


Epoch 00043: val_loss did not improve from -0.66241
Epoch 44/100

Epoch 00044: val_loss did not improve from -0.66241
Epoch 45/100

Epoch 00045: val_loss did not improve from -0.66241
Epoch 46/100


Epoch 00046: val_loss did not improve from -0.66241
Epoch 47/100

Epoch 00047: val_loss did not improve from -0.66241
Epoch 48/100

Epoch 00048: val_loss did not improve from -0.66241
Epoch 49/100


Epoch 00049: val_loss did not improve from -0.66241
Epoch 50/100

Epoch 00050: val_loss did not improve from -0.66241
Epoch 51/100

Epoch 00051: val_loss did not improve from -0.66241
Epoch 52/100


Epoch 00052: val_loss did not improve from -0.66241
Epoch 53/100

Epoch 00053: val_loss did not improve from -0.66241
Epoch 54/100

Epoch 00054: val_loss did not improve from -0.66241
Epoch 55/100


Epoch 00055: val_loss did not improve from -0.66241
Epoch 56/100

Epoch 00056: val_loss did not improve from -0.66241
Epoch 57/100

Epoch 00057: val_loss did not improve from -0.66241
Epoch 58/100


Epoch 00058: val_loss did not improve from -0.66241
Epoch 59/100

Epoch 00059: val_loss did not improve from -0.66241
Epoch 60/100

Epoch 00060: val_loss did not improve from -0.66241
Epoch 61/100


Epoch 00061: val_loss did not improve from -0.66241
Epoch 62/100

Epoch 00062: val_loss improved from -0.66241 to -0.66248, saving model to Wehigts.hdf5
Epoch 63/100

Epoch 00063: val_loss did not improve from -0.66248
Epoch 64/100


Epoch 00064: val_loss did not improve from -0.66248
Epoch 65/100

Epoch 00065: val_loss did not improve from -0.66248
Epoch 66/100

Epoch 00066: val_loss did not improve from -0.66248
Epoch 67/100


Epoch 00067: val_loss did not improve from -0.66248
Epoch 68/100

Epoch 00068: val_loss did not improve from -0.66248
Epoch 69/100

Epoch 00069: val_loss did not improve from -0.66248
Epoch 70/100


Epoch 00070: val_loss did not improve from -0.66248
Epoch 71/100

Epoch 00071: val_loss did not improve from -0.66248
Epoch 72/100

Epoch 00072: val_loss did not improve from -0.66248
Epoch 73/100


Epoch 00073: val_loss did not improve from -0.66248
Epoch 74/100

Epoch 00074: val_loss did not improve from -0.66248
Epoch 75/100

Epoch 00075: val_loss did not improve from -0.66248
Epoch 76/100


Epoch 00076: val_loss did not improve from -0.66248
Epoch 77/100

Epoch 00077: val_loss did not improve from -0.66248
Epoch 78/100

Epoch 00078: val_loss did not improve from -0.66248
Epoch 79/100


Epoch 00079: val_loss did not improve from -0.66248
Epoch 80/100

Epoch 00080: val_loss did not improve from -0.66248
Epoch 81/100

Epoch 00081: val_loss did not improve from -0.66248
Epoch 82/100


Epoch 00082: val_loss did not improve from -0.66248
Epoch 83/100

Epoch 00083: val_loss did not improve from -0.66248
Epoch 84/100

Epoch 00084: val_loss did not improve from -0.66248
Epoch 85/100


Epoch 00085: val_loss did not improve from -0.66248
Epoch 86/100

Epoch 00086: val_loss did not improve from -0.66248
Epoch 87/100

Epoch 00087: val_loss did not improve from -0.66248
Epoch 88/100


Epoch 00088: val_loss did not improve from -0.66248
Epoch 89/100

Epoch 00089: val_loss did not improve from -0.66248
Epoch 90/100

Epoch 00090: val_loss did not improve from -0.66248
Epoch 91/100


Epoch 00091: val_loss did not improve from -0.66248
Epoch 92/100

Epoch 00092: val_loss did not improve from -0.66248
Epoch 93/100

Epoch 00093: val_loss did not improve from -0.66248
Epoch 94/100


Epoch 00094: val_loss did not improve from -0.66248
Epoch 95/100

Epoch 00095: val_loss did not improve from -0.66248
Epoch 96/100

Epoch 00096: val_loss did not improve from -0.66248
Epoch 97/100


Epoch 00097: val_loss did not improve from -0.66248
Epoch 98/100

Epoch 00098: val_loss did not improve from -0.66248
Epoch 99/100

Epoch 00099: val_loss did not improve from -0.66248
Epoch 100/100


Epoch 00100: val_loss did not improve from -0.66248
Test loss: -0.6624847393531303
Test accuracy: 0.7785714285714286
______________Validation Confusion Matrix______________
[[850 135]
 [206 349]]

___________________Validation Report___________________
             precision    recall  f1-score   support

          0       0.80      0.86      0.83       985
          1       0.72      0.63      0.67       555

avg / total       0.77      0.78      0.77      1540

0.7785714285714285
0.7523624221269556
0.7748571781571421


In [7]:
"""SVM"""
from sklearn.svm import LinearSVC
from sklearn.metrics import confusion_matrix, classification_report

svm = LinearSVC()
svm.fit(x_train, y_train)  

y_pred = svm.predict(x_test)
print("______________Validation Confusion Matrix______________")
print(confusion_matrix(y_test, y_pred))
print("")
print("___________________Validation Report___________________")
print(classification_report(y_test, y_pred))

print(f1_score(y_test, y_pred, average='micro'))
print(f1_score(y_test, y_pred, average='macro'))
print(f1_score(y_test, y_pred, average='weighted'))

______________Validation Confusion Matrix______________
[[882 103]
 [202 353]]

___________________Validation Report___________________
             precision    recall  f1-score   support

          0       0.81      0.90      0.85       985
          1       0.77      0.64      0.70       555

avg / total       0.80      0.80      0.80      1540

0.801948051948052
0.7754521433874553
0.7969894603648779


In [0]:
"""DecisionTreeClassifier"""
from sklearn import tree

clf = tree.DecisionTreeClassifier()
clf = clf.fit(x_train, y_train)

y_pred = clf.predict(x_test)
print("______________Validation Confusion Matrix______________")
print(confusion_matrix(y_test, y_pred))
print("")
print("___________________Validation Report___________________")
print(classification_report(y_test, y_pred))

print(f1_score(y_test, y_pred, average='micro'))
print(f1_score(y_test, y_pred, average='macro'))
print(f1_score(y_test, y_pred, average='weighted'))

In [0]:
"""Naive Bayes Gaussian"""
from sklearn.naive_bayes import GaussianNB
gnb = GaussianNB()

gnb = gnb.fit(x_train.toarray(), y_train)

y_pred = gnb.predict(x_test.toarray())
print("______________Validation Confusion Matrix______________")
print(confusion_matrix(y_test, y_pred))
print("")
print("___________________Validation Report___________________")
print(classification_report(y_test, y_pred))

print(f1_score(y_test, y_pred, average='micro'))
print(f1_score(y_test, y_pred, average='macro'))
print(f1_score(y_test, y_pred, average='weighted'))

In [0]:
"""KNeighbors"""
from sklearn.neighbors import KNeighborsClassifier
neigh = KNeighborsClassifier(n_neighbors=10)

neigh = neigh.fit(x_train.toarray(), y_train)

y_pred = neigh.predict(x_test.toarray())
print("______________Validation Confusion Matrix______________")
print(confusion_matrix(y_test, y_pred))
print("")
print("___________________Validation Report___________________")
print(classification_report(y_test, y_pred))

print(f1_score(y_test, y_pred, average='micro'))
print(f1_score(y_test, y_pred, average='macro'))
print(f1_score(y_test, y_pred, average='weighted'))