# Transformers library
We will take advantage of custom transformers. This way, it is easy to use them in pipelines both for training and testing. We can easily select a subset of transformations and vary transformations based on used model.

In [1]:
# Local
from ipynb.fs.full.data_loader import load_train_test_all_cols_data

# Math and data stuff
import pandas as pd
import numpy as np

# Sklearn
from sklearn.base import BaseEstimator, TransformerMixin

# Language stuff
from pymagnitude import Magnitude, MagnitudeUtils
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
nltk.download("stopwords", quiet=True)

# Other
import html

---

In [2]:
train_X, test_X, train_y, test_y = load_train_test_all_cols_data()

## DenseTransformer

In [3]:
class DenseTransformer(TransformerMixin):
    """
    Makes sparse arrays dense.
    """
    def fit(self, X, y=None, **fit_params):
        return self

    def transform(self, X, y=None, **fit_params):
        return X.todense() # toarray() works too

---

## FeatureSelector

In [37]:
class FeatureSelector(BaseEstimator, TransformerMixin):
    """
    Selects a subset of features based on column names or data type.
    If both `columns` and `dtype_include` are given, union of selected columns is returned.
    """
    def __init__(self, columns=[], dtype_include=None):
        self.columns = columns
        self.dtype_include = dtype_include

    def fit(self, X, y=None):
        return self

    def transform(self, X, y=None):
        X = X.copy()
        if self.dtype_include is not None:
            self.columns.extend(X.select_dtypes(self.dtype_include).columns)
        return X[set(self.columns)]

# DEMO 1
transformed = FeatureSelector(["AUTHOR", "DATE"]).transform(train_X)
transformed.head()

Unnamed: 0,AUTHOR,DATE
1447,Sonny Carter,2015-05-22 11:46:35.988
1846,Lizzy Molly,2013-09-09 17:34:07.052
1304,Warcorpse666,2015-05-26 02:27:43.254
402,Santeri Saariokari,2014-09-03 16:32:59.000
652,Quinho Divulgaçoes,2014-11-06 19:50:16.000


In [38]:
# DEMO 2
transformed = FeatureSelector(dtype_include="string").transform(train_X)
transformed.head()

Unnamed: 0,COMMENT_ID,INTERPRET,AUTHOR,CONTENT
1447,z13wxtdpeznid12et23ogtd4zoyvzbnoz04,eminem,Sonny Carter,I love this song sooooooooooooooo much﻿
1846,_2viQ_Qnc6-adCzTDLAhqNVQ5hFYcjPyPI5m7pHY4BY,shakira,Lizzy Molly,PLEASE CHECK OUT MY VIDEO CALLED &quot;WE LOVE...
1304,z13uutbriumnuj3rq04ccbvqlwjuj1srhyk0k,eminem,Warcorpse666,sorry but eminmem is a worthless wife beating ...
402,z121gbuy2unhc5m4n04cf3kyslqhepeqgvo0k,katy,Santeri Saariokari,"Hey guys go to check my video name ""growtopia ..."
652,z133hdqrqpukup0lp22chhoaztrhvxov5,katy,Quinho Divulgaçoes,me segue ha https://www.facebook.com/marcos.s...


---

## WordEmbeddings

In [6]:
class WordEmbeddingsSeries(BaseEstimator, TransformerMixin):
    """
    ! Works with Series, not DataFrame !
    For each row in series, which contains a sentence, 
    it embeds the words in the series into 300 dimensional vectors (one vector for each word).
    """
    def __init__(self, vectors):
        super().__init__()
        self.vectors = vectors

    def fit(self, X, y=None):
        return self

    def transform(self, X, y=None):
        X = X.copy()
        tokenized = [word_tokenize(line) for line in X]
        return self.vectors.query(tokenized)
    
# DEMO
vectors = Magnitude(MagnitudeUtils.download_model('fasttext/medium/wiki-news-300d-1M'), pad_to_length=30)
transformed = WordEmbeddingsSeries(vectors).transform(train_X["CONTENT"][:20])
transformed.shape

(20, 30, 300)

---

## WordEmbeddingsDF

In [7]:
class WordEmbeddingsDF(BaseEstimator, TransformerMixin):
    """
    For each column (given in constructor or all columns if not specified),
    computes word embedddings of values in the column split with nltk.word_tokenize.
    The mean of these word embeddings is then computed, giving 300 dimensional vector for each row.
    This vector is then appended to the dataframe as 300 new columns.
    """
    def __init__(self, columns=None):
        super().__init__()
        self.columns = columns

    def fit(self, X, y=None):
        return self

    def transform(self, X, y=None):
        X = X.copy()
        i = 0
        # encode all columns by default
        if self.columns is None:
            self.columns = X.columns

        for col in self.columns:
            tokenized = [word_tokenize(line) for line in X[col]]
            mean = np.mean(vectors.query(tokenized), axis=1)
            for j in range(300):
                X[f"{i}_EMBEDDED"] = mean[:,j]
                i += 1
                
        return X
    
transformed = WordEmbeddingsDF().transform(train_X[["AUTHOR", "CONTENT"]])
transformed.shape

(1249, 602)

---

## ExplorativeTransformer

In [8]:
class ExplorativeTransformer(BaseEstimator, TransformerMixin):
    """
    
    """
    def __init__(self):
        super().__init__()
        self.duplicated_and_spammers = 0
        self.duplicated_and_not_spammers = 0
        self.not_unique_authors = set()

    def fit(self, X, y=None):
        X = X.copy()
        
        stop = stopwords.words('english')

        author_set = set()
        for author in X["AUTHOR"]:
            if author in author_set:
                self.not_unique_authors.add(author)
            author_set.add(author)
            
        # are all duplicated authors spammers?
        for index, row in X.iterrows():
            if row["AUTHOR"] in self.not_unique_authors and y[index] == 1:
                self.duplicated_and_spammers += 1
            if row["AUTHOR"] in self.not_unique_authors and y[index] == 0:
                self.duplicated_and_not_spammers += 1
        
        """
        Creates 2 dictionaries: spam_dic and ham_dict, one that counts occurence of each word in spam comments, 
        other counts occurence of each word in ham comments.
        Further, only those words that are repeated more than 15 times are left in ham_dict, 
        and those repeated more than 35 times in spam_dict.
        Numbers are chosen like that, because I wanted to empahsise, that it is more importatnt to not mark as spam sth.,
        that is not spam, than the other way around.
        The last step is to keep only those words in spam_dict that are not in ham_dict and are not stop_words.
        """

        self.spam_dict = self.get_suspicious_words(X, y, 1)
        ham_dict = self.get_suspicious_words(X, y, 0)
        my_inverted_dict_spam = dict(map(reversed, self.spam_dict.items()))
        my_inverted_dict_ham = dict(map(reversed, ham_dict.items()))
        suspicious_spam = []
        suspicious_ham = []

        for key in my_inverted_dict_spam:
            if key > 15 and my_inverted_dict_spam[key] not in stop:
                suspicious_spam.append(my_inverted_dict_spam[key])

        for key in my_inverted_dict_ham:
            if key > 35 and my_inverted_dict_ham[key] not in stop:
                suspicious_ham.append(my_inverted_dict_ham[key])

        # remove all from spam that is in ham
        self.suspicious_words_list = []
        for word in suspicious_spam:
            if word not in suspicious_ham:
                self.suspicious_words_list.append(word)

        return self

    def transform(self, X, y=None):
        X = X.copy()
        X["HAS_LINK"] = np.where(X['CONTENT'].str.contains('http') |
                                  X['CONTENT'].str.contains('//'), 2, 0)
    
        X["NOT_UNIQUE_AUTHOR"] = np.where(X['AUTHOR'].str in self.not_unique_authors, self.duplicated_and_not_spammers
                                           // self.duplicated_and_spammers, 0)

        X["NULL_IN_DATE_TIME"] = np.where(X["DATE"].isna(), 2, 0)
                
        result = []
        for index, row in X.iterrows():
            suspicious_counter = 0
            for word in self.suspicious_words_list:
                my_row = row["CONTENT"].lower().split()
                if word in my_row:
                    suspicious_counter += self.spam_dict[word.lower()] // 100
            result.append(suspicious_counter)

        X["SUSPICIOUS_WORDS_COUNT"] = result

        return X
    
    def get_suspicious_words(self, X, y, num):
        result = {}
        for index, row in X.iterrows():
            if y[index] != num:
                continue
            words = (row["CONTENT"].lower()).split()
            for word in words:
                if word in result:
                    result[word] += 1
                else:
                    result[word] = 1
                    
        return result

# DEMO
transformed = ExplorativeTransformer().fit_transform(train_X, train_y)
transformed.head()

Unnamed: 0,COMMENT_ID,AUTHOR,DATE,CONTENT,INTERPRET,HAS_LINK,NOT_UNIQUE_AUTHOR,NULL_IN_DATE_TIME,SUSPICIOUS_WORDS_COUNT
1447,z13wxtdpeznid12et23ogtd4zoyvzbnoz04,Sonny Carter,2015-05-22 11:46:35.988,I love this song sooooooooooooooo much﻿,eminem,0,0,0,0
1846,_2viQ_Qnc6-adCzTDLAhqNVQ5hFYcjPyPI5m7pHY4BY,Lizzy Molly,2013-09-09 17:34:07.052,PLEASE CHECK OUT MY VIDEO CALLED &quot;WE LOVE...,shakira,0,0,0,3
1304,z13uutbriumnuj3rq04ccbvqlwjuj1srhyk0k,Warcorpse666,2015-05-26 02:27:43.254,sorry but eminmem is a worthless wife beating ...,eminem,0,0,0,0
402,z121gbuy2unhc5m4n04cf3kyslqhepeqgvo0k,Santeri Saariokari,2014-09-03 16:32:59.000,"Hey guys go to check my video name ""growtopia ...",katy,0,0,0,3
652,z133hdqrqpukup0lp22chhoaztrhvxov5,Quinho Divulgaçoes,2014-11-06 19:50:16.000,me segue ha https://www.facebook.com/marcos.s...,katy,2,0,0,0


---

## AddMissingDateColumn

In [9]:
class AddMissingDateColumn(BaseEstimator, TransformerMixin):
    """
    Adds boolean column if DATE value is missing.
    The model should then understand that True most likely means spam.
    """
    def __init__(self):
        super().__init__()

    def fit(self, X, y=None):
        return self

    def transform(self, X, y=None):
        # do not modify the original dataset
        X = X.copy()
        X["DATE_MISSING"] = X.DATE.isna()
        return X
    
# DEMO
transformed = AddMissingDateColumn().transform(train_X)
transformed[1244:1246]

Unnamed: 0,COMMENT_ID,AUTHOR,DATE,CONTENT,INTERPRET,DATE_MISSING
1865,_2viQ_Qnc6_FlLJN0izQaKVQNe6LGDmPZMmkVDjjymE,Neeru bala,2013-09-05 23:07:09.056,Hi.. Everyone.. If anyone after real online wo...,shakira,False
1427,LneaDw26bFsltJodWnZAafXscqrATBuKDM8-8lA4TQE,miamiscraziest,NaT,LADIES!!! -----&gt;&gt; If you have a broken h...,eminem,True


---

## AddLongCommentColumn

In [10]:
class AddLongCommentColumn(BaseEstimator, TransformerMixin):
    """
    Adds boolean column if CONTENT is longer or equal to 50 charcters.
    The model should then understand that True most likely means spam.
    """
    def __init__(self):
        super().__init__()

    def fit(self, X, y=None):
        return self

    def transform(self, X, y=None):
        X = X.copy()
        X["LONG_COMMENT"] = X.CONTENT.str.len() >= 50
        return X

# DEMO
transformed = AddLongCommentColumn().transform(train_X)
transformed[:2]

Unnamed: 0,COMMENT_ID,AUTHOR,DATE,CONTENT,INTERPRET,LONG_COMMENT
1447,z13wxtdpeznid12et23ogtd4zoyvzbnoz04,Sonny Carter,2015-05-22 11:46:35.988,I love this song sooooooooooooooo much﻿,eminem,False
1846,_2viQ_Qnc6-adCzTDLAhqNVQ5hFYcjPyPI5m7pHY4BY,Lizzy Molly,2013-09-09 17:34:07.052,PLEASE CHECK OUT MY VIDEO CALLED &quot;WE LOVE...,shakira,True


---

## AddContainsCheckColumn

In [11]:
class AddContainsCheckColumn(BaseEstimator, TransformerMixin):
    """
    Adds boolean column if CONTENT contains 'check'.
    The model should then understand that True most likely means spam.
    """
    def __init__(self):
        super().__init__()

    def fit(self, X, y=None):
        return self

    def transform(self, X, y=None):
        X = X.copy()
        X["CONTAINS_CHECK"] = X.CONTENT.str.contains("check")
        return X

# DEMO
transformed = AddContainsCheckColumn().transform(train_X)
transformed[2:4]

Unnamed: 0,COMMENT_ID,AUTHOR,DATE,CONTENT,INTERPRET,CONTAINS_CHECK
1304,z13uutbriumnuj3rq04ccbvqlwjuj1srhyk0k,Warcorpse666,2015-05-26 02:27:43.254,sorry but eminmem is a worthless wife beating ...,eminem,False
402,z121gbuy2unhc5m4n04cf3kyslqhepeqgvo0k,Santeri Saariokari,2014-09-03 16:32:59.000,"Hey guys go to check my video name ""growtopia ...",katy,True


---

## AddMultipleCommentsColumn

In [12]:
class AddMultipleCommentsColumn(BaseEstimator, TransformerMixin):
    """
    Adds boolean column if author posted multiple comments.
    The model should then understand that True most likely means spam.
    
    Looks also into data it has been fit on.
    """
    def __init__(self):
        super().__init__()

    def fit(self, X, y=None):
        self.prev_X = X.copy()
        return self

    def transform(self, X, y=None):
        X = X.copy()
        joined = pd.concat([X, self.prev_X])
        joined.drop_duplicates(inplace=True)
        X["MULTIPLE_COMMENTS"] = joined.duplicated(subset=["AUTHOR"], keep=False)[:len(X)]
        return X
    
# DEMO
mollys = train_X[train_X.AUTHOR == "Lizzy Molly"]
admc = AddMultipleCommentsColumn()
admc.fit_transform(mollys[:1])

Unnamed: 0,COMMENT_ID,AUTHOR,DATE,CONTENT,INTERPRET,MULTIPLE_COMMENTS
1846,_2viQ_Qnc6-adCzTDLAhqNVQ5hFYcjPyPI5m7pHY4BY,Lizzy Molly,2013-09-09 17:34:07.052,PLEASE CHECK OUT MY VIDEO CALLED &quot;WE LOVE...,shakira,False


In [13]:
transformed = admc.transform(mollys[1:2])
transformed[transformed.AUTHOR == "Lizzy Molly"]

Unnamed: 0,COMMENT_ID,AUTHOR,DATE,CONTENT,INTERPRET,MULTIPLE_COMMENTS
1472,LneaDw26bFuhuiZ8uX6C-qYLIsOFj9BIWtKWtCz870c,Lizzy Molly,NaT,PLEASE CHECK OUT MY VIDEO CALLED &quot;WE LOVE...,eminem,True


---

## AddMultipleCommentsSameVideoColumn

In [14]:
class AddMultipleCommentsSameVideoColumn(BaseEstimator, TransformerMixin):
    """
    Adds boolean column if author posted multiple comments for the same video.
    The model should then understand that True most likely means spam.
    """
    def __init__(self):
        super().__init__()

    def fit(self, X, y=None):
        return self

    def transform(self, X, y=None):
        X = X.copy()
        X["MULTIPLE_COMMENTS_SAME_VIDEO"] = X.duplicated(subset=["AUTHOR", "INTERPRET"], keep=False)
        return X
    
# DEMO    
transformed = AddMultipleCommentsSameVideoColumn().transform(train_X)
transformed[transformed.AUTHOR == "Lizzy Molly"]

Unnamed: 0,COMMENT_ID,AUTHOR,DATE,CONTENT,INTERPRET,MULTIPLE_COMMENTS_SAME_VIDEO
1846,_2viQ_Qnc6-adCzTDLAhqNVQ5hFYcjPyPI5m7pHY4BY,Lizzy Molly,2013-09-09 17:34:07.052,PLEASE CHECK OUT MY VIDEO CALLED &quot;WE LOVE...,shakira,False
1472,LneaDw26bFuhuiZ8uX6C-qYLIsOFj9BIWtKWtCz870c,Lizzy Molly,NaT,PLEASE CHECK OUT MY VIDEO CALLED &quot;WE LOVE...,eminem,False


---

## AddContainsHttpColumn

In [15]:
class AddContainsHttpColumn(BaseEstimator, TransformerMixin):
    """
    Adds boolean column if CONTENT contains a link.
    The model should then understand that True most likely means spam.
    """
    def __init__(self):
        super().__init__()

    def fit(self, X, y=None):
        return self

    def transform(self, X, y=None):
        X = X.copy()
        X["CONTAINS_HTTP"] = X.CONTENT.str.contains("http")
        return X
    
# DEMO    
transformed = AddContainsHttpColumn().transform(train_X)
transformed[3:5]

Unnamed: 0,COMMENT_ID,AUTHOR,DATE,CONTENT,INTERPRET,CONTAINS_HTTP
402,z121gbuy2unhc5m4n04cf3kyslqhepeqgvo0k,Santeri Saariokari,2014-09-03 16:32:59,"Hey guys go to check my video name ""growtopia ...",katy,False
652,z133hdqrqpukup0lp22chhoaztrhvxov5,Quinho Divulgaçoes,2014-11-06 19:50:16,me segue ha https://www.facebook.com/marcos.s...,katy,True


---

## AddTimeColumn

In [16]:
class AddTimeColumn(BaseEstimator, TransformerMixin):
    """
    Adds time column.
    Hypothesis: spams are posted at night, or, on the contrary, spams are posted during main working hours. Let the model decide...
    This probably won't work, because the dates are most likely relative to the time zone, where they were collected.
    """
    def __init__(self):
        super().__init__()

    def fit(self, X, y=None):
        return self

    def transform(self, X, y=None):
        X = X.copy()
        X["TIME"] = X.DATE.dt.time
        return X
    
# DEMO    
transformed = AddTimeColumn().transform(train_X)
transformed[0:1]

Unnamed: 0,COMMENT_ID,AUTHOR,DATE,CONTENT,INTERPRET,TIME
1447,z13wxtdpeznid12et23ogtd4zoyvzbnoz04,Sonny Carter,2015-05-22 11:46:35.988,I love this song sooooooooooooooo much﻿,eminem,11:46:35.988000


---

## HtmlUnescaper

In [17]:
class HtmlUnescaper(BaseEstimator, TransformerMixin):
    """
    For example, `&amp;` is escaped ampersand. Unescape it and other characters as well.
    """
    def __init__(self):
        super().__init__()

    def fit(self, X, y=None):
        return self

    def transform(self, X, y=None):
        X = X.copy()
        X["CONTENT"] = X["CONTENT"].apply(html.unescape)
        return X
    
# DEMO    
print("Before: ", train_X.CONTENT[700])
transformed = HtmlUnescaper().transform(train_X)
print("After: ", transformed.CONTENT[700])

Before:  <a href="http://www.youtube.com/watch?v=KQ6zr6kCPj8&amp;t=2m19s">2:19</a> best part﻿
After:  <a href="http://www.youtube.com/watch?v=KQ6zr6kCPj8&t=2m19s">2:19</a> best part﻿


---

## BOMRemover

In [18]:
class BOMRemover(BaseEstimator, TransformerMixin):
    """
    Remove Byte Order Mark from comments.
    """
    def __init__(self):
        super().__init__()

    def fit(self, X, y=None):
        return self

    def transform(self, X, y=None):
        X = X.copy()
        X["CONTENT"] = X["CONTENT"].str.replace("\ufeff", "", regex=False)
        return X

# DEMO
"Before: " + train_X.CONTENT.loc[700]

'Before: <a href="http://www.youtube.com/watch?v=KQ6zr6kCPj8&amp;t=2m19s">2:19</a> best part\ufeff'

In [19]:
transformed = BOMRemover().transform(train_X)
"After: " + transformed.CONTENT.loc[700]

'After: <a href="http://www.youtube.com/watch?v=KQ6zr6kCPj8&amp;t=2m19s">2:19</a> best part'

---

## AnchorTransformer

In [20]:
class AnchorTransformer(BaseEstimator, TransformerMixin):
    """
    Transforms all anchor tags into one keyword. 
    The model will figure out, that the presence of this keyword probably means spam.
    """
    def __init__(self):
        super().__init__()

    def fit(self, X, y=None):
        return self

    def transform(self, X, y=None):
        X = X.copy()
        X["CONTENT"] = X["CONTENT"].str.replace("<a.+>", "anchortag", regex=True)
        return X
    
# DEMO
print("Before: " + train_X.CONTENT.loc[700])
transformed = AnchorTransformer().transform(train_X)
print("After: " + transformed.CONTENT.loc[700])

Before: <a href="http://www.youtube.com/watch?v=KQ6zr6kCPj8&amp;t=2m19s">2:19</a> best part﻿
After: anchortag best part﻿


---

## AddContainsAnchorTagColumn

In [25]:
class AddContainsAnchorTagColumn(BaseEstimator, TransformerMixin):
    """
    Adds new column CONTAINS_ANCHOR_TAG which is True when CONTENT contains <a> tag.
    The model should then understand that True most likely means spam.
    Removes the link from CONTENT as well.
    """
    def __init__(self):
        super().__init__()

    def fit(self, X, y=None):
        return self

    def transform(self, X, y=None):
        X = X.copy()
        X["CONTAINS_ANCHOR_TAG"] = X["CONTENT"].str.contains("<a.+>")
        X["CONTENT"] = X["CONTENT"].str.replace("<a.+>", "", regex=True)
        return X
    
# DEMO
print("Before: " + train_X.CONTENT.loc[700])
transformed = AddContainsAnchorTagColumn().transform(train_X)
transformed.loc[700]

Before: <a href="http://www.youtube.com/watch?v=KQ6zr6kCPj8&amp;t=2m19s">2:19</a> best part﻿


COMMENT_ID             z13uwn2heqndtr5g304ccv5j5kqqzxjadmc0k
AUTHOR                                          Corey Wilson
DATE                              2015-05-28 21:39:52.376000
CONTENT                                           best part﻿
INTERPRET                                              lmfao
CONTAINS_ANCHOR_TAG                                     True
Name: 700, dtype: object

---

## Lower

In [26]:
class Lower(BaseEstimator, TransformerMixin):
    """
    Makes CONTENT lowercase.
    """
    def __init__(self):
        super().__init__()

    def fit(self, X, y=None):
        return self

    def transform(self, X, y=None):
        X = X.copy()
        X["CONTENT"] = X["CONTENT"].str.lower()
        return X
    
# DEMO
print("Before: " + train_X.CONTENT.loc[192][:40])
transformed = Lower().transform(train_X)
print("After: " + transformed.CONTENT.loc[192][:40])

Before: People, here is a new network like FB...
After: people, here is a new network like fb...


---

## UrlTransformer

In [23]:
class UrlTransformer(BaseEstimator, TransformerMixin):
    """
    Transforms all urls into one keyword. 
    The model will figure out, that the presence of this keyword probably means spam.
    """
    def __init__(self):
        super().__init__()

    def fit(self, X, y=None):
        return self

    def transform(self, X, y=None):
        X = X.copy()
        X["CONTENT"] = X["CONTENT"].str.replace(r"\S*\.com\S*|\S*watch\?\S*", "urllink", regex=True)
        return X
    
# DEMO
print("Before: " + train_X.CONTENT.loc[1573])
print("Before: " + train_X.CONTENT.loc[14])
transformed = UrlTransformer().transform(train_X)
print("After: " + transformed.CONTENT.loc[1573])
print("After: " + transformed.CONTENT.loc[14])

Before: subscribe to my channel  /watch?v=NxK32i0HkDs
Before: please like :D https://premium.easypromosapp.com/voteme/19924/616375350﻿
After: subscribe to my channel  urllink
After: please like :D urllink


---

## Debugger

In [24]:
class Debugger(BaseEstimator, TransformerMixin):
    """
    Prints head of dataframe
    """
    def __init__(self):
        super().__init__()

    def fit(self, X, y=None):
        return self

    def transform(self, X, y=None):
        print(X.head())
        return X
    
# DEMO
transformed = Debugger().transform(train_X)

                                       COMMENT_ID              AUTHOR  \
1447          z13wxtdpeznid12et23ogtd4zoyvzbnoz04        Sonny Carter   
1846  _2viQ_Qnc6-adCzTDLAhqNVQ5hFYcjPyPI5m7pHY4BY         Lizzy Molly   
1304        z13uutbriumnuj3rq04ccbvqlwjuj1srhyk0k        Warcorpse666   
402         z121gbuy2unhc5m4n04cf3kyslqhepeqgvo0k  Santeri Saariokari   
652             z133hdqrqpukup0lp22chhoaztrhvxov5  Quinho Divulgaçoes   

                        DATE  \
1447 2015-05-22 11:46:35.988   
1846 2013-09-09 17:34:07.052   
1304 2015-05-26 02:27:43.254   
402  2014-09-03 16:32:59.000   
652  2014-11-06 19:50:16.000   

                                                CONTENT INTERPRET  
1447            I love this song sooooooooooooooo much﻿    eminem  
1846  PLEASE CHECK OUT MY VIDEO CALLED &quot;WE LOVE...   shakira  
1304  sorry but eminmem is a worthless wife beating ...    eminem  
402   Hey guys go to check my video name "growtopia ...      katy  
652   me segue ha  https://w

---