# 1. Import Libraries

* Import the required libraries.

In [1]:
#pip install Unidecode

In [2]:
#pip install --user -U nltk

In [3]:
#pip install beautifulsoup4

In [4]:
import re
import os
import math
import nltk
import pickle
import warnings
import unidecode
import numpy as np
import pandas as pd

from tqdm.notebook import tqdm
from bs4 import BeautifulSoup

In [5]:
pd.options.display.max_columns = None # To display all the columns of a Dataframe.
warnings.filterwarnings('ignore') # Ignore any warnings and do not show them in the cell output.

----

# 2. Data Pre-processing

* We will perform the data-preprocessing of the comment text by performing various operations as listed below:<br>
&emsp; 1. Remove HTML Tags.<br>
&emsp; 2. Remove accented characters.<br>
* We will create utility functions to perform each of the data-preprocessing operation.

## 2.1. Remove HTML Tags

* HTML Tags does not add much value towards understanding and analyzing texts.

In [6]:
def removeHTMLTags(text):
    '''
    Function to remove the HTML Tags from a given text.
    
    Parameter:
    ---------
    text: str
        Text from which the HTML tags has to be removed.
    '''
    
    # Reference: 'Remove html tags using BeautifulSoup' - https://www.geeksforgeeks.org/remove-all-style-scripts-and-html-tags-using-beautifulsoup/
    
    # Create a BeautifulSoup object to parse the given html text content
    soup = BeautifulSoup(text, 'html.parser')
    
    # Remove the <style> and <script> tags from the html content because they contains the styling sheet and javascript
    # file references and won't give any meaningful context.
    for data in soup(['style', 'script']):
        
        # Remove tag
        data.decompose()
        
    # Return the html tag free content
    return ' '.join(soup.stripped_strings)

#### Example - Removal of html tags

In [7]:
# Let's consider an html content
html = '''
        <html>
            <head>
                <title>Case Study</title>
                <style>.class { font-color: blue; }</style>
                <script>function(){ console.log('This is a JavaScript Code'); }</script>
            </head>
            <body>
                <h1>Jigsaw Unintended Bias</h1>
                <h2>Toxicity Classification<h2>
            </body>
        </html>
        '''

# Call the function 'removeHTMLTags()' to remove the html tags from the html content
removeHTMLTags(html)

'Case Study Jigsaw Unintended Bias Toxicity Classification'

----

## 2.2. Remove Accented Characters

* We may receive some accented characters/letters in comments. E.g., résumé, tête-à-tête, etc.
* The most common accents are the acute (é), grave (è), circumflex (â, î or ô), tilde (ñ), umlaut and dieresis (ü or ï – the same symbol is used for two different purposes), and cedilla (ç). Accent marks (also referred to as diacritics or diacriticals) usually appear above a character. [Reference](https://www.fonts.com/content/learning/fontology/level-3/signs-and-symbols/accents)
* We need to ensure that we convert and standardize such characters to ASCII characters.

In [8]:
def removeAccentedChars(text):
    '''
    Function to remove the accented characters from a given text.
    
    Parameter:
    ---------
    text: str
        Text from which the accented character has to be removed.
    '''
    
    # Reference: "remove accented characters python" - https://www.geeksforgeeks.org/how-to-remove-string-accents-using-python-3/
    
    # Remove accents
    return unidecode.unidecode(text)

#### Example - Removal of accented characters

In [9]:
# String containing accented characters
accentedStr = 'Sómě Áccěntěd těxt: orčpžsíáýd stävänger hell°'

# Call the function 'removeAccentedChars()' to remove the accented characters from the text.
removeAccentedChars(accentedStr)

'Some Accented text: orcpzsiayd stavanger helldeg'

----

## 2.3. Convert to Lowercase

* Convert the comment text to lower case before doing further preprocessing.
* Texts in lowercase help in the process of preprocessing and in later stages in NLP.
* Converting the text to lowercase is pretty easy.

In [10]:
def lowercase(text):
    '''
    Function to convert a given text to its lowercase.
    
    Parameter:
    ---------
    text: str
        Text that has to be converted to lowercase.
    '''
    
    return text.lower()

#### Example - Converting a text to lowercase

In [11]:
# String containing few words with uppercase letters.
upStr = 'Converting a TEXT to its Lowercase is very simple.'

# Call the function 'lowercase()' to convert the text to its lowercase.
lowercase(upStr)

'converting a text to its lowercase is very simple.'

----

## 2.4. Remove IP Address, Hyperlinks and Number

* Remove any IP Address, Hyperlinks and numbers from the comment text as they won't add any value to perform the toxicity classification.

In [12]:
def removeIPLinkNum(text, ipAddress=True, hyperlink=False, numbers=True):
    '''
    Function to remove IP Address and Number from the given text.
    
    Parameter:
    ---------
    text: str
        Text from which IP Address and number(s) have to be removed.
    '''
    
    # Replace IP Address with empty string.
    # Reference: 'Remove IP Address Python' - https://www.geeksforgeeks.org/extract-ip-address-from-file-using-python/#:~:text=The%20regular%20expression%20for%20valid,%5C.)%7B
    if ipAddress == True:
        
        text = re.sub(r'((25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)', '', text)
    
    # Remove hyperlinks
    # Reference: 'Regex for hperlinks Python' - https://www.geeksforgeeks.org/python-check-url-string/
    if hyperlink == True:
        
        text = re.sub(r"(?i)\b((?:https?://|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'\".,<>?«»“”‘’]))", "", text)
    
    # Remove numbers.
    if numbers == True:
        
        text = re.sub(r'[0-9]', '', text)
    
    # Remove the extra space if any.
    text = re.sub(r'[ ][ ]+', ' ', text)
    
    return text

#### Example - Removal of IP Address, Hyperlinks and numbers

In [13]:
%%time
# Text containing few IP Addresses, Hyperlinks and numbers.
ipLinkNum = 'Ping 192.128.10.10 to connect to the site https://www.infinity.com/ and get the numbers between 1 and 1000'

# Call the 'removeIPLinkNum()' to remove the IP Address, Hyperlinks and numbers from the text.
removeIPLinkNum(ipLinkNum, hyperlink=True)

CPU times: user 911 µs, sys: 772 µs, total: 1.68 ms
Wall time: 1.69 ms


'Ping to connect to the site and get the numbers between and '

----

## 2.5. Replace Emoticons with the corresponding words

* Replace the emoticons with the corresponding words like ":-(" by "sad". 

In [14]:
# Dictionary containing the emoticons and their corresponding words.
# Reference: https://pc.net/emoticons/
emoticonsDict = {
    '(.V.)': 'alien',
    'O:-)': 'angel',
    'X-(': 'angry',
    '~:0': 'baby',
    ':-D': 'big grin',
    ':-#': 'braces',
    '=^.^=': 'cat',
    '*<:o)': 'clown',
    'O.o': 'confused',
    'B-)': 'cool',
    ':_(': 'crying',
    '\:D/': 'dancing',
    '*-*': 'dazed',
    ':o3': 'dog',
    '#-o': 'doh',
    ':*)': 'drunk',
    '//_^': 'emo',
    '>:)': 'evil grin',
    '<><': 'fish',
    ':(': 'frown',
    ':-(': 'frown',
    ':-(': 'frowning',
    '=P': 'frustrated',
    ':-P': 'frustrated',
    '8-)': 'glasses',
    '$_$': 'greedy',
    ':->': 'grin',
    ':-)': 'happy',
    ':)': 'happy',
    '=)': 'happy',
    '#': 'hashtag',
    '<3': 'heart',
    '{}': 'hug',
    ':-|': 'indifferent',
    'X-p': 'joking',
    ':*': 'kiss',
    ':-*': 'kiss',
    ':-)*': 'kiss',
    '(-}{-)': 'kissing',
    'XD': 'laughing',
    '=D': 'laughing out loud',
    ')-:': 'left-handed sad face',
    '(-:': 'Left-handed smiley face',
    '<3': 'love',
    '=/': 'mad',
    ':-)(-:': 'married',
    '@': 'mention',
    '<:3)~': 'mouse',
    '~,~': 'napping',
    ':-B': 'nerd',
    '^_^': 'overjoyed',
    '<l:0': 'partying',
    ':-/': 'perplexed',
    '=8)': 'Pig',
    '@~)~~~~': 'rose',
    ':-(': 'sad',
    ':(': 'sad',
    '=(': 'sad',
    ':S': 'sarcastic',
    ':-@': 'screaming',
    '=O': 'shocked',
    ':-o': 'shocked',
    ':-Q': 'smoking',
    ':>': 'smug',
    ':P': 'sticking tongue out',
    ':o': 'surprised',
    ':-J': 'tongue in cheek',
    ':-&': 'tongue tied',
    '=-O': 'uh-oh',
    ':-E': 'vampire',
    '=D': 'very happy',
    ';-)': 'winking',
    ';)': 'winking',
    '|-O': 'yawn',
    '8-#': 'zombie'
}

In [15]:
# Replace Emoticons with correponding words
def replaceEmoticons(text):
    
    for emoticon in emoticonsDict:
        
        word = "_".join(emoticonsDict[emoticon].split())
        
        text = text.replace(emoticon, ' ' + word + ' ')
        
        # Remove the extra space if any.
        text = re.sub(r'[ ][ ]+', ' ', text)
    
    return text

#### Example - Replace emoticons with words

In [16]:
%%time
# Define a text with certain emoticons
text = 'In Joy, people are :-) and in despair, people are :-('

# Call the 'replaceEmoticons()' to replace emoticons by their corresponding words.
replaceEmoticons(text)

CPU times: user 152 µs, sys: 0 ns, total: 152 µs
Wall time: 156 µs


'In Joy, people are happy and in despair, people are sad '

----

## 2.6. Remove Special Characters

* Remove special characters except the below four characters:
## <font color="blue">**'** **.** **?** **!**</font>

In [17]:
def removeSpecialChars(text, removeAll=False):
    '''
    Function to remove the special characters from the given text.
    
    Parameter:
    ---------
    text: str
        Text from which the special characters have to be removed.
    removeAll: boolean
        Flag to check whether to remove all special characters or all except ' . ? !
    '''
    
    if removeAll == True:
        
        text = re.sub(r'[^A-Za-z ]+', '', text) # Remove all special characters.
        
    else:
        
        text = re.sub(r'[^A-Za-z\'.?! ]+', '', text) # Remove all special characters except ' . ? !
        
    # Remove the extra space if any.
    text = re.sub(r'[ ][ ]+', ' ', text)
    
    return text

#### Example - Removal of special characters

In [18]:
%%time
# Define a text with some special characters in it
text = 'This # is ! a \' sentence % with & some speci@al characters. Is\'nt it?'

# Call the 'removeSpecialChars()' to remove the special characters from a text.
removeSpecialChars(text)

CPU times: user 98 µs, sys: 83 µs, total: 181 µs
Wall time: 186 µs


"This is ! a ' sentence with some special characters. Is'nt it?"

----

## 2.7. Replace Sentence end markers with special tokens

* Replacing the sentence end markers with special tokens.
* This is done to not lose information about these marks at the stage of transformation of the text into a word embedding.
* This can be omitted while using BERT.
* The following end markers will be replaced:<br>
&emsp; i. ! $\longrightarrow$ exclmrk.<br>
&emsp; ii. ? $\longrightarrow$ qstmrk.<br>
&emsp; iii. . $\longrightarrow$ eosmkr.<br>

In [19]:
def replaceSpecialTokens(text, isBERTUsed=False):
    '''
    Function to replace special tokens with words in a given text.
    
    Parameter:
    ---------
    text: str
        Text in which certain special tokens need to be replaced.
    isBERTUsed: boolean
        Boolean flag to indicate if BERT is used in the modelling, then do not apply this pre-processing.
    '''
    
    if (isBERTUsed == False):
    
        text = re.sub(r'[!]', ' exclmrk ', text) # Replace ! with exclmrk.
        text = re.sub(r'[?]', ' qstmrk ', text) # Replace ? with qstmrk.
        text = re.sub(r'[.]', ' eosmkr ', text) # Replace . with eosmkr.

        # Remove the extra space if any.
        text = re.sub(r'[ ][ ]+', ' ', text)
    
    return text

#### Example - Replacing special tokens

In [20]:
%%time
# Define a text containing special tokens
text = 'This is amazing! Is\'nt it? We should do more such stuffs.'

# Call the 'replaceSpecialTokens' to replace special tokens with words.
replaceSpecialTokens(text)

CPU times: user 117 µs, sys: 99 µs, total: 216 µs
Wall time: 222 µs


"This is amazing exclmrk Is'nt it qstmrk We should do more such stuffs eosmkr "

----

## 2.8. Decontraction

* A contraction, or short form, is an abbreviated form of a word of words, from which one or more letters have been left out and replaced by an apostrophe. [Reference](https://typely.com/blogs/entry/15-how-to-use-english-contractions-correctly-with-word-list-and-examples/). They're are very common in english sentences.
* For example:<br>
&emsp; i. I am $\longrightarrow$ I'm.<br>
&emsp; ii. He is $\longrightarrow$ He's.<br>
&emsp; iii. It is $\longrightarrow$ It's.<br>
&emsp; iv. We will $\longrightarrow$ We'll.<br>
* We need to de-contract (opposite of contraction) the words to its original form, to help with text standardization.


* Here we will use a dictionary (contraction map) containing the contracted form in its keys and their corresponding expanded form in its values.

In [21]:
# Reference- https://github.com/dipanjanS/practical-machine-learning-with-python/blob/master/bonus%20content/nlp%20proven%20approach/contractions.py
contractionMap = {
    'ain\'t': 'is not',
    'aren\'t': 'are not',
    'can\'t': 'cannot',
    'can\'t\'ve': 'cannot have',
    '\'cause': 'because',
    'could\'ve': 'could have',
    'couldn\'t': 'could not',
    'couldn\'t\'ve': 'could not have',
    'didn\'t': 'did not',
    'doesn\'t': 'does not',
    'don\'t': 'do not',
    'hadn\'t': 'had not',
    'hadn\'t\'ve': 'had not have',
    'hasn\'t': 'has not',
    'haven\'t': 'have not',
    'he\'d': 'he would',
    'he\'d\'ve': 'he would have',
    'he\'ll': 'he will',
    'he\'ll\'ve': 'he he will have',
    'he\'s': 'he is',
    'how\'d': 'how did',
    'how\'d\'y': 'how do you',
    'how\'ll': 'how will',
    'how\'s': 'how is',
    'I\'d': 'I would',
    'I\'d\'ve': 'I would have',
    'I\'ll': 'I will',
    'I\'ll\'ve': 'I will have',
    'I\'m': 'I am',
    'I\'ve': 'I have',
    'i\'d': 'i would',
    'i\'d\'ve': 'i would have',
    'i\'ll': 'i will',
    'i\'ll\'ve': 'i will have',
    'i\'m': 'i am',
    'i\'ve': 'i have',
    'isn\'t': 'is not',
    'it\'d': 'it would',
    'it\'d\'ve': 'it would have',
    'it\'ll': 'it will',
    'it\'ll\'ve': 'it will have',
    'it\'s': 'it is',
    'let\'s': 'let us',
    'ma\'am': 'madam',
    'mayn\'t': 'may not',
    'might\'ve': 'might have',
    'mightn\'t': 'might not',
    'mightn\'t\'ve': 'might not have',
    'must\'ve': 'must have',
    'mustn\'t': 'must not',
    'mustn\'t\'ve': 'must not have',
    'needn\'t': 'need not',
    'needn\'t\'ve': 'need not have',
    'o\'clock': 'of the clock',
    'oughtn\'t': 'ought not',
    'oughtn\'t\'ve': 'ought not have',
    'shan\'t': 'shall not',
    'sha\'n\'t': 'shall not',
    'shan\'t\'ve': 'shall not have',
    'she\'d': 'she would',
    'she\'d\'ve': 'she would have',
    'she\'ll': 'she will',
    'she\'ll\'ve': 'she will have',
    'she\'s': 'she is',
    'should\'ve': 'should have',
    'shouldn\'t': 'should not',
    'shouldn\'t\'ve': 'should not have',
    'so\'ve': 'so have',
    'so\'s': 'so as',
    'that\'d': 'that would',
    'that\'d\'ve': 'that would have',
    'that\'s': 'that is',
    'there\'d': 'there would',
    'there\'d\'ve': 'there would have',
    'there\'s': 'there is',
    'they\'d': 'they would',
    'they\'d\'ve': 'they would have',
    'they\'ll': 'they will',
    'they\'ll\'ve': 'they will have',
    'they\'re': 'they are',
    'they\'ve': 'they have',
    'to\'ve': 'to have',
    'wasn\'t': 'was not',
    'we\'d': 'we would',
    'we\'d\'ve': 'we would have',
    'we\'ll': 'we will',
    'we\'ll\'ve': 'we will have',
    'we\'re': 'we are',
    'we\'ve': 'we have',
    'weren\'t': 'were not',
    'what\'ll': 'what will',
    'what\'ll\'ve': 'what will have',
    'what\'re': 'what are',
    'what\'s': 'what is',
    'what\'ve': 'what have',
    'when\'s': 'when is',
    'when\'ve': 'when have',
    'where\'d': 'where did',
    'where\'s': 'where is',
    'where\'ve': 'where have',
    'who\'ll': 'who will',
    'who\'ll\'ve': 'who will have',
    'who\'s': 'who is',
    'who\'ve': 'who have',
    'why\'s': 'why is',
    'why\'ve': 'why have',
    'will\'ve': 'will have',
    'won\'t': 'will not',
    'won\'t\'ve': 'will not have',
    'would\'ve': 'would have',
    'wouldn\'t': 'would not',
    'wouldn\'t\'ve': 'would not have',
    'y\'all': 'you all',
    'y\'all\'d': 'you all would',
    'y\'all\'d\'ve': 'you all would have',
    'y\'all\'re': 'you all are',
    'y\'all\'ve': 'you all have',
    'you\'d': 'you would',
    'you\'d\'ve': 'you would have',
    'you\'ll': 'you will',
    'you\'ll\'ve': 'you will have',
    'you\'re': 'you are',
    'you\'ve': 'you have'
}

In [22]:
def decontract(text, isBERTUsed=False):
    '''
    Function to decontract a given text.
    
    Parameter:
    ---------
    text: str
        Text to be decontracted.
    isBERTUsed: boolean
        Boolean flag to indicate if BERT is used in the modelling, then do not apply this pre-processing.
    '''
    
    if isBERTUsed==False:
    
        # Iterate through all the contraction keys and replace the keys with their corresponding values (expanded form)
        for word in contractionMap.keys():

            text = lowercase(text) # Convert to lowercase.
            text = re.sub(word, contractionMap[word], text) # Replace the contracted word with its decontracted form.
        
    return text

#### Example - Decontracting a text

In [23]:
%%time
# Text containing few contracted words.
contractText = "You've got the potential they won't believe."

# Call the 'decontract()' function to decontract a given text. 
decontract(contractText)

CPU times: user 4.54 ms, sys: 447 µs, total: 4.99 ms
Wall time: 5 ms


'you have got the potential they will not believe.'

----

## 2.9. Common Function

* Include all the above data-preprocessing functions inside a single function that can be used to perform the preprocessing on the data.

In [24]:
def preprocess(text, html=True, accent=True, lower=True, ipLinkNum=True, emoticon=True, specialChar=True, 
               specialToken=True, decontraction=True, isBERTUsed=False, removeAllSpecialChar=False, hyperlink=False):
    '''
    Function to perform all the data-preprocessing on a given text.
    
    Parameters:
    ----------
    text: str
        Text on which the pre-processing has to be performed.
    html: boolean
        Flag to check whether to remove html tags from the text or not.
    accent: boolean
        Flag to check whether to remove the accented characters from the text or not.
    lower: boolean
        Flag to check whether to perform lowercase on the text or not.
    ipLinkNum: boolean
        Flag to check whether to remove the IP Address, Hyperlink(s) and number(s) from the text or not.
    emoticon: boolean
        Flag to check whether to replace the emoticons with their corresponding words in the text or not.
    specialChar: boolean
        Flag to check whether to remove the special characters from the text or not.
    specialToken: boolean
        Flag to check whether to replace the special tokens with their corresponding words in the text or not.
    decontraction: boolean
        Flag to check whether to do decontraction in the given text or not.
    isBERTUsed: boolean
        Boolean flag to indicate if BERT is used in the modelling, then do not apply this pre-processing.
    removeAllSpecialChar: boolean
        Flag to check whether to remove all special characters or all except ' . ? !
    hyperlink: boolean
        Flag to check whether to remove the hyperlink from the text or not.
    '''
    
    if html == True:
        
        # Call the function 'removeHTMLTags()' to remove the html tags from the html content
        text = removeHTMLTags(text)
        
    if accent == True:
        
        # Call the function 'removeAccentedChars()' to remove the accented characters from the text.
        text = removeAccentedChars(text)
        
    if lower == True:
        
        # Call the function 'lowercase()' to convert the text to its lowercase.
        text = lowercase(text)
        
    if ipLinkNum == True: 
        
        # Call the 'removeIPLinkNum()' to remove the IP Address, Hyperlinks and numbers from the text.
        text = removeIPLinkNum(text, hyperlink=hyperlink)
        
    if emoticon == True:
        
        # Call the 'replaceEmoticons()' to replace emoticons by their corresponding words.
        text = replaceEmoticons(text)
        
    if specialChar == True:
        
        # Call the 'removeSpecialChars()' to remove the special characters from a text.
        text = removeSpecialChars(text, removeAllSpecialChar)
        
    if specialToken == True:
        
        # Call the 'replaceSpecialTokens' to replace special tokens with words.
        text = replaceSpecialTokens(text, isBERTUsed)
        
    if decontraction == True:
        
        # Call the 'decontract()' function to decontract a given text. 
        text = decontract(text, isBERTUsed)
        
    return text

#### Example - All data-preprocessing

In [25]:
%%time

# Define a text
text = 'The content is downloaded from http://www.wikipedia.com , which could\'ve also been downloaded from the IP Address\
        192.128.10.10. The content has some html content as well (<span>Some span content</span>). Few accent characters:\
        orčpžsíáýd stävänger hell. The content was downloaded in 2022. Isn\'t that a recent activity? :-) &*!@&'

# Call the 'preprocess()' function to pre-process the text.
preprocess(text, hyperlink=True)

CPU times: user 538 µs, sys: 448 µs, total: 986 µs
Wall time: 996 µs


'the content is downloaded from which could have also been downloaded from the ip address eosmkr the content has some html content as well some span content eosmkr few accent characters orcpzsiayd stavanger hell eosmkr the content was downloaded in eosmkr is not that a recent activity qstmrk happy exclmrk mention '

----

## 2.10. Apply the data pre-processing on the comment text

* Now, we will apply the data-preprocessing on the comment text of the training dataset.

In [27]:
%%time
if os.path.isfile('Data/preprocessed_train.csv'):
    
    data = pd.read_csv('Data/preprocessed_train.csv')

else:
    
    data = pd.read_csv('Data/train.csv')
    
    # Preprocess the comment text and store the processed text in a new feature 'preprocessed_text1'
    data['preprocessed_text1'] = data['comment_text'].apply(preprocess)
    
    data.to_csv('Data/preprocessed_train.csv', index=false)

CPU times: user 20.1 s, sys: 1.62 s, total: 21.8 s
Wall time: 21.8 s


----

# 3. Feature Engineering

* There could be some new features generated from the comment text like no. of words, no, unique words, no. of obscene words, etc.
* Based on correlation of no. of words and no. of unique words features, it was found that they are highly correlated to each other.
* Hence, we will create a new feature to capture the number of words in the comment text.

In [28]:
%%time
# Create two new features 'word_count'
data['word_count'] = data['comment_text'].apply(lambda x: len(x.split()))

CPU times: user 5.55 s, sys: 18.9 ms, total: 5.57 s
Wall time: 5.57 s


In [36]:
%%time
# Save the pre-processed data
data.to_csv('Data/preprocessed_train.csv', index=False)

In [29]:
# Display the original comment text, pre-processed comment text and word count features.
data[['comment_text', 'preprocessed_text1', 'word_count']]

Unnamed: 0,comment_text,preprocessed_text1,word_count
0,"This is so cool. It's like, 'would you want yo...",this is so cool eosmkr it is like 'would you w...,19
1,Thank you!! This would make my life a lot less...,thank you exclmrk exclmrk this would make my l...,22
2,This is such an urgent design problem; kudos t...,this is such an urgent design problem kudos to...,16
3,Is this something I'll be able to install on m...,is this something i will be able to install on...,17
4,haha you guys are a bunch of losers.,haha you guys are a bunch of losers eosmkr,8
...,...,...,...
1804869,"Maybe the tax on ""things"" would be collected w...",maybe the tax on things would be collected whe...,37
1804870,What do you call people who STILL think the di...,what do you call people who still think the di...,15
1804871,"thank you ,,,right or wrong,,, i am following ...",thank you right or wrong i am following your a...,10
1804872,Anyone who is quoted as having the following e...,anyone who is quoted as having the following e...,58


----

----