In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/text-reason/evaluation.xlsx
/kaggle/input/text-reason/train.xlsx


In [2]:
df = pd.read_excel("/kaggle/input/text-reason/train.xlsx")

In [3]:
df

Unnamed: 0,text,reason,label
0,this is an amazing app for online classes!but,good app for conducting online classes,1.0
1,very practical and easy to use,app is user-friendly,1.0
2,this app is very good for video conferencing.,good for video conferencing,1.0
3,i can not download this zoom app,unable to download zoom app,1.0
4,i am not able to download this app,want to download the app,1.0
...,...,...,...
2056,i am not getting jio tv properly.,unable to use with jio tv,1.0
2057,(learn from the netflix interface),netflix ui is better,1.0
2058,"hello,\ndisney+ must also be installed on chro...",unable to use disney on chromecast,1.0
2059,it is a shame that disney+ does not work on tv...,unable to use disney plus on tv,1.0


In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import re
from bs4 import BeautifulSoup

import warnings
warnings.filterwarnings('ignore')

In [5]:
def preprocess(q):
    
    q = str(q).lower().strip()
    
    # Replace certain special characters with their string equivalents
    q = q.replace('%', ' percent')
    q = q.replace('$', ' dollar ')
    q = q.replace('₹', ' rupee ')
    q = q.replace('€', ' euro ')
    q = q.replace('@', ' at ')
    
    # The pattern '[math]' appears around 900 times in the whole dataset.
    q = q.replace('[math]', '')
    
    # Replacing some numbers with string equivalents (not perfect, can be done better to account for more cases)
    q = q.replace(',000,000,000 ', 'b ')
    q = q.replace(',000,000 ', 'm ')
    q = q.replace(',000 ', 'k ')
    q = re.sub(r'([0-9]+)000000000', r'\1b', q)
    q = re.sub(r'([0-9]+)000000', r'\1m', q)
    q = re.sub(r'([0-9]+)000', r'\1k', q)
    
    # Decontracting words
    # https://en.wikipedia.org/wiki/Wikipedia%3aList_of_English_contractions
    # https://stackoverflow.com/a/19794953
    contractions = { 
    "ain't": "am not",
    "aren't": "are not",
    "can't": "can not",
    "can't've": "can not have",
    "'cause": "because",
    "could've": "could have",
    "couldn't": "could not",
    "couldn't've": "could not have",
    "didn't": "did not",
    "doesn't": "does not",
    "don't": "do not",
    "hadn't": "had not",
    "hadn't've": "had not have",
    "hasn't": "has not",
    "haven't": "have not",
    "he'd": "he would",
    "he'd've": "he would have",
    "he'll": "he will",
    "he'll've": "he will have",
    "he's": "he is",
    "how'd": "how did",
    "how'd'y": "how do you",
    "how'll": "how will",
    "how's": "how is",
    "i'd": "i would",
    "i'd've": "i would have",
    "i'll": "i will",
    "i'll've": "i will have",
    "i'm": "i am",
    "i've": "i have",
    "isn't": "is not",
    "it'd": "it would",
    "it'd've": "it would have",
    "it'll": "it will",
    "it'll've": "it will have",
    "it's": "it is",
    "let's": "let us",
    "ma'am": "madam",
    "mayn't": "may not",
    "might've": "might have",
    "mightn't": "might not",
    "mightn't've": "might not have",
    "must've": "must have",
    "mustn't": "must not",
    "mustn't've": "must not have",
    "needn't": "need not",
    "needn't've": "need not have",
    "o'clock": "of the clock",
    "oughtn't": "ought not",
    "oughtn't've": "ought not have",
    "shan't": "shall not",
    "sha'n't": "shall not",
    "shan't've": "shall not have",
    "she'd": "she would",
    "she'd've": "she would have",
    "she'll": "she will",
    "she'll've": "she will have",
    "she's": "she is",
    "should've": "should have",
    "shouldn't": "should not",
    "shouldn't've": "should not have",
    "so've": "so have",
    "so's": "so as",
    "that'd": "that would",
    "that'd've": "that would have",
    "that's": "that is",
    "there'd": "there would",
    "there'd've": "there would have",
    "there's": "there is",
    "they'd": "they would",
    "they'd've": "they would have",
    "they'll": "they will",
    "they'll've": "they will have",
    "they're": "they are",
    "they've": "they have",
    "to've": "to have",
    "wasn't": "was not",
    "we'd": "we would",
    "we'd've": "we would have",
    "we'll": "we will",
    "we'll've": "we will have",
    "we're": "we are",
    "we've": "we have",
    "weren't": "were not",
    "what'll": "what will",
    "what'll've": "what will have",
    "what're": "what are",
    "what's": "what is",
    "what've": "what have",
    "when's": "when is",
    "when've": "when have",
    "where'd": "where did",
    "where's": "where is",
    "where've": "where have",
    "who'll": "who will",
    "who'll've": "who will have",
    "who's": "who is",
    "who've": "who have",
    "why's": "why is",
    "why've": "why have",
    "will've": "will have",
    "won't": "will not",
    "won't've": "will not have",
    "would've": "would have",
    "wouldn't": "would not",
    "wouldn't've": "would not have",
    "y'all": "you all",
    "y'all'd": "you all would",
    "y'all'd've": "you all would have",
    "y'all're": "you all are",
    "y'all've": "you all have",
    "you'd": "you would",
    "you'd've": "you would have",
    "you'll": "you will",
    "you'll've": "you will have",
    "you're": "you are",
    "you've": "you have"
    }

    q_decontracted = []

    for word in q.split():
        if word in contractions:
            word = contractions[word]

        q_decontracted.append(word)

    q = ' '.join(q_decontracted)
    q = q.replace("'ve", " have")
    q = q.replace("n't", " not")
    q = q.replace("'re", " are")
    q = q.replace("'ll", " will")
    
    # Removing HTML tags
    q = BeautifulSoup(q)
    q = q.get_text()
    
    # Remove punctuations
    pattern = re.compile('\W')
    q = re.sub(pattern, ' ', q).strip()

    
    return q
    

In [6]:
df.reason = df.reason.apply(preprocess)

In [7]:
df.text = df.text.apply(preprocess)

In [8]:
! pip install nlpaug

Collecting nlpaug
  Downloading nlpaug-1.1.11-py3-none-any.whl (410 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m410.5/410.5 kB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
Collecting gdown>=4.0.0
  Downloading gdown-4.6.0-py3-none-any.whl (14 kB)
Installing collected packages: gdown, nlpaug
Successfully installed gdown-4.6.0 nlpaug-1.1.11
[0m

In [9]:
import nlpaug
import nlpaug.augmenter.word as naw

In [10]:
aug = naw.SynonymAug(aug_src='wordnet',aug_max=2)

[nltk_data] Downloading package wordnet to /usr/share/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /usr/share/nltk_data...


In [11]:
aug.augment("Misleading reviews. Worst coffee ever had, and sorely disappointing vibe.",n=2)

['Misleading reviews. Worst coffee ever so had, and painfully disappointing vibe.',
 'Misleading reviews. Worst coffee ever had, and sorely dissatisfactory vibe.']

In [12]:
aug2 = naw.AntonymAug()

In [13]:
aug2.augment("Worst coffee ever had, and sorely disappointing vibe.",)

['Worst coffee never refuse, and painlessly disappointing vibe.']

In [14]:
antonmy = []
y = []

for i in range(len(df)) :
    temp = []
    temp.append(df.text.iloc[i])
    temp.append(aug2.augment(df.reason.iloc[i])[0])
    antonmy.append(temp)
    y.append(0)
    
for i in range(len(df)) :
    temp = []
    temp.append(aug2.augment(df.text.iloc[i])[0])
    temp.append(df.reason.iloc[i])
    antonmy.append(temp)
    y.append(0)
    
for i in range(len(df)) :
    temp = []
    temp.append(aug.augment(df.text.iloc[i])[0])
    temp.append(df.reason.iloc[i])
    antonmy.append(temp)
    y.append(1)
    
antonmy = np.array(antonmy)
y = np.array(y)
print(antonmy.shape, y.shape)

(6183, 2) (6183,)


In [15]:
df.shape

(2061, 3)

In [16]:
df.columns

Index(['text', 'reason', 'label'], dtype='object')

In [17]:
new = pd.DataFrame(antonmy, columns=['text','reason'])
new['label'] = y
new

Unnamed: 0,text,reason,label
0,this is an amazing app for online classes but,evil app for conducting off line classes,0
1,very practical and easy to use,app differ user friendly,0
2,this app is very good for video conferencing,evil for video conferencing,0
3,i can not download this zoom app,able to upload zoom app,0
4,i am not able to download this app,want to upload the app,0
...,...,...,...
6178,i am non getting jio tv decent,unable to use with jio tv,1
6179,learn from the netflix interface,netflix ui is better,1
6180,hello walter elias disney must also be install...,unable to use disney on chromecast,1
6181,it is a shame that disney act not work on tv b...,unable to use disney plus on tv,1


In [18]:
temp = pd.concat([df, new], axis=0)
temp

Unnamed: 0,text,reason,label
0,this is an amazing app for online classes but,good app for conducting online classes,1.0
1,very practical and easy to use,app is user friendly,1.0
2,this app is very good for video conferencing,good for video conferencing,1.0
3,i can not download this zoom app,unable to download zoom app,1.0
4,i am not able to download this app,want to download the app,1.0
...,...,...,...
6178,i am non getting jio tv decent,unable to use with jio tv,1.0
6179,learn from the netflix interface,netflix ui is better,1.0
6180,hello walter elias disney must also be install...,unable to use disney on chromecast,1.0
6181,it is a shame that disney act not work on tv b...,unable to use disney plus on tv,1.0


In [19]:
temp = temp.sample(frac=1).reset_index(drop=True)
temp

Unnamed: 0,text,reason,label
0,this app keeps closing drink down,app keeps auto closing,1.0
1,i can not see the participant menu at the same...,want to view participant menu,1.0
2,why can not i sign up for the program,want to sign up for the program,0.0
3,my app server goes down when i try to join my ...,app server goes down when trying to join class,1.0
4,sound unmake not idle on xiomi 10,sound does not work on xiomi 10,0.0
...,...,...,...
8239,the meeting will end but in zoom you install t...,want to start zoom without app,0.0
8240,every time i want to close it i lack to uninst...,unable to load even after reinstalling the app,0.0
8241,i can not register this how work 0 points is d...,want to erase points,0.0
8242,the loading page disappear and it differ idle ...,unable to load page,0.0


In [20]:
temp.label.value_counts()

1.0    4122
0.0    4122
Name: label, dtype: int64

In [16]:
aug.augment("zoom is a very very bad app because there are")

['zoom is a very very bad app because at that place be']

# ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------