# Putting It All Together (Part 2)

The objective of this Python notebook is to demonstrate how unseen scam reports can be classified using the best deep learning model we have trained.

## Import the necessary libraries

In [11]:
import pandas as pd
import numpy as np
from keras.models import load_model
from keras.preprocessing.sequence import pad_sequences
import pickle
import re
import spacy
import nltk
from nltk.corpus import stopwords

In [12]:
nlp = spacy.load("en_core_web_sm")

## Define functions

In [13]:
def predict_label(model, text):
    
    label_to_idx = {0: 'Home/Room Rental Scam',
                    1: 'Impersonation Scam', 
                    2: 'Internet Love Scam', 
                    3: 'Investment Scam', 
                    4: 'Online Purchase Scam'}
    
    text = [pre_process(text)]
    text = tokenizer.texts_to_sequences(text)
    text = pad_sequences(text, maxlen=maxlen)
    pred_probabilities = model.predict(text)
    Y_pred = [np.argmax(x) for x in pred_probabilities][0]
    
    for k, v in label_to_idx.items():
        if k == Y_pred:
            predicted = v
            break
        
    return predicted

In [14]:
def unabbreviate(text):

    """This function takes a text string as input, finds acronyms as defined in a specified Python dictionary,
    and replaces those acronyms with their unabbreviated forms."""
    
    # Define a Python dictionary to match acronym to their non-abbreviated forms
    acronym_dict = {'ICA': 'immigration and checkpoints authority', 
                    'ID': 'identity',
                    'DBS': 'dbs bank', 
                    'FB': 'facebook',
                    'SG': 'singapore',
                    'UK': 'united kingdom',
                    'NRIC': 'identity number',
                    'IC': 'identity number',
                    'I/C': 'identity number',
                    'HQ': 'headquarters',
                    'MOM': 'ministry of manpower',
                    'POSB': 'posb bank',
                    'MOH': 'ministry of health',
                    'OCBC': 'ocbc bank',
                    'CMB': 'cmb bank',
                    'SPF': 'singapore police force',
                    'IRAS': 'inland revenue authority of singapore',
                    'UOB': 'uob bank',
                    'IG': 'instagram',
                    'HP': 'handphone',
                    'HK': 'hong kong',
                    'KL': 'kuala lumpur',
                    'PM': 'private message',
                    'MRT': 'mass rapid transit train',
                    'DOB': 'date of birth',
                    'ATM': 'automated teller machine',
                    'MAS': 'monetary authority of singapore',
                    'PRC': 'people republic of china',
                    'USS': 'universal studios singapore',
                    'MIA': 'missing in action',
                    'GST': 'goods and services tax',
                    'CIMB': 'cimb bank',
                    'HSBC': 'hsbc bank',
                    'MBS': 'marina_bay_sands',
                    'LTD': 'limited',
                    'ASAP': 'as soon as possible',
                    'IBAN': 'international bank account number',
                    'HR': 'human resource',
                    'AMK': 'ang mo kio',
                    'CID': 'criminal investigation department',
                    'PTE': 'private',
                    'OTP': 'one time password',
                    'WA': 'whatsapp',
                    'PC': 'personal computer',
                    'ACRA': 'accounting and corporate regulatory authority',
                    'CPF': 'central provident fund',
                    'ISD': 'internal security department', 
                    'WP': 'work permit',
                    'OKC': 'okcupid', 
                    'HDB': 'housing development board', 
                    'NPC': 'neighbourhood police centre',
                    'MOP': 'member of public',
                    'MOPS': 'members of public', 
                    'IMO': 'in my opinion',
                    'ISP': 'internet service provider', 
                    'IMDA': 'infocomm media development authority', 
                    'CB': 'circuit breaker',
                    'MINLAW': 'ministry of law',
                    'LMAO': 'laugh my ass off',
                    'AKA': 'also known as',
                    'BF': 'boyfriend', 
                    'W/O': 'without',
                    'MOF': 'ministry of finance'}
    
    # Tokenize the text
    x = nltk.word_tokenize(text)

    # Replace acronyms (both upper-case and lower-case) with their unabbreviated forms
    for index, token in enumerate(x):
        for k, v in acronym_dict.items():
            if token == k or token == k.lower():
                x[index] = v
                break

    return ' '.join(x).replace(" .", ".").replace(" ,", ",")

In [15]:
def remove_url(text): 

    """This function takes a text string as input and replaces URL links with a <url> token."""

    regex = r"(?i)\b((?:https?://|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'\".,<>?«»“”‘’]))" 
    text = re.sub(regex, "url_link", text)   
    
    return text

In [16]:
def decontract(phrase):
    
    """This function takes a phrase, finds contracted words and expands them.
    Source: https://stackoverflow.com/questions/43018030/replace-apostrophe-short-words-in-python"""
    
    # Specific
    phrase = re.sub(r"won\'t", "will not", phrase)
    phrase = re.sub(r"can\'t", "can not", phrase)
    phrase = re.sub(r"let\'s", "let us", phrase)
    
    # general
    phrase = re.sub(r"n\'t", " not", phrase)
    phrase = re.sub(r"\'re", " are", phrase)
    phrase = re.sub(r"\'s", " is", phrase)
    phrase = re.sub(r"\'d", " would", phrase)
    phrase = re.sub(r"\'ll", " will", phrase)
    phrase = re.sub(r"\'t", " not", phrase)
    phrase = re.sub(r"\'ve", " have", phrase)
    phrase = re.sub(r"\'m", " am", phrase)
    
    # Specific
    phrase = re.sub(r"won\’t", "will not", phrase)
    phrase = re.sub(r"can\’t", "can not", phrase)
    phrase = re.sub(r"let\’s", "let us", phrase)

    # general
    phrase = re.sub(r"n\’t", " not", phrase)
    phrase = re.sub(r"\’re", " are", phrase)
    phrase = re.sub(r"\’s", " is", phrase)
    phrase = re.sub(r"\’d", " would", phrase)
    phrase = re.sub(r"\’ll", " will", phrase)
    phrase = re.sub(r"\’t", " not", phrase)
    phrase = re.sub(r"\’ve", " have", phrase)
    phrase = re.sub(r"\’m", " am", phrase)
    
    return phrase

In [17]:
def remove_punct(text):

    """This function takes a text string as input and returns the same text string without specified punctuation marks."""
    
    # Specify punctuation marks to remove from text string
    punctuation = "``-±!@#$%^&*()+?:;”“’<>" 

    # Loop through the text to remove specified punctuations
    for c in text:
        if c in punctuation:
            text = text.replace(c, "").replace('/', ' ').replace('`', "").replace('"', '')

    return text

In [18]:
def correct_misspelled_words(text):
    
    # Define a Python dictionary to match misspelled words to their correctly-spelled forms
    spellcheck_dict = {'acct': 'account', 
                       'acc': 'account',
                       'a/c': 'account',
                       'blk': 'block',
                       'alot': 'a lot', 
                       'abit': 'a bit',
                       'watsapp': 'whatsapp',
                       'whatapps': 'whatsapp',
                       'whatapp': 'whatsapp',
                       'wadsapp': 'whatsapp',
                       'watapps': 'whatsapp',
                       'whatsapps': 'whatsapp',
                       'whats app': 'whatsapp',
                       'whatsaap': 'whatsapp',
                       'whatsap': 'whatsapp',
                       'whattsapp': 'whatsapp',
                       'whattapp': 'whatsapp',
                       'whatsap': 'whatsapp',
                       'whataspp': 'whatsapp',
                       'whatapps': 'whatsapp',
                       'whastapp': 'whatsapp',
                       'whatsapphe': 'whatsapp',
                       'whattapp': 'whatsapp',
                       'abt': 'about',
                       'recieved': 'received',
                       'recieve': 'receive',
                       'hv': 'have',
                       'amt': 'amount',
                       'mths': 'months',
                       'gf': 'girlfriend',
                       'msia': 'malaysia',
                       'tranfer': 'transfer',
                       'trans': 'transfer',
                       'trf': 'transfer',
                       'becareful': 'be careful',
                       'frm': 'from',
                       'msgs': 'messages',
                       'msg': 'message',
                       'plz': 'please',
                       'pls': 'please',
                       'harrass': 'harass',
                       'sintel': 'singtel',
                       'ard': 'around',
                       'wk': 'week', 
                       'fyi': 'for your information',
                       'govt': 'government',
                       'gov': 'government',
                       'thru': 'through',
                       'assent': 'accent', 
                       'dun': 'do not',
                       'nv': 'never', 
                       'sing-tel': 'singtel', 
                       'sintel': 'singtel',
                       'insta': 'instagram', 
                       'sg': 'singapore', 
                       'payapl': 'paypal', 
                       'carousel': 'carousell',
                       'tix': 'tickets', 
                       'mandrain': 'mandarin', 
                       'admin': 'administrative',
                       'bz': 'busy',
                       'daugter': 'daughter',
                       'cos': 'because',
                       'bcos': 'because',
                       'I-banking': 'internet banking',
                       'intl': 'international',
                       'shoppe': 'shopee',
                       'tis': 'this',
                       'docs': 'documents',
                       'doc': 'document',
                       'ytd': 'yesterday', 
                       'tmr': 'tomorrow', 
                       'mon': 'monday',
                       'tue': 'tuesday', 
                       'tues': 'tuesday', 
                       'wed': 'wednesday',
                       'thu': 'thursday',
                       'thur': 'thursday', 
                       'thurs': 'thursday',
                       'fri': 'friday',
                       'wikipeida': 'wikipedia',
                       'juz': 'just',
                       'impt': 'important',
                       'transger': 'transfer',
                       'suspicios': 'suspicious',
                       'suspicius': 'suspicious',
                       'suspicous': 'suspicious',
                       'suspecious': 'suspicious',
                       'suspision': 'suspicion',
                       'nvr': 'never', 
                       'instagam': 'instagram', 
                       'instagramm': 'instagram',
                       "s'pore": "singapore", 
                       'polive': 'police',
                       'linkein': 'linkedin',
                       'messanger': 'messenger', 
                       'scammmer': 'scammer',
                       'laywer': 'lawyer',
                       'dunno': 'do not know',
                       'tidner': 'tinder',
                       'rcvd': 'received',
                       'infomed': 'informed',
                       'informaing': 'informing', 
                       'knowldge': 'knowledge'}

    # Tokenize the text
    x = nltk.word_tokenize(text)    
    
    for index, token in enumerate(x):
        for k, v in spellcheck_dict.items():
            if token == k:
                x[index] = v
                break
        
    return ' '.join(x).replace(' .', '.').replace(' ,', ',').replace('< ', '<').replace(' >', '>')

In [19]:
def remove_stopwords(text_string):

    """This function takes a text string as input, tokenises it and returns a list of tokens without stopwords."""
    
    word_list = [word for word in nltk.word_tokenize(text_string) if not word in set(stopwords.words('english'))]
    text = ' '.join(word_list).replace(' .', '').replace(' ,', '').replace('< ', '<').replace(' >', '>')

    return text

In [20]:
def lemmatise(text_string):

    """This function takes a tokenised text string as input and returns another tokenised text string after lemmatisation."""

    list_of_tokens = [token.lemma_ for token in nlp(text_string)]
    text = ' '.join(list_of_tokens).replace('< ', '<').replace(' >', '>')
    
    return text

In [21]:
def pre_process(text):

    """This function takes a text string as input and pre-processes them as follow: 
    1. Ignore ASCII encodings if any, and decode them as UTF-8
    2. Replaces any URL link with 'url_link' 
    3. Add space after comma, full-stop, question and exclamation mark
    4. Remove digits
    5. Expands out contracted words
    6. Convert all words into lower cases and remove white spaces
    7. Remove punctuations except full-stops and commas
    8. Replace acronyms with unabbreviated forms 
    9. Replace misspelled words with correctly-spelled forms"""
    
    # 1. Ignore ASCII encodings if any, and decode them as UTF-8
#     text = text.encode('ascii', 'ignore').decode('utf-8')
    text = re.sub(r'[^\x00-\x7f]',r' ', text)
    
    # 2. Replace any URL link with 'url_link' 
    text = remove_url(text.replace('\n', ' '))

    # 3. Add a space after comma, full-stop, question mark and exclamation mark
    text = re.sub(r'(?<=[.,?!])(?=[^\s])', r' ', text)
    
    # 4. Remove digits
    text = re.sub(r'\d+', '', text)
    
    # 5. Expand contractions
    text = decontract(text)

    # 6. Convert to lower cases and remove white spaces
    text = text.lower().strip().replace('’s', '')

    # 7. Remove punctuations except full-stops and commas.
    text = remove_punct(text)

    # 8. Replace acronyms with their unabbreviated forms
    text = unabbreviate(text)

    # 9. Replace misspelled words with their correctly-spelled forms
    text = correct_misspelled_words(text)

    return text

In [42]:
def predict_label(model, text, tokenizer, label_to_idx, maxlen=66):

    print("Input text:", text, "\n")
    
    text = [pre_process(text)][0]
    print("After text pre-processing:", text, "\n")
    
    text = remove_stopwords(text)
    print("After removing stopwords:", text, "\n")

    text = [lemmatise(text)]
    print("After lemmatisation:", text, "\n")

    text = tokenizer.texts_to_sequences(text)
    print("After conversion to sequence:", text, "\n")

    text = pad_sequences(text, maxlen=maxlen)
    print("After padding sequences:", text, "\n")

    pred_probabilities = model.predict(text)
    print("Predicted probabilities:", np.round(pred_probabilities, 3), "\n")
    
    Y_pred = [np.argmax(x) for x in pred_probabilities][0]

    for k, v in label_to_idx.items():
        if k == Y_pred:
            predicted = v
            break
    
    print("Predicted classification:", predicted)
#     return predicted

## Load the model

In [23]:
# Load the selected model
model = load_model("Models/Scam_Classifier_Augmented_Text/best-LSTM-fold-5.h5")

## Load the tokenizer

In [24]:
# Load the tokeniser
with open('Tokenizer/scam_classifier_augmented_text_tokenizer.pickle', 'rb') as handle:
    tokenizer = pickle.load(handle)    

## Load the dictionary for mapping of categories 

In [25]:
a_file = open("Data/scam_type_cat_mapping.pkl", "rb")
scam_type_cat_mapping = pickle.load(a_file)
scam_type_cat_mapping

{0: 'Credit-for-Sex Scam',
 1: 'Home/Room Rental Scam',
 2: 'Impersonation Scam',
 3: 'Internet Love Scam',
 4: 'Investment Scam',
 5: 'Online Purchase Scam'}

## Predict classification for a text

### Impersonation Scam

In [26]:
text = "I received a scam call. It was an automated voice from the Singapore High Court, stating that I have an outstanding summon. I was asked to pay it."

In [43]:
predict_label(model=model, text=text, tokenizer=tokenizer, label_to_idx=scam_type_cat_mapping)

Input text: I received a scam call. It was an automated voice from the Singapore High Court, stating that I have an outstanding summon. I was asked to pay it. 

After text pre-processing: i received a scam call. it was an automated voice from the singapore high court, stating that i have an outstanding summon. i was asked to pay it. 

After removing stopwords: received scam call automated voice singapore high court stating outstanding summon asked pay 

After lemmatisation: ['receive scam call automate voice singapore high court state outstanding summon ask pay'] 

After conversion to sequence: [[17, 13, 1, 168, 158, 18, 264, 418, 99, 849, 928, 2, 5]] 

After padding sequences: [[  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0  17
   13   1 168 158  18 264 418  99 849 928   2   5]] 

Predicted probabilities: [[0.003 0.00

In [44]:
text = "Someone with a thick Indian accent told me they are from Singtel, and that my internet connection was used for illegal activities overseas. they want to help me solve the problem, and ask if I am in front of my computer. I told them I wasn't and hung up the call."
text

"Someone with a thick Indian accent told me they are from Singtel, and that my internet connection was used for illegal activities overseas. they want to help me solve the problem, and ask if I am in front of my computer. I told them I wasn't and hung up the call."

In [45]:
predict_label(model=model, text=text, tokenizer=tokenizer, label_to_idx=scam_type_cat_mapping)

Input text: Someone with a thick Indian accent told me they are from Singtel, and that my internet connection was used for illegal activities overseas. they want to help me solve the problem, and ask if I am in front of my computer. I told them I wasn't and hung up the call. 

After text pre-processing: someone with a thick indian accent told me they are from singtel, and that my internet connection was used for illegal activities overseas. they want to help me solve the problem, and ask if i am in front of my computer. i told them i was not and hung up the call. 

After removing stopwords: someone thick indian accent told singtel internet connection used illegal activities overseas want help solve problem ask front computer told hung call 

After lemmatisation: ['someone thick indian accent tell singtel internet connection use illegal activity overseas want help solve problem ask front computer tell hang call'] 

After conversion to sequence: [[137, 2096, 480, 260, 7, 261, 204, 710, 2

In [46]:
text = "I received a phone call from a man claiming to be from singtel support on 10th august 2020 at 1.38pm. he told me that something is wrong with my internet connection and there are many people from overseas are using my internet. by the way, I cannot really understand him due to his strong indian accent. he sounded like an indian national. I felt that the call was very suspicious, hence, I asked him for his full name. He said Mike Ambray. he also give me his email address and some license number. i told him that i cannot trust him and i will call singtel to verify first. But he was very insistent that he could show me through the computer. i am curious how he is able to show me. i went to my computer and he instructed me to key windows and r keys together. then type cmd into the run app resulting in black window appearing. but i stopped there and said i will call singtel first. He became agitated and kept insisting that just one more step. i hang up the phone and call the police hot line and was told this is a scam call. the phone number that appeared on the caller id was a fake number. when i tried calling back, the voice message said that this number is not in use."
text

'I received a phone call from a man claiming to be from singtel support on 10th august 2020 at 1.38pm. he told me that something is wrong with my internet connection and there are many people from overseas are using my internet. by the way, I cannot really understand him due to his strong indian accent. he sounded like an indian national. I felt that the call was very suspicious, hence, I asked him for his full name. He said Mike Ambray. he also give me his email address and some license number. i told him that i cannot trust him and i will call singtel to verify first. But he was very insistent that he could show me through the computer. i am curious how he is able to show me. i went to my computer and he instructed me to key windows and r keys together. then type cmd into the run app resulting in black window appearing. but i stopped there and said i will call singtel first. He became agitated and kept insisting that just one more step. i hang up the phone and call the police hot lin

In [47]:
predict_label(model=model, text=text, tokenizer=tokenizer, label_to_idx=scam_type_cat_mapping)

Input text: I received a phone call from a man claiming to be from singtel support on 10th august 2020 at 1.38pm. he told me that something is wrong with my internet connection and there are many people from overseas are using my internet. by the way, I cannot really understand him due to his strong indian accent. he sounded like an indian national. I felt that the call was very suspicious, hence, I asked him for his full name. He said Mike Ambray. he also give me his email address and some license number. i told him that i cannot trust him and i will call singtel to verify first. But he was very insistent that he could show me through the computer. i am curious how he is able to show me. i went to my computer and he instructed me to key windows and r keys together. then type cmd into the run app resulting in black window appearing. but i stopped there and said i will call singtel first. He became agitated and kept insisting that just one more step. i hang up the phone and call the pol

In [48]:
text = "I've received a call from someone called 'Alex William' and he told me that he was from Singtel. He told me that my WiFi was compromised and required us to use the command prompt command and he asked me to enter some commands, When i told him i was busy, he flustered and requested to call me back at a later time. When I tried to ask for incident case ticket number, he was unable to provide me with one and hung up on me abruptly. Fortunately i didn't do as he requested me to do and nothing was lost, including any personal information."
text

"I've received a call from someone called 'Alex William' and he told me that he was from Singtel. He told me that my WiFi was compromised and required us to use the command prompt command and he asked me to enter some commands, When i told him i was busy, he flustered and requested to call me back at a later time. When I tried to ask for incident case ticket number, he was unable to provide me with one and hung up on me abruptly. Fortunately i didn't do as he requested me to do and nothing was lost, including any personal information."

In [49]:
predict_label(model=model, text=text, tokenizer=tokenizer, label_to_idx=scam_type_cat_mapping)

Input text: I've received a call from someone called 'Alex William' and he told me that he was from Singtel. He told me that my WiFi was compromised and required us to use the command prompt command and he asked me to enter some commands, When i told him i was busy, he flustered and requested to call me back at a later time. When I tried to ask for incident case ticket number, he was unable to provide me with one and hung up on me abruptly. Fortunately i didn't do as he requested me to do and nothing was lost, including any personal information. 

After text pre-processing: i have received a call from someone called 'alex william ' and he told me that he was from singtel. he told me that my wifi was compromised and required us to use the command prompt command and he asked me to enter some commands, when i told him i was busy, he flustered and requested to call me back at a later time. when i tried to ask for incident case ticket number, he was unable to provide me with one and hung up

### Internet Love Scam

In [50]:
text = "My story is similar to those shared in Feb and Mar 2020. Got to know Jason (claimed to be a freelance architect) from okc since Mar 2020 and had chatted with him daily for about 4 months.  He mentioned he is from moscow, adopted by russian couple and was there since 3 years old. mum passed away, dad at nursing home. he was very sweet, caring. sent me daily morning message and after known him for about a month, he asked for my email address and also sent me daily emails. The whole encounter felt really real until recently he got a call from Cyrus to inform him that the warehouse that he built COLLAPSED and he needed to fly over. He was told client requested compensation of USD 50K and his lawyer had helped him with 24K, left 26K.  Called me desperately and sounded super real that he needed help to raise money. I didn't give him any money as I suspected that he's a scammer. Blocked him off after that."
text

"My story is similar to those shared in Feb and Mar 2020. Got to know Jason (claimed to be a freelance architect) from okc since Mar 2020 and had chatted with him daily for about 4 months.  He mentioned he is from moscow, adopted by russian couple and was there since 3 years old. mum passed away, dad at nursing home. he was very sweet, caring. sent me daily morning message and after known him for about a month, he asked for my email address and also sent me daily emails. The whole encounter felt really real until recently he got a call from Cyrus to inform him that the warehouse that he built COLLAPSED and he needed to fly over. He was told client requested compensation of USD 50K and his lawyer had helped him with 24K, left 26K.  Called me desperately and sounded super real that he needed help to raise money. I didn't give him any money as I suspected that he's a scammer. Blocked him off after that."

In [51]:
predict_label(model=model, text=text, tokenizer=tokenizer, label_to_idx=scam_type_cat_mapping)

Input text: My story is similar to those shared in Feb and Mar 2020. Got to know Jason (claimed to be a freelance architect) from okc since Mar 2020 and had chatted with him daily for about 4 months.  He mentioned he is from moscow, adopted by russian couple and was there since 3 years old. mum passed away, dad at nursing home. he was very sweet, caring. sent me daily morning message and after known him for about a month, he asked for my email address and also sent me daily emails. The whole encounter felt really real until recently he got a call from Cyrus to inform him that the warehouse that he built COLLAPSED and he needed to fly over. He was told client requested compensation of USD 50K and his lawyer had helped him with 24K, left 26K.  Called me desperately and sounded super real that he needed help to raise money. I didn't give him any money as I suspected that he's a scammer. Blocked him off after that. 

After text pre-processing: my story is similar to those shared in feb and

### Credit-for-Sex Scam

In [52]:
text = "Saw the girl at michat, offered sexual services. Proceed to meet up point. Asked to take a photo of the surroundings. Proceed to ask for contact. Then said her friend would call to ask few security questions. Friend called and requested to buy 100 Google card as payment because it's the first date and the girl don't take cash for first timer. After photo of pin was send to the girl. Friend requested for 1000 for security deposit to ensure no connection with police. After 1000 card pin was sent. Again, asked for another 1000 assurance fee to ensure that I would not hurt the girl during transaction. Then finally asked for hotel management fee. The friend 'ah kun' said all will be refunded which is a lie. After I refused, he became angry and threaten to hunt me down with my phone number. Proceeded to block their number and left"
text

"Saw the girl at michat, offered sexual services. Proceed to meet up point. Asked to take a photo of the surroundings. Proceed to ask for contact. Then said her friend would call to ask few security questions. Friend called and requested to buy 100 Google card as payment because it's the first date and the girl don't take cash for first timer. After photo of pin was send to the girl. Friend requested for 1000 for security deposit to ensure no connection with police. After 1000 card pin was sent. Again, asked for another 1000 assurance fee to ensure that I would not hurt the girl during transaction. Then finally asked for hotel management fee. The friend 'ah kun' said all will be refunded which is a lie. After I refused, he became angry and threaten to hunt me down with my phone number. Proceeded to block their number and left"

In [54]:
predict_label(model=model, text=text, tokenizer=tokenizer, label_to_idx=scam_type_cat_mapping)

Input text: Saw the girl at michat, offered sexual services. Proceed to meet up point. Asked to take a photo of the surroundings. Proceed to ask for contact. Then said her friend would call to ask few security questions. Friend called and requested to buy 100 Google card as payment because it's the first date and the girl don't take cash for first timer. After photo of pin was send to the girl. Friend requested for 1000 for security deposit to ensure no connection with police. After 1000 card pin was sent. Again, asked for another 1000 assurance fee to ensure that I would not hurt the girl during transaction. Then finally asked for hotel management fee. The friend 'ah kun' said all will be refunded which is a lie. After I refused, he became angry and threaten to hunt me down with my phone number. Proceeded to block their number and left 

After text pre-processing: saw the girl at michat, offered sexual services. proceed to meet up point. asked to take a photo of the surroundings. proc

### Home/Room Rental Scam

In [55]:
text = "Hi I received a whats app from someone called Zu Lan. She wanted my email rather than phone. Sounded odd but I gave it. I received the following message which sounded odd so i looked online and found lots of similar stories. (but people have lost money) Zu Lan's email  ------------------------------------------------------------------------------------------------------------------ Thank you for replying to me. My name is Zhu Lan, 36 years old, a single and non-smoker lady. I am willing to rent the room. I am at sea at the moment as i work on a cruise ship as a Chef offshore of Philippines I will be living and working down there as my contract here is coming to an end that's why I am coming there. But due to the nature of my work of having a busy schedule, phone calls making and visiting websites are restricted, I only squeezed out time to check this advert and send you a message so I won't be able to come for the viewing myself. I will like you to kindly calculate the total cost covering my first six months rent together with a deposit and I will be staying for long as your room is available. Please kindly get back to me with some pictures of the room and your address for me to forward it to my mover agent to calculate the total cost of moving my stuff over there. I have attached my IC with this email for you to have a view of me. Looking forward to read back from you soon. --------------------------------------------------------------------------------------------------------------------"
text

"Hi I received a whats app from someone called Zu Lan. She wanted my email rather than phone. Sounded odd but I gave it. I received the following message which sounded odd so i looked online and found lots of similar stories. (but people have lost money) Zu Lan's email  ------------------------------------------------------------------------------------------------------------------ Thank you for replying to me. My name is Zhu Lan, 36 years old, a single and non-smoker lady. I am willing to rent the room. I am at sea at the moment as i work on a cruise ship as a Chef offshore of Philippines I will be living and working down there as my contract here is coming to an end that's why I am coming there. But due to the nature of my work of having a busy schedule, phone calls making and visiting websites are restricted, I only squeezed out time to check this advert and send you a message so I won't be able to come for the viewing myself. I will like you to kindly calculate the total cost cove

In [56]:
predict_label(model=model, text=text, tokenizer=tokenizer, label_to_idx=scam_type_cat_mapping)

Input text: Hi I received a whats app from someone called Zu Lan. She wanted my email rather than phone. Sounded odd but I gave it. I received the following message which sounded odd so i looked online and found lots of similar stories. (but people have lost money) Zu Lan's email  ------------------------------------------------------------------------------------------------------------------ Thank you for replying to me. My name is Zhu Lan, 36 years old, a single and non-smoker lady. I am willing to rent the room. I am at sea at the moment as i work on a cruise ship as a Chef offshore of Philippines I will be living and working down there as my contract here is coming to an end that's why I am coming there. But due to the nature of my work of having a busy schedule, phone calls making and visiting websites are restricted, I only squeezed out time to check this advert and send you a message so I won't be able to come for the viewing myself. I will like you to kindly calculate the tota

### Investment Scam

In [57]:
text = "There was a girl on Wechat added me on around may. We chat a little she sweet talk a bit then we are into the topic of investment. I do a bit of investment and so i was interested about was she was talking.  She bring me into the topic of bit coin and Crypto Currency then i was a bit interested as the stuff she said was legit. Then she say her uncle was in a bank and send me screenshot of picture and official information and then she deleted. So then I was quite interested as she had inside info on the investment. So i went in and follow her steps of investment. Then we added on Whatsapp for more info. At first it was just 800 but then the website showed my coin was showing a very good growth. Due to greed i went to put in another $1000 sgd. The next month(June) another $2000 until on July 7, she started asking me to sell my stocks and invest with her. I felt a bit fishy as she is trying to force me to sell my stock where i do not want to. So in the end i did more research on the website and found out that the coin transferred in could not be taken out. i blocked her immediately on all social media platform and communication All in all, I learnt a lesson but i am just insecure about my info was given to them so I make a police report for a record."
text

'There was a girl on Wechat added me on around may. We chat a little she sweet talk a bit then we are into the topic of investment. I do a bit of investment and so i was interested about was she was talking.  She bring me into the topic of bit coin and Crypto Currency then i was a bit interested as the stuff she said was legit. Then she say her uncle was in a bank and send me screenshot of picture and official information and then she deleted. So then I was quite interested as she had inside info on the investment. So i went in and follow her steps of investment. Then we added on Whatsapp for more info. At first it was just 800 but then the website showed my coin was showing a very good growth. Due to greed i went to put in another $1000 sgd. The next month(June) another $2000 until on July 7, she started asking me to sell my stocks and invest with her. I felt a bit fishy as she is trying to force me to sell my stock where i do not want to. So in the end i did more research on the webs

In [58]:
predict_label(model=model, text=text, tokenizer=tokenizer, label_to_idx=scam_type_cat_mapping)

Input text: There was a girl on Wechat added me on around may. We chat a little she sweet talk a bit then we are into the topic of investment. I do a bit of investment and so i was interested about was she was talking.  She bring me into the topic of bit coin and Crypto Currency then i was a bit interested as the stuff she said was legit. Then she say her uncle was in a bank and send me screenshot of picture and official information and then she deleted. So then I was quite interested as she had inside info on the investment. So i went in and follow her steps of investment. Then we added on Whatsapp for more info. At first it was just 800 but then the website showed my coin was showing a very good growth. Due to greed i went to put in another $1000 sgd. The next month(June) another $2000 until on July 7, she started asking me to sell my stocks and invest with her. I felt a bit fishy as she is trying to force me to sell my stock where i do not want to. So in the end i did more research 

In [59]:
text = "I was approached by a lady from china who goes by the name 'jiang lin' on wechat. She claims to be from a well-to-do family from beijing and has an uncle who is a financial expert. After a couple of text exchanges she introduced me to invest in a cryptocurrency known as obb, trading on a cryptocurrency online platform known as bftbit (www.bftbit.com). Bftbit turns out to be a fake trading platform and obb is a non-existent cryptocurrency. Once monies are deposited into bftbit it goes directly to the scammers, although on bftbit it reflects as deposited into your bftbit account and no matter how you try to withdraw the monies, the withdrawal status would remain as processing. In the beginning i was very skeptical and refused to believe her. But a moment of greed and too smart for my own good led me to foolishly believe in her and lost an unrecoverable sum of s$14,500. The modus operandi: 1) she would first introduce that she had been into this obb investment for 3 years under the guidance of her financial expert uncle who guides her to invest at the relevant time on bftbit which most of the time would be profit-making as the uncle knows the market. 2) she would ask you to invest in a small sum of us$500 first which would make profits after 2 rounds of investment on bftbit. She would also provide step-by-step guidance [i.e. buying of cryptocurrency on huobi (an authentic ans well-known cryptocurrency exchange incorporated in singapore), transferring of currency from huobi to bftbit, making trades in obb on bftbit and withdrawal and transferring of a small sum of currency from bftbit to huobi (this withdrawal is to make one believe that bftbit is an authentic trading platform but in fact the withdrawal is done by the scammers)]. From there, she would entice you to invest in us$10,000 as her uncle predicts that a big market trend is coming, thus earning more profit. 3) she would then suggest to make a joint trade of usd$20,000 which would require us$30,000 as security deposit. In order to gain your trust, she would transfer us$40,000 into your bftbit account. Once you transfer your us$10,000 into the account it is gone forever. 4) at the time when you (and her) are jointly trading on the obb big market trend, the trading system would stop for unknown reason and when you refresh the trading system you would realise that the joint trade suffers an 80% loss. From there, she would claim to be your fault and ask for refund of her us$40,000. 5) a much serious concern is that in setting up your account with bftbit you need to furnish a copy of your nric which is at the scammers' disposal."
text

"I was approached by a lady from china who goes by the name 'jiang lin' on wechat. She claims to be from a well-to-do family from beijing and has an uncle who is a financial expert. After a couple of text exchanges she introduced me to invest in a cryptocurrency known as obb, trading on a cryptocurrency online platform known as bftbit (www.bftbit.com). Bftbit turns out to be a fake trading platform and obb is a non-existent cryptocurrency. Once monies are deposited into bftbit it goes directly to the scammers, although on bftbit it reflects as deposited into your bftbit account and no matter how you try to withdraw the monies, the withdrawal status would remain as processing. In the beginning i was very skeptical and refused to believe her. But a moment of greed and too smart for my own good led me to foolishly believe in her and lost an unrecoverable sum of s$14,500. The modus operandi: 1) she would first introduce that she had been into this obb investment for 3 years under the guida

In [60]:
predict_label(model=model, text=text, tokenizer=tokenizer, label_to_idx=scam_type_cat_mapping)

Input text: I was approached by a lady from china who goes by the name 'jiang lin' on wechat. She claims to be from a well-to-do family from beijing and has an uncle who is a financial expert. After a couple of text exchanges she introduced me to invest in a cryptocurrency known as obb, trading on a cryptocurrency online platform known as bftbit (www.bftbit.com). Bftbit turns out to be a fake trading platform and obb is a non-existent cryptocurrency. Once monies are deposited into bftbit it goes directly to the scammers, although on bftbit it reflects as deposited into your bftbit account and no matter how you try to withdraw the monies, the withdrawal status would remain as processing. In the beginning i was very skeptical and refused to believe her. But a moment of greed and too smart for my own good led me to foolishly believe in her and lost an unrecoverable sum of s$14,500. The modus operandi: 1) she would first introduce that she had been into this obb investment for 3 years unde

### Online Purchase Scam

In [61]:
text = "Went online shopping in shopee on 20 june 2020 and found this sg seller. item was offered at $350 with no delivery fee required. I contact seller through whatsapp (83792917) and He counter offer at $300 and told us to make a direct bank transfer. i worried this is a scam so he offer a deposit of $200 instead, he even provide me with an NRIC (both side copy) to make me feel safe. coming to think of it now, the nric must have belong to some other victim as well. anyway, story is, i transfer the $200 to posb-??8032065 and was told to contact his boss for the item. through whatsapp (83022302) with the boss, he told me to make the remaining payment if we want to get the item cos his staff is new and not too sure of the procedure. once again, $100 was transfer, this time to another bank account (posb-??6470451). then we are told that we need to make another payment of $100 as delivery fee. only then, did i realise this is a scam... i confront the boss, as usual it is pointless, then i told him i wanted a refund... of cos there is no refund so my money is gone...I wish to raise awareness and hope it will help to prevent you from falling into similiar case or being cheated by the same scammer. There are 2 person in incident who cheated me of a total of $300."
text

'Went online shopping in shopee on 20 june 2020 and found this sg seller. item was offered at $350 with no delivery fee required. I contact seller through whatsapp (83792917) and He counter offer at $300 and told us to make a direct bank transfer. i worried this is a scam so he offer a deposit of $200 instead, he even provide me with an NRIC (both side copy) to make me feel safe. coming to think of it now, the nric must have belong to some other victim as well. anyway, story is, i transfer the $200 to posb-??8032065 and was told to contact his boss for the item. through whatsapp (83022302) with the boss, he told me to make the remaining payment if we want to get the item cos his staff is new and not too sure of the procedure. once again, $100 was transfer, this time to another bank account (posb-??6470451). then we are told that we need to make another payment of $100 as delivery fee. only then, did i realise this is a scam... i confront the boss, as usual it is pointless, then i told 

In [62]:
predict_label(model=model, text=text, tokenizer=tokenizer, label_to_idx=scam_type_cat_mapping)

Input text: Went online shopping in shopee on 20 june 2020 and found this sg seller. item was offered at $350 with no delivery fee required. I contact seller through whatsapp (83792917) and He counter offer at $300 and told us to make a direct bank transfer. i worried this is a scam so he offer a deposit of $200 instead, he even provide me with an NRIC (both side copy) to make me feel safe. coming to think of it now, the nric must have belong to some other victim as well. anyway, story is, i transfer the $200 to posb-??8032065 and was told to contact his boss for the item. through whatsapp (83022302) with the boss, he told me to make the remaining payment if we want to get the item cos his staff is new and not too sure of the procedure. once again, $100 was transfer, this time to another bank account (posb-??6470451). then we are told that we need to make another payment of $100 as delivery fee. only then, did i realise this is a scam... i confront the boss, as usual it is pointless, t

### Loan Scam

In [63]:
text = "Today, at 2.00pm, i received a call from +65 9749038?, which i clearly knew was a scam call. It was first an automated voice, which said, 'hello, good afternoon. We are a credit loan company that offer personal loan and micro loan. The application process will only take you a couple of minutes. After approval, the funds will transfer to your preferred account within 3 hours. Press 1 for more information. Your call will be diverted to our sales officer. Press 2 to repeat the message.' I press 1 and it was transferred to a lady with a filipino accent. She said the company's name was 'ac? capital' and asked me how much i wished to loan. I said i wanted to loan $2,000. Next, she asked what was my monthly salary and said that they can approve loan amounts within 2 to 2.5 times the monthly income. She said somebody from the sales department will contact me again today regarding my application. She said her name was 'avi'.After that, I called the number back and not surprising that it was an invalid number."
text

"Today, at 2.00pm, i received a call from +65 9749038?, which i clearly knew was a scam call. It was first an automated voice, which said, 'hello, good afternoon. We are a credit loan company that offer personal loan and micro loan. The application process will only take you a couple of minutes. After approval, the funds will transfer to your preferred account within 3 hours. Press 1 for more information. Your call will be diverted to our sales officer. Press 2 to repeat the message.' I press 1 and it was transferred to a lady with a filipino accent. She said the company's name was 'ac? capital' and asked me how much i wished to loan. I said i wanted to loan $2,000. Next, she asked what was my monthly salary and said that they can approve loan amounts within 2 to 2.5 times the monthly income. She said somebody from the sales department will contact me again today regarding my application. She said her name was 'avi'.After that, I called the number back and not surprising that it was an

In [64]:
predict_label(model=model, text=text, tokenizer=tokenizer, label_to_idx=scam_type_cat_mapping)

Input text: Today, at 2.00pm, i received a call from +65 9749038?, which i clearly knew was a scam call. It was first an automated voice, which said, 'hello, good afternoon. We are a credit loan company that offer personal loan and micro loan. The application process will only take you a couple of minutes. After approval, the funds will transfer to your preferred account within 3 hours. Press 1 for more information. Your call will be diverted to our sales officer. Press 2 to repeat the message.' I press 1 and it was transferred to a lady with a filipino accent. She said the company's name was 'ac? capital' and asked me how much i wished to loan. I said i wanted to loan $2,000. Next, she asked what was my monthly salary and said that they can approve loan amounts within 2 to 2.5 times the monthly income. She said somebody from the sales department will contact me again today regarding my application. She said her name was 'avi'.After that, I called the number back and not surprising tha