## Import

In [None]:
import pickle
from keras.models import load_model
import numpy as np
from flask import Flask, request, jsonify, render_template
import pickle
from gensim.models.word2vec import Word2Vec
from keras.preprocessing.sequence import pad_sequences
import re 
import pyarabic.araby as ar
import unicodedata as ud

# Creat dictionary with country name and short name
country_name = {'IQ': 'Iraq', 'SY':'Syria', 'LY':'Libya', 'EG':'Egypt', 'YE':'Yemen', 'OM':'Oman', 'BH':'Bahrain',
               'KW':'Kuwait', 'SA':'Saudi Arabia', 'AE':'United Arab Emirates', 'QA':'Qatar', 'DZ':'Algeria', 'MA':'Morocco',
                'TN':'Tunisia', 'SD':'Sudan ', 'JO':'Jordan', 'LB':'Lebanon', 'PL':'Palestine'}

## Cleaning function

In [None]:
def cleaner(text):
    regrex_pattern = re.compile(pattern = "["
        u"\U0001F600-\U0001F64F"  # emoticons
        u"\U0001F300-\U0001F5FF"  # symbols & pictographs
        u"\U0001F680-\U0001F6FF"  # transport & map symbols
        u"\U0001F1E0-\U0001F1FF"  # flags (iOS)
        u"\U00002500-\U00002BEF"  # chinese char
        u"\U00002702-\U000027B0"
        u"\U00002702-\U000027B0"
        u"\U000024C2-\U0001F251"
        u"\U0001f926-\U0001f937"
        u"\U00010000-\U0010ffff"
        u"\u2640-\u2642" 
        u"\u2600-\u2B55"
        u"\u200d"
        u"\u23cf"
        u"\u23e9"
        u"\u231a"
        u"\ufe0f"  
        u"\u3030"
                           "]+", flags = re.UNICODE)
    text = regrex_pattern.sub(r' ',text)
    text = re.sub("\d+", " ", text)
    text = re.sub("[a-zA-Z]", " ", text)
    text = re.sub('\u0621', '\u0627', text)
    text = re.sub('\u0649', '\u064a', text)
    text = re.sub('\u0629', '\u0647', text)
    text = re.sub('\u06af', '\u0643', text)
    text = re.sub('\u0686', '\u062c', text)
    text = re.sub("[إأآا]", "ا", text)
    text = re.sub(r'\n', ' ', text)
    text = ar.strip_tashkeel(text)
    text = ar.strip_tatweel(text)
    text = ar.normalize_hamza(text)
    text = ar.strip_diacritics(text)
    text = re.sub(r'(.)\1+', r'\1\1', text)  # keep 2 repeat
    text =  ''.join([c if not ud.category(c).startswith('P') else ' ' for c in text])
    text = re.sub(r' +', ' ' , text)
    return text


## Deploy with Flask

In [None]:
flask_app = Flask(__name__)

# Load machine learning trained files
victorizer = pickle.load(open('./Machine_learning_pickles/victorizer.sav', 'rb'))
transformer = pickle.load(open('./Machine_learning_pickles/transformer.sav', 'rb'))
machine_model = pickle.load(open('./Machine_learning_pickles/linearSVC.sav', 'rb'))

# Load deep learning trained files
deep_model = load_model('./Deep_model_pickles/deep_model.h5')
encoder = pickle.load(open('./Deep_model_pickles/encoder.sav', 'rb'))
tokenizer = pickle.load(open('./Deep_model_pickles/tokenizer.sav', 'rb'))

# Method to predict the result with machine leanring model
def get_machine_result(input_text):
    input_text = cleaner(input_text)
    if len(input_text) < 3:
        return 'Tweet is too short or its not arabic, Please enter valid text'
    t = victorizer.transform([input_text])
    t = transformer.transform(t)
    result = machine_model.predict(t)
    return country_name[result[0]]

# Method to predict the result with deep leanring model
def get_deep_result(input_text):
    input_text = cleaner(input_text)
    if len(input_text) < 3:
        return 'Tweet is too short or its not arabic, Please enter valid text'
    txt = pad_sequences(tokenizer.texts_to_sequences([input_text]), maxlen=300)
    t = encoder.classes_[np.argmax(deep_model.predict(txt))]
    return country_name[t]

# Get Html template
@flask_app.route("/")
def Home():
    return render_template("index.html")

@flask_app.route("/predict", methods = ["POST"])
def predict():
    vals = [str(x) for x in request.form.values()]
    machine_result = get_machine_result(vals[0])
    deep_result = get_deep_result(vals[0])
    return render_template("index.html", machine_learning = f"Machine Learning prediction: {machine_result}", 
                           deep_learning = f"Deep Learning prediction: {deep_result}")

if __name__ == "__main__":
    flask_app.run(debug=False)

In [1]:
'BUILDING DIALECT CLASSIFIERS USING TWITTER DATA'.lower()

'building dialect classifiers using twitter data'