In [1]:
#IMPORT LIBRARIES
import pickle
import pandas as pd
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import punkt
from nltk.corpus.reader import wordnet
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import TfidfVectorizer

In [2]:
#TRAINED MODEL
path_models = ""

# SVM
path_svm = path_models + 'svc_0.pickle'
with open(path_svm, 'rb') as data:
    svc_model = pickle.load(data)

In [3]:
#TF-IDF object
path_tfidf = "tfidf.pickle"
with open(path_tfidf, 'rb') as data:
    tfidf = pickle.load(data)

In [5]:
#LABEL MAPPING DICTIONARY
Label = {
    'Revelation': 0,
    'Belief': 1,
    'Knowledge': 2,
    "Ablution (Wudu')" : 3,
    'Bathing (Ghusl)' : 4,
    'Mestrual Periods' : 5,
    'Ablution with dust' : 6,
    'Prayer (Salat)' : 7,
    'Prayer Hall (Sutra)' : 8,
    'Times of the Prayer' : 9,
    'Call to Prayer (Adhaan)' : 10,
    'Characteristics of Prayer' : 11,
    'Friday Prayer' : 12,
    'Fear Prayer' : 13,
    'The Two Festivals (Eids)' : 14,
    'Witr Prayer' : 15,
    "Dua' for Rain (Istisqaa)" : 16,
    'Eclipses' : 17,
    "Prostration During Recital of Qur'an" : 18,
    "Shortening Prayers (At-Taqseer)" : 19   
}

In [11]:
# FEATURE ENGINEERING WORKFLOW
punctuation_signs = list("?:!.,;")
stop_words = list(stopwords.words('english'))

def create_features_from_text(text):
    
    # Dataframe creation
    lemmatized_text_list = []
    df = pd.DataFrame(columns=['Text'])
    df.loc[0] = text
    df['Text'] = df['Text'].str.replace("\r", " ")
    df['Text'] = df['Text'].str.replace("\n", " ")
    df['Text'] = df['Text'].str.replace("    ", " ")
    df['Text'] = df['Text'].str.replace('"', '')
    df['Text'] = df['Text'].str.lower()
    df['Text'] = df['Text']
    for punct_sign in punctuation_signs:
        df['Text'] = df['Text'].str.replace(punct_sign, '')
    df['Text'] = df['Text'].str.replace("'s", "")
    wordnet_lemmatizer = WordNetLemmatizer()
    lemmatized_list = []
    text = df.loc[0]['Text']
    text_words = text.split(" ")
    for word in text_words:
        lemmatized_list.append(wordnet_lemmatizer.lemmatize(word, pos="v"))
    lemmatized_text = " ".join(lemmatized_list)    
    lemmatized_text_list.append(lemmatized_text)
    df['Text'] = lemmatized_text_list
    df['Text'] = df['Text']
    for stop_word in stop_words:
        regex_stopword = r"\b" + stop_word + r"\b"
        df['Text'] = df['Text'].str.replace(regex_stopword, '')
    df = df['Text']
    
    # TF-IDF
    features = tfidf.transform(df).toarray()
    
    return features

In [7]:
# write a function that tells us the chapter category given the label:
def get_category_name(category_id):
    for category, id_ in Label.items():    
        if id_ == category_id:
            return category

In [16]:
# write a function that includes the whole process:
def predict_from_text(text):
    
    # Predict using the input model
    prediction_svc = svc_model.predict(create_features_from_text(text))[0]
    #prediction_svc_proba = svc_model.predict_proba(create_features_from_text(text))[0]
    
    # Return result
    category_svc = get_category_name(prediction_svc)
    
    print("The predicted category using the SVM model is %s." %(category_svc) )
    #print("The conditional probability is: %a" %(prediction_svc_proba.max()*100))

In [17]:
# TEXT PREDICTION
text = """

Narrated by 'Umar bin Al-Khattab:  I heard Allah's Apostle saying, "The reward of deeds depends upon the intentions and every person will get the reward according to what he has intended. So whoever emigrated for worldly benefits or for a woman to marry, his emigration was for what he emigrated for."

"""

In [18]:
predict_from_text(text)

The predicted category using the SVM model is Knowledge.


  df['Text'] = df['Text'].str.replace(punct_sign, '')
  df['Text'] = df['Text'].str.replace(regex_stopword, '')
