In [1]:
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
from huggingface_hub import hf_hub_download
import joblib

In [2]:
!pip install -q gradio

In [11]:
import gradio as gr

In [3]:
def LoadLatinDependencies(vectorizer_id: str, model_id: str):
  """
    This function loads the serialized latin detection vectorizer and model.
  """
  vect = joblib.load(
    hf_hub_download(repo_id="MohamedAmineLayachi/North_Latin_Version", filename=vectorizer_id)
  )
  model = joblib.load(
    hf_hub_download(repo_id="MohamedAmineLayachi/North_Latin_Version", filename=model_id)
  )
  return vect, model

In [4]:
def LoadArabicDependencies(model_id: str, r_labels=False, only_pipeline=False):
  """
    This function loads the arabic detection model from the huggingface hub.
  """
  labels = ['algeria','tunisia','morocco','egypt']
  label2id = {label: idx for idx, label in enumerate(labels)}
  id2label = {idx: label for idx, label in enumerate(labels)}
  tokenizer = AutoTokenizer.from_pretrained(model_id)
  model = AutoModelForSequenceClassification.from_pretrained(model_id, num_labels=len(labels), id2label=id2label, label2id=label2id)
  pipe = pipeline('text-classification', model=model, tokenizer=tokenizer)

  if r_labels is True and only_pipeline is False:
    return pipe, tokenizer, model, {'labels': labels, 'label_id': label2id, 'id_label': id2label}
  elif r_labels is False and only_pipeline is False:
    return pipe, tokenizer, model
  elif only_pipeline is True:
    return pipe

In [5]:
def LatinPrediction(text: str, vectorizer, model):
  """
   This function takes text in latin characters as input and classify it in the appropriate class.
  """
  feature_vector = vectorizer.transform([text])
  return model.predict(feature_vector)

In [6]:
def ArabicPrediction(text: str, pipeline, show_score=False):
  """
   This function takes text in Arabic characters as input and classify it in the appropriate class.
  """
  if show_score:
    return pipeline(text)
  else:
    return pipeline(text)[0]['label']

In [15]:
class DialectClassifier:
  """
    This is the DialectClassfier, a wrapper object for all the models used in this project.
  """
  def __init__(self):
    self.LatinVect, self.LatinModel = LoadLatinDependencies("North_Latin_CountVectorizer.joblib", "North_Latin_MNB_Classifier.joblib")
    self.ArabicPipe = LoadArabicDependencies("Oelbourki/northafrica-arabizi-dialect-classifier", only_pipeline=True)

  def predictArabic(self, text):
    """
     This function takes text in Arabic characters as input and classify it in the appropriate class.
    """
    prediction = ArabicPrediction(text, self.ArabicPipe)
    return prediction

  def predictLatin(self, text):
    """
      This function takes text in latin characters as input and classify it in the appropriate class.
    """
    prediction = LatinPrediction(text, self.LatinVect, self.LatinModel)
    return prediction[0]

In [16]:
DC = DialectClassifier()

In [19]:
def classify(text, alphabet):
  """
    This function interactes with the gradio interface.
  """
  if text is not None:
    if alphabet == 'Latin':
      return DC.predictLatin(text).title()
    elif alphabet == 'Arabic':
      return DC.predictArabic(text).title()
  elif text.strip() == '':
    return 'Nothing to classify.'
  else:
    return 'Unsupported :('

In [20]:
with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Interface(
        fn=classify,
        inputs=["text", gr.Dropdown(
            ["Latin", "Arabic"], label="Alphabet"
        )],
        outputs=["text"],
    )
    demo.launch()

Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://a007ec9ea543f4d96e.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)
