# Custom HuggingFace NER

In [11]:
!pip install transformers seqeval[gpu]
!pip install flair

Collecting flair
  Downloading flair-0.12.2-py3-none-any.whl (373 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m373.1/373.1 kB[0m [31m5.2 MB/s[0m eta [36m0:00:00[0m
Collecting segtok>=1.5.7 (from flair)
  Downloading segtok-1.5.11-py3-none-any.whl (24 kB)
Collecting mpld3==0.3 (from flair)
  Downloading mpld3-0.3.tar.gz (788 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m788.5/788.5 kB[0m [31m10.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting sqlitedict>=1.6.0 (from flair)
  Downloading sqlitedict-2.1.0.tar.gz (21 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting deprecated>=1.2.4 (from flair)
  Downloading Deprecated-1.2.14-py2.py3-none-any.whl (9.6 kB)
Collecting boto3 (from flair)
  Downloading boto3-1.28.67-py3-none-any.whl (135 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m135.8/135.8 kB[0m [31m8.8 MB/s[0m eta [36m0:00:00[0m
Collectin

In [5]:
from transformers import AutoTokenizer, BertForTokenClassification

model_name = "MehdiHosseiniMoghadam/skill-role-mapper"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = BertForTokenClassification.from_pretrained(model_name)

In [8]:
def prediction(text):
  encoding = tokenizer(text, return_tensors="pt")
  outputs = model(**encoding)
  logits = outputs.logits
  predicted_label_classes = logits.argmax(-1)
  predicted_labels = [model.config.id2label[id] for id in predicted_label_classes.squeeze().tolist()]
  return ','.join(predicted_labels)

In [9]:
text = '''
Searching for new opportunities as Junior Node.js JavaScript backend developer.
Over 15 years of experience in different IT areas. Experience with: Node.js JavaScript MongoDB  HTML CSS Java Lotus Script websocket socket.io
Docker babel Webpack MySQL JSON React and project management
'''

prediction(text)

'O,O,O,O,O,O,O,In-SK,In-SK,In-SK,In-SK,In-SK,In-SK,O,O,O,In-SK,O,O,O,O,O,O,O,In-SK,O,O,O,O,In-SK,In-SK,In-SK,In-SK,In-SK,In-SK,In-SK,In-SK,In-SK,In-SK,In-SK,In-SK,In-SK,In-SK,O,In-SK,In-SK,O,O,In-SK,O,O,In-SK,In-SK,In-SK,O,In-SK,In-SK,O,In-SK,In-SK,In-SK,In-SK,In-SK,In-SK,In-SK,O,O,In-SK,O'

# Custom Flair NER

In [10]:
!gdown --i 1Pqh_QrByhiPsmBrRz6ZlciHIRLISN8Fe

Downloading...
From: https://drive.google.com/uc?id=1Pqh_QrByhiPsmBrRz6ZlciHIRLISN8Fe
To: /content/best-model (1).pt
100% 272M/272M [00:06<00:00, 39.2MB/s]


In [12]:
from flair.data import Sentence
from flair.models import SequenceTagger
# load the trained model
model = SequenceTagger.load('/content/best-model (1).pt') # Weight is avalible in the repo
# create example sentence
sentence = Sentence('Searching project management for new opportunities as Junior Node.js JavaScript backend developer. Over 15 years of experience in different IT areas. Experience with: Node.js JavaScript MongoDB  HTML CSS Java Lotus Script websocket socket.io Docker babel Webpack MySQL JSON React')
# predict the tags
model.predict(sentence,verbose=1)
sentence.to_tagged_string()

Downloading (…)lve/main/config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

Downloading model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

2023-10-19 22:49:24,718 SequenceTagger predicts: Dictionary with 4 tags: O, In-SK, <START>, <STOP>


Batch inference: 100%|██████████| 1/1 [00:00<00:00,  2.23it/s]


'Sentence[42]: "Searching project management for new opportunities as Junior Node.js JavaScript backend developer. Over 15 years of experience in different IT areas. Experience with: Node.js JavaScript MongoDB  HTML CSS Java Lotus Script websocket socket.io Docker babel Webpack MySQL JSON React" → ["project"/In-SK, "management"/In-SK, "Node.js"/In-SK, "JavaScript"/In-SK, "IT"/In-SK, "Node.js"/In-SK, "JavaScript"/In-SK, "MongoDB"/In-SK, "HTML"/In-SK, "CSS"/In-SK, "Java"/In-SK, "Lotus"/In-SK, "Docker"/In-SK, "MySQL"/In-SK, "JSON"/In-SK, "React"/In-SK]'