In [18]:
from transformers import AutoTokenizer, AutoModelForTokenClassification

tokenizer = AutoTokenizer.from_pretrained("tokenizer_date")
model = AutoModelForTokenClassification.from_pretrained("model_date")

In [19]:
# Use a pipeline as a high-level helper
from transformers import pipeline

pipe = pipeline("token-classification", model=model, tokenizer=tokenizer)

In [35]:
pipe("I have a meeting on 24th April at 3PM and 25th April at 4PM")

[{'entity': 'B-DATE',
  'score': 0.99277323,
  'index': 6,
  'word': '24th',
  'start': 20,
  'end': 24},
 {'entity': 'I-DATE',
  'score': 0.94976634,
  'index': 7,
  'word': 'April',
  'start': 25,
  'end': 30},
 {'entity': 'B-TIME',
  'score': 0.9579224,
  'index': 9,
  'word': '3',
  'start': 34,
  'end': 35},
 {'entity': 'I-TIME',
  'score': 0.9816777,
  'index': 10,
  'word': '##PM',
  'start': 35,
  'end': 37},
 {'entity': 'B-DATE',
  'score': 0.97738975,
  'index': 12,
  'word': '25th',
  'start': 42,
  'end': 46},
 {'entity': 'I-DATE',
  'score': 0.7439361,
  'index': 13,
  'word': 'April',
  'start': 47,
  'end': 52},
 {'entity': 'I-TIME',
  'score': 0.58697575,
  'index': 14,
  'word': 'at',
  'start': 53,
  'end': 55},
 {'entity': 'B-TIME',
  'score': 0.94118047,
  'index': 15,
  'word': '4',
  'start': 56,
  'end': 57},
 {'entity': 'I-TIME',
  'score': 0.97838575,
  'index': 16,
  'word': '##PM',
  'start': 57,
  'end': 59}]

In [30]:
entities_ = pipe("I have a meeting on monday")

date = None
time = None

for entity in entities:
    if entity['entity'] == 'B-DATE':
        date = entity['word']
    elif entity['entity'] == 'I-DATE':
        date += ' ' + entity['word']
    elif entity['entity'] == 'B-TIME':
        time = entity['word']
    elif entity['entity'] == 'I-TIME':
        time += entity['word'].replace('##', '')

result = {
    'date': date,
    'time': time
}

print(result)

{'date': 'Monday', 'time': '3pm'}


In [34]:
entities = pipe("I have a meeting on 24th April at 3PM and 25th April at 4PM")

date = []
time = []

for entity in entities:
    if entity['entity'] == 'B-DATE':
        word_date = entity['word']
    elif entity['entity'] == 'I-DATE':
        word_date += ' ' + entity['word']
        date.append(word_date)
    elif entity['entity'] == 'B-TIME':
        word_time = entity['word']
    elif entity['entity'] == 'I-TIME':
        word_time += entity['word'].replace('##', '')
        time.append(word_time)

result = {
    'date': date,
    'time': time
}

print(result)

{'date': ['24th April', '25th April'], 'time': ['3PM', '3PMat', '4PM']}


In [44]:
entities = pipe("Wedding on 2nd April at 4PM")

import datetime

def merge_entities(entities):
    merged_entities = {'date': [], 'time': []}
    current_date = []
    current_time = []
    
    for entity in entities:
        if entity['entity'].startswith('B-DATE'):
            if current_date:
                merged_entities['date'].append(''.join(current_date))
            current_date = [entity['word']]
        elif entity['entity'].startswith('I-DATE'):
            current_date.append(entity['word'])
        elif entity['entity'].startswith('B-TIME'):
            if current_time:
                merged_entities['time'].append(''.join(current_time).replace('##', ''))
            current_time = [entity['word'].replace('##', '')]
        elif entity['entity'].startswith('I-TIME'):
            current_time.append(entity['word'].replace('##', ''))
    
    if current_date:
        merged_entities['date'].append(''.join(current_date))
    if current_time:
        merged_entities['time'].append(''.join(current_time))
    
    
    return merged_entities

merged = merge_entities(entities)

print(merged)

{'date': ['2ndApril'], 'time': ['4PM']}
