# Loading datasets

In [1]:
from datasets import load_dataset

In [2]:
split = 'train'
cache_dir = './cache'

In [3]:
dialogue_data = load_dataset(
    "doc2dial",
    name="dialogue_domain",  # this is the name of the dataset for the second subtask, dialog generation
    split=split,
    ignore_verifications=True,
    cache_dir=cache_dir,
)


document_data = load_dataset(
    "doc2dial",
    name="document_domain",  # this is the name of the dataset for the second subtask, dialog generation
    split=split,
    ignore_verifications=True,
    cache_dir=cache_dir,
)

Reusing dataset doc2dial (./cache/doc2dial/dialogue_domain/1.0.1/cf6d3ed4e77cea477387dd51c171a021a09bd314cf3a2cb2a6431ca738c6c0ee)
Reusing dataset doc2dial (./cache/doc2dial/document_domain/1.0.1/cf6d3ed4e77cea477387dd51c171a021a09bd314cf3a2cb2a6431ca738c6c0ee)


In [4]:
rc_data = load_dataset(
    "doc2dial",
    name="doc2dial_rc",  # this is the name of the dataset for the second subtask, dialog generation
    split=split,
    ignore_verifications=True,
    cache_dir=cache_dir,
)

Reusing dataset doc2dial (./cache/doc2dial/doc2dial_rc/1.0.1/cf6d3ed4e77cea477387dd51c171a021a09bd314cf3a2cb2a6431ca738c6c0ee)


In [5]:
dialogue_test = load_dataset(
    "doc2dial",
    name="dialogue_domain",  # this is the name of the dataset for the second subtask, dialog generation
    split='validation',
    ignore_verifications=True,
    cache_dir=cache_dir,
)

Reusing dataset doc2dial (./cache/doc2dial/dialogue_domain/1.0.1/cf6d3ed4e77cea477387dd51c171a021a09bd314cf3a2cb2a6431ca738c6c0ee)


# Extracting user and agent utterances

In [8]:
print(dialogue_data['turns'][0])

user_utterances = []
agent_utterances = []



for sample in dialogue_data:
    turns = sample['turns']
    doc_user = []
    doc_agent = []
    #span = sample['span']
    for turn in turns:
        if turn['role'] == 'user':
            doc_user.append(turn['utterance'])
            
        elif turn['role'] == 'agent':
            doc_agent.append(turn['utterance'])
    user_utterances.append(doc_user)
    agent_utterances.append(doc_agent)
#print(span)
            
###



print(f'Number of user utterances: {len(user_utterances)}')
print(f'Number of agent utterances: {len(agent_utterances)}')

[{'da': 'query_condition', 'references': [{'label': 'precondition', 'sp_id': '4'}], 'role': 'user', 'turn_id': 1, 'utterance': 'Hello, I forgot o update my address, can you help me with that?'}, {'da': 'respond_solution', 'references': [{'label': 'solution', 'sp_id': '6'}, {'label': 'solution', 'sp_id': '7'}], 'role': 'agent', 'turn_id': 2, 'utterance': 'hi, you have to report any change of address to DMV within 10 days after moving. You should do this both for the address associated with your license and all the addresses associated with all your vehicles.'}, {'da': 'query_solution', 'references': [{'label': 'solution', 'sp_id': '56'}], 'role': 'user', 'turn_id': 3, 'utterance': 'Can I do my DMV transactions online?'}, {'da': 'respond_solution', 'references': [{'label': 'solution', 'sp_id': '56'}], 'role': 'agent', 'turn_id': 4, 'utterance': 'Yes, you can sign up for MyDMV for all the online transactions needed.'}, {'da': 'query_condition', 'references': [{'label': 'precondition', 'sp

In [9]:
document_data

Dataset({
    features: ['domain', 'doc_id', 'title', 'doc_text', 'spans', 'doc_html_ts', 'doc_html_raw'],
    num_rows: 3416
})

In [10]:
spans = []
spans_2 = []

for sample in document_data:
    spans_2 = sample['doc_text']
    doc_spans = spans_2.split('.')
    # for span in spans_2:
    #     s = span['text_sp']
    #     doc_spans.append(s)
    spans.append(doc_spans)
#print(s)
#print(spans[0])
#print(spans_2)

In [47]:
print(spans[0:2])

[["\n\nBenefits Planner: Survivors | Planning For Your Survivors \nAs you plan for the future , you'll want to think about what your family would need if you should die now", ' Social Security can help your family if you have earned enough Social Security credits through your work', ' You can earn up to four credits each year', ' In 2019 , for example , you earn one credit for each $1,360 of wages or self - employment income', ' When you have earned $5,440 , you have earned your four credits for the year', ' The number of credits needed to provide benefits for your survivors depends on your age when you die', ' No one needs more than 40 credits 10 years of work to be eligible for any Social Security benefit', ' But , the younger a person is , the fewer credits they must have for family members to receive survivors benefits', " Benefits can be paid to your children and your spouse who is caring for the children even if you don't have the required number of credits", ' They can get benef

In [40]:
print(spans_2)



Student Loan Repayment 
Before repayment begins , develop a plan that puts you on track to pay back your loan on time and in full. Understanding the details of repayment on your federal student loan can save you time and money. Find out what repayment plan options are available , when you must begin making payments , how to make your payment , how to pay your loan off faster , and what to do if you have trouble making payments. 

I need more information about my loan servicer. 
Find out who services your federal student loan. Try This Resource Federal Student Loans: Repaying Your Loans Provides information about federal student loan repayment plan options, finding loan history and loan servicers, and making payments. 

I need more information about the types of repayment plans available. 
Learn more about Repayment Plans. When You Must Begin Payments The Grace Period Making Payments Having Your Student Loan Forgiven REMEMBER : Your federal student loans can t be canceled or forgiven 

# Generating text using transformers

## 1. Text generation pipeline based on GPT2

In [11]:
from transformers import pipeline

In [12]:
generator = pipeline("text-generation")

No model was supplied, defaulted to gpt2 (https://huggingface.co/gpt2)


In [13]:
user_utterances[0:5]

[['Hello, I forgot o update my address, can you help me with that?',
  'Can I do my DMV transactions online?',
  'Thanks, and in case I forget to bring all of the documentation needed to the DMV office, what can I do?',
  'Ok, and can you tell me again where should I report my new address?',
  'Can you tell me more about Traffic points and their cost?'],
 ['Will you keep me informed about when my insurance is about to lapse?',
  'Is it needed that my address is always up to date with you guys?',
  'Can you tell me more about the traffic points and its cost?',
  "It's always a hassle to bring all the required documents to the DMV Office. Please tell me an alternative.",
  'In case I have a problem with my insurance and your inquiry letter does not resolve the problem, what would happen.'],
 ['I moved recently and forgot to update my address. Could this be a problem?',
  'Is it common to delay a transaction due to forgetting any prerequisite when going to the DMV office? what could be th

In [14]:
doc_user[0:10]

['how student loan payments are handled',
 'If I want to know more about this topic and make payments',
 'ok i understand and what things should i avoid?',
 "What if it's not up to me and I can't make payments?",
 'How can I make a payment by post?',
 'Can you forgive the student loan?']

### Example 1-1. Top 5 DMV Mistakes - Forgot to update the address (without dialogue history) - better score

In [None]:
from transformers import pipeline, set_seed
set_seed(42)

In [98]:
generator = pipeline('text-generation', model='gpt2')
generator("Hello, I forgot o update my address, can you help me with that?", max_length=75, num_return_sequences=5)

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


[{'generated_text': 'Hello, I forgot o update my address, can you help me with that?\n\nHudde\n\n\nOffline\n\n\nActivity: 564\n\nMerit: 500\n\n\nHero MemberActivity: 564Merit: 500 Re: [ANN][ICO] Bitmoner - Cryptowire - Buy Bitcoin - Bitcoin.com August 22, 2015, 09:27'},
 {'generated_text': 'Hello, I forgot o update my address, can you help me with that?\n\nMigraine: If it is a problem with sleep, check the address on your laptop\n\nMigraine: Go check the "Addresses" page\n\nMigraine: I know, I would\n\nMigraine: But they arent all real names\n'},
 {'generated_text': 'Hello, I forgot o update my address, can you help me with that? - Aufwohl\n\nBruxellius [ edit ]\n\nMember\n\n\nPosts : 1608\n\nJoin date : 2015-12-20\n\nAge : 21\n\n\nMemberPosts : 1608Join date : 2015-12-20Age : 21 Profile :'},
 {'generated_text': "Hello, I forgot o update my address, can you help me with that?\n\nI would like to be able to follow you on twitter.\n\nSincerely,\n\n\nThe Administrator\n\nUser Info: Huzzu4H

In [None]:
from datasets import load_metric
metric = load_metric("sacrebleu")
references = [["you must report a change of address to DMV within ten days of moving. That is the case for the address associated with your license, as well as all the addresses associated with each registered vehicle, which may differ."]]

In [33]:
predictions = ["you may ask. This should work. Now that you have the address, you do not have to re-connect using the web.\n\nThat will also help me to locate out the problem of my address since I do not know it\n\nEdit: You are missing a couple of fields"]
results = metric.compute(predictions=predictions, references=references)
results

{'score': 2.7027319057477674,
 'counts': [15, 2, 0, 0],
 'totals': [53, 52, 51, 50],
 'precisions': [28.30188679245283,
  3.8461538461538463,
  0.9803921568627451,
  0.5],
 'bp': 1.0,
 'sys_len': 53,
 'ref_len': 43}

In [34]:
predictions = ['I\'m sorry, I just told you your address." The reply wasn\'t what the address meant though. I just wanted to have the address to myself with which I could get a message with a short, text message. I decided to use Google "address lookup." To do this at the']
results = metric.compute(predictions=predictions, references=references)
results

{'score': 2.5035231196615144,
 'counts': [16, 2, 0, 0],
 'totals': [58, 57, 56, 55],
 'precisions': [27.586206896551722,
  3.508771929824561,
  0.8928571428571429,
  0.45454545454545453],
 'bp': 1.0,
 'sys_len': 58,
 'ref_len': 43}

In [35]:
predictions = ['I\'m using the new address from the "Registrar" page on the wiki!\n\nWell this is true, you will need a new password if you want to use the server.\n\nAlso if you use a proxy or you install a script and can\'t get the client server']
results = metric.compute(predictions=predictions, references=references)
results

{'score': 1.2704589353414029,
 'counts': [10, 0, 0, 0],
 'totals': [51, 50, 49, 48],
 'precisions': [19.607843137254903,
  1.0,
  0.5102040816326531,
  0.2604166666666667],
 'bp': 1.0,
 'sys_len': 51,
 'ref_len': 43}

In [36]:
predictions = ['I need to update my address so please help me with that?I have an issue,Please let me know, I cannot keep going back, what should I do next?:- I should be able to get the data I need, right?You can also help, let me know that?']
results = metric.compute(predictions=predictions, references=references)
results

{'score': 0.9460547950971884,
 'counts': [6, 0, 0, 0],
 'totals': [60, 59, 58, 57],
 'precisions': [10.0,
  0.847457627118644,
  0.43103448275862066,
  0.21929824561403508],
 'bp': 1.0,
 'sys_len': 60,
 'ref_len': 43}

In [135]:
generator("user:Hello, I forgot o update my address, can you help me with that?", max_length=75, num_return_sequences=5)

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


[{'generated_text': "user:Hello, I forgot o update my address, can you help me with that?\n\nuser:Hi, could you help me with that?\n\nuser: I will tell you. It's a question for you.\n\nuser:...\n\nuser: I told you to do\n\nuser: But I can't...\n\nuser: I"},
 {'generated_text': 'user:Hello, I forgot o update my address, can you help me with that? I have $10 worth of credit card and one more note? Just give me a few minutes. Thanks ^\n\nI want to send out a request of sorts, I know you can help me address the issue, why send this, just feel free to drop it me on Twitter'},
 {'generated_text': 'user:Hello, I forgot o update my address, can you help me with that?\n\n[11:37:06 PM] [Client thread/INFO] [The Elder Scrolls V: Skyrim]: Skins are in-game, right?\n\n\n[11:37:06 PM] [Client thread/INFO] [The Elder Scrolls V: Skyrim]:'},
 {'generated_text': "user:Hello, I forgot o update my address, can you help me with that?\n\nError\n\nIt means I sent you multiple emails. I'm not sure if you were

In [136]:
metric = load_metric("sacrebleu")
references = [["you must report a change of address to DMV within ten days of moving. That is the case for the address associated with your license, as well as all the addresses associated with each registered vehicle, which may differ."]]

In [137]:
predictions = ['user:Hi, could you help me with that?\n\nuser: I will tell you. It\'s a question for you.\n\nuser:...\n\nuser: I told you to do\n\nuser: But I can\'t...\n\nuser: I']
results = metric.compute(predictions=predictions, references=references)
results

{'score': 1.3072157844994778,
 'counts': [8, 0, 0, 0],
 'totals': [47, 46, 45, 44],
 'precisions': [17.02127659574468,
  1.0869565217391304,
  0.5555555555555556,
  0.2840909090909091],
 'bp': 1.0,
 'sys_len': 47,
 'ref_len': 43}

In [138]:
predictions = ['I have $10 worth of credit card and one more note? Just give me a few minutes. Thanks ^\n\nI want to send out a request of sorts, I know you can help me address the issue, why send this, just feel free to drop it me on Twitter']
results = metric.compute(predictions=predictions, references=references)
results

{'score': 1.1754281596509946,
 'counts': [10, 0, 0, 0],
 'totals': [55, 54, 53, 52],
 'precisions': [18.181818181818183,
  0.9259259259259259,
  0.4716981132075472,
  0.2403846153846154],
 'bp': 1.0,
 'sys_len': 55,
 'ref_len': 43}

In [139]:
predictions = ['[11:37:06 PM] [Client thread/INFO] [The Elder Scrolls V: Skyrim]: Skins are in-game, right?\n\n\n[11:37:06 PM] [Client thread/INFO] [The Elder Scrolls V: Skyrim]:']
results = metric.compute(predictions=predictions, references=references)
results

{'score': 0.7002773917100699,
 'counts': [1, 0, 0, 0],
 'totals': [52, 51, 50, 49],
 'precisions': [1.9230769230769231,
  0.9803921568627451,
  0.5,
  0.25510204081632654],
 'bp': 1.0,
 'sys_len': 52,
 'ref_len': 43}

In [143]:
predictions = ['Error\n\nIt means I sent you multiple emails. I\'m not sure if you were aware of this. Could you please explain.']
results = metric.compute(predictions=predictions, references=references)
results

{'score': 1.0081108203421676,
 'counts': [4, 0, 0, 0],
 'totals': [24, 23, 22, 21],
 'precisions': [16.666666666666668,
  2.1739130434782608,
  1.1363636363636365,
  0.5952380952380952],
 'bp': 0.453089017280169,
 'sys_len': 24,
 'ref_len': 43}

In [144]:
predictions = ['Thanks,\n\nJosé\n\nPossibly!\n\nThank you.\n\nHello! Please see the FAQ.\n\nThe addresses displayed below are actually a valid copy of my public key.\n\nThis is a very basic issue of having a unique identifier. As']
results = metric.compute(predictions=predictions, references=references)
results

{'score': 1.5164399783192966,
 'counts': [10, 0, 0, 0],
 'totals': [42, 41, 40, 39],
 'precisions': [23.80952380952381,
  1.2195121951219512,
  0.625,
  0.32051282051282054],
 'bp': 0.9764716866522433,
 'sys_len': 42,
 'ref_len': 43}

### Example 1-2. Top 5 DMV Mistakes - Forgot to update the address (with dialogue history)

In [87]:
from transformers import pipeline, set_seed
generator = pipeline('text-generation', model='gpt2')
set_seed(42)
generator("user:Can you tell me more about Traffic points and their cost? agent:Sure. Any change of address must be reported to the DMV, that's for the address associated with your license and any of your vehicles. user:Ok, and can you tell me again where should I report my new address? agent:This happens often with our customers so that's why our website and MyDMV are so useful for our customers. Just check if you can make your transaction online so you don't have to go to the DMV Office. user:Thanks, and in case I forget to bring all of the documentation needed to the DMV office, what can I do? agent:Yes, you can sign up for MyDMV for all the online transactions needed. user:Can I do my DMV transactions online? agent:hi, you have to report any change of address to DMV within 10 days after moving. You should do this both for the address associated with your license and all the addresses associated with all your vehicles. user:Hello, I forgot o update my address, can you help me with that?", 
          max_length=300, num_return_sequences=5)

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


[{'generated_text': "user:Can you tell me more about Traffic points and their cost? agent:Sure. Any change of address must be reported to the DMV, that's for the address associated with your license and any of your vehicles. user:Ok, and can you tell me again where should I report my new address? agent:This happens often with our customers so that's why our website and MyDMV are so useful for our customers. Just check if you can make your transaction online so you don't have to go to the DMV Office. user:Thanks, and in case I forget to bring all of the documentation needed to the DMV office, what can I do? agent:Yes, you can sign up for MyDMV for all the online transactions needed. user:Can I do my DMV transactions online? agent:hi, you have to report any change of address to DMV within 10 days after moving. You should do this both for the address associated with your license and all the addresses associated with all your vehicles. user:Hello, I forgot o update my address, can you help

In [99]:
metric = load_metric("sacrebleu")
references = [["agent:DMV maintains a point system to track dangerous drivers. Often , motorists convicted of a traffic ticket feel they have resolved all their motoring issues with the local court, but later learn that the Driver Responsibility Assessment DRA is a separate DMV charge based on the total points they accumulate."]]

In [100]:
predictions = ["agent:Ok, thanks man, now I just need to take my own ID and enter it into the MyDMV as well."]
results = metric.compute(predictions=predictions, references=references)
results

{'score': 1.241004033856688,
 'counts': [7, 1, 0, 0],
 'totals': [25, 24, 23, 22],
 'precisions': [28.0,
  4.166666666666667,
  2.1739130434782608,
  1.1363636363636365],
 'bp': 0.301194211912202,
 'sys_len': 25,
 'ref_len': 55}

In [101]:
predictions = ["agent:No problem, just need to go through each page and sign on the other side."]
results = metric.compute(predictions=predictions, references=references)
results

{'score': 0.9902321088268243,
 'counts': [7, 2, 0, 0],
 'totals': [19, 18, 17, 16],
 'precisions': [36.8421052631579,
  11.11111111111111,
  2.9411764705882355,
  1.5625],
 'bp': 0.15035789770837657,
 'sys_len': 19,
 'ref_len': 55}

In [102]:
predictions = ["agent:Yes, we can help with all the transactions required to make your vehicle and the vehicle registration online."]
results = metric.compute(predictions=predictions, references=references)
results

{'score': 1.1226387140064022,
 'counts': [9, 1, 0, 0],
 'totals': [22, 21, 20, 19],
 'precisions': [40.90909090909091, 4.761904761904762, 2.5, 1.3157894736842106],
 'bp': 0.22313016014842982,
 'sys_len': 22,
 'ref_len': 55}

### Example 2-1. Dial-in search accounts - Search records (without dialogue history)

In [84]:
generator("Can I know if anybody asked for my records?", 
          max_length=30, num_return_sequences=5)

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


[{'generated_text': "Can I know if anybody asked for my records? No. No, I don't care what those documents say or do. We all had one —"},
 {'generated_text': 'Can I know if anybody asked for my records? I really need such a person."\n\nKaepernick\'s team was already on the losing end of'},
 {'generated_text': "Can I know if anybody asked for my records? [Laughs] I don't think so. [Laughs] [Laughs] You never look, as"},
 {'generated_text': 'Can I know if anybody asked for my records?\n\n"I am sorry. I think it\'s been a long time, but I think I'},
 {'generated_text': "Can I know if anybody asked for my records? Not that I would be surprised if I did, but it's a different case. I'm curious"}]

In [118]:
metric = load_metric("sacrebleu")
references = [["Records of searches are public records."]]

In [119]:
predictions = ["No. No, I don't care what those documents say or do. We all had one —"]
results = metric.compute(predictions=predictions, references=references)
results

{'score': 2.0244462660665508,
 'counts': [1, 0, 0, 0],
 'totals': [19, 18, 17, 16],
 'precisions': [5.2631578947368425,
  2.7777777777777777,
  1.4705882352941178,
  0.78125],
 'bp': 1.0,
 'sys_len': 19,
 'ref_len': 7}

In [120]:
predictions = ["I really need such a person.\n\nKaepernick\'s team was already on the losing end of"]
results = metric.compute(predictions=predictions, references=references)
results

{'score': 2.908317710573757,
 'counts': [2, 0, 0, 0],
 'totals': [16, 15, 14, 13],
 'precisions': [12.5,
  3.3333333333333335,
  1.7857142857142858,
  0.9615384615384616],
 'bp': 1.0,
 'sys_len': 16,
 'ref_len': 7}

In [121]:
predictions = ["Not that I would be surprised if I did, but it's a different case. I'm curious"]
results = metric.compute(predictions=predictions, references=references)
results

{'score': 2.1476912089159055,
 'counts': [1, 0, 0, 0],
 'totals': [18, 17, 16, 15],
 'precisions': [5.555555555555555,
  2.9411764705882355,
  1.5625,
  0.8333333333333334],
 'bp': 1.0,
 'sys_len': 18,
 'ref_len': 7}

### with 'user:' at prompt - better score

In [103]:
generator("user:Can I know if anybody asked for my records?", 
          max_length=30, num_return_sequences=5)

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


[{'generated_text': 'user:Can I know if anybody asked for my records?\n\nMr Robot: I guess not.\n\nDr. Robot: Well, maybe'},
 {'generated_text': 'user:Can I know if anybody asked for my records?\n\n: If the answer is "yes", then the server must be able to see'},
 {'generated_text': "user:Can I know if anybody asked for my records?\n\nMARTIN: I think you'll get the full answer as soon as you"},
 {'generated_text': "user:Can I know if anybody asked for my records?\n\n\nReply 1: Yes, she's an authorized user at www.favicon"},
 {'generated_text': 'user:Can I know if anybody asked for my records?\n\nMr. Smith: There should be a record of my relationship with you.\n'}]

In [109]:
metric = load_metric("sacrebleu")
references = [["agent:Records of searches are public records."]]

In [110]:
predictions = ["Mr Robot: I guess not.\n\nDr. Robot: Well, maybe"]
results = metric.compute(predictions=predictions, references=references)
results

{'score': 3.377156414337854,
 'counts': [2, 0, 0, 0],
 'totals': [14, 13, 12, 11],
 'precisions': [14.285714285714286,
  3.8461538461538463,
  2.0833333333333335,
  1.1363636363636365],
 'bp': 1.0,
 'sys_len': 14,
 'ref_len': 9}

In [112]:
predictions = [": If the answer is 'yes', then the server must be able to see"]
results = metric.compute(predictions=predictions, references=references)
results

{'score': 2.627961710408444,
 'counts': [1, 0, 0, 0],
 'totals': [15, 14, 13, 12],
 'precisions': [6.666666666666667,
  3.5714285714285716,
  1.9230769230769231,
  1.0416666666666667],
 'bp': 1.0,
 'sys_len': 15,
 'ref_len': 9}

In [115]:
predictions = ["MARTIN: I think you'll get the full answer as soon as you"]
results = metric.compute(predictions=predictions, references=references)
results

{'score': 3.0890553181566975,
 'counts': [1, 0, 0, 0],
 'totals': [13, 12, 11, 10],
 'precisions': [7.6923076923076925,
  4.166666666666667,
  2.272727272727273,
  1.25],
 'bp': 1.0,
 'sys_len': 13,
 'ref_len': 9}

In [116]:
predictions = ["Reply 1: Yes, she's an authorized user at www.favicon"]
results = metric.compute(predictions=predictions, references=references)
results

{'score': 3.673526562988939,
 'counts': [2, 0, 0, 0],
 'totals': [13, 12, 11, 10],
 'precisions': [15.384615384615385,
  4.166666666666667,
  2.272727272727273,
  1.25],
 'bp': 1.0,
 'sys_len': 13,
 'ref_len': 9}

In [117]:
predictions = ["Mr. Smith: There should be a record of my relationship with you."]
results = metric.compute(predictions=predictions, references=references)
results

{'score': 3.4585921141027356,
 'counts': [3, 0, 0, 0],
 'totals': [15, 14, 13, 12],
 'precisions': [20.0,
  3.5714285714285716,
  1.9230769230769231,
  1.0416666666666667],
 'bp': 1.0,
 'sys_len': 15,
 'ref_len': 9}

### Example 2-2. Dial-in search accounts - Search records (with dialogue history)

In [65]:
generator("user:Just to check the address of contact is correct please tell me if this is the right one: NYSDMV DIAL - IN AccountsRoom 2316 Empire State PlazaAlbany , NY 12228 473 - 2137 agent:We can supply you with the name and business address of the Dial-In account holder and the time, date and search criteria they used for the records. user:What info can you give me about those that search for my DMV records? agent:First, you should update your browser to its newest version available. user:What should I do if your site says my browser is not compatible?. agent:ok our mail address for dial-in search account related problems is NYSDMV DIAL - IN AccountsRoom 2316 Empire State PlazaAlbany , NY 12228 473 - 2137 user:Sorry, it looks like my chat didn't show your message well, can you please how to tell me how to contact DMV about my dial-in search account. agent:Our contact address for that is NYSDMV DIAL - IN AccountsRoom 2316 Empire State PlazaAlbany , NY 12228 473 - 2137e - mail user:I need to contact DMV about my dial-in search account agent:You should visit https://encryption.ny.gov/ to check the compatibility of your website and if it says that is not compatible, please update your browser. user:it looks like my browser is not compatible with your website. agent:Yes, since the records of searches are public we'll inform any motorist if his records were searched. user:Can I know if anybody asked for my records?", 
          max_length=400, num_return_sequences=5)

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


[{'generated_text': "user:Just to check the address of contact is correct please tell me if this is the right one: NYSDMV DIAL - IN AccountsRoom 2316 Empire State PlazaAlbany , NY 12228 473 - 2137 agent:We can supply you with the name and business address of the Dial-In account holder and the time, date and search criteria they used for the records. user:What info can you give me about those that search for my DMV records? agent:First, you should update your browser to its newest version available. user:What should I do if your site says my browser is not compatible?. agent:ok our mail address for dial-in search account related problems is NYSDMV DIAL - IN AccountsRoom 2316 Empire State PlazaAlbany , NY 12228 473 - 2137 user:Sorry, it looks like my chat didn't show your message well, can you please how to tell me how to contact DMV about my dial-in search account. agent:Our contact address for that is NYSDMV DIAL - IN AccountsRoom 2316 Empire State PlazaAlbany , NY 12228 473 - 2137e - 

In [75]:
metric = load_metric("sacrebleu")
#print(metric)
predictions = ["I can ask you to forward the records if anyone asks the same address, or the same email address. agent:My computer is now functioning fine. It's a web browser, you may open, navigate, click, type, click the email or mail, if you are browsing the"]
references = [["agent:You can contact us at: NYSDMV DIAL - IN AccountsRoom 2316 Empire State PlazaAlbany, NY 12228 473 - 2137e - mail: [email protected] [5 ] "]]
results = metric.compute(predictions=predictions, references=references)
results

{'score': 0.9702695924258076,
 'counts': [5, 0, 0, 0],
 'totals': [56, 55, 54, 53],
 'precisions': [8.928571428571429,
  0.9090909090909091,
  0.46296296296296297,
  0.2358490566037736],
 'bp': 1.0,
 'sys_len': 56,
 'ref_len': 31}

In [67]:
predictions = ["agent:no, they will need to look for their contacts and we'll do it for them. user:How should I find people that request my information?. agent:look for someone that uses your website but do not use it for the same purpose with the phone number and"]
results = metric.compute(predictions=predictions, references=references)
results

{'score': 0.8699141688737136,
 'counts': [3, 0, 0, 0],
 'totals': [55, 54, 53, 52],
 'precisions': [5.454545454545454,
  0.9259259259259259,
  0.4716981132075472,
  0.2403846153846154],
 'bp': 1.0,
 'sys_len': 55,
 'ref_len': 31}

In [70]:
predictions = ["agent:Ok, thanks man, now I just need to check my status\n\nThank you."]
results = metric.compute(predictions=predictions, references=references)
results

{'score': 1.2404298611265816,
 'counts': [2, 0, 0, 0],
 'totals': [18, 17, 16, 15],
 'precisions': [11.11111111111111,
  2.9411764705882355,
  1.5625,
  0.8333333333333334],
 'bp': 0.4856717852477123,
 'sys_len': 18,
 'ref_len': 31}

In [77]:
predictions = ["agent:Yes, if you use our system of checks and balances on the other websites we are using this is your first step, if you use any other search providers, you need to contact us."]
results = metric.compute(predictions=predictions, references=references)
results

{'score': 2.185068324154416,
 'counts': [4, 1, 0, 0],
 'totals': [40, 39, 38, 37],
 'precisions': [10.0,
  2.5641025641025643,
  1.3157894736842106,
  0.6756756756756757],
 'bp': 1.0,
 'sys_len': 40,
 'ref_len': 31}

In [78]:
predictions = ["agent:Yes, we'll go to the DMV office to provide you with your name and the number of the person asking contact."]
results = metric.compute(predictions=predictions, references=references)
results

{'score': 1.5592982903541592,
 'counts': [3, 0, 0, 0],
 'totals': [25, 24, 23, 22],
 'precisions': [12.0,
  2.0833333333333335,
  1.0869565217391304,
  0.5681818181818182],
 'bp': 0.7866278610665535,
 'sys_len': 25,
 'ref_len': 31}

## 2. Transformer XLNet

### Example 1-1. Top 5 DMV Mistakes - Forgot to update the address (without dialogue history) - better score

In [39]:
from transformers import AutoModelForCausalLM, AutoTokenizer

model = AutoModelForCausalLM.from_pretrained("xlnet-base-cased")
tokenizer = AutoTokenizer.from_pretrained("xlnet-base-cased")

# Padding text helps XLNet with short prompts - proposed by Aman Rusia in https://github.com/rusiaaman/XLNet-gen#methodology
PADDING_TEXT = "Many DMV customers make easily avoidable mistakes that cause them significant problems, including encounters with law enforcement and impounded vehicles. Because we see customers make these mistakes over and over again , we are issuing this list of the top five DMV mistakes and how to avoid them. 1. Forgetting to Update Address By statute , you must report a change of address to DMV within ten days of moving. That is the case for the address associated with your license, as well as all the addresses associated with each registered vehicle, which may differ. It is not sufficient to only: write your new address on the back of your old license; tell the United States Postal Service; or inform the police officer writing you a ticket. If you fail to keep your address current , you will miss a suspension order and may be charged with operating an unregistered vehicle and/or aggravated unlicensed operation, both misdemeanors. This really happens , but the good news is this is a problem that is easily avoidable. Learn more about how to change the address on your license and registrations [1 ] 2. Leaving the State Without Notifying DMV States communicate with each other , so when you move to another state, be sure to tie up any loose ends regarding your New York State license or registration. That means resolving any unanswered tickets, suspensions or revocations, and surrendering your license plates to NYS when you get to your new home state. A license suspension or revocation here could mean that your new home state will not issue you a license there. Remember , it is important to notify DMV of your new address so that any possible mail correspondence can reach you. Also , turning in your plates is important to avoid an insurance lapse. 3. Letting Insurance Lapse Because we all pay indirectly for crashes involving uninsured motorists , New York State requires every motorist to maintain auto insurance every single day a vehicle is registered. DMV works with insurance companies to electronically monitor your insurance coverage , and we know when coverage is dropped for any reason. When that happens , we mail you an insurance inquiry letter to allow you to clear up the problem. We send 500,000 inquiry letters a year. If the inquiry letter does not resolve the problem , we must suspend the vehicle registration and , if it persists, your driver license!We suspend 300,000 registrations a year for failure to maintain insurance. If you fail to maintain an updated address with us , you won t learn that you have an insurance problem , and we will suspend your registration and license. Make sure you turn in your vehicle s license plates at DMV before you cancel your insurance policy. Insurance policies must be from a company licensed in New York State. Learn more about Insurances Lapes [2] and How to Surrender your Plates [3 ] 4. Understanding how Much Traffic Points Cost DMV maintains a point system to track dangerous drivers. Often , motorists convicted of a traffic ticket feel they have resolved all their motoring issues with the local court, but later learn that the Driver Responsibility Assessment DRA is a separate DMV charge based on the total points they accumulate. The $300 DRA fee can be paid in $100 annual installments over three years. Motorists who fail to maintain an updated address with DMV may resolve their tickets with the court, but never receive their DRA assessment because we do not have their new address on record. Failure to pay the DRA will result in a suspended license. Learn more about About the NYS Driver Point System [4] and how to Pay Driver Responsibility Assessment [5 ] 5. Not Bringing Proper Documentation to DMV Office About ten percent of customers visiting a DMV office do not bring what they need to complete their transaction, and have to come back a second time to finish their business. This can be as simple as not bringing sufficient funds to pay for a license renewal or not having the proof of auto insurance required to register a car. Better yet , don t visit a DMV office at all, and see if your transaction can be performed online, like an address change, registration renewal, license renewal, replacing a lost title, paying a DRA or scheduling a road test. Our award - winning website is recognized as one of the best in the nation. It has all the answers you need to efficiently perform any DMV transaction. Consider signing up for our MyDMV service, which offers even more benefits. Sign up or log into MyDMV [6 ]"

prompt = "Hello, I forgot o update my address, can you help me with that?"
inputs = tokenizer(PADDING_TEXT + prompt, add_special_tokens=False, return_tensors="pt")["input_ids"]

prompt_length = len(tokenizer.decode(inputs[0]))
outputs = model.generate(inputs, max_length=1250, do_sample=True, top_p=0.95, top_k=60)
generated = prompt + tokenizer.decode(outputs[0])[prompt_length + 1 :]

print(generated)

Hello, I forgot o update my address, can you help me with that?I am an auto insurer and want to work with your DMV. If you can t find a DMV office that will accept this change to your new address, then I will call you or write to the DMV Office. Once


In [51]:
from datasets import load_metric
metric = load_metric("sacrebleu")
#print(metric)
predictions = ["I am an auto insurer and want to work with your DMV. If you can t find a DMV office that will accept this change to your new address, then I will call you or write to the DMV Office. Once"]
references = [["you must report a change of address to DMV within ten days of moving. That is the case for the address associated with your license, as well as all the addresses associated with each registered vehicle, which may differ."]]
results = metric.compute(predictions=predictions, references=references)
results

{'score': 2.604859149269604,
 'counts': [12, 1, 0, 0],
 'totals': [44, 43, 42, 41],
 'precisions': [27.272727272727273,
  2.3255813953488373,
  1.1904761904761905,
  0.6097560975609756],
 'bp': 1.0,
 'sys_len': 44,
 'ref_len': 43}

### compare with 'user:' at the beginning of prompt - worse score

In [94]:
prompt = "user:Hello, I forgot o update my address, can you help me with that?"
inputs = tokenizer(PADDING_TEXT + prompt, add_special_tokens=False, return_tensors="pt")["input_ids"]

prompt_length = len(tokenizer.decode(inputs[0]))
outputs = model.generate(inputs, max_length=1195, do_sample=True, top_p=0.95, top_k=60)
generated = prompt + tokenizer.decode(outputs[0])[prompt_length + 1 :]

print(generated)

user:Hello, I forgot o update my address, can you help me with that?eop> Thank you for your patience in answering my questions. When you find yourself returning to this page, please let us know the correct address is found. Contact us at: Your Application [8 ] Refresh your password and return to your 'home page' when you return to the previous page. The link in the message will continue to appear as an


In [124]:
# max length 1195
predictions = ["Thank you for your patience in answering my questions. When you find yourself returning to this page, please let us know the correct address is found. Contact us at: Your Application [8 ] Refresh your password and return to your 'home page' when you return to the previous page."]
references = [["you must report a change of address to DMV within ten days of moving. That is the case for the address associated with your license, as well as all the addresses associated with each registered vehicle, which may differ."]]
results = metric.compute(predictions=predictions, references=references)
results

{'score': 1.2037720688339202,
 'counts': [11, 0, 0, 0],
 'totals': [55, 54, 53, 52],
 'precisions': [20.0,
  0.9259259259259259,
  0.4716981132075472,
  0.2403846153846154],
 'bp': 1.0,
 'sys_len': 55,
 'ref_len': 43}

In [95]:
# max length 1230 - better score
predictions = ["No. We can send you the address you want to update you to. We can also send you the URL or message that you want to look up the address or message. If not, please contact us at: Intelli-File [7) - mail: [8 ] Please see our Webmaster's Privacy Policy, for more information on how to use the information we collect from your accounts. DMV may also contact you if you have questions about your search."]
references = [["you must report a change of address to DMV within ten days of moving. That is the case for the address associated with your license, as well as all the addresses associated with each registered vehicle, which may differ."]]
results = metric.compute(predictions=predictions, references=references)
results

{'score': 1.352914939815383,
 'counts': [15, 1, 0, 0],
 'totals': [88, 87, 86, 85],
 'precisions': [17.045454545454547,
  1.1494252873563218,
  0.5813953488372093,
  0.29411764705882354],
 'bp': 1.0,
 'sys_len': 88,
 'ref_len': 43}

### Example 1-2. Top 5 DMV Mistakes - Forgot to update the address (with dialogue history)

In [125]:
PADDING_TEXT = "Many DMV customers make easily avoidable mistakes that cause them significant problems, including encounters with law enforcement and impounded vehicles. Because we see customers make these mistakes over and over again , we are issuing this list of the top five DMV mistakes and how to avoid them. 1. Forgetting to Update Address By statute , you must report a change of address to DMV within ten days of moving. That is the case for the address associated with your license, as well as all the addresses associated with each registered vehicle, which may differ. It is not sufficient to only: write your new address on the back of your old license; tell the United States Postal Service; or inform the police officer writing you a ticket. If you fail to keep your address current , you will miss a suspension order and may be charged with operating an unregistered vehicle and/or aggravated unlicensed operation, both misdemeanors. This really happens , but the good news is this is a problem that is easily avoidable. Learn more about how to change the address on your license and registrations [1 ] 2. Leaving the State Without Notifying DMV States communicate with each other , so when you move to another state, be sure to tie up any loose ends regarding your New York State license or registration. That means resolving any unanswered tickets, suspensions or revocations, and surrendering your license plates to NYS when you get to your new home state. A license suspension or revocation here could mean that your new home state will not issue you a license there. Remember , it is important to notify DMV of your new address so that any possible mail correspondence can reach you. Also , turning in your plates is important to avoid an insurance lapse. 3. Letting Insurance Lapse Because we all pay indirectly for crashes involving uninsured motorists , New York State requires every motorist to maintain auto insurance every single day a vehicle is registered. DMV works with insurance companies to electronically monitor your insurance coverage , and we know when coverage is dropped for any reason. When that happens , we mail you an insurance inquiry letter to allow you to clear up the problem. We send 500,000 inquiry letters a year. If the inquiry letter does not resolve the problem , we must suspend the vehicle registration and , if it persists, your driver license!We suspend 300,000 registrations a year for failure to maintain insurance. If you fail to maintain an updated address with us , you won t learn that you have an insurance problem , and we will suspend your registration and license. Make sure you turn in your vehicle s license plates at DMV before you cancel your insurance policy. Insurance policies must be from a company licensed in New York State. Learn more about Insurances Lapes [2] and How to Surrender your Plates [3 ] 4. Understanding how Much Traffic Points Cost DMV maintains a point system to track dangerous drivers. Often , motorists convicted of a traffic ticket feel they have resolved all their motoring issues with the local court, but later learn that the Driver Responsibility Assessment DRA is a separate DMV charge based on the total points they accumulate. The $300 DRA fee can be paid in $100 annual installments over three years. Motorists who fail to maintain an updated address with DMV may resolve their tickets with the court, but never receive their DRA assessment because we do not have their new address on record. Failure to pay the DRA will result in a suspended license. Learn more about About the NYS Driver Point System [4] and how to Pay Driver Responsibility Assessment [5 ] 5. Not Bringing Proper Documentation to DMV Office About ten percent of customers visiting a DMV office do not bring what they need to complete their transaction, and have to come back a second time to finish their business. This can be as simple as not bringing sufficient funds to pay for a license renewal or not having the proof of auto insurance required to register a car. Better yet , don t visit a DMV office at all, and see if your transaction can be performed online, like an address change, registration renewal, license renewal, replacing a lost title, paying a DRA or scheduling a road test. Our award - winning website is recognized as one of the best in the nation. It has all the answers you need to efficiently perform any DMV transaction. Consider signing up for our MyDMV service, which offers even more benefits. Sign up or log into MyDMV [6 ]"

prompt = "user:Can you tell me more about Traffic points and their cost? agent:Sure. Any change of address must be reported to the DMV, that's for the address associated with your license and any of your vehicles. user:Ok, and can you tell me again where should I report my new address? agent:This happens often with our customers so that's why our website and MyDMV are so useful for our customers. Just check if you can make your transaction online so you don't have to go to the DMV Office. user:Thanks, and in case I forget to bring all of the documentation needed to the DMV office, what can I do? agent:Yes, you can sign up for MyDMV for all the online transactions needed. user:Can I do my DMV transactions online? agent:hi, you have to report any change of address to DMV within 10 days after moving. You should do this both for the address associated with your license and all the addresses associated with all your vehicles. user:Hello, I forgot o update my address, can you help me with that?"
inputs = tokenizer(PADDING_TEXT + prompt, add_special_tokens=False, return_tensors="pt")["input_ids"]

prompt_length = len(tokenizer.decode(inputs[0]))
outputs = model.generate(inputs, max_length=1300, do_sample=True, top_p=0.95, top_k=60)
generated = prompt + tokenizer.decode(outputs[0])[prompt_length + 1 :]

print(generated)

user:Can you tell me more about Traffic points and their cost? agent:Sure. Any change of address must be reported to the DMV, that's for the address associated with your license and any of your vehicles. user:Ok, and can you tell me again where should I report my new address? agent:This happens often with our customers so that's why our website and MyDMV are so useful for our customers. Just check if you can make your transaction online so you don't have to go to the DMV Office. user:Thanks, and in case I forget to bring all of the documentation needed to the DMV office, what can I do? agent:Yes, you can sign up for MyDMV for all the online transactions needed. user:Can I do my DMV transactions online? agent:hi, you have to report any change of address to DMV within 10 days after moving. You should do this both for the address associated with your license and all the addresses associated with all your vehicles. user:Hello, I forgot o update my address, can you help me with that?agent: 

In [127]:
predictions = ["agent: Yes, because DMV always knows where the wrong address is, the car has been stopped by the licensee and you won t learn that you have changed your address at DMV or at the U.S. Customs or Border Protection Offices."]
references = [["DMV maintains a point system to track dangerous drivers. Often , motorists convicted of a traffic ticket feel they have resolved all their motoring issues with the local court, but later learn that the Driver Responsibility Assessment DRA is a separate DMV charge based on the total points they accumulate."]]
results = metric.compute(predictions=predictions, references=references)
results

{'score': 2.1885021248128274,
 'counts': [13, 1, 0, 0],
 'totals': [48, 47, 46, 45],
 'precisions': [27.083333333333332,
  2.127659574468085,
  1.0869565217391304,
  0.5555555555555556],
 'bp': 0.9010751057212905,
 'sys_len': 48,
 'ref_len': 53}

### Example 2-1. Dial-in search accounts - Search records (without dialogue history)

In [128]:
PADDING_TEXT = "Log On to DIAL - IN [1 ] What is a DIAL-IN search account and who can get one? If you regularly need to search DMV records for driver license, registration, vehicle title or insurance information , then DIAL - IN is the service you need. Our typical clients include attorneys, insurance and private investigators, process servers and building / site security personnel. See Drivers Privacy Protection Act DPPA [2] for more information about permissible uses of DMV records. How do I apply for an account? You should complete form Motor Vehicle Record Search Account Application PDF [3] MV-15D. The instructions are on the form. Send the form to : Data Services - New Search AccountNYS Department of Motor Vehicles6 Empire State PlazaAlbany, NY 12228 How will I know my application has been accepted? We will review your application. If you application is not complete , we will return your application with an explanation that will give you more instructions. If your application is complete , we will create an account for you. We will send you your account number and password. Your seven - digit account number will never change. You will enter this account number when you log in to the DIAL - IN system. Are there fees for using a DIAL-IN search account? According to the NYS Vehicle and Traffic Law , we must charge a fee to search DMV records. Private persons, commercial organizations, and not - for - profit organizations must pay a search fee. There is a fee of $7.00 for each search that you make with the DIAL - IN service. There are some organizations that are exempt from search fees. Any public organization, its officers, a volunteer fire company, a volunteer ambulance service, or a legal aid bureau or society or other private entity acting pursuant to section 722 of the New York State County Law, that makes a search for a public purpose, is exempt from the search fee. How do I pay DIAL-IN Search Account fees? Paying customers must include a deposit with the application. Your first deposit must be enough to pay for two months of searches. When we accept your application , your deposit is added to your new account balance. When you make a search , we subtract the search fee from your account balance. Payment by credit card is NOT available at this time. You can use your account to pay for the mail - order searches and the online searches. If you must pay a fee for the service , your account records will reflect your debits and your credits and show your account balance. For more information , please visit our Escrow Accounts [4] page. What business records must I keep to document the searches I perform? The business records you keep must exist prior to the search you perform and must establish the business purpose of the search. Common examples of business records include invoices, retainer agreements, employment applications, consent forms, accident reports, and surveillance reports and numerous other types of records. The Terms of Service of your Search Account require you to maintain these records and to provide them to DMV when requested. Will DMV contact me about the searches I perform using my DIAL-IN Search Account? Yes , DMV may ask you about a DIAL - IN search you performed. We inquire about searches to monitor compliance with the DIAL - IN Terms of Service . You must respond to the inquiry whether or not you have business records that support your search. Your response will be considered an incomplete reply if you do not give us the business records. You can complete an inquiry at any time if you send us the requested acceptable business records. If you performed the search on behalf of a client , you will need to show permissible use of your client, and your client agent relationship. If you personally did not perform the search we have inquired about , please check with the other users of your DIAL - IN account to see if one of them performed the search. If none of the known users performed the search , contact us immediately. If you do not respond to our inquiry about a DIAL - IN search, your search privileges will be suspended until you respond. Will you tell a motorist if I searched his records? Yes. Records of searches are public records. If a motorist asks the DMV if his records were searched , the DMV supplies him with the name and business address of the DIAL - IN account holder , and the time, date and search criteria used to search for the records. We do not disclose the DIAL - IN account numbers of the account holder that performed the search. How do I contact DMV about my Dial-in Search Account? You can contact us at: NYSDMV DIAL - IN AccountsRoom 2316 Empire State PlazaAlbany, NY 12228 473 - 2137e - mail: [email protected] [5 ] Note on browser compatibility We are upgrading our security protocols to better protect your data. To verify your browser is compatible to continue using any of the state's government websites , please visit https://encryption.ny.gov/ [6]. If your browser is not currently compatible , please update it to the newest version."

prompt = "user:Can I know if anybody asked for my records?"
inputs = tokenizer(PADDING_TEXT + prompt, add_special_tokens=False, return_tensors="pt")["input_ids"]

prompt_length = len(tokenizer.decode(inputs[0]))
outputs = model.generate(inputs, max_length=1200, do_sample=True, top_p=0.95, top_k=60)
generated = prompt + tokenizer.decode(outputs[0])[prompt_length + 1 :]

print(generated)

user:Can I know if anybody asked for my records?eop> Please contact us at : ( the search for information on DMV records may require an inquiry ).<eop> Contact an IRS clerk for a specific search request. We need the details that you provide. In order to confirm that you are eligible to submit records, we may need to obtain the information required to do so. We do not require data from your records. Information returned to your account will


In [129]:
predictions = ["Please contact us at : ( the search for information on DMV records may require an inquiry )."]
references = [["Records of searches are public records."]]
results = metric.compute(predictions=predictions, references=references)
results

{'score': 2.4074859035470344,
 'counts': [2, 0, 0, 0],
 'totals': [19, 18, 17, 16],
 'precisions': [10.526315789473685,
  2.7777777777777777,
  1.4705882352941178,
  0.78125],
 'bp': 1.0,
 'sys_len': 19,
 'ref_len': 7}

### compare without 'user:' at prompt - better score - consistent

In [130]:
prompt = "Can I know if anybody asked for my records?"
inputs = tokenizer(PADDING_TEXT + prompt, add_special_tokens=False, return_tensors="pt")["input_ids"]

prompt_length = len(tokenizer.decode(inputs[0]))
outputs = model.generate(inputs, max_length=1200, do_sample=True, top_p=0.95, top_k=60)
generated = prompt + tokenizer.decode(outputs[0])[prompt_length + 1 :]

print(generated)

Can I know if anybody asked for my records?No. You must notify the DMV at the time of the request. You are responsible for protecting your personal information. Information collected through your search has been used to make you identifiable and sensitive to our law enforcement and public safety agencies for data collection purposes. If you did not enter the name and business address of any of the employees who are requested by DMV, please respond to our inquiries and provide the business record to


In [131]:
predictions = ["No. You must notify the DMV at the time of the request."]
references = [["Records of searches are public records."]]
results = metric.compute(predictions=predictions, references=references)
results

{'score': 3.377156414337854,
 'counts': [2, 0, 0, 0],
 'totals': [14, 13, 12, 11],
 'precisions': [14.285714285714286,
  3.8461538461538463,
  2.0833333333333335,
  1.1363636363636365],
 'bp': 1.0,
 'sys_len': 14,
 'ref_len': 7}

### Example 2-2. Dial-in search accounts - Search records (with dialogue history) - similar score

In [133]:
PADDING_TEXT = "Log On to DIAL - IN [1 ] What is a DIAL-IN search account and who can get one? If you regularly need to search DMV records for driver license, registration, vehicle title or insurance information , then DIAL - IN is the service you need. Our typical clients include attorneys, insurance and private investigators, process servers and building / site security personnel. See Drivers Privacy Protection Act DPPA [2] for more information about permissible uses of DMV records. How do I apply for an account? You should complete form Motor Vehicle Record Search Account Application PDF [3] MV-15D. The instructions are on the form. Send the form to : Data Services - New Search AccountNYS Department of Motor Vehicles6 Empire State PlazaAlbany, NY 12228 How will I know my application has been accepted? We will review your application. If you application is not complete , we will return your application with an explanation that will give you more instructions. If your application is complete , we will create an account for you. We will send you your account number and password. Your seven - digit account number will never change. You will enter this account number when you log in to the DIAL - IN system. Are there fees for using a DIAL-IN search account? According to the NYS Vehicle and Traffic Law , we must charge a fee to search DMV records. Private persons, commercial organizations, and not - for - profit organizations must pay a search fee. There is a fee of $7.00 for each search that you make with the DIAL - IN service. There are some organizations that are exempt from search fees. Any public organization, its officers, a volunteer fire company, a volunteer ambulance service, or a legal aid bureau or society or other private entity acting pursuant to section 722 of the New York State County Law, that makes a search for a public purpose, is exempt from the search fee. How do I pay DIAL-IN Search Account fees? Paying customers must include a deposit with the application. Your first deposit must be enough to pay for two months of searches. When we accept your application , your deposit is added to your new account balance. When you make a search , we subtract the search fee from your account balance. Payment by credit card is NOT available at this time. You can use your account to pay for the mail - order searches and the online searches. If you must pay a fee for the service , your account records will reflect your debits and your credits and show your account balance. For more information , please visit our Escrow Accounts [4] page. What business records must I keep to document the searches I perform? The business records you keep must exist prior to the search you perform and must establish the business purpose of the search. Common examples of business records include invoices, retainer agreements, employment applications, consent forms, accident reports, and surveillance reports and numerous other types of records. The Terms of Service of your Search Account require you to maintain these records and to provide them to DMV when requested. Will DMV contact me about the searches I perform using my DIAL-IN Search Account? Yes , DMV may ask you about a DIAL - IN search you performed. We inquire about searches to monitor compliance with the DIAL - IN Terms of Service . You must respond to the inquiry whether or not you have business records that support your search. Your response will be considered an incomplete reply if you do not give us the business records. You can complete an inquiry at any time if you send us the requested acceptable business records. If you performed the search on behalf of a client , you will need to show permissible use of your client, and your client agent relationship. If you personally did not perform the search we have inquired about , please check with the other users of your DIAL - IN account to see if one of them performed the search. If none of the known users performed the search , contact us immediately. If you do not respond to our inquiry about a DIAL - IN search, your search privileges will be suspended until you respond. Will you tell a motorist if I searched his records? Yes. Records of searches are public records. If a motorist asks the DMV if his records were searched , the DMV supplies him with the name and business address of the DIAL - IN account holder , and the time, date and search criteria used to search for the records. We do not disclose the DIAL - IN account numbers of the account holder that performed the search. How do I contact DMV about my Dial-in Search Account? You can contact us at: NYSDMV DIAL - IN AccountsRoom 2316 Empire State PlazaAlbany, NY 12228 473 - 2137e - mail: [email protected] [5 ] Note on browser compatibility We are upgrading our security protocols to better protect your data. To verify your browser is compatible to continue using any of the state's government websites , please visit https://encryption.ny.gov/ [6]. If your browser is not currently compatible , please update it to the newest version."

prompt = "user:Just to check the address of contact is correct please tell me if this is the right one: NYSDMV DIAL - IN AccountsRoom 2316 Empire State PlazaAlbany , NY 12228 473 - 2137 agent:We can supply you with the name and business address of the Dial-In account holder and the time, date and search criteria they used for the records. user:What info can you give me about those that search for my DMV records? agent:First, you should update your browser to its newest version available. user:What should I do if your site says my browser is not compatible?. agent:ok our mail address for dial-in search account related problems is NYSDMV DIAL - IN AccountsRoom 2316 Empire State PlazaAlbany , NY 12228 473 - 2137 user:Sorry, it looks like my chat didn't show your message well, can you please how to tell me how to contact DMV about my dial-in search account. agent:Our contact address for that is NYSDMV DIAL - IN AccountsRoom 2316 Empire State PlazaAlbany , NY 12228 473 - 2137e - mail user:I need to contact DMV about my dial-in search account agent:You should visit https://encryption.ny.gov/ to check the compatibility of your website and if it says that is not compatible, please update your browser. user:it looks like my browser is not compatible with your website. agent:Yes, since the records of searches are public we'll inform any motorist if his records were searched. user:Can I know if anybody asked for my records?"
inputs = tokenizer(PADDING_TEXT + prompt, add_special_tokens=False, return_tensors="pt")["input_ids"]

prompt_length = len(tokenizer.decode(inputs[0]))
outputs = model.generate(inputs, max_length=1510, do_sample=True, top_p=0.95, top_k=60)
generated = prompt + tokenizer.decode(outputs[0])[prompt_length + 1 :]

print(generated)

user:Just to check the address of contact is correct please tell me if this is the right one: NYSDMV DIAL - IN AccountsRoom 2316 Empire State PlazaAlbany , NY 12228 473 - 2137 agent:We can supply you with the name and business address of the Dial-In account holder and the time, date and search criteria they used for the records. user:What info can you give me about those that search for my DMV records? agent:First, you should update your browser to its newest version available. user:What should I do if your site says my browser is not compatible?. agent:ok our mail address for dial-in search account related problems is NYSDMV DIAL - IN AccountsRoom 2316 Empire State PlazaAlbany , NY 12228 473 - 2137 user:Sorry, it looks like my chat didn't show your message well, can you please how to tell me how to contact DMV about my dial-in search account. agent:Our contact address for that is NYSDMV DIAL - IN AccountsRoom 2316 Empire State PlazaAlbany , NY 12228 473 - 2137e - mail user:I need to c

In [134]:
predictions = ["agent:No, if they ask for my records, you are not authorized to send my records to DMV. agent:We cannot find anyone who asks for the records."]
references = [["agent:You can contact us at: NYSDMV DIAL - IN AccountsRoom 2316 Empire State PlazaAlbany, NY 12228 473 - 2137e - mail: [email protected] [5 ]"]]
results = metric.compute(predictions=predictions, references=references)
results

{'score': 2.5889065173943995,
 'counts': [4, 1, 0, 0],
 'totals': [34, 33, 32, 31],
 'precisions': [11.764705882352942,
  3.0303030303030303,
  1.5625,
  0.8064516129032258],
 'bp': 1.0,
 'sys_len': 34,
 'ref_len': 33}