# Answering questions using Roberta

## Main solution using pre-made model

In [60]:
!pwd

/home/george/Documents/LeWagon/Transformers_Hugging_Face


In [None]:
"""Install requirements"""
# Install the transformers library from HuggingFace
!pip install transformers torch pytesseract
# You'll also need some extra tools that some of these models use under the hood
! pip install sentencepiece sacremoses

In [1]:
"""Import packages"""
from transformers import pipeline
from transformers.pipelines.pt_utils import KeyDataset
import pandas as pd

"""Import our question answering model"""
question_answerer = pipeline(model = 'deepset/roberta-base-squad2')

2023-12-01 10:33:38.059974: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2023-12-01 10:33:38.060008: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2023-12-01 10:33:38.097288: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-12-01 10:33:39.078537: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory
2023-12-01 10:33:39.078708: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: ca

In [34]:
answer_questions_with_confidence(content, question)

Unnamed: 0,confidence,question,answer
0,0.688135,Where is Spain?,Europe


In [33]:
"""Open a file as a possible source of context"""
file = open("/home/george/Documents/example_article.txt", "r") # Example file
content = file.read()
print(content)
file.close()

question = ['Where is Spain?'] # Example question

This is just some text to use as an example.
It does not particularly say much that is very interesting or useful.
The purpose of the article is that I can confirm how to open and read articles and to see whether my model can answer questions based on it.
Animals: mouse, cat, horse, hippo, elephant, whale.
Spain is a country in Europe.



In [129]:
!pwd

/home/george/Documents/LeWagon/Transformers_Hugging_Face/quizachu/notebooks


In [127]:
def answer_questions_with_confidence(context = "You did not specify any content", questions = ["Did you mean to specify a question?"]):
    """Takes a list called 'questions' that contains the questions to answer
    Takes some text called 'content' as a source for answering questions
    Returns a dataframe of the questions with their answers and an assessment of confidence in the answers
    If no context or content is provided, returns a dataframe requesting these"""
    
    # List to fill with questions, answers, and confidence
    questions_answers = []
    
    # For each question create an empty dictionary and call the question_answerer model on the question
    for q in questions: 
        q_a_dict = {}
        q_a = question_answerer(question=q, context=context)
        
        # Assign the question, and outputs of the question_answerer model to the dictionary
        q_a_dict['confidence'] = q_a['score']
        q_a_dict['question'] = q
        q_a_dict['answer'] = q_a['answer'].replace('\n', ' ')
        
        # Add the dictionary to the list and then convert the final list of dicts to a dataframe
        questions_answers.append(q_a_dict) 
    questions_answers = pd.DataFrame(questions_answers)
    
    # Set a large maxcolwidth to allow for potentially long answers
    pd.options.display.max_colwidth = 20000
    
    return questions_answers

In [124]:
select_top_n_questions(bbc_article_text, bbc_cat_questions, 0.6, 4)

Here are your 4 questions


Unnamed: 0,original_question_number,confidence,question,answer
0,1,0.963533,Who produced the song?,Danny Kirsch
1,6,0.842697,Where was the video filmed?,Stevenage railway station
2,2,0.81919,What is the song called?,Meow
3,3,0.781142,Who gave the song its first play?,Justin Dealey


In [123]:
def select_top_n_questions(context, questions, c = 0.5, n = 5):
    """Selects the top n questions with the highest confidence level c
    User can define how many questions are required and the minimum confidence level"""
    
    # Call answer_questions to get a df of questions and answers
    questions_answers = answer_questions_with_confidence(context, questions)
    
    # Filter for confidence
    conf_questions = questions_answers[questions_answers['confidence'] > c] 
    
    # Return n questions ordered by confidence
    selected_questions = conf_questions.sort_values(by='confidence', ascending=False).head(n)\
    .reset_index().rename(columns={'index':'original_question_number'}) 
    
    """Check whether enough questions can be returned and explain why if not"""
    
    # Were enough questions generated?
    if len(questions_answers) < n:
        print(f"Only {len(questions_answers)} questions were generated")
        
        # Did enough questions meet the confidence requirement?
        if len(selected_questions) == 0:
            print("No questions met your required confidence level.")
        elif len(selected_questions) < n: 
            print(f"Not enough questions met your required confidence level,\
 but here {'is' if len(selected_questions) == 1 else 'are'} the {len(selected_questions)} that did:")
        else:
            print(f"Here are your {n} questions")
            
    else:
        # Did enough questions meet the confidence requirement?
        if len(selected_questions) == 0:
            print("No questions met your required confidence level.")
        elif len(selected_questions) < n: 
            print(f"Not enough questions met your required confidence level,\
 but here {'is' if len(selected_questions) == 1 else 'are'} the {len(selected_questions)} that did:")
        else:
            print(f"Here are your {n} questions")
            
    return selected_questions

In [139]:
answer_questions_with_confidence(result_text, ['When were the first microscopes developed?'])

Unnamed: 0,confidence,question,answer
0,0.439633,When were the first microscopes developed?,mid-17th century


In [128]:
answer_questions_with_confidence(bbc_article_text, bbc_cat_questions)

Unnamed: 0,confidence,question,answer
0,0.101562,Where will profit go?,RSPCA and Stevenage homelessness charity Feed Up Warm Up
1,0.963533,Who produced the song?,Danny Kirsch
2,0.81919,What is the song called?,Meow
3,0.781142,Who gave the song its first play?,Justin Dealey
4,0.682033,When will the song be released?,Wednesday
5,0.440879,Who wrote the song?,"Danny Kirsch, who wrote it with Joe Killington"
6,0.842697,Where was the video filmed?,Stevenage railway station
7,0.508643,How has nala been delighting commuters?,taking photos of her at Stevenage station
8,0.308242,Who's pictures went viral?,Nala
9,0.2376,All proceeds from the single will be what?,donated to the RSPCA and Stevenage homelessness charity Feed Up Warm Up


In [87]:
select_top_n_questions(underground_context, underground_questions, c = 0.1, n = 20)

12
Not enough questions met your required confidence level, but here are the 12 that did:


Unnamed: 0,original_question_number,confidence,question,answer
0,1,0.958134,In what year did the UERL begin to be built?,1902
1,12,0.781515,When did the UERL begin to run through the London Underground?,1902
2,8,0.573044,How many kilometers long was the London Underground Station?,80
3,13,0.507921,When did the London Underground begin?,19th century
4,11,0.502571,What railway line was built in the London Underground?,Metropolitan Railway
5,14,0.311805,What was the name of the first railway station in London?,Metropolitan Railway
6,6,0.29533,When did the London Underground first operate?,19th century
7,0,0.255145,In what year did the London Underground begin?,19th century
8,2,0.255145,In what year did the London Underground begin?,19th century
9,15,0.255145,In what year did the London Underground begin?,19th century


In [78]:
underground_context = """The history of the London Underground began in the 19th century with the construction of the Metropolitan Railway, the world's first underground railway. The Metropolitan Railway, which opened in 1863 using gas-lit wooden carriages hauled by steam locomotives, worked with the District Railway to complete London's Circle line in 1884. Both railways expanded, the Metropolitan eventually extending as far as Verney Junction in Buckinghamshire, more than 50 miles (80 km) from Baker Street and the centre of London. The first deep-level tube line, the City and South London Railway, opened in 1890 with electric trains. This was followed by the Waterloo & City Railway in 1898, the Central London Railway in 1900, and the Great Northern and City Railway in 1904. The Underground Electric Railways Company of London (UERL) was established in 1902 to fund the electrification of the District Railway and to complete and operate three tube lines, the Baker Street and Waterloo Railway, the Charing Cross, Euston and Hampstead Railway and the Great Northern, Piccadilly and Brompton Railway, which opened in 1906–07. By 1907 the District and Metropolitan Railways had electrified the underground sections of their lines.

Under a joint marketing agreement between most of the companies in the early years of the 20th century, UNDERGROUND signs appeared outside stations in central London. World War I delayed extensions of the Bakerloo and Central London Railways, and people used the tube stations as shelters during Zeppelin air raids by June 1915. After the war, government-backed financial guarantees were used to expand the network, and the tunnels of the City and South London and Charing Cross, Euston and Hampstead Railways were linked at Euston and Kennington, although the combined service was not named the Northern line until later. The Piccadilly line was extended north to Cockfosters and took over District line branches to Harrow (later Uxbridge) and Hounslow. In 1933, the underground railways and all London area tram and bus operators were merged into the London Passenger Transport Board (LPTB). The outlying branches of the Metropolitan were closed; various upgrades were planned. The Bakerloo line's extension to take over the Metropolitan's Stanmore branch, and extensions of the Central and Northern lines, formed part of the 1930s New Works Programme. The outbreak of World War II in 1939 halted or interrupted some of this work, and many tube stations were used as air-raid shelters. """

In [85]:
underground_questions = ["In what year did the London Underground begin?",
"In what year did the UERL begin to be built?",
"In what year did the London Underground begin?",
"What railway line was built in the 18th century?",
"In what year did the Thames & City Railway open?",
"How many kilometers long did it take to travel through London's Station?",
"When did the London Underground first operate?",
"In what year was the London Underground Station opened?",
"How many kilometers long was the London Underground Station?",
"In what year did the Thames & City Railway open?",
"What railway line was built in London in the 18th century?",
"What railway line was built in the London Underground?",
"When did the UERL begin to run through the London Underground?",
"When did the London Underground begin?",
"What was the name of the first railway station in London?",
"In what year did the London Underground begin?",
"In what year was the London Underground Station closed?",
"What railway line opened in 1880?",
"What railway line was built in London in 1880?",
"What railway line was built in the 18th century?"]

## Importing audio as input for questions or answers

In [None]:
"""Installs to analyse audio"""
!sudo apt install ffmpeg
!pip3 install datasets
!pip install SoundFile
!pip install librosa

In [None]:
"""Example audio to analyse"""
!mkdir data
!curl https://wagon-public-datasets.s3.amazonaws.com/deep_learning_datasets/harvard.wav > data/harvard.wav

In [None]:
"""Packages for audio"""
from scipy.io import wavfile
from IPython.display import Audio

In [None]:
"""Read the audio file and play it to verify"""
rate, audio = wavfile.read("data/harvard.wav")
Audio(audio.T, rate=rate)

In [None]:
"""Transcription of a downloaded wav file"""

from transformers import WhisperProcessor, WhisperForConditionalGeneration
import librosa  

# load model and processor
processor = WhisperProcessor.from_pretrained("openai/whisper-tiny")
model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny")
model.config.forced_decoder_ids = None

# Whisper requires a sampling rate of 16000 so must convert this with librosa
audio, rate = librosa.load('data/harvard.wav', sr=16000)
input_features = processor(audio, sampling_rate=rate, return_tensors="pt").input_features 

# generate token ids
predicted_ids = model.generate(input_features)
# decode token ids to text
transcription = processor.batch_decode(predicted_ids, skip_special_tokens=False)

transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)


In [None]:
"""Transcription of a flac file from hugging face"""

from transformers import WhisperProcessor, WhisperForConditionalGeneration
from datasets import load_dataset

# load model and processor
processor = WhisperProcessor.from_pretrained("openai/whisper-tiny")
model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny")
model.config.forced_decoder_ids = None

# load dummy dataset and read audio files
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
sample = ds[0]["audio"]
input_features = processor(sample["array"], sampling_rate=sample["sampling_rate"], return_tensors="pt").input_features 

# generate token ids
predicted_ids = model.generate(input_features)
# decode token ids to text
transcription = processor.batch_decode(predicted_ids, skip_special_tokens=False)

transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)


In [None]:
transcription

## 1.3 Processing visual input for questions or answers

### Final OCR extraction code

In [None]:
"""For """
!sudo apt install tesseract-ocr  
!sudo apt install libtesseract-dev
!pip install Pillow pytesseract

In [137]:
"""This is not answering questions. It simply performs OCR on images.
This would enable output from images to be put into the question answerer
This should work with images obtained from the snipping tool.
It does not recognise handwriting."""

import pytesseract
from PIL import Image

def ocr_document(image_path):
    # Open the image using the Pillow library
    image = Image.open(image_path)

    # Perform OCR using Tesseract
    text = pytesseract.image_to_string(image)

    return text

# Example usage
image_path = '/home/george/Downloads/microscope.jpg'
result_text = ocr_document(image_path)

print("OCR Result:")
print(result_text)


OCR Result:
B 1 Cell structure and

transport

 

1.1 The world of the microscope

Learning objectives

After this topic, you should know:

e@ how microscopy techniques have
developed over time

@ the differences in magnification and
resolution between a light microscope
and an electron microscope

@ how to calculate the magnification,
teal size, and image size of a specimen.

Ge wvrriewe

    

coarse focus ‘a lens

stage
fine focus,

slide

light

Figure 1 A light microscope

Living things are all made up of cells, but most cells are so small you can
only see them using a microscope. It is important to grasp the units used for
such tiny specimens before you start to look at them.

Using units
1 kilometre (km) = 1000 metres (m)

1m = 100centimetres (cm)
1cm= 10 millimetres (mm)
1mm = 1000 micrometres (um)

1 um = 1000 nanometres (nm) — so a nanometre is 0.000000 001
metres (or written in standard form as 1 x 10°°m).

 

The first light microscopes were developed in the mid-17th centur

In [118]:
answer_questions_with_confidence(bbc_article_text, ['What links Danny and Joe?'])

Unnamed: 0,confidence,question,answer
0,0.032588,What links Danny and Joe?,wrote it with Joe Killington


In [108]:
"""Here we use the question answering model to answer questions about the OCR text"""
answer_questions_with_confidence(result_text, ['What does this tool capture?'])

Unnamed: 0,confidence,question,answer
0,0.253979,What does this tool capture?,a screenshot of anything on your desktop


### These are previous attempts at various image processing

In [4]:
"""Example images for processing"""
"""Text"""
# Invoice
invoice = 'https://huggingface.co/spaces/impira/docquery/resolve/2359223c1837a7587402bda0f2643382a6eefeab/invoice.png'
# Simple poster
simple = 'https://www.11thhourracingteam.org/wp-content/uploads/11th-hour-racing-team-how-to-create-a-sustainability-policy-horizontal-3-1-1536x1056.png'
# Complex poster
complicated = 'https://cdn.greenmatch.co.uk/cdn-cgi/image/format=auto/2/2023/07/MAY23_4_02-Plastic-Waste_Global-Waste_2-1-663x1024.png'
# Microscopes text book page via web link
microscope = 'https://m.media-amazon.com/images/I/71Ts-QXYIhL._SL1500_.jpg'
# Magnification text book page downloaded to absolute file path
magnification = '/home/george/Downloads/magnification.jpg'

"""Handwriting"""
# Nice clear handwriting and cursive handwriting
clear = 'https://steemitimages.com/DQmcdbSGrnA9zeqWrYHD8EkNjvF9uxQCAeB7qnucUShpNDe/IMG_7345.PNG'
# Tricky handwriting
tricky = 'https://www.researchgate.net/profile/Neeta-Nain/publication/299666231/figure/fig1/AS:491693964304386@1494240384780/Example-image-of-a-general-handwritten-text-paragraph-from-IAM-dataset-4.png'
y5 = 'https://thelinksprimary.org.uk/wp-content/uploads/2023/10/Handwriting-Y6.png'

#### This is for reading images with text in, eg invoices or posters

In [3]:
"""First model - this answers questions about documents
- this works for very simple documents 
but struggles for anything which implies relationships (e.g. two text boxes that relate to one another)"""
ocr = pipeline(model = 'impira/layoutlm-invoices') #This struggles to find relationships between objects

Some weights of the model checkpoint at impira/layoutlm-invoices were not used when initializing LayoutLMForQuestionAnswering: ['token_classifier_head.weight', 'token_classifier_head.bias']
- This IS expected if you are initializing LayoutLMForQuestionAnswering from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing LayoutLMForQuestionAnswering from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [13]:
"""Question-answer format"""
ocr(image='/home/george/Downloads/magnification.jpg',question="What does this page say?")

[{'score': 0.7278719544410706,
  'answer': 'Calculating the size of an object',
  'start': 0,
  'end': 5}]

#### This is for reading handwriting

In [None]:
"""This works well for single lines of handwriting but does not support multiple lines.
I need to split multiple line files into single lines."""

hw = pipeline(model = 'microsoft/trocr-base-handwritten')

In [59]:
"""This attempts to split images. It is the first time I gave up and got chatgpt to write code for me.
It does not work very well - it identifies words but does not link them correctly as lines."""

import cv2
import os
import pytesseract

def split_and_save_handwritten_lines(image_path, output_directory):
    # Read the image using OpenCV
    image = cv2.imread(image_path)

    # Convert the image to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # Use adaptive thresholding to preprocess the image
    _, binary_image = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)

    # Find contours in the binary image
    contours, _ = cv2.findContours(binary_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    # Create the output directory if it doesn't exist
    os.makedirs(output_directory, exist_ok=True)

    # List to store individual line images
    line_images = []

    # Minimum width and height threshold for a contour to be considered a line
    min_width_threshold = 300
    min_height_threshold = 20

    # Iterate through contours
    for i, contour in enumerate(contours):
        # Get bounding box for each contour
        x, y, w, h = cv2.boundingRect(contour)

        # Filter out contours based on width and height
        if w > min_width_threshold and h > min_height_threshold:
            # Crop the original image to extract the line
            line_image = image[y:y+h, x:x+w]

            # Save the line image to the output directory
            output_path = os.path.join(output_directory, f'line_{i+1}.png')
            cv2.imwrite(output_path, line_image)

            # Append the line image to the list
            line_images.append(line_image)

    return line_images

# Example usage
image_path = '/home/george/Downloads/Handwriting-Y4.png'
output_directory = '/home/george/Downloads/split_text'
lines = split_and_save_handwritten_lines(image_path, output_directory)

# Print the paths of saved line images
for i, line_image in enumerate(lines, start=1):
    print(f"Saved line {i} to {os.path.join(output_directory, f'line_{i}.png')}")


Saved line 1 to /home/george/Downloads/split_text/line_1.png


## Other things

In [None]:
"""These are possible ways to better process images"""
"""visual bert needs more configuring"""
https://huggingface.co/daki97/visualbert_finetuned_easy_vqa
https://huggingface.co/docs/transformers/model_doc/visual_bert#overview # overview is part of the url, not a comment
https://github.com/huggingface/transformers/blob/main/examples/research_projects/visual_bert/demo.ipynb
"""layout needs more configuring"""
https://huggingface.co/docs/transformers/model_doc/layoutlmv3
"""should work for extracting printed text, but only works for single lines"""
https://huggingface.co/microsoft/trocr-base-printed
"""suggestions on how to split into multiple lines"""
https://github.com/microsoft/unilm/issues/628
https://discuss.huggingface.co/t/trocr-fine-tuning/13293/3
"""vision encoder requires more configuration"""
https://huggingface.co/docs/transformers/model_doc/vision-encoder-decoder
"""Generate LaTEX from images"""
https://huggingface.co/Norm/nougat-latex-base

#### Scraping

In [None]:
"""Import packages"""
import requests
from bs4 import BeautifulSoup
import re

In [48]:
"""Scrape Wikipedia as a possible source of context"""

import requests
from bs4 import BeautifulSoup

def scrape_wikipedia_article(url):
    # Send an HTTP request to the URL
    response = requests.get(url)
    
    # Check if the request was successful (status code 200)
    if response.status_code == 200:
        # Parse the HTML content of the page
        soup = BeautifulSoup(response.content, 'html.parser')
        
        # Find the main article text (adjust the selector based on the structure of the webpage)
        article_text = soup.find('div', {'id': 'mw-content-text'}).get_text(separator='\n')
        
        return article_text
    else:
        print(f"Failed to retrieve the page. Status code: {response.status_code}")
        return None

# Example usage
url = 'https://en.wikipedia.org/wiki/Ai_(chimpanzee)'
wikipedia_article_text = scrape_wikipedia_article(url)

if wikipedia_article_text:
    print(wikipedia_article_text)


Subject of the Ai project


Ai
Species
chimpanzee
Born
c.
 1976
 (age 
46–47)
Guinean Forests of West Africa
Offspring
Ayumu (chimpanzee)


Ai
 (born in 1976, estimated) is a female 
western chimpanzee
 (
Pan troglodytes verus
),
[1]
 currently living at the 
Primate Research Institute
 of 
Kyoto University
 (acronym KUPRI). She is the first subject of the 
Ai project
, a research program started in 1978 by Kiyoko Murofushi and 
Tetsuro Matsuzawa
 which is aimed at understanding chimpanzee 
cognition
 through computer interface experiments.
[2]






Biography
[
edit
]


Ai was born in 1976 (estimated), in the 
Guinean Forests of West Africa
.
[1]
 Born wild, Ai was soon taken into captivity and sold to KUPRI in 1977 by an animal trader (this type of sale became illegal in 1980 with 
Japan
's ratification of 
CITES
).
[1]
  She was the first subject of KUPRI’s chimpanzee project, which was intended to become Japan’s first ape-language study in the vein of earlier ape-language studies.


In [119]:
"""Scrape BBC as a possible source of context"""

import requests
from bs4 import BeautifulSoup

def scrape_bbc_article(url):
    # Send an HTTP request to the URL
    response = requests.get(url)
    
    # Check if the request was successful (status code 200)
    if response.status_code == 200:
        # Parse the HTML content of the page
        soup = BeautifulSoup(response.content, 'html.parser')
        
        # Find the main article text (adjust the selector based on the structure of the webpage)
        article = []
        for para in soup.find_all("div", {"data-component": "text-block"}):
            article.append(para.text)
        article_text = " ".join(article)
        
        return article_text
    else:
        print(f"Failed to retrieve the page. Status code: {response.status_code}")
        return None

# Example usage - cat article
url = 'https://www.bbc.co.uk/news/uk-england-beds-bucks-herts-67407334'
bbc_article_text = scrape_bbc_article(url).replace('\n', ' ')

# Example questions - cat article
bbc_cat_questions = ['Where will profit go?','Who produced the song?','What is the song called?',\
             'Who gave the song its first play?','When will the song be released?','Who wrote the song?',\
             'Where was the video filmed?','How has nala been delighting commuters?',\
             "Who's pictures went viral?", 'All proceeds from the single will be what?', 'What links Danny and Joe?']

if bbc_article_text:
    print(bbc_article_text)


A cat whose pictures went viral for regularly visiting a railway station is releasing a Christmas single. Four-year-old Nala has been delighting commuters who have been taking photos of her at Stevenage station. Owner Natasha Ambler revealed the cat was releasing a single called Meow and has been approached for a book deal. The ginger tabby has also recorded a video for the song due to be released this week, under the name Nala the Station Cat. It has been produced by Danny Kirsch, who wrote it with Joe Killington, while Nala is also co-credited as a songwriter, as well as a vocalist. Ms Ambler said "we want to spread the happiness that Stevenage has had, and she's had on socials to the world". The single is officially released on Wednesday and BBC Three Counties Radio's Justin Dealey gave the single an exclusive first play on Sunday. "I'm slightly lost for words," said the presenter after the song finished. Nala's owner replied: "So am I to be fair." The musical cat does not yet have 

#### Retraining model

In [133]:
from transformers import TFAutoModelForSequenceClassification

In [134]:
question_answerer_train = TFAutoModelForSequenceClassification.from_pretrained('deepset/roberta-base-squad2', from_pt=True)

pytorch_model.bin:   0%|          | 0.00/496M [00:00<?, ?B/s]

2023-12-01 16:37:16.271633: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2023-12-01 16:37:16.292324: W tensorflow/stream_executor/cuda/cuda_driver.cc:263] failed call to cuInit: UNKNOWN ERROR (303)
2023-12-01 16:37:16.292539: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (george-ThinkPad-X220-Tablet): /proc/driver/nvidia/version does not exist
2023-12-01 16:37:17.115447: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 154414080 exceeds 10% of free system memory.
2023-12-01 16:37:17.564863: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 154414080 exceeds 10% of free system memory.
2023-12-01 16:37:17.660538: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 154414080 exceeds 10% of free system memory.
2023-12-01 16:

In [None]:
question_answerer_train.