ALBERT_Question_Answering.ipynb

Automatically generated by Colab.

Original file is located at
    https://colab.research.google.com/drive/1CieCJ89HyQV0CNH-agWFqHfTsNi1noQW

### Install NVIDIA

### Import libraries and initialize the model

In [1]:
import os
import torch
import time
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler
from transformers import (
    AlbertConfig,
    AlbertForQuestionAnswering,
    AlbertTokenizer,
    squad_convert_examples_to_features
)

from transformers.data.processors.squad import SquadResult, SquadV2Processor, SquadExample

from transformers.data.metrics.squad_metrics import compute_predictions_logits


  from .autonotebook import tqdm as notebook_tqdm


## READER NOTE: Set this flag to use own model, or use pretrained model in the Hugging Face repository

In [2]:
use_own_model = False

if use_own_model:
  model_name_or_path = "/content/model_output"
else:
  model_name_or_path = "ktrapeznikov/albert-xlarge-v2-squad-v2"

output_dir = ""

# Config
n_best_size = 1
max_answer_length = 30
do_lower_case = True
null_score_diff_threshold = 0.0

def to_list(tensor):
    return tensor.detach().cpu().tolist()

# Setup model
config_class, model_class, tokenizer_class = (
    AlbertConfig, AlbertForQuestionAnswering, AlbertTokenizer)
config = config_class.from_pretrained(model_name_or_path)
tokenizer = tokenizer_class.from_pretrained(
    model_name_or_path, do_lower_case=True)
model = model_class.from_pretrained(model_name_or_path, config=config)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model.to(device)

processor = SquadV2Processor()

# Define functions
def run_prediction(question, context_text):
    """Setup function to compute predictions"""
    examples = []

    for i, question_text in enumerate(question):
        example = SquadExample(
            qas_id=str(i),
            question_text=question_text,
            context_text=context_text,
            answer_text=None,
            start_position_character=None,
            title="Predict",
            is_impossible=False,
            answers=None,
        )

        examples.append(example)

    features, dataset = squad_convert_examples_to_features(
        examples=examples,
        tokenizer=tokenizer,
        max_seq_length=384,
        doc_stride=128,
        max_query_length=64,
        is_training=False,
        return_dataset="pt",
        threads=1,
    )

    eval_sampler = SequentialSampler(dataset)
    eval_dataloader = DataLoader(dataset, sampler=eval_sampler, batch_size=10)

    all_results = []

    for batch in eval_dataloader:
        model.eval()
        batch = tuple(t.to(device) for t in batch)

        with torch.no_grad():
            inputs = {
                "input_ids": batch[0],
                "attention_mask": batch[1],
                "token_type_ids": batch[2],
            }

            example_indices = batch[3]

            outputs = model(**inputs).values()

            for i, example_index in enumerate(example_indices):
                eval_feature = features[example_index.item()]
                unique_id = int(eval_feature.unique_id)

                output = [to_list(output[i]) for output in outputs]

                start_logits, end_logits = output
                result = SquadResult(unique_id, start_logits, end_logits)
                all_results.append(result)

    output_prediction_file = "predictions.json"
    output_nbest_file = "nbest_predictions.json"
    output_null_log_odds_file = "null_predictions.json"

    predictions = compute_predictions_logits(
        examples,
        features,
        all_results,
        n_best_size,
        max_answer_length,
        do_lower_case,
        output_prediction_file,
        output_nbest_file,
        output_null_log_odds_file,
        False,  # verbose_logging
        True,  # version_2_with_negative
        null_score_diff_threshold,
        tokenizer,
    )

    return predictions


"""##Run predictions"""

full_text = """Miami-Dade County (the “County”), as represented by the Miami-Dade Aviation Department (“MDAD”),
manages its Capital Improvement
Program (CIP) and requires a Digital Platform that will be used as a Common Data Environment (CDE) to
house a Project Management
Information System (PMIS) with the ability to view Building Information Models (BIM). In addition, PMIS and
BIM support services, in the
form of staff, are required for the CIP and lifecycle asset management. The PIMIS and Support services will
be used by MDAD’s different
divisions such as: Planning, Facilities Development, Contracts, Program Controls, Facilities Management,
Terminal Operations, Air Side
Operations, Land Side Operations, Properties and Security, etc. This Digital Platform will act as a single
source of truth that provides
systems integration with MDAD infrastructure and personnel. The solution should leverage technologies to
enable cooperation between
stakeholders and streamline processes within the different divisions. The main goals for procuring this
intelligent software are to have an
environment where teams can collaborate within a consolidated platform while avoiding redundancy,
improving efficiency and
performance as well as automating processes. This new software procurement includes integration with
different MDAD databases such
as but not exclusive to Oracle financials, Primavera P6 and EAMS among others. Software will also be
used for asset management and
asset lifecycle after commissioning and project completion.
Miami-Dade County uses adobe sign to sign documents and prefers to maintain this software for
signatures. In addition, the County
uses SharePoint which gives us the ability to store many different file formats including PDF, Microsoft
Office documents, and image
files, including annotations or markups that need to be maintained as historical records.
2.2 Scope of Services
The Miami-Dade Aviation Department is looking to modernize and digitize its facilities management
operations by obtaining a solution to
meet and maintain the requirements of the digital transformation happening in our industry. The proposed
PMIS along with its interface
with BIM and other MDAD systems will provide stakeholders with access to all project documentation,
digital representation """
# question = "i need scope of services"

questions = ["what is scope of services"]

# Run method
predictions = run_prediction(questions, full_text)

# Print results
print(list(predictions.values())[0])





ImportError: 
AlbertTokenizer requires the SentencePiece library but it was not found in your environment. Checkout the instructions on the
installation page of its repo: https://github.com/google/sentencepiece#installation and follow the ones
that match your environment. Please note that you may need to restart your runtime after installation.
