# Run only once to install the software

In [None]:
!pip install -U transformers
!pip install -U spacy
!pip install -U ludwig
!pip install -U tensorflow

!python -m spacy download en_core_web_sm

# Import models

In [4]:
from transformers import pipeline
import json
import spacy
from spacy import displacy
from ludwig.api import LudwigModel
from ludwig.datasets import agnews
import logging
nlp = spacy.load("en_core_web_sm")

RuntimeError: ignored

In [None]:
def getFactText():
    allFactTexts={"parts":[], "fullText":[]}
    with open('cases-9.json', encoding='utf-8') as json_file:
        data = json.load(json_file)
        for case in data:
            caseFullText=""
            for contentItem in case["content"]:
                for textBulk in case["content"][contentItem]:
                    if textBulk["content"]=="THE FACTS":
                        for subText in textBulk["elements"]:
                            if subText["content"]=="I.\xa0\xa0THE CIRCUMSTANCES OF THE CASE":
                                for actualText in subText["elements"]:
                                    allFactTexts["parts"].append(actualText["content"].replace("\xa0\xa0", " "))
                                    caseFullText=caseFullText + "\n " + actualText["content"].replace("\xa0\xa0", " ")
            allFactTexts["fullText"].append(caseFullText)
    return allFactTexts

In [None]:
facts = getFactText()

factToTestOn=facts["parts"][13]
longText=facts["fullText"][3]
print(factToTestOn)
print(longText)

# Question / answering

In [None]:
question_answerer = pipeline("question-answering", model='distilbert-base-uncased-distilled-squad')

context = factToTestOn

In [None]:
result = question_answerer(question="When did the applicant arrive in Moscow?",     context=context)

In [None]:
print(result)

# Entity highlighting

In [None]:

doc = nlp(factToTestOn)
displacy.render(doc, style="ent")

# Summarisation

In [None]:
summarizer = pipeline("summarization", model="facebook/bart-base")
print(summarizer(longText, max_length=40, min_length=30, do_sample=False))


# Supervised Machine Learning

In [None]:
train_df = agnews.load()


config = {
  "input_features": [
    {
      "name": "title",            # The name of the input column
      "type": "text",             # Data type of the input column
      "encoder": {
            "type": "parallel_cnn"
       }                          # The model architecture we should use for encoding this column
    }
  ],
  "output_features": [
    {
      "name": "class",
      "type": "category",
    }
  ]
}

model = LudwigModel(config, logging_level=logging.INFO)


In [None]:
train_stats, preprocessed_data, output_directory = model.train(dataset=train_df)

In [None]:
test_stats, predictions, output_directory = model.evaluate(
  test_df,
  collect_predictions=True,
  collect_overall_stats=True
)

In [None]:
from ludwig.visualize import confusion_matrix

confusion_matrix(
  [test_stats],
  model.training_set_metadata,
  'class',
  top_n_classes=[5],
  model_names=[''],
  normalize=True,
)



In [None]:
text_to_predict = pd.DataFrame({
  "title": [
    "Google may spur cloud cybersecurity M&A with $5.4B Mandiant buy",
    "Europe struggles to meet mounting needs of Ukraine's fleeing millions",
    "How the pandemic housing market spurred buyer's remorse across America",
  ]
})

predictions, output_directory = model.predict(text_to_predict)