<a href="https://colab.research.google.com/github/pardau38/covid19/blob/master/Codiv_19.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## General info

[trello board](https://trello.com/b/ED3H13vT/covid-19-kaggle-kickoff)

forked [notebook](https://www.kaggle.com/davidmezzetti/cord-19-analysis-with-sentence-embeddings)

List of interesting notebook:

[Explored drugs being develloped](https://www.kaggle.com/maria17/cord-19-explore-drugs-being-developed)

## Kaggle dataset and word vectors downloading

Update your kaggle.json API key with the cell below, then launch the newt two.

It'll download data if needed (*eg*, your kernel has restarted)

In [0]:
# Upload your kaggle API token
from google.colab import files
# In kaggle.com : MyAccount -> Create New API Token, will download kaggle.json that you can upload here.
files.upload()

In [0]:
import os

if not os.path.isfile("kaggle.json") and not os.path.isdir(os.path.expanduser("~/.kaggle")):
  raise Exception("Please import your kaggle key first.")

if os.path.isfile("kaggle.json"):
  !mkdir -p ~/.kaggle
  !cp kaggle.json ~/.kaggle/
  !chmod 600 ~/.kaggle/kaggle.json
  !rm kaggle.json

if not os.path.isdir("kaggle_data"):
  # !kaggle datasets list | head
  !pip install -q kaggle
  !pip install -q kaggle-cli

  !kaggle datasets download -d allen-institute-for-ai/CORD-19-research-challenge
  !mkdir kaggle_data
  !unzip -qq CORD-19-research-challenge.zip -d kaggle_data
  !rm CORD-19-research-challenge.zip

if not os.path.isdir("glove_vectors"):
  !pip install -q kaggle
  !pip install -q kaggle-cli

  !kaggle datasets download -d rtatman/glove-global-vectors-for-word-representation
  !mkdir glove_vectors
  !unzip -qq glove-global-vectors-for-word-representation.zip -d glove_vectors
  !rm glove-global-vectors-for-word-representation.zip

In [0]:
# NLTK data for pre-processing
if not os.path.isdir("/root/nltk_data"):
  import nltk
  nltk.download('stopwords')
  nltk.download('punkt')

if "corpora" not in os.listdir("/root/nltk_data"):
  import nltk
  nltk.download('stopwords')
if "tokenizers" not in os.listdir("/root/nltk_data"):
  import nltk
  nltk.download('punkt')

# Python packages
try:
  from retry import retry
except ModuleNotFoundError:
  !pip install retry

try:
  import pathos
except ModuleNotFoundError:
  !pip install pathos

## Link your Google Drive

In [0]:
# Upload files to your google drive (SQLite file eg) and mount it
from google.colab import drive
drive.mount('/content/drive')

## Libs

### Imports

In [0]:
import os
import re
import json
import time
import tqdm
import time
import pickle
import sqlite3

import numpy as np
import pandas as pd
import multiprocessing as mp

from datetime import date
from retry import retry
from sklearn.metrics.pairwise import cosine_similarity
from dateutil import parser
from pathlib import Path
from typing import List, Dict, Any, Generator, Tuple, Union
from collections import OrderedDict, Counter, MutableMapping, Sequence
from pathos.multiprocessing import ProcessingPool as picklable_pool
from textblob import TextBlob

from nltk.corpus import stopwords
from nltk.stem.snowball import SnowballStemmer
from nltk.tokenize import sent_tokenize, RegexpTokenizer

### File processing

In [0]:
def read_file(file_path: str) -> Dict[str, Any]:
    """ Open JSON file and return dict() data """
    with open(file_path, "r") as handler:
        json_data = json.loads(handler.read(), object_pairs_hook=OrderedDict)
    return json_data


def get_body(json_data: Dict[str, Any]) -> str:
    """ Return body from json data """
    return " ".join([json_data["body_text"][index]["text"].strip() for index in range(len(json_data["body_text"]))])

### Language detection

In [0]:
def get_lang(text: str) -> str:
  """ Detects language of text : must contain minimum 3 characters """
  if len(text) >= 3:
    b = TextBlob(text)
    return b.detect_language()
  else:
    raise ValueError('Minimum of 3 characters needed !')

### Database utilities

In [0]:
def instanciate_sql_db(db_path: str = "articles_database.sqlite") -> None:
    """ Create an SQLite database """

    if os.path.isfile(db_path):
        os.remove(db_path)    
    database = sqlite3.connect(db_path)
    # Storing articles
    articles_table = {
        "paper_doi": "TEXT PRIMARY KEY",
        "date": "DATETIME",
        "body": "TEXT",
        "abstract": "TEXT",
        "title": "TEXT",
        "sha": "TEXT",
        "folder": "TEXT"
    }
    columns = ["{0} {1}".format(name, col_type) for name, col_type in articles_table.items()]
    command = "CREATE TABLE IF NOT EXISTS articles ({});".format(", ".join(columns))
    database.execute(command)
    # Storing sentences
    sentences_table = {
        "paper_doi": "TEXT",
        "section": "TEXT",
        "raw_sentence": "TEXT",
        "sentence": "TEXT",
        "vector": "TEXT"
    }
    columns = ["{0} {1}".format(name, col_type) for name, col_type in sentences_table.items()]
    command = "CREATE TABLE IF NOT EXISTS sentences ({});".format(", ".join(columns))
    database.execute(command)
    database.close()

def get_articles_to_insert(articles_df: pd.DataFrame) -> List[Any]:
    """ List comprehension get stuck, who knows why """
    articles = []
    for index, data in articles_df.iterrows():
        articles.append((index, data))
    return articles
  
@retry(sqlite3.OperationalError, tries=5, delay=2)
def insert_row(list_to_insert: List[Any], table_name: str = "articles", db_path: str = "articles_database.sqlite") -> None:
    """ Insert row of articles into the SQLite database """

    if table_name == "articles":
        command = "INSERT INTO articles(paper_doi, title, body, abstract, date, sha, folder) VALUES (?, ?, ?, ?, ?, ?, ?)"
    elif table_name == "sentences":
        command = "INSERT INTO sentences(paper_doi, section, raw_sentence, sentence, vector) VALUES (?, ?, ?, ?, ?)"
    else:
        raise Exception(f"Unknown table {table_name}")

    connection = sqlite3.connect(db_path)
    cursor = connection.cursor()
    cursor.execute(command, list_to_insert)  # This line will be retried if fails
    cursor.close()
    connection.commit()
    connection.close()

def insert_article(args: Any) -> None:
    """ Parse and insert a single article into the SQLite DB. args = [(index, df_line), db_path] """
    index = args[0][0]
    data = args[0][1]
    db_path = args[1]

    # Get body
    if data.has_full_text is True:
        json_file = os.path.join(os.sep, "kaggle", "input", "CORD-19-research-challenge", data.full_text_file, data.full_text_file, f"{data.sha}.json")
        try:
            json_data = read_file(json_file)
            body = get_body(json_data=json_data)
            folder = data.full_text_file
        except FileNotFoundError:
            body = None
            folder = None
    else:
        body = None
        folder = None

    try:
        date = parser.parse(data.publish_time)
    except Exception:  # Better to get no date than a string of whatever
        date = None
        
    raw_data = [
        data.doi,
        data.title,
        body,
        data.abstract,
        date,
        data.sha,
        folder
    ]
    insert_row(list_to_insert=raw_data, db_path=db_path)

def get_all_ids(db_path: str = "articles_database.sqlite") -> List[str]:
    """ Return all articles DOI stored in the article table """
    connection = sqlite3.connect(db_path)
    cursor = connection.cursor()
    cursor.execute("SELECT paper_doi FROM articles")
    ids = cursor.fetchall()
    cursor.close()
    connection.close()
    ids_cleaneds = [id_[0] for id_ in ids if len(id_) == 1]

    return ids_cleaneds

### Text pre-processing

In [0]:
def preprocess_text(text: str, stem_words: bool = True, remove_num: bool = True) -> Tuple[List[str], List[str]]:
    """ Pre-process extracted texts """

    word = RegexpTokenizer(r"\w+")
    stop_words = set(stopwords.words("english"))
    stemmer = SnowballStemmer("english")
    
    def filter_stopwords(sentence: List[str], stopwords: List[str] = stop_words) -> List[str]:
        """ Remove stopwords from a given list of words """
        return [word for word in sentence if word not in stopwords]
    
    def stem_words(sentence: List[str], stem_function: Any = stemmer) -> List[str]:
        """ Get words root for every member of an input list """
        return [stem_function.stem(word) for word in sentence]
    
    def remove_numeric_words(sentence: List[str]) -> List[str]:
        """ Remove number (items) from a list of words """
        letter_pattern = re.compile(r"[a-z]")
        return [word for word in sentence if letter_pattern.match(word)]   

    # Split paragraphs into sentences and keep them for nive output
    sentences_raw = sent_tokenize(text)
    # Lower
    sentences = [sentence.lower() for sentence in sentences_raw]
    # Split sentences into words and remove punctuation
    sentences = [word.tokenize(sentence) for sentence in sentences]
    # Remove stopwords
    sentences = [filter_stopwords(sentence) for sentence in sentences]
    if stem_words is True:
        # Stem words
        sentences = [stem_words(sentence) for sentence in sentences]
    if remove_num is True:
        sentences = [remove_numeric_words(sentence) for sentence in sentences]
    # Filter empty sentences and one-letters words
    sentences = [[word for word in sentence if len(word) > 1] for sentence in sentences if sentence != []]
    return sentences, sentences_raw

def pre_process_articles(args: List[Any]) -> None:
    """ Apply preprocessing to texts and store result into the SQLite DB """

    article_id: str = args[0]
    embedding_model = args[1]
    db_path: str = args[2]
    stem_words: bool = args[3]
    remove_num: bool = args[4]

    connection = sqlite3.connect(db_path)
    cursor = connection.cursor()
    cursor.execute("SELECT * FROM articles WHERE paper_doi = ?", [article_id])
    # Get dict {column: value}
    try:
      article = {[col for col in head if col is not None][0]: value for head, value in zip(cursor.description, cursor.fetchone())}
      cursor.close()
      connection.close()
    except TypeError:  # When the DB doest not return a result
      cursor.close()
      connection.close()
      return None
    
    for section in ["title", "abstract", "body"]:
        if article[section] is not None:
            pp_sentences, sentences_raw = preprocess_text(article[section], stem_words=stem_words, remove_num=remove_num)
            for pp_sentence, raw_sentence in zip(pp_sentences, sentences_raw):
              try:
                # paper, section, sentence, vector
                row_to_insert = [
                    article_id,
                    section,
                    raw_sentence,                                                                          # Raw sentence
                    json.dumps(pp_sentence),                                                               # Store list of tokens as loadable str
                    json.dumps([str(x) for x in embedding_model.compute_sentence_vector(pp_sentence)])     # Embeded vector
                ]
                try:
                  insert_row(list_to_insert=row_to_insert, table_name="sentences", db_path=db_path)
                except sqlite3.OperationalError:  # Even the retry() decorator failed
                  continue
              except TypeError:  # When all words are not in the model
                continue

### Embedding

In [0]:
class Embedding():

    def __init__(self, vectors_path: str = None, embeddings_dimension: int = 50, sentence_embedding_method: str = "mowe"):
        
        if vectors_path is None:
            self.vectors_path = os.path.join(os.sep, "kaggle", "input", "glove-global-vectors-for-word-representation", f"glove.6B.{embeddings_dimension}d.txt")
        else:
            self.vectors_path = vectors_path
        
        self.embeddings_dimension = embeddings_dimension
        self.sentence_embedding_method = sentence_embedding_method

    def build_vectors_dictionary(self) -> Dict[str, List[float]]:
        """ Load pre-trained vectors and build a dict """

        tic = time.time()    

        self.vectors = {}
        with open(self.vectors_path, "r") as handler:
            for line in handler.readlines():
                try:
                    # Prevent to keep useless words (otherwise pre-proc return nothing)
                    word, word_raw = preprocess_text(line.split()[0])
                    vector = [float(dimension) for dimension in line.split()[1:None]]
                    assert len(vector) == self.embeddings_dimension
                    self.vectors[word[0][0]] = vector
                except IndexError:  # When the preprocessing does not return a word (useless word)
                    continue     

        toc = time.time()
        print(f"Took {round((toc-tic) / 60, 2)} min to load {len(self.vectors.keys())} GloVe vectors (embedding dim: {self.embeddings_dimension}).")
        
    def compute_sentence_vector(self, sentence: List[str], sentence_embedding_method: str = "mowe") -> List[float]:
        """ Compute a SOWE/MOWE over all tokens composing a sentence. Word skipped if not in model. """
        words_vector = [self.vectors[word] if word in self.vectors.keys() else list(list(np.full([1, EMBEDDING_DIMENSION, ], np.nan))[0]) for word in sentence]
        if self.sentence_embedding_method == "mowe":
            sentence_embedding = np.nanmean(words_vector, axis=0)
        elif self.sentence_embedding_method == "sowe":
            sentence_embedding = np.nansum(words_vector, axis=0)
        else:
            raise Exception(f"No such sentence embedding method: {sentence_embedding_method}")
        return sentence_embedding


### Query matching

In [0]:
def vectorize_query(query: str) -> List[float]:
    """ Vectorize a sentence """
    pp_query, query_raw = preprocess_text(query, stem_words=False, remove_num=False)
    query_vector = embedding_model.compute_sentence_vector(pp_query[0])
    return query_vector

def get_sentences(db_path: str) -> List[Any]:
    """ Retrieve all sentences """
    connection = sqlite3.connect(db_path)
    cursor = connection.cursor()
    command = "SELECT * FROM sentences"
    cursor.execute(command)
    data = cursor.fetchall()
    cursor.close()
    connection.close()

    return data

def get_article(db_path: str, paper_doi) -> List[Any]:
    """ Retrieve article by paper_doi """
    connection = sqlite3.connect(db_path)
    cursor = connection.cursor()
    command = "SELECT * FROM articles WHERE paper_doi='%s'" % paper_doi
    cursor.execute(command)
    data = cursor.fetchall()
    cursor.close()
    connection.close()

    return data

def compute_cosine_distance(args: Any) -> float:
    """ Compute cosine distance between two embeded sentences """
    sentence_vector = args[1]
    query_vector = args[0]
    distance = 1 - cosine_similarity([query_vector], [sentence_vector])[0][0]

    return (distance, sentence_vector)

def query_db_for_sentence(db_path: str, vector: str):
    """ Get a full sentence from a vector """
    connection = sqlite3.connect(db_path)
    cursor = connection.cursor()
    command = "SELECT * FROM sentences WHERE vector='%s'" % vector
    cursor.execute(command)
    data = cursor.fetchall()
    cursor.close()
    connection.close()

    data = list(set(data))

    if len(data) > 1:
      print(f"ERROR: two sentences with vector {vector} have been found.")
      data = None

    return data

## Parameters

In [0]:
DB_VERSION = 3                      # V1: no embedded vector, V2 no raw sentence

PRE_PROC_STEM_WORDS = False         # Stem words to their root during pre-processing
PRE_PROC_REMOVE_NUM = True          # Remove numerical tokens during pre-processing

EMBEDDING_DIMENSION = 100           # The word embedding dimension to be used (sentence embedding dim as well).
SENTENCE_EMBEDDING_METHOD = "mowe"  # How to calculate sentence vector (SUM or MEAN of words embedding): mowe or sowe.

# Create a DB file name regarding these parameters
today = date.today().strftime("%d%m%Y")
DB_FILE_NAME = os.path.join(f"articles_database_v{DB_VERSION}_{today}_embedding_{EMBEDDING_DIMENSION}_remove_num_{str(PRE_PROC_REMOVE_NUM)}_stem_words_{str(PRE_PROC_STEM_WORDS)}.sqlite")
print(DB_FILE_NAME)

## Insert articles into sqlite DB¶


In [0]:
def create_db_and_load_articles(db_path: str = "articles_database.sqlite", load_file: bool = True) -> None:
    """ Load metadata.csv, try to get body texts and insert """

    if load_file is True:
      assert os.path.isfile(db_path)
      print(f"DB {db_path} will be used instead.")

    else:
      tic = time.time()

      # The metadata.csv file will be used to fetch available files
      metadata_path = os.path.join(os.sep, "content", "kaggle_data", "metadata.csv")
      metadata_df = pd.read_csv(metadata_path)
      # The DOI isn't unique, then let's keep the last version of a duplicated paper
      metadata_df.drop_duplicates(subset=["doi"], keep="last", inplace=True)
      # Load usefull information to be stored: id, title, body, abstract, date, sha, folder
      articles_to_be_inserted = [(article, DB_FILE_NAME) for article in get_articles_to_insert(metadata_df)]
      # Create a new SQLite DB file
      instanciate_sql_db(db_path=DB_FILE_NAME)
      # Parallelize articles insertion
      with mp.Pool(os.cpu_count()) as pool:
          pool.map(insert_article, articles_to_be_inserted)

      toc = time.time()
      print(f"Took {round((toc-tic) / 60, 2)} min to insert {len(articles_to_be_inserted)} articles (SQLite DB: {db_path}).")

In [0]:
# Change load_file to False if you want to create the DB for the first time
create_db_and_load_articles(DB_FILE_NAME, load_file=False)

## Load word embedding

In [0]:
embedding_model = Embedding(
    vectors_path=os.path.join(os.sep, "content", "glove_vectors", f"glove.6B.{EMBEDDING_DIMENSION}d.txt"),
    embeddings_dimension=EMBEDDING_DIMENSION,
    sentence_embedding_method=SENTENCE_EMBEDDING_METHOD
)
embedding_model.build_vectors_dictionary()

## Pre-process and vectorize texts

In [0]:
def pre_process_and_vectorize_texts(embedding_model: Embedding, db_path: str = "articles_database.sqlite", load_file: bool = True, stem_words: bool = False, remove_num : bool = False) -> None:
    """ Apply pre-processing to all loaded articles """

    if load_file is True:
      assert os.path.isfile(db_path)
      print(f"DB {db_path} will be used instead.")

    else:
      tic = time.time()

      # Get all previously inserted IDS as well as a pointer on embedding method
      ids = [(id_, embedding_model, db_path, stem_words, remove_num) for id_ in get_all_ids(db_path=db_path)]
      # For each title, abstract and body, pre-processed found data

      with picklable_pool(os.cpu_count()) as pool:
          pool.map(pre_process_articles, ids)

      toc = time.time()
      print(f"Took {round((toc-tic) / 60, 2)} min to pre-process {len(ids)} articles (SQLite DB: {db_path}).")

In [0]:
# Change load_file to False if you want to create the DB for the first time
pre_process_and_vectorize_texts(embedding_model, DB_FILE_NAME, load_file=False, stem_words=PRE_PROC_STEM_WORDS, remove_num=PRE_PROC_REMOVE_NUM)

## Query the DB

In [0]:
# TODO merge with DB/Query utilities
def get_db_sentences_vectors(db_path: str = "articles_database.sqlite"):
  """ Get sentences vectors to be matched with queries (stay in RAM, thus computed once)."""
  sentences = get_sentences(db_path)
  sentences_vectors = [[float(x) for x in json.loads(sentence_vector[4])] for sentence_vector in sentences]
  sentences_vectors = [vector for vector in sentences_vectors if np.nansum(vector) != 0]

  print(f"Queries will be matched versus {len(sentences_vectors)} vectors.")
  return sentences_vectors 

def get_query_distances_and_vectors(query: str, sentences_vectors):
  tic = time.time()

  # Vectorize it and format as arguments to be mapped by mp.Pool
  query_vector = list(vectorize_query(query))
  mapping_arguments = [(query_vector, sentence_vector) for sentence_vector in sentences_vectors]

  # Execute
  with mp.Pool(os.cpu_count()) as pool:
    distances_and_vectors = pool.map(compute_cosine_distance, mapping_arguments)
  toc = time.time()
  print(f"Took {round((toc-tic) / 60, 2)} min to process the query.")

  return distances_and_vectors

def get_k_closest_sentences(distances_and_vectors, k = 5) -> List:
  # Get results
  distances = [item[0] for item in distances_and_vectors]
  vectors = [item[1] for item in distances_and_vectors]

  # Find  k closest 
  closest_sentence_indexes = np.argpartition(np.array(distances), k)[:k]
  closest_vectors = [vectors[idx] for idx in closest_sentence_indexes]
  closest_vectors_str = [json.dumps([str(x) for x in vec]) for vec in closest_vectors]

  # Retrieve closest sentences
  closest_sentences = [query_db_for_sentence(vector=vec_str, db_path=DB_FILE_NAME) for vec_str in closest_vectors_str]

  return closest_sentences

In [0]:
sentences_vectors = get_db_sentences_vectors(DB_FILE_NAME)

Queries will be matched versus 344182 vectors.


In [0]:
# query = "chloroquine usage coronavirus treatment"
query = "Persistence of virus on surfaces of different materials (e,g., copper, stainless steel, plastic)."

distances_and_vectors = get_query_distances_and_vectors(query, sentences_vectors)

Took 1.41 min to process the query.


In [0]:
# K best results
k = 20
closest_sentences = get_k_closest_sentences(distances_and_vectors, k)
for sentence in closest_sentences:

  print("SENTENCE")
  print(f"\tSECTION:\t\t{sentence[0][1]}")
  print(f"\tSENTENCE:\t\t{sentence[0][2]}")
  print(f"\tVECTOR:\t\t\t{str(sentence[0][3])}")
  # article data 
  print("ARTICLE")
  article = get_article(DB_FILE_NAME, sentence[0][0])
  print(f"\tTITLE:\t\t\t{str(article[0][4])}")
  print(f"\tDOI:\t\t\t{sentence[0][0]}")
  print("\n")

SENTENCE
	SECTION:		abstract
	SENTENCE:		Therefore, the objective of this study was to evaluate the survival of PEDV in 9 different feed ingredients when exposed to 60, 70, 80, and 90 °C, as well as the survival on four different surfaces (galvanized steel, stainless steel, aluminum, and plastic).
	VECTOR:			["therefore", "objective", "study", "evaluate", "survival", "pedv", "different", "feed", "ingredients", "exposed", "well", "survival", "four", "different", "surfaces", "galvanized", "steel", "stainless", "steel", "aluminum", "plastic"]
ARTICLE
	TITLE:			Survival of porcine epidemic diarrhea virus (PEDV) in thermally treated feed ingredients and on surfaces
	DOI:			10.1186/s40813-017-0064-3


SENTENCE
	SECTION:		abstract
	SENTENCE:		After drying, HCoV-229E infectivity was still detectable after 3h on various surfaces (aluminum, sterile latex surgical gloves, sterile sponges) but HCoV-OC43 survived 1h or less.
	VECTOR:			["drying", "hcov", "infectivity", "still", "detectable", "vario

# Tests

In [0]:
connection = sqlite3.connect(DB_FILE_NAME)
cursor = connection.cursor()
cursor.execute("SELECT * FROM sentences")
res = cursor.fetchall()
cursor.close()
connection.close()

print(len(res))
print("\n".join(res[32]))

344482
10.1001/jama.2014.2116
title
Critically Ill Patients With Influenza A(H1N1)pdm09 Virus Infection in 2014
["critically", "ill", "patients", "influenza", "h1n1", "pdm09", "virus", "infection"]
["-0.03297171428571429", "-0.07034200000000003", "-0.13139299999999998", "0.23254414285714284", "-0.21820857142857145", "0.25680571428571425", "0.4409482857142857", "-0.28643142857142856", "0.3511464285714286", "0.02130357142857142", "-0.1704287142857143", "0.30054571428571425", "0.3011331428571428", "-0.14776571428571428", "0.14185214285714287", "0.30474285714285715", "-0.310925", "-0.14698807142857143", "0.2134452142857143", "-0.002152714285714254", "0.04621328571428572", "-0.07467785714285714", "-0.1030742857142857", "0.20810857142857145", "-0.016704285714285727", "0.17449864285714287", "0.3705238571428571", "0.13111942857142855", "-0.09994071428571427", "0.3934417142857143", "0.088722", "0.07798914285714284", "-0.025241000000000006", "0.17041", "0.1196057142857143", "-0.20598457142857146

In [0]:
sentence_1 = ["cool", "concert", "guitar"]
sentence_2 = ["super", "piano", "song"]
sentence_3 = ["boat", "drugs", "corona"]

distance_1_2 = compute_cosine_distance(
    embeding_model.compute_sentence_vector(sentence_1),
    embeding_model.compute_sentence_vector(sentence_2)
)

distance_1_3 = compute_cosine_distance(
    embeding_model.compute_sentence_vector(sentence_1),
    embeding_model.compute_sentence_vector(sentence_3)
)

print(f"Distance between: '{' '.join(sentence_1)}' and '{' '.join(sentence_2)}': {distance_1_2}")
print(f"Distance between: '{' '.join(sentence_1)}' and '{' '.join(sentence_3)}': {distance_1_3}")

NameError: ignored