In [1]:
import requests
import pandas as pd
from load_dotenv import load_dotenv
import random
from sqlalchemy import create_engine, text
from sqlalchemy.engine import Engine
import json

from news_nlp.config import paths

In [2]:
load_dotenv(paths.ENV_FILE)

True

In [3]:
def get_engine(user="news_nlp_user", password="news_nlp_password", host="localhost", port="5433", db_name="news_nlp") -> Engine:
    url = f"postgresql+psycopg2://{user}:{password}@{host}:{port}/{db_name}"
    engine = create_engine(url)
    return engine

In [4]:
def get_one_news_article(id_news: int):
    engine = get_engine()

    query = text("""
        SELECT title, content
        FROM news
        WHERE id_news = :id_news
    """)

    params = {"id_news": id_news}

    df = pd.read_sql(query, con=engine, params=params)

    return df

In [6]:
RANDOM_SEED = 31415
random.seed(RANDOM_SEED)

# ARTICLE_ID = "201111"
ARTICLE_ID = random.randint(1, 200_000)
ARTICLE_ID

154662

In [7]:
article = get_one_news_article(str(RANDOM_SEED))
title = article.loc[0, "title"]
content = article.loc[0, "content"]
print(f"title: {title}\ncontent: {content}")

title: The truth about the downfall of a star reporter at the New York Times
content: It was n't the utterance of the n-word that ended Donald McNeil 's career with The New York Times .
It was his incuriosity about maybe the most important story of our time , his comfort wrapped in a kind of blinkered ignorance , that likely did him in .
I 'm not talking about the pandemic , for which McNeil 's reporting has been rightly praised and may net him a Pulitzer .
I 'm talking about rapidly shifting racial demographics in the United States and what that means for our democracy , cultural understanding and a host of @ @ @ @ @ @ @ health .
That 's the way he comes across in his own four-part series of columns explaining his exit from the Times .
It does n't much matter that he also sounds ornery and a bit entitled to say whatever he wants because he 's been allowed to for such a long time .
He even admits a time in which he said something so nasty about his boss , and shared it with colleagues 

In [8]:
payload = {
    "title": f"{title}",
    "text": f"{content}"
}

# Endpoint /health

In [9]:
url_health = "http://localhost:8005/health"

In [10]:
resp_health = requests.get(url_health)
resp_health

<Response [200]>

In [11]:
resp_health.json()

{'status': 'ok',
 'topics_model_loaded': True,
 'ner_model_loaded': True,
 'active_topics_run_id': 6,
 'entity_types_supported': ['PERSON', 'ORG', 'GPE', 'LOC'],
 'error_message': None}

# Endpoint /v1/topics

In [12]:
url_topics = "http://localhost:8005/v1/topics"

In [13]:
resp_topics = requests.post(url_topics, json=payload)
resp_topics

<Response [200]>

In [14]:
resp_topics.json()

{'topics': {'id_run': 6,
  'id_topic': 11,
  'topic_name': 'White Supremacy',
  'top_terms': ['people',
   'black',
   'white',
   'media',
   'said',
   'america',
   'like',
   'trump',
   'political',
   'american',
   'right',
   'just',
   'social',
   'news',
   'world',
   'new',
   'president',
   'country',
   'anti',
   'americans',
   'racist',
   'left',
   'twitter',
   'rights',
   'biden',
   'history',
   'carlson',
   'racism',
   'government',
   'know',
   'think',
   'race',
   'time',
   'social media',
   'years',
   'women',
   'war',
   'way',
   'did',
   'book',
   'don',
   'party',
   'report',
   'big',
   'fox',
   'going',
   'violence',
   'life',
   'culture',
   'hate']}}

# Endpoint /v1/entities

In [15]:
url_entities = "http://localhost:8005/v1/entities"

In [16]:
resp_entities = requests.post(url_entities, json=payload)
resp_entities

<Response [200]>

In [17]:
resp_entities.json()

{'entities': [{'text': 'the New York Times',
   'label': 'ORG',
   'start_char': 51,
   'end_char': 69},
  {'text': "Donald McNeil 's",
   'label': 'PERSON',
   'start_char': 121,
   'end_char': 137},
  {'text': 'The New York Times',
   'label': 'ORG',
   'start_char': 150,
   'end_char': 168},
  {'text': 'McNeil', 'label': 'ORG', 'start_char': 373, 'end_char': 379},
  {'text': 'the United States',
   'label': 'GPE',
   'start_char': 506,
   'end_char': 523},
  {'text': 'Times', 'label': 'ORG', 'start_char': 724, 'end_char': 729},
  {'text': 'McNeil', 'label': 'PERSON', 'start_char': 1143, 'end_char': 1149},
  {'text': 'McNeil', 'label': 'PERSON', 'start_char': 1392, 'end_char': 1398},
  {'text': 'McNeil', 'label': 'PERSON', 'start_char': 2143, 'end_char': 2149},
  {'text': 'Times', 'label': 'ORG', 'start_char': 2262, 'end_char': 2267},
  {'text': 'McNeil', 'label': 'ORG', 'start_char': 2472, 'end_char': 2478},
  {'text': 'Times', 'label': 'ORG', 'start_char': 2772, 'end_char': 2777},


# Endpoint /v1/analyze

In [18]:
url_analyze = "http://localhost:8005/v1/analyze"

In [19]:
resp_analyze = requests.post(url_analyze, json=payload)
resp_analyze

<Response [200]>

In [20]:
resp_analyze.json()

{'topics': {'id_run': 6,
  'id_topic': 11,
  'topic_name': 'White Supremacy',
  'top_terms': ['people',
   'black',
   'white',
   'media',
   'said',
   'america',
   'like',
   'trump',
   'political',
   'american',
   'right',
   'just',
   'social',
   'news',
   'world',
   'new',
   'president',
   'country',
   'anti',
   'americans',
   'racist',
   'left',
   'twitter',
   'rights',
   'biden',
   'history',
   'carlson',
   'racism',
   'government',
   'know',
   'think',
   'race',
   'time',
   'social media',
   'years',
   'women',
   'war',
   'way',
   'did',
   'book',
   'don',
   'party',
   'report',
   'big',
   'fox',
   'going',
   'violence',
   'life',
   'culture',
   'hate']},
 'entities': [{'text': 'the New York Times',
   'label': 'ORG',
   'start_char': 51,
   'end_char': 69},
  {'text': "Donald McNeil 's",
   'label': 'PERSON',
   'start_char': 121,
   'end_char': 137},
  {'text': 'The New York Times',
   'label': 'ORG',
   'start_char': 150,
   'end_ch