# Demo - News to open sanctions

* Read News article from url
* Prompt LLM to Extract persons, organisations and relationships
* Get output as JSON
* Use GraphCypherQAChain to translate the JSON to Cypher and store the found nodes and relations to Neo4

### Sources:
* https://learn.deeplearning.ai/langchain
* https://python.langchain.com/docs/get_started/quickstart

### Requirements

In [1]:
#!pip install newsapi-python langchain openai langchain-openai neo4j python-dotenv langchainhub langchain-community --quiet

In [2]:
%load_ext watermark
%watermark -p langchain,langchainhub,langchain_community

langchain          : 0.1.5
langchainhub       : 0.1.14
langchain_community: 0.0.17



### Imports

In [3]:
import os
import pandas as pd
from graphdatascience import GraphDataScience
from dotenv import load_dotenv, find_dotenv, dotenv_values
from pathlib import Path
import neo4j

from langchain_openai import ChatOpenAI

from langchain.agents import AgentExecutor, create_react_agent
from langchain.chains import LLMChain
from langchain.chains.conversation.memory import ConversationBufferMemory
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.output_parsers.json import SimpleJsonOutputParser
from langchain.prompts import PromptTemplate
from langchain.tools import Tool

from langchain_community.graphs import Neo4jGraph
from langchain.chains import GraphCypherQAChain

from langchain import hub
from langchain_community.document_loaders import PyPDFLoader

import newsapi

from IPython.display import Image
from IPython.core.display import HTML

from string import Template
from datetime import datetime

### Settings

In [4]:
project_path = Path(os.getcwd()).parent
data_path = project_path / "data"
model_path = project_path / "models"
output_path = project_path / "output"

llm_model = "gpt-4"

# load env settings
load_dotenv("../.env.opensanctions")

neo4j_url = os.getenv('NEO4J_URL')
neo4j_database = os.getenv('NEO4J_DATABASE')
neo4j_user = os.getenv('NEO4J_USER')
neo4j_pass = os.getenv('NEO4J_PASS')
openai_api_key = os.getenv('OPENAI_API_KEY')
news_api_key = os.getenv('NEWS_API_KEY')

### 1. Read news article

* Created developer API Key on https://newsapi.org

In [5]:
# Init
api = newsapi.NewsApiClient(api_key=news_api_key)

### Explore available sources

In [6]:
print("newsapi.const.categories:", newsapi.const.categories)
print("newsapi.const.languages:", newsapi.const.languages)
print("newsapi.const.countries:", newsapi.const.countries)

newsapi.const.categories: {'technology', 'entertainment', 'sports', 'health', 'general', 'science', 'business'}
newsapi.const.languages: {'sv', 'de', 'es', 'se', 'zh', 'en-US', 'en', 'ud', 'it', 'no', 'cn', 'ru', 'fr', 'ar', 'pt', 'nl', 'he'}
newsapi.const.countries: {'de', 'tr', 'be', 'at', 'pl', 'rs', 'se', 've', 'br', 'ng', 'cu', 'it', 'is', 'no', 'ro', 'mx', 'ie', 'kr', 'ru', 'us', 'za', 'hu', 'jp', 'eg', 'ma', 'lv', 'sa', 'au', 'my', 'il', 'ch', 'pk', 'gb', 'es', 'si', 'id', 'gr', 'tw', 'lt', 'zh', 'co', 'in', 'nz', 'cn', 'sk', 'hk', 'sg', 'ca', 'bg', 'ua', 'ae', 'th', 'fr', 'ar', 'pt', 'nl', 'cz', 'ph'}


In [7]:
# call get_sources endpoint
dict_sources = api.get_sources()

# put results into dataframe
df_sources = pd.DataFrame(dict_sources['sources'])
df_sources

Unnamed: 0,id,name,description,url,category,language,country
0,abc-news,ABC News,"Your trusted source for breaking news, analysi...",https://abcnews.go.com,general,en,us
1,abc-news-au,ABC News (AU),"Australia's most trusted source of local, nati...",https://www.abc.net.au/news,general,en,au
2,aftenposten,Aftenposten,Norges ledende nettavis med alltid oppdaterte ...,https://www.aftenposten.no,general,no,no
3,al-jazeera-english,Al Jazeera English,"News, analysis from the Middle East and worldw...",https://www.aljazeera.com,general,en,us
4,ansa,ANSA.it,"Agenzia ANSA: ultime notizie, foto, video e ap...",https://www.ansa.it,general,it,it
...,...,...,...,...,...,...,...
123,wired,Wired,"Wired is a monthly American magazine, publishe...",https://www.wired.com,technology,en,us
124,wired-de,Wired.de,Wired reports on how emerging technologies aff...,https://www.wired.de,technology,de,de
125,wirtschafts-woche,Wirtschafts Woche,Das Online-Portal des führenden Wirtschaftsmag...,http://www.wiwo.de,business,de,de
126,xinhua-net,Xinhua Net,"中国主要重点新闻网站,依托新华社遍布全球的采编网络,记者遍布世界100多个国家和地区,地方频...",http://xinhuanet.com/,general,zh,zh


In [8]:
# select sources with "world news" 
cond_worldnews = df_sources.description.str.contains("world news", case=False)

# select dutch sources
cond_dutch = (df_sources.language == 'nl') | (df_sources.country == 'nl')

# filter sources
df_selected_sources = df_sources[cond_worldnews | cond_dutch].head()

# as list
list_selected_sources = df_selected_sources.id.tolist()

# as string (required for api
selected_sources = ",".join(list_selected_sources)

display(df_selected_sources)
print("selected sources (list): ", list_selected_sources)
print("selected sources (str): ", selected_sources)

Unnamed: 0,id,name,description,url,category,language,country
1,abc-news-au,ABC News (AU),"Australia's most trusted source of local, nati...",https://www.abc.net.au/news,general,en,au
78,nbc-news,NBC News,"Breaking news, videos, and the latest top stor...",http://www.nbcnews.com,general,en,us
96,rtl-nieuws,RTL Nieuws,Volg het nieuws terwijl het gebeurt. RTL Nieuw...,https://www.rtlnieuws.nl/,general,nl,nl
120,time,Time,Breaking news and analysis from TIME.com. Poli...,http://time.com,general,en,us


selected sources (list):  ['abc-news-au', 'nbc-news', 'rtl-nieuws', 'time']
selected sources (str):  abc-news-au,nbc-news,rtl-nieuws,time


### Get top headlines for selected sources

get_top_headlines(q=None, qintitle=None, sources=None, language='en', country=None, category=None, page_size=None, page=None)`

In [9]:
results = api.get_top_headlines(sources=selected_sources)
df_articles = pd.DataFrame(results['articles'])

In [10]:
display(df_articles.head())

# template (HTML) for article
article_template = Template(f"""
    <h3><a href="$url">$title</a></h3>
    <b>$source_name</b><br>
    <i>Published at $published_date by $author</i> - $description
    <img src="$urlToImage" width="300">
    <hr>""")

# display articles
for idx, row in df_articles.head().iterrows():
    # format date
    row['published_date'] = datetime.strptime(row['publishedAt'], "%Y-%m-%dT%H:%M:%SZ")
    
    # add source name as separate key
    row['source_name'] = row['source']['name']
    
    # display as HTML
    display(HTML(article_template.substitute(**row)))

Unnamed: 0,source,author,title,description,url,urlToImage,publishedAt,content
0,"{'id': 'abc-news-au', 'name': 'ABC News (AU)'}",Caroline Horn,Heritage-listed Milang Lakeside Butter Factory...,"Thousands of donated hours, countless sausage ...",https://www.abc.net.au/news/2024-02-10/milang-...,https://live-production.wcms.abc-cdn.net.au/52...,2024-02-10T05:59:41Z,The end is finally in sight for a small town's...
1,"{'id': 'abc-news-au', 'name': 'ABC News (AU)'}",Gavin McGrath,Nature Glenelg Trust works to clear blue gum p...,"Most of the 1,000 different eucalyptus tree va...",https://www.abc.net.au/news/2024-02-10/nature-...,https://live-production.wcms.abc-cdn.net.au/7a...,2024-02-10T03:05:37Z,Blue gums degraded Victoria's native wetlands....
2,"{'id': 'abc-news-au', 'name': 'ABC News (AU)'}",Declan Bowring,Platypus habitat in Penrith threatened by Sydn...,A court has ordered Sydney Water to repair ero...,https://www.abc.net.au/news/2024-02-10/conserv...,https://live-production.wcms.abc-cdn.net.au/32...,2024-02-10T02:49:59Z,<ul><li>In short: Conservationists are worried...
3,"{'id': 'abc-news-au', 'name': 'ABC News (AU)'}",Myles Houlbrook-Walk,Aspiring pro-baseballer contracted to Chicago ...,"In spring in the northern hemisphere, Blake Wh...",https://www.abc.net.au/news/2024-02-10/blake-w...,https://live-production.wcms.abc-cdn.net.au/96...,2024-02-10T01:54:52Z,Blake Whitney is a long way from home or at l...
4,"{'id': 'abc-news-au', 'name': 'ABC News (AU)'}",Aaron Kelly,Football players from Papua New Guinea chase t...,They grew up playing footy in cow paddocks at ...,https://www.abc.net.au/news/2024-02-10/papua-n...,https://live-production.wcms.abc-cdn.net.au/7e...,2024-02-10T01:21:57Z,<ul><li>In short: Four footballers from Papua ...
