# Scan for Articles
<p> This script will use NewsAPI to scan for articles based on keywords
<p> Then it will apply the Flair sentiment model and add the results to a dataframe

## Initial Imports

In [75]:
# Initial Imports

import streamlit as st
import os
from newsapi import NewsApiClient
from datetime import date, timedelta
import pandas as pd
from fpdf import FPDF
news_api_key = os.getenv("news_api_key")
import flair
from flair.data import Sentence
import requests

## Function to scan for articles based on keywords

In [76]:
def scan_for_articles(keyword):
    newsapi = NewsApiClient(api_key=news_api_key)
    relevant_articles = newsapi.get_everything(q=keyword,
                                        language='en', 
                                        sort_by='relevancy', 
                                        page_size=100)
    return relevant_articles


## Pass in keywords into the scan function and return a dataframe

In [82]:

all_relevant_articles = pd.DataFrame(columns = ["source", "author",	"title", "description",	"url","urlToImage", "publishedAt", "content", "keyword","article_sentiment", "article_confidence"])

keywords = ['Helping', 'Forgiveness']

for word in keywords:
    relevant_articles = scan_for_articles(word)
    df = pd.DataFrame(relevant_articles['articles'])
    df["keyword"] = word
    all_relevant_articles = pd.concat([all_relevant_articles, df],ignore_index=True)

# Clean the data in the source column
#     
all_relevant_articles["source"] = all_relevant_articles["source"].apply(lambda x: x['name'])


# Apply Sentiment model

In [89]:
article_sentiment_model = flair.models.TextClassifier.load('en-sentiment')

# Initialize lists

article_sentiment = []
article_confidence = []


# Run Sentiment analysis on collected news sentences


for sentence in all_relevant_articles["description"]:
        if sentence.strip() == "":
                article_confidence.append("")
                article_sentiment.append("")
                
        else:
                sample = flair.data.Sentence(sentence)
                article_sentiment_model.predict(sample)
                article_sentiment.append(sample.labels[0].value)
                article_confidence.append(sample.labels[0].score)

# Add Results to Dataframe

all_relevant_articles['sentiment'] = article_sentiment
all_relevant_articles['confidence'] = article_confidence

2022-02-12 14:54:30,213 loading file C:\Users\Airma\.flair\models\sentiment-en-mix-distillbert_4.pt


In [105]:
all_relevant_articles.head(1)

Unnamed: 0,source,author,title,description,url,urlToImage,publishedAt,content,keyword,sentiment,confidence
0,New York Times,Annie Karni,"Clyburn Pushes Childs for Supreme Court, Testi...",The highest-ranking Black member of Congress i...,https://www.nytimes.com/2022/02/03/us/politics...,https://static01.nyt.com/images/2022/02/03/us/...,2022-02-03T21:58:17Z,I decided that Joe Biden needed to do somethin...,Helping,POSITIVE,0.992109


In [111]:
def make_clickable(url, name):
    return '<a href="{}" rel="noopener noreferrer" target="_blank">{}</a>'.format(url,name)

all_relevant_articles['url_clickable'] = all_relevant_articles.apply(lambda x: make_clickable(x['url'], x['description']), axis=1)




In [113]:
all_relevant_articles.head()


Unnamed: 0,source,author,title,description,url,urlToImage,publishedAt,content,keyword,sentiment,confidence,url_clickable
0,New York Times,Annie Karni,"Clyburn Pushes Childs for Supreme Court, Testi...",The highest-ranking Black member of Congress i...,"<a href=""https://www.nytimes.com/2022/02/03/us...",https://static01.nyt.com/images/2022/02/03/us/...,2022-02-03T21:58:17Z,I decided that Joe Biden needed to do somethin...,Helping,POSITIVE,0.992109,"<a href=""<a href=""https://www.nytimes.com/2022..."
1,Gizmodo.com,Justin Carter,M. Night Shyamalan Thanks Blade Runner 2049 fo...,Guardians of the Galaxy is typically credited ...,"<a href=""https://gizmodo.com/m-night-shyamalan...",https://i.kinja-img.com/gawker-media/image/upl...,2022-01-29T17:45:00Z,Guardians of the Galaxyis typically credited w...,Helping,POSITIVE,0.99732,"<a href=""<a href=""https://gizmodo.com/m-night-..."
2,New York Times,Andy Newman,"How $1,000 a Month in Guaranteed Income Is Hel...","A new, privately funded program offers familie...","<a href=""https://www.nytimes.com/2022/01/18/ny...",https://static01.nyt.com/images/2022/01/17/nyr...,2022-01-18T16:15:31Z,The Bridge Project sprang from Nido de Esperan...,Helping,POSITIVE,0.928812,"<a href=""<a href=""https://www.nytimes.com/2022..."
3,New York Times,Stanley Reed,What Happens if Russia Cuts Off Europe’s Natur...,Europe is a huge customer of Russia’s fossil f...,"<a href=""https://www.nytimes.com/2022/01/25/bu...",https://static01.nyt.com/images/2022/01/25/bus...,2022-01-25T22:24:32Z,But while storage levels remain low and prices...,Helping,POSITIVE,0.996027,"<a href=""<a href=""https://www.nytimes.com/2022..."
4,New York Times,,‘I Asked a Woman on the Platform if She Had An...,"Helping hands in the subway, a tug of war on t...","<a href=""https://www.nytimes.com/2022/01/23/ny...",https://static01.nyt.com/images/2022/01/23/nyr...,2022-01-23T08:00:07Z,Another woman called out to me.\r\nI overheard...,Helping,POSITIVE,0.981947,"<a href=""<a href=""https://www.nytimes.com/2022..."
