In [None]:
#@title Cloning the github repository
!git clone https://github.com/pie3636/newsjam.git
!mv newsjam/* .

In [None]:
#@title Imports (run once)
!pip install selenium --upgrade;
!apt install chromium-chromedriver;
!cp /usr/lib/chromium-browser/chromedriver /usr/bin
!pip install tweepy --upgrade;
!pip install python-dotenv --upgrade;
!python -m pip install -r requirements.txt;
!python -m spacy download fr_core_news_sm;

In [None]:
#@title More imports and Twitter authentication
import nltk
nltk.download('stopwords')

import os
from dotenv import load_dotenv
from summ.lsa import LSASummarizer
from summ.bert_embed import BertEmbeddingsSummarizer as Bert
from data import scraper_functions
from classif import log_reg_classifier as lrc
import tweepy
import numpy as np
LSA = LSASummarizer()
flau = Bert('flaubert/flaubert_large_cased')
camem = Bert('camembert/camembert-large')

#this will not work if you don't have the .env file with the API keys

load_dotenv('/content/API.env')
consumer_key = os.getenv('consumer_key')
consumer_secret = os.getenv('consumer_secret')
bearer = os.getenv('bearer')
access_key = os.getenv('access_key')
access_key_secret = os.getenv('access_key_secret')

client = tweepy.Client(bearer_token=bearer, consumer_key=consumer_key, consumer_secret=consumer_secret, access_token=access_key, access_token_secret=access_key_secret)

In [2]:
#@title Pipeline code
chosen_summ = int(input("Select your summarizer: LSA (1), FlauBERT (2), or CamemBERT (3) "))
num_articles = int(input("How many articles should be selected? "))
url_list = scraper_functions.actu_autoscraper('https://actu.fr/societe/coronavirus', url_amount=num_articles)
summ_list = []
summ_url = []

if chosen_summ == 1:
  summariser = LSA
elif chosen_summ == 2:
  summariser = flau
elif chosen_summ == 3:
  summariser = camem
else:
  summariser = LSA

for x in range(len(url_list)):
  text, url = scraper_functions.actu_scraper(url_list[x])
  lrc_results = lrc.classifier(text)
  if lrc_results == False:
    continue
  else:
    summ_list.append(text)
    summ_url.append(url)

summ_list = [x for x in summ_list if x != '' or len(x) > 300] #sometimes the scraper decides to mess up, so this gets rid of empty and very short article texts
num_of_tweets = int(input("{} articles are appropriate for posting. How many should be tweeted? ".format(len(summ_list))))

for y in range(num_of_tweets):
  try:
    summary = summariser.get_summary(summ_list[y])
    summary = summary[0]
    print("\nSummary of article {}:".format(y+1),summary)
    tweet_response = client.create_tweet(text=summary)
    client.create_tweet(text=summ_url[y], in_reply_to_tweet_id=tweet_response.data['id']) #reply to the posted tweet immediately with the original URL
    print("\nArticle {} was successfully posted!".format(y+1))
  except tweepy.errors.Forbidden:
    print("\nArticle {} was unable to be posted. It was likely posted already.".format(y+1))
    continue

Select your summarizer: LSA (1), FlauBERT (2), or CamemBERT (3) 1
How many articles should be selected? 10
10 articles are appropriate for posting. How many should be tweeted? 4

Summary of article 1: Une seconde journée est prévue sur le même lieu à Sablé, lundi 17 janvier de 10 h à 16 h 30.
Deux lignes de dépistages étaient installées de 10 h à 16 h 30.  

Article 1 was successfully posted!

Summary of article 2: des Pays de La Loire faisait le point sur la vaccination en Sarthe, ce jeudi 13 janvier 2022.  
90% des plus de 65 ans y sont éligibles.  

Article 2 was successfully posted!

Summary of article 3: Covid-19 de moins de 48 heures avant de se présenter à l’aéroport », explique Cédric Gandini.
C’est ici qu’Anne
La gérante de l’agence de voyages

Article 3 was successfully posted!

Summary of article 4: -André
À lire aussi Saint
-de-l’Eure : la ferme photovoltaïque, c’est pour 2022 !
-André
À lire aussi Saint
-de-l’Eure (Eure).
-André

Article 4 was successfully posted!
