Ce notebook a été créé pour les requètes via les API.

# Importation des modules

In [1]:
import pandas as pd
import numpy as np
import requests
import searchtweets
from datetime import *

# Récupération des tweets via l'API de Twitter

In [2]:
premium_search_args = searchtweets.load_credentials("twitter_keys_idrissa.yaml",
                                       yaml_key="search_tweets_api",
                                       env_overwrite=False)

# Format de date_debut et date_fin : '2021-3-1'
# OR moderna OR astrazeneca OR janssen

def tweets_entre_2dates(vaccin, date_debut, date_fin):
    rule = searchtweets.gen_rule_payload("("+vaccin+") lang:fr place_country:FR", 
                                         results_per_call=100, from_date=date_debut, to_date=date_fin) 

    rs = searchtweets.ResultStream(rule_payload=rule,
                      max_results=100,
                      **premium_search_args)

    return list(rs.stream())

def csv_tweets_entre2dates(vaccin, date_debut, date_fin):
    tweets = tweets_entre_2dates(vaccin, date_debut, date_fin)
    clean_tweets = []
    fields = ['id', "date", 'screenName', "texte", "type", "nbre_like", "nbre_retweet", "type_vaccin"]
    for tweet in tweets:
        clean_tweet = [tweet.id, datetime.fromtimestamp(tweet.created_at_seconds), tweet.screen_name, tweet.all_text, tweet.tweet_type, tweet.favorite_count, tweet.retweet_count, vaccin]
        clean_tweets.append(clean_tweet)

    tweets_df = pd.DataFrame(clean_tweets)
    tweets_df.columns = fields
    # globals()[f"my_variable_{i}"] = i
    tweets_df.to_csv('~/work/projetpy/csv_tweets/tweets_'+vaccin+'_'+date_debut+'_a_'+date_fin+'.csv', index = False, header=True)

In [8]:
def ajout_7_jrs(dte):
    date = datetime.strptime(dte, "%Y-%m-%d")
    modified_date = date + timedelta(days=7)
    return datetime.strftime(modified_date, "%Y-%m-%d")

def ajout_20_jrs(dte):
    date = datetime.strptime(dte, "%Y-%m-%d")
    modified_date = date + timedelta(days=20)
    return datetime.strftime(modified_date, "%Y-%m-%d")

In [9]:
def import_pfizer_tweets(date_debut, date_fin) : 
    date_suivante = ajout_7_jrs(date_debut)
    while datetime.strptime(date_suivante, "%Y-%m-%d") < datetime.strptime(date_fin, "%Y-%m-%d") :
        csv_tweets_entre2dates("pfizer", date_debut, date_suivante)
        date_debut = date_suivante
        date_suivante = ajout_7_jrs(date_suivante)

In [10]:
import_pfizer_tweets("2021-6-23", "2021-12-1")

In [12]:
csv_tweets_entre2dates("pfizer", "2021-11-24", "2021-12-1")

In [20]:
def import_tweets(vaccin, date_debut, date_fin) : 
    date_suivante = ajout_20_jrs(date_debut)
    while datetime.strptime(date_suivante, "%Y-%m-%d") < datetime.strptime(date_fin, "%Y-%m-%d") :
        csv_tweets_entre2dates(vaccin, date_debut, date_suivante)
        date_debut = date_suivante
        date_suivante = ajout_20_jrs(date_suivante)

In [13]:
csv_tweets_entre2dates("moderna", "2021-09-13", "2021-09-30")

In [14]:
csv_tweets_entre2dates("moderna", "2021-11-13", "2021-12-01")

In [16]:
csv_tweets_entre2dates("moderna", "2021-06-09", "2021-07-05")

In [17]:
csv_tweets_entre2dates("moderna", "2021-06-01", "2021-06-09")

In [21]:
# import_tweets("janssen", "2021-6-1", "2021-12-1")
import_tweets("astrazeneca", "2021-06-27", "2021-12-01")

In [22]:
csv_tweets_entre2dates("astrazeneca", "2021-11-14", "2021-12-01")

# Unification des tweets en un dataFrame