In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import requests
import os
import tweepy
import configparser  # Library to hide cridentials
import json
from timeit import default_timer as timer

In [2]:
# Include a 'magic word' for visualizing plotted inline with the notebook
%matplotlib inline

In [3]:
df_twitter_archive = pd.read_csv('twitter-archive-enhanced.csv')
df_twitter_archive.shape

(2356, 17)

In [4]:
# Setup pandas configurations
pd.set_option('display.max_columns', 21) # Augmenter la limite de colonnes à afficher par défaut.
pd.options.mode.chained_assignment = None  # Disable warnings 

####  Téléchargement programmatique des données

Grâce à la library <b>requests</b>, nous allons télécharger et enregistrer le fichier <b>image_predictions.tsv</b> contenant certaines information telle que les urls des photos des chiens.

In [5]:
# Get the image_predictions.tsv from url
#folder_name = 'image_predictions'
url = 'https://d17h27t6h515a5.cloudfront.net/topher/2017/August/599fd2ad_image-predictions/image-predictions.tsv'
response = requests.get(url)
with open(os.path.join(url.split('/')[-1]), mode='wb') as file:
    file.write(response.content)

####  Récupération de données supplémentaires de l’API Twitter
Maintenant nous devons récupérer les données supplémentaires : le nombre de retweets de chaque tweet et le nombre de favoris (« j’aime ») en utilisant l’API Twitter. Pour y arriver, nous allons procéder de la manière suivante :
<ul>
    <li>Configurer twitter API</li>
    <li>Récupérer la liste de tous les identifiants des tweets(tweet_id) contenus dans <b>df_twitter_archive</b></li>
    <li>Utiliser l'API pour récupérer ces données supplémentaires </li>
    <li>Si un téléchargement réussi, stocker ces données sous forme de dictionnaire dans la variable <b>tweets_list</b></li>
    <li>Si un téléchargement échoue, enregistrer son tweet_id dans <b>error_tweets_list</b></li>
</ul>


In [6]:
# Twitter API configuration
config = configparser.RawConfigParser()
config.read('config.ini')

api_key = config['TWITTER']['api_key']
api_key_secret = config['TWITTER']['api_key_secret']

access_token = config['TWITTER']['access_token']
access_token_secret = config['TWITTER']['access_token_secret']

bearer_token = config['TWITTER']['bearer_token']

In [7]:
# Twitter API Authentication
auth = tweepy.OAuthHandler(api_key, api_key_secret)
auth.set_access_token(access_token, access_token_secret)

# Create the API instance
api = tweepy.API(auth)

In [8]:
# Get the list of all tweet_id of df_twitter_archive
tweet_id_list = list(df_twitter_archive['tweet_id'])

# Display the first 10 tweet_id of the list
print(tweet_id_list[:10])


[892420643555336193, 892177421306343426, 891815181378084864, 891689557279858688, 891327558926688256, 891087950875897856, 890971913173991426, 890729181411237888, 890609185150312448, 890240255349198849]


In [None]:
# Get data from twitter API
tweet = {}
tweets_list = []
error_tweets_list = []
tweets_count = 0

start = timer()
for tweet_id in tweet_id_list:
    tweets_count += 1
    print('{0} : {1} fecthing data...'.format(tweets_count, tweet_id))

    try:
        
        tweet_data = api.get_status(tweet_id, tweet_mode="extended")
        retweet_count = tweet_data._json['retweet_count']
        favorite_count = tweet_data._json['favorite_count']
        
        tweet['tweet_id'] = str(tweet_id)
        tweet['retweet_count'] = retweet_count
        tweet['favorite_count'] = favorite_count
        tweets_list.append(tweet)
        print('done')
    
    except:
        error_tweets_list.append(tweet_id)
        print('failed')
        pass
    

    print('_______________________________________________________________')
end = timer()
print(end - start)
print('Total of successes : ', len(tweets_list))
print('Total of failures :', len(error_tweets_list))
print(error_tweets_list)


1 : 892420643555336193 fecthing data...
done
_______________________________________________________________
2 : 892177421306343426 fecthing data...
done
_______________________________________________________________
3 : 891815181378084864 fecthing data...
failed
_______________________________________________________________
4 : 891689557279858688 fecthing data...
done
_______________________________________________________________
5 : 891327558926688256 fecthing data...
done
_______________________________________________________________
6 : 891087950875897856 fecthing data...
failed
_______________________________________________________________
7 : 890971913173991426 fecthing data...
done
_______________________________________________________________
8 : 890729181411237888 fecthing data...
failed
_______________________________________________________________
9 : 890609185150312448 fecthing data...
done
_______________________________________________________________
10 : 89024025

failed
_______________________________________________________________
76 : 878281511006478336 fecthing data...
done
_______________________________________________________________
77 : 878057613040115712 fecthing data...
done
_______________________________________________________________
78 : 877736472329191424 fecthing data...
done
_______________________________________________________________
79 : 877611172832227328 fecthing data...
failed
_______________________________________________________________
80 : 877556246731214848 fecthing data...
done
_______________________________________________________________
81 : 877316821321428993 fecthing data...
done
_______________________________________________________________
82 : 877201837425926144 fecthing data...
done
_______________________________________________________________
83 : 876838120628539392 fecthing data...
failed
_______________________________________________________________
84 : 876537666061221889 fecthing data...
done

In [None]:
# Save the list of dictionaries(tweets_list) as .txt file
with open('tweet_json.txt', 'w', encoding='utf-8') as file:
    for tweet in tweets_list:
        json.dumps(tweet, file, )
        file.write('\n')

In [None]:
# Read the tweet_json.txt file
df_tweet_json = pd.read_csv('tweet_json.txt', sep=' ')
df_tweet_json.head()

In [None]:
with open("tweet_json.txt") as f:
   json_data = json.load(f)
   print(json_data)

### Références
Video(Youtube) de l'utilisation de twitter api :
https://www.youtube.com/watch?v=Lu1nskBkPJU

Reading and Writing JSON to a File in Python :
https://stackabuse.com/reading-and-writing-json-to-a-file-in-python/