# Tweepy & Gephi
In this tutorial we will use Tweepy to get tweets from the Twitter API, then we will use Gephi to see links between twitter users via retweet links

## Getting data from Twitter
If you don't have tweepy installed on your machine then uncomment and run the following command

In [126]:
#!pip install --upgrade tweepy

In [120]:
#Import the required libraries 
import tweepy
import pandas as pd
import matplotlib.pyplot as plt

#For streaming tweets
from tweepy.streaming import StreamListener
from tweepy import Stream


#For storing the tweets in json
import sys
import json

### Setting up the Twitter API
We get the api keys from apps.twitter.com

In [2]:
#Twitter keys
consumerKey = '2X4TMqdjHNoXkuKYYyYWcwgJg'
consumerSecret = 'UhfVbh3ieZZ6A8JZU4LfaODkMG4xYlo2afpQJ2qbiffKZqGtFU'
access_token = '330840191-IjH37GO9g9qXZbcE4SbMmTYyetTn5ZkoETd41Btw'
access_token_secret = 'EnEIhinq7rwWyBCEVFRPA22WglMYkKC46APPPrRsJOjUO'

In [42]:
#Use tweepy.OAuthHandler to create an authnetication using the given key and secret
auth = tweepy.OAuthHandler(consumer_key=consumerKey,consumer_secret=consumerSecret)
auth.set_access_token(access_token, access_token_secret)

#Connect to the Twitter API using the authentication
api = tweepy.API(auth)

### Creating a twitter stream listener

In [110]:
class StdOutListener(StreamListener):

    def __init__(self, api=None):
        super(StdOutListener, self).__init__()
        self.num_tweets = 0
        #self.list_of_tweets = []
        
    def on_data(self, data):
        # Twitter returns data in JSON format - we need to decode it first
        decoded = json.loads(data)
        # Also, we convert UTF-8 to ASCII ignoring all bad characters sent by users
        #print '@%s @#@ %s @#@ %s' % (decoded['user']['screen_name'], decoded['entities']['user_mentions'], decoded['text'].encode('ascii', 'ignore'))
        #print ''
        #record = decoded['user']['screen_name'], decoded['entities']['user_mentions'], decoded['text'].encode('ascii', 'ignore')
        self.num_tweets += 1
        tweets_data.append(str(data))
        if self.num_tweets < num_tweet_limit:
            return True
        else:
            return False
        
    def on_error(self, status_code):
        if status_code == 420:
            #returning False in on_data disconnects the stream
            return False
        print 'Error on status', status

    def on_limit(self, status):
        print 'Limit threshold exceeded', status

    def on_timeout(self, status):
        print 'Stream disconnected; continuing...'

Streaming of tweets will start when you implement this code cell. Make async=True if you want it to run in the background

In [112]:
listener = StdOutListener()

tweets_data = []
num_tweet_limit = 1000 #Number of tweets you want to stream
stream = Stream(auth, listener)

stream.filter(track=['bieber'], async=False)

In [113]:
len(tweets_data)

1000

## Analysing the Twitter data

### Converting tweets into edges for use with Gephi

In [111]:
#This function converts the tweet stream into tweets and edges
def tweets_n_edges(tweet_stream):
    tweets=[]
    edges=[]

    for tweet in tweet_stream:
        try:
            decoded = json.loads(tweet)
            usr_mentions= decoded['entities']['user_mentions']
            #print usr_mentions
            if len(usr_mentions)>0:
                for ii in usr_mentions:
                    if decoded['user']['screen_name'] != ii['screen_name']:
                        edges.append((decoded['user']['screen_name'], ii['screen_name']))
            tweets.append(decoded)
        except: # if no user mentions, or something unexpected
            continue

    return (tweets,edges)

In [114]:
tweets,edges = tweets_n_edges(tweets_data)

In [115]:
print "There are %s tweets about data science this week, and %s user mentions!" % ( len(tweets), len(edges) )

There are 1000 tweets about data science this week, and 895 user mentions!


In [116]:
edges

[(u'AgostaGarcia', u'zinoscrew'),
 (u'Barbarajf4', u'ProjetoTagsJB'),
 (u'amandaxsteele1', u'lxrryperff'),
 (u'My_sunshine_02', u'projetoojbbr'),
 (u'BiebsLezzy', u'susiesingler'),
 (u'Cretinez', u'OLLGEMOU'),
 (u'MariaBe77531622', u'ProjetoTagsJB'),
 (u'TrapsoulTesfaye', u'illumigomez'),
 (u'MariaBe77531622', u'ProjetoTagsJB'),
 (u'Marquezineofcl', u'justinbieberDMe'),
 (u'Marquezineofcl', u'justinbieberDMe'),
 (u'Marquezineofcl', u'BeaFuturaOLLG'),
 (u'theyounqveins', u'illumigomez'),
 (u'Marquezineofcl', u'BeaFuturaOLLG'),
 (u'jadeapoderosa', u'veraoretrasado'),
 (u'Marquezineofcl', u'BeaFuturaOLLG'),
 (u'belieber_vicky1', u'ProjetoTagsJB'),
 (u'lgbtziall', u'illumigomez'),
 (u'28givenachance', u'illumigomez'),
 (u'kidrauhlpower94', u'promote_jb'),
 (u'Marquezineofcl', u'BeaFuturaOLLG'),
 (u'nathaliabodysay', u'biebersmaniabr'),
 (u'Marquezineofcl', u'BeaFuturaOLLG'),
 (u'Bianca_drew6', u'anegadobiba'),
 (u'Marquezineofcl', u'BeaFuturaOLLG'),
 (u'nathaliabodysay', u'biebersmaniabr')

### Finding the most important nodes
NetworkX (NX) is a Python package for the creation, manipulation, and study of the structure, dynamics, and functions of complex networks

In [118]:
import networkx as nx

G=nx.DiGraph() # initiate a directed graph
G.add_edges_from(edges) # add edges to the graph from user mentions
ev_cent=nx.eigenvector_centrality(G,max_iter=10000) # compute eigenvector centrality

ev_tuple = []
for i in ev_cent.keys():
    ev_tuple.append((i,ev_cent[i]))
    
zip(range(1,11)[::-1],sorted(ev_tuple,key=lambda x: x[1])[-10:])[::-1] # get the top 10 network influencers

[(1, (u'ProjetoTagsJB', 0.8049844718999243)),
 (2, (u'SuaMigaLouca', 0.17888543819998315)),
 (3, (u'JBPROJETO24HRS', 0.17888543819998315)),
 (4, (u'ProjetooBieber', 0.17888543819998315)),
 (5, (u'shopfandoms', 0.17888543819998315)),
 (6, (u'jusdrake', 0.17888543819998315)),
 (7, (u'ProjetoFFans', 0.17888543819998315)),
 (8, (u'PJFollowJBHelp', 0.08944271909999157)),
 (9, (u'projetoojbbr', 0.08944271909999157)),
 (10, (u'Beasena94', 0.08944271909999157))]

### Exporting data into CSV file

In [125]:
import csv

myfile = open('edges1.csv', 'wb')
wr = csv.writer(myfile, quoting=csv.QUOTE_ALL)
wr.writerow(edges)
myfile.close

<function close>