# Pengambilan Data

In [10]:
topik = 'bitcoin'
waktu_ambil = '14 maret malam'

In [11]:
from tweepy import API 
from tweepy import Cursor
from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
from tweepy import Stream

In [12]:
import twitter_credentials
import pandas as pd
import numpy as np
from textblob import TextBlob
import re
import string
import matplotlib.pyplot as plt
%matplotlib inline

In [13]:
# # # # TWITTER CLIENT # # # #
class TwitterClient():
    def __init__(self, twitter_user=None):
        self.auth = TwitterAuthenticator().authenticate_twitter_app()
        self.twitter_client = API(self.auth)

        self.twitter_user = twitter_user

    def get_twitter_client_api(self):
        return self.twitter_client

In [14]:
# # # # TWITTER AUTHENTICATER # # # #
class TwitterAuthenticator():

    def authenticate_twitter_app(self):
        auth = OAuthHandler(twitter_credentials.CONSUMER_KEY, twitter_credentials.CONSUMER_SECRET)
        auth.set_access_token(twitter_credentials.ACCESS_TOKEN, twitter_credentials.ACCESS_TOKEN_SECRET)
        return auth

In [15]:
# # # # TWITTER STREAMER # # # #
class TwitterStreamer():
    """
    Class for streaming and processing live tweets.
    """
    def __init__(self):
        self.twitter_autenticator = TwitterAuthenticator()    

    def stream_tweets(self, fetched_tweets_filename, hash_tag_list):
        # This handles Twitter authetification and the connection to Twitter Streaming API
        listener = TwitterListener(fetched_tweets_filename)
        auth = self.twitter_autenticator.authenticate_twitter_app() 
        stream = Stream(auth, listener)

        # This line filter Twitter Streams to capture data by the keywords: 
        stream.filter(track=hash_tag_list)

In [16]:
# # # # TWITTER STREAM LISTENER # # # #
class TwitterListener(StreamListener):
    """
    This is a basic listener that just prints received tweets to stdout.
    """
      
    def __init__(self, fetched_tweets_filename):
        self.fetched_tweets_filename = fetched_tweets_filename

    def on_data(self, data):
        try:
            print(data)
            with open(self.fetched_tweets_filename, 'a') as tf:
                       tf.write(data)
            return True
        except BaseException as e:
            print("Error on_data %s" % str(e))
        return True
          
    def on_error(self, status):
        if status == 420:
            # Returning False on_data method in case rate limit occurs.
            return False
        print(status)

In [17]:
 class TweetAnalyzer():
    """
    Functionality for analyzing and categorizing content from tweets.
    """
         
    def clean_tweet(self, tweet):
    
        return ' '.join(re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w +:\/\/ \S +)", " ", tweet).split())
    
    def analyze_sentiment(self, tweet):
        analysis = TextBlob(self.clean_tweet(tweet))
        # analysis = analysis.translate(from_lang = 'in',to='en')

        if analysis.sentiment.polarity > 0:
            return "Positif"
        elif analysis.sentiment.polarity == 0:
            return "Netral"
        else:
            return "Negatif"
        
    def tweets_to_data_frame(self, tweets):
        df = pd.DataFrame(data=[tweet.text for tweet in tweets], columns=['tweets'])
        
        df['id'] = np.array([tweet.id_str for tweet in tweets])
        df['date'] = np.array([tweet.created_at for tweet in tweets])
        df['source'] = np.array([tweet.source for tweet in tweets])
        df['lang'] = np.array([tweet.lang for tweet in tweets])
        df['place'] = np.array([tweet.place for tweet in tweets ])
        

        return df

In [18]:
if __name__ == '__main__':

    twitter_client = TwitterClient()
    tweet_analyzer = TweetAnalyzer()
    keyword = [topik]
    api = twitter_client.get_twitter_client_api()
    tweets = api.search(keyword, count=100, lang = 'en')

    df = tweet_analyzer.tweets_to_data_frame(tweets)
    df['sentiment'] = np.array([tweet_analyzer.analyze_sentiment(tweet) for tweet in df['tweets']])

    print(df)

                                               tweets                   id  \
0   RT @SatoshiFlipper: Note on our macro view:\n\...  1238814749485400069   
1   RT @makerdemy: How will all the chaos in the w...  1238814749456039936   
2   RT @MXC_Fans: @MXC_Exchange AMA --InfinityDeFi...  1238814745613852673   
3   @MrPP85519997 @nhlameck @MusevenzoL @daddyhope...  1238814737963651072   
4   1 BTC Price: Bitstamp 5424.97 USD Coinbase  US...  1238814730770239490   
5   @ProfJAParker Best available theory says the B...  1238814717935783936   
6   RT @CryptoDebby: 👾👾EXCHANGE👾👾\n\nUse the link ...  1238814706766360576   
7   RT @blockfolio: In 2011, Bitcoin fell 95% from...  1238814694238040065   
8   RT @carmindabrendel: Bitcoin price crashes spe...  1238814693130670080   
9   coindesk: udi wetheimer on  cyperpunk myths an...  1238814670120615937   
10  Although the price of #Bitcoin dropped substan...  1238814669181259776   
11  RT @tylerwinklevoss: If bitcoin isn't gold 2.0...  123881465

# Preproses

In [19]:
def remove_punct(text):
  
    #hapus simbol
    text = re.sub('((www\.[^\s]+)|(https?://[^\s]+))',' ',text)
    text = re.sub('rt @[^\s]+',' ', text)
    text = re.sub('@[^\s]+',' ',text)
    text = re.sub(r'[^\x00-\x7F]+',' ', text)
    #Remove additional white spaces
    text = re.sub('[\s]+', ' ', text)
    text = re.sub(r'#([^\s]+)', r'\1', text)
    #trim
    text = text.strip('\'"')
    text  = "".join([char for char in text if char not in string.punctuation])
    #text = re.sub('[0-9]+', '', text)

    return text

df['tweet_bersih'] = df['tweets'].apply(lambda x: remove_punct(x.lower()))

In [20]:
#--- REMOVE DUPLICATES
df = df.drop_duplicates(['tweet_bersih'])
df

Unnamed: 0,tweets,id,date,source,lang,place,sentiment,tweet_bersih
0,RT @SatoshiFlipper: Note on our macro view:\n\...,1238814749485400069,2020-03-14 13:10:26,Twitter for Android,en,,Negatif,note on our macro view im betting heavy that ...
1,RT @makerdemy: How will all the chaos in the w...,1238814749456039936,2020-03-14 13:10:26,Twitter for iPhone,en,,Netral,how will all the chaos in the world impact th...
2,RT @MXC_Fans: @MXC_Exchange AMA --InfinityDeFi...,1238814745613852673,2020-03-14 13:10:26,Twitter Web App,en,,Positif,ama infinitydefi join to win infd enter follo...
3,@MrPP85519997 @nhlameck @MusevenzoL @daddyhope...,1238814737963651072,2020-03-14 13:10:24,Twitter for iPhone,en,,Netral,do not post dunderhead replies on twee
4,1 BTC Price: Bitstamp 5424.97 USD Coinbase US...,1238814730770239490,2020-03-14 13:10:22,CoinTweety,en,,Netral,1 btc price bitstamp 542497 usd coinbase usd b...
5,@ProfJAParker Best available theory says the B...,1238814717935783936,2020-03-14 13:10:19,Twitter for iPad,en,,Positif,best available theory says the bitcoin price ...
6,RT @CryptoDebby: 👾👾EXCHANGE👾👾\n\nUse the link ...,1238814706766360576,2020-03-14 13:10:16,Twitter Web App,en,,Netral,exchange use the link or qr code to sign up a...
7,"RT @blockfolio: In 2011, Bitcoin fell 95% from...",1238814694238040065,2020-03-14 13:10:13,Twitter Web App,en,,Netral,in 2011 bitcoin fell 95 from 31 to under 2 wh...
8,RT @carmindabrendel: Bitcoin price crashes spe...,1238814693130670080,2020-03-14 13:10:13,Twitter Web App,en,,Positif,bitcoin price crashes spectacularly losing 20...
9,coindesk: udi wetheimer on cyperpunk myths an...,1238814670120615937,2020-03-14 13:10:08,crypto retreat,en,,Positif,coindesk udi wetheimer on cyperpunk myths and ...


In [21]:
df.to_excel("Data Tanggal " + waktu_ambil + "(" + topik + ").xlsx")
df.to_csv("Data Tanggal " + waktu_ambil + "(" + topik + ").csv")