# Pengambilan Data

In [1]:
from tweepy import API 
from tweepy import Cursor
from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
from tweepy import Stream

In [2]:
import twitter_credentials
import perbaikan_singkatan
import pandas as pd
import numpy as np
from textblob import TextBlob
import re
import string
import matplotlib.pyplot as plt
%matplotlib inline

In [3]:
# # # # TWITTER CLIENT # # # #
class TwitterClient():
    def __init__(self, twitter_user=None):
        self.auth = TwitterAuthenticator().authenticate_twitter_app()
        self.twitter_client = API(self.auth)

        self.twitter_user = twitter_user

    def get_twitter_client_api(self):
        return self.twitter_client

In [4]:
# # # # TWITTER AUTHENTICATER # # # #
class TwitterAuthenticator():

    def authenticate_twitter_app(self):
        auth = OAuthHandler(twitter_credentials.CONSUMER_KEY, twitter_credentials.CONSUMER_SECRET)
        auth.set_access_token(twitter_credentials.ACCESS_TOKEN, twitter_credentials.ACCESS_TOKEN_SECRET)
        return auth

In [5]:
# # # # TWITTER STREAMER # # # #
class TwitterStreamer():
    """
    Class for streaming and processing live tweets.
    """
    def __init__(self):
        self.twitter_autenticator = TwitterAuthenticator()    

    def stream_tweets(self, fetched_tweets_filename, hash_tag_list):
        # This handles Twitter authetification and the connection to Twitter Streaming API
        listener = TwitterListener(fetched_tweets_filename)
        auth = self.twitter_autenticator.authenticate_twitter_app() 
        stream = Stream(auth, listener)

        # This line filter Twitter Streams to capture data by the keywords: 
        stream.filter(track=hash_tag_list)

In [6]:
# # # # TWITTER STREAM LISTENER # # # #
class TwitterListener(StreamListener):
    """
    This is a basic listener that just prints received tweets to stdout.
    """
      
    def __init__(self, fetched_tweets_filename):
        self.fetched_tweets_filename = fetched_tweets_filename

    def on_data(self, data):
        try:
            print(data)
            with open(self.fetched_tweets_filename, 'a') as tf:
                       tf.write(data)
            return True
        except BaseException as e:
            print("Error on_data %s" % str(e))
        return True
          
    def on_error(self, status):
        if status == 420:
            # Returning False on_data method in case rate limit occurs.
            return False
        print(status)

In [7]:
 class TweetAnalyzer():
    """
    Functionality for analyzing and categorizing content from tweets.
    """
         
    def clean_tweet(self, tweet):
    
        return ' '.join(re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w +:\/\/ \S +)", " ", tweet).split())
    
    def analyze_sentiment(self, tweet):
        analysis = TextBlob(self.clean_tweet(tweet))
        # analysis = analysis.translate(from_lang = 'in',to='en')

        if analysis.sentiment.polarity > 0:
            return "Positif"
        elif analysis.sentiment.polarity == 0:
            return "Netral"
        else:
            return "Negatif"
        
    def tweets_to_data_frame(self, tweets):
        df = pd.DataFrame(data=[tweet.text for tweet in tweets], columns=['tweets'])
        
        df['id'] = np.array([tweet.id_str for tweet in tweets])
        df['date'] = np.array([tweet.created_at for tweet in tweets])
        df['source'] = np.array([tweet.source for tweet in tweets])
        df['lang'] = np.array([tweet.lang for tweet in tweets])
        df['place'] = np.array([tweet.place for tweet in tweets ])
        

        return df

In [12]:
if __name__ == '__main__':

    twitter_client = TwitterClient()
    tweet_analyzer = TweetAnalyzer()
    keyword = ["bitcoin"]
    api = twitter_client.get_twitter_client_api()
    tweets = api.search(keyword, count=100, lang = 'en')

    df = tweet_analyzer.tweets_to_data_frame(tweets)
    df['sentiment'] = np.array([tweet_analyzer.analyze_sentiment(tweet) for tweet in df['tweets']])

    print(df)

                                               tweets                   id  \
0   @mims Fiat and debt allows future consumption ...  1235810469073416192   
1   RT @traderberg: Bitcoin patterns? Scaled in pr...  1235810459531374592   
2   RT @real12k: People have started stocking up o...  1235810458872905732   
3                  @ABPNews Leave Bank,Accept Bitcoin  1235810456746385411   
4   RT @real12k: Looking to buy an $ACED #masterno...  1235810438924742656   
..                                                ...                  ...   
95  RT @cz_binance: When it rains, it pours. #adop...  1235809428546260993   
96  @MartyBent Bitcoin will win!? Win what? Sounds...  1235809413639725056   
97  RT @Eljaboom2030: ðŸŽ‰FAST GÄ°VEAWAY $10 #ETH #Giv...  1235809413081862145   
98  RT @BTCTN: $65M Investment Fuels Natural Gas P...  1235809412305965056   
99  HAPPY 6TH ANNIVERSARY!\n\nto our beloved #NewY...  1235809403984433153   

                  date               source lang place sent

# Preproses

In [13]:
def remove_punct(text):

    neg_pattern = re.compile(r'\b(' + '|'.join(perbaikan_singkatan.negations_dic.keys()) + r')\b')    
    #hapus simbol
    text = re.sub('((www\.[^\s]+)|(https?://[^\s]+))',' ',text)
    text = re.sub('rt @[^\s]+',' ', text)
    text = re.sub('@[^\s]+',' ',text)
    text = re.sub(r'[^\x00-\x7F]+',' ', text)
    #Remove additional white spaces
    text = re.sub('[\s]+', ' ', text)
    #Replace #word with word
    neg_handled = neg_pattern.sub(lambda x: perbaikan_singkatan.negations_dic[x.group()], text)
    text = re.sub(r'#([^\s]+)', r'\1', neg_handled)
    #trim
    text = text.strip('\'"')
    text  = "".join([char for char in text if char not in string.punctuation])
    #text = re.sub('[0-9]+', '', text)

    return text

df['tweet_bersih'] = df['tweets'].apply(lambda x: remove_punct(x.lower()))

In [14]:
#--- REMOVE DUPLICATES
df = df.drop_duplicates(['tweet_bersih'])
df

Unnamed: 0,tweets,id,date,source,lang,place,sentiment,tweet_bersih
0,@mims Fiat and debt allows future consumption ...,1235810469073416192,2020-03-06 06:12:30,Twitter for Android,en,,Netral,fiat and debt allows future consumption to be...
1,RT @traderberg: Bitcoin patterns? Scaled in pr...,1235810459531374592,2020-03-06 06:12:28,Twitter Web App,en,,Netral,bitcoin patterns scaled in price and time btc...
2,RT @real12k: People have started stocking up o...,1235810458872905732,2020-03-06 06:12:28,Twitter Web App,en,,Netral,people have started stocking up on aced while...
3,"@ABPNews Leave Bank,Accept Bitcoin",1235810456746385411,2020-03-06 06:12:27,Twitter for Android,en,,Netral,leave bankaccept bitcoin
4,RT @real12k: Looking to buy an $ACED #masterno...,1235810438924742656,2020-03-06 06:12:23,Twitter Web App,en,,Netral,looking to buy an aced masternode now that ha...
...,...,...,...,...,...,...,...,...
94,Bitcoin Lacks Momentum Above $9k: Hereâ€™s Why B...,1235809440890146816,2020-03-06 06:08:25,twitterAutoPostFromPage,en,,Positif,bitcoin lacks momentum above 9k here s why bul...
95,"RT @cz_binance: When it rains, it pours. #adop...",1235809428546260993,2020-03-06 06:08:22,Twitter for Android,en,,Netral,when it rains it pours adoption bitcoin bnb
96,@MartyBent Bitcoin will win!? Win what? Sounds...,1235809413639725056,2020-03-06 06:08:19,Twitter for Android,en,,Positif,bitcoin will win win what sounds to me you ha...
98,RT @BTCTN: $65M Investment Fuels Natural Gas P...,1235809412305965056,2020-03-06 06:08:18,Twitter for Android,en,,Negatif,65m investment fuels natural gas provider s b...


In [15]:
df.to_excel("Data Tanggal 6 siang (bitcoin).xlsx")
df.to_csv("Data Tanggal 6 siang (bitcoin).csv")

# Visualisasi Data

In [None]:
df = pd.read_excel('myindihome.xlsx',header=0,encoding = 'unicode_escape')
#df = df.drop(columns = "Unnamed: 0")
df.count

In [None]:
df = df.drop_duplicates(['tweet_bersih'])
df = df.set_index("source")
df = df.drop("Sociomedio Pro Telkom", axis=0)

In [None]:
df_positif = df[df['sentiment'] == 'Positif']
df_negatif = df[df['sentiment'] == 'Negatif']
df_netral = df[df['sentiment'] == 'Netral']
df_negatif

In [None]:
sentimen_count = df['sentiment'].value_counts()
sentimen_count

In [None]:
objects = sentimen_count.index
y_pos = np.arange(len(objects))
performance = sentimen_count
 
plt.bar(y_pos, performance, align='center', alpha=0.5)
plt.xticks(y_pos, objects)
plt.ylabel('Jumlah dalam tweets')
plt.title('Sentiment Topik Wifi.id')
 
plt.show()
plt.show()

In [None]:
labels = sentimen_count.index
sizes = sentimen_count
colors = ['steelblue', 'red', 'gray']
explode = (0.2, 0.1, 0.1)  # explode 1st slice
 
# Plot
plt.pie(sizes, explode=explode, labels=labels, colors=colors,
        autopct='%1.1f%%', shadow=True, startangle=140)
 
plt.axis('equal')
plt.show()

In [None]:
from wordcloud import WordCloud
from Sastrawi.StopWordRemover.StopWordRemoverFactory import StopWordRemoverFactory
factory = StopWordRemoverFactory()
stopwords = factory.get_stop_words()

In [None]:
words_positif = ' '.join(df_positif['tweet_bersih'])
words_negatif = ' '.join(df_negatif['tweet_bersih'])
words_netral = ' '.join(df_netral['tweet_bersih'])

In [None]:
wordcloud = WordCloud(stopwords = stopwords, background_color='white', 
                      height = 1000, width = 2000).generate(words_positif)

In [None]:
plt.imshow(wordcloud, interpolation = "bilinear")
plt.axis('off')
plt.show()

In [None]:
# Save the image
wordcloud.to_file("wordcloud_positif.png")

In [None]:
wordcloud = WordCloud(stopwords = stopwords, background_color='white', 
                      height = 1000, width = 2000).generate(words_negatif)

In [None]:
plt.imshow(wordcloud, interpolation = "bilinear")
plt.axis('off')
plt.show()

In [None]:
# Save the image
wordcloud.to_file("wordcloud_negatif1.png")

In [None]:
wordcloud = WordCloud(stopwords = stopwords, background_color='white', 
                      height = 1000, width = 2000).generate(words_netral)

In [None]:
plt.imshow(wordcloud, interpolation = "bilinear")
plt.axis('off')
plt.show()

In [None]:
# Save the image
wordcloud.to_file("wordcloud_netral.png")