In [None]:
import pandas as pd 
import numpy as np

TWEET_DATA = pd.read_csv("/content/prabowo.csv", encoding = "ISO-8859-1")
TWEET_DATA['Text'].str.encode('ascii', 'ignore')
TWEET_DATA.head()

Unnamed: 0,Datetime,Tweet Url,Text,Username,User Created,Verified,Followers Count,Retweet Count,Like Count,Reply Count,Hashtags
0,2023-01-09 09:42:25+00:00,https://twitter.com/restabengkulu/status/16123...,Potret kekompakan Kapolri Jenderal Polisi Drs....,restabengkulu,2014-04-23 07:44:52+00:00,False,1003,0,0,0,
1,2023-01-09 09:42:16+00:00,https://twitter.com/lilahimala/status/16123842...,Kita yakin Pak Prabowo Subianto yang memang Pi...,lilahimala,2021-06-26 15:49:42+00:00,False,42,0,0,0,
2,2023-01-09 09:41:38+00:00,https://twitter.com/bbableaj/status/1612384091...,@likeforwa @starfess Jk tuh Jokowi? Nggak lh k...,bbableaj,2020-03-13 06:04:31+00:00,False,1423,0,0,0,
3,2023-01-09 09:40:28+00:00,https://twitter.com/genx36545403/status/161238...,"Pak Prabowo kan orang baik, jadi saya hanya ti...",genx36545403,2021-01-07 04:05:36+00:00,False,36,0,0,1,"['PeduliRakyat', 'kerjasama']"
4,2023-01-09 09:40:27+00:00,https://twitter.com/genx36545403/status/161238...,Yang saya kenal Pak Prabowo orang yang sangat ...,genx36545403,2021-01-07 04:05:36+00:00,False,36,0,0,1,"['PeduliRakyat', 'kerjasama']"


In [None]:
# ------ Case Folding --------
# gunakan fungsi Series.str.lower() pada Pandas
TWEET_DATA['Text'] = TWEET_DATA['Text'].str.lower()


print('Case Folding Result : \n')
print(TWEET_DATA['Text'].head(5))
print('\n\n\n')

Case Folding Result : 

0    potret kekompakan kapolri jenderal polisi drs....
1    kita yakin pak prabowo subianto yang memang pi...
2    @likeforwa @starfess jk tuh jokowi? nggak lh k...
3    pak prabowo kan orang baik, jadi saya hanya ti...
4    yang saya kenal pak prabowo orang yang sangat ...
Name: Text, dtype: object






In [None]:
import string 
import re #regex library

# import word_tokenize & FreqDist from NLTK
import nltk
nltk.download('punkt')
from nltk.tokenize import word_tokenize 
from nltk.probability import FreqDist

# ------ Tokenizing ---------

def remove_tweet_special(text):
    # remove tab, new line, ans back slice
    text = text.replace('\\t'," ").replace('\\n'," ").replace('\\u'," ").replace('\\',"")
    # remove non ASCII (emoticon, chinese word, .etc)
    text = text.encode('ascii', 'replace').decode('ascii')
    # remove mention, link, hashtag
    text = ' '.join(re.sub("([@#][A-Za-z0-9]+)|(\w+:\/\/\S+)"," ", text).split())
    # remove incomplete URL
    return text.replace("http://", " ").replace("https://", " ")
                
TWEET_DATA['Text'] = TWEET_DATA['Text'].apply(remove_tweet_special)

#remove number
def remove_number(text):
    return  re.sub(r"\d+", "", text)

TWEET_DATA['Text'] = TWEET_DATA['Text'].apply(remove_number)

#remove punctuation
def remove_punctuation(text):
    return text.translate(str.maketrans("","",string.punctuation))

TWEET_DATA['Text'] = TWEET_DATA['Text'].apply(remove_punctuation)

#remove whitespace leading & trailing
def remove_whitespace_LT(text):
    return text.strip()

TWEET_DATA['Text'] = TWEET_DATA['Text'].apply(remove_whitespace_LT)

#remove multiple whitespace into single whitespace
def remove_whitespace_multiple(text):
    return re.sub('\s+',' ',text)

TWEET_DATA['Text'] = TWEET_DATA['Text'].apply(remove_whitespace_multiple)

# remove single char
def remove_singl_char(text):
    return re.sub(r"\b[a-zA-Z]\b", "", text)

TWEET_DATA['Text'] = TWEET_DATA['Text'].apply(remove_singl_char)

# NLTK word rokenize 
def word_tokenize_wrapper(text):
    return word_tokenize(text)

TWEET_DATA['tweet_tokens'] = TWEET_DATA['Text'].apply(word_tokenize_wrapper)

print('Tokenizing Result : \n') 
print(TWEET_DATA['tweet_tokens'].head())
print('\n\n\n')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


Tokenizing Result : 

0    [potret, kekompakan, kapolri, jenderal, polisi...
1    [kita, yakin, pak, prabowo, subianto, yang, me...
2    [jk, tuh, jokowi, nggak, lh, kak, kataku, mah,...
3    [pak, prabowo, kan, orang, baik, jadi, saya, h...
4    [yang, saya, kenal, pak, prabowo, orang, yang,...
Name: tweet_tokens, dtype: object






In [None]:
# NLTK calc frequency distribution
def freqDist_wrapper(text):
    return FreqDist(text)

TWEET_DATA['tweet_tokens_fdist'] = TWEET_DATA['tweet_tokens'].apply(freqDist_wrapper)

print('Frequency Tokens : \n') 
print(TWEET_DATA['tweet_tokens_fdist'].head().apply(lambda x : x.most_common()))

Frequency Tokens : 

0    [(tni, 2), (papua, 2), (potret, 1), (kekompaka...
1    [(kita, 2), (prabowo, 2), (yakin, 1), (pak, 1)...
2    [(jk, 1), (tuh, 1), (jokowi, 1), (nggak, 1), (...
3    [(pak, 2), (prabowo, 2), (orang, 2), (terlalu,...
4    [(yang, 3), (diajak, 2), (berpikir, 2), (saya,...
Name: tweet_tokens_fdist, dtype: object


In [None]:
from nltk.corpus import stopwords
nltk.download('stopwords')

# ----------------------- get stopword from NLTK stopword -------------------------------
# get stopword indonesia
list_stopwords = stopwords.words('indonesian')
print(len(list_stopwords))

# ---------------------------- manualy add stopword  ------------------------------------
# append additional stopword
list_stopwords.extend(["yg", "dg", "rt", "dgn", "ny", "d", 'klo', 
                       'kalo', 'amp', 'biar', 'bikin', 'bilang', 
                       'gak', 'ga', 'krn', 'nya', 'nih', 'sih', 
                       'si', 'tau', 'tdk', 'tuh', 'utk', 'ya', 
                       'jd', 'jgn', 'sdh', 'aja', 'n', 't', 
                       'nyg', 'hehe', 'pen', 'u', 'nan', 'loh', 'rt',
                       '&amp', 'yah'])
len(list_stopwords)
# ----------------------- add stopword from txt file ------------------------------------
# read txt stopword using pandas
txt_stopword = pd.read_csv("/content/stopwords.txt", names= ["stopwords"], header = None)

# convert stopword string to list & append additional stopword
list_stopwords.extend(txt_stopword["stopwords"][0].split(' '))
len(list_stopwords)
# ---------------------------------------------------------------------------------------

# convert list to dictionary
list_stopwords = set(list_stopwords)


#remove stopword pada list token
def stopwords_removal(words):
    return [word for word in words if word not in list_stopwords]

TWEET_DATA['tweet_tokens_WSW'] = TWEET_DATA['tweet_tokens'].apply(stopwords_removal) 


print(TWEET_DATA['tweet_tokens_WSW'].head())

758
0    [potret, kekompakan, kapolri, jenderal, polisi...
1    [prabowo, subianto, pilihan, mengambil, hati, ...
2    [jk, jokowi, nggak, lh, kataku, sungjin, prabowo]
3    [prabowo, titip, didengerin, pikiranpikiran, b...
4    [kenal, prabowo, rasional, diajak, berpikir, j...
Name: tweet_tokens_WSW, dtype: object


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


# Normalisasi

In [None]:
normalizad_word = pd.read_csv("https://raw.githubusercontent.com/fendiirfan/Kamus-Alay/main/Kamu-Alay.csv")

normalizad_word_dict = {}

for index, row in normalizad_word.iterrows():
    if row[0] not in normalizad_word_dict:
        normalizad_word_dict[row[0]] = row[1] 

def normalized_term(document):
    return [normalizad_word_dict[term] if term in normalizad_word_dict else term for term in document]

TWEET_DATA['tweet_normalized'] = TWEET_DATA['tweet_tokens_WSW'].apply(normalized_term)

TWEET_DATA['tweet_normalized'].head(10)

0    [potret, kekompakan, kapolri, jenderal, polisi...
1    [prabowo, subianto, pilihan, mengambil, hati, ...
2    [jk, jokowi, tidak, lh, kataku, sungjin, prabowo]
3    [prabowo, titip, didengerin, pikiranpikiran, b...
4    [kenal, prabowo, rasional, diajak, berpikir, j...
5    [luhut, mengaku, berkomunikasi, prabowo, via, ...
6    [menurutnya, prabowo, sosok, pemimpin, patriot...
7    [menko, kemaritiman, luhut, binsar, pandjaitan...
8    [luhut, kepedulian, prabowo, indonesia, dipung...
9    [capres, megawati, pdip, prabowo, gerindra, ca...
Name: tweet_normalized, dtype: object

In [None]:
!pip install Sastrawi

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting Sastrawi
  Downloading Sastrawi-1.0.1-py2.py3-none-any.whl (209 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m209.7/209.7 KB[0m [31m8.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: Sastrawi
Successfully installed Sastrawi-1.0.1


In [None]:
!pip install swifter

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting swifter
  Downloading swifter-1.3.4.tar.gz (830 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m830.9/830.9 KB[0m [31m14.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting psutil>=5.6.6
  Downloading psutil-5.9.4-cp36-abi3-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (280 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m280.2/280.2 KB[0m [31m32.9 MB/s[0m eta [36m0:00:00[0m
Collecting jedi>=0.10
  Downloading jedi-0.18.2-py2.py3-none-any.whl (1.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m70.1 MB/s[0m eta [36m0:00:00[0m
Building wheels for collected packages: swifter
  Building wheel for swifter (setup.py) ... [?25l[?25hdone
  Created wheel for swifter: filename=swifter-1.3.4-py3-none-any.whl s

In [None]:
# import Sastrawi package
from Sastrawi.Stemmer.StemmerFactory import StemmerFactory
import swifter


# create stemmer
factory = StemmerFactory()
stemmer = factory.create_stemmer()

# stemmed
def stemmed_wrapper(term):
    return stemmer.stem(term)

term_dict = {}

for document in TWEET_DATA['tweet_normalized']:
    for term in document:
        if term not in term_dict:
            term_dict[term] = ' '
            
print(len(term_dict))
print("------------------------")

for term in term_dict:
    term_dict[term] = stemmed_wrapper(term)
    print(term,":" ,term_dict[term])
    
print(term_dict)
print("------------------------")


# apply stemmed term to dataframe
def get_stemmed_term(document):
    return [term_dict[term] for term in document]

TWEET_DATA['tweet_tokens_stemmed'] = TWEET_DATA['tweet_normalized'].swifter.apply(get_stemmed_term)
print(TWEET_DATA['tweet_tokens_stemmed'])

3775
------------------------
potret : potret
kekompakan : kompak
kapolri : kapolri
jenderal : jenderal
polisi : polisi
drs : drs
listyo : listyo
sigit : sigit
prabowo : prabowo
panglima : panglima
tni : tni
laksamana : laksamana
yudo : yudo
margono : margono
csfa : csfa
pejabat : jabat
utama : utama
tnipolri : tnipolri
peresmian : resmi
gedung : gedung
polda : polda
papua : papua
kota : kota
jayapura : jayapura
minggu : minggu
subianto : subianto
pilihan : pilih
mengambil : ambil
hati : hati
rakyat : rakyat
indonesia : indonesia
maju : maju
jk : jk
jokowi : jokowi
tidak : tidak
lh : lh
kataku : kata
sungjin : sungjin
titip : titip
didengerin : didengerin
pikiranpikiran : pikiranpikiran
basisnya : basis
rasional : rasional
kenal : kenal
diajak : ajak
berpikir : pikir
jernih : jernih
pemimpin : pimpin
luhut : luhut
mengaku : aku
berkomunikasi : komunikasi
via : via
telepon : telepon
tipikal : tipikal
menurutnya : turut
sosok : sosok
patriot : patriot
aset : aset
negara : negara
kesatuan

Pandas Apply:   0%|          | 0/2501 [00:00<?, ?it/s]

0       [potret, kompak, kapolri, jenderal, polisi, dr...
1       [prabowo, subianto, pilih, ambil, hati, rakyat...
2         [jk, jokowi, tidak, lh, kata, sungjin, prabowo]
3       [prabowo, titip, didengerin, pikiranpikiran, b...
4       [kenal, prabowo, rasional, ajak, pikir, jernih...
                              ...                        
2496    [hut, lvri, prabowo, muda, tentara, teken, mat...
2497    [hut, lvri, prabowo, muda, tentara, teken, mat...
2498    [prabowo, sindir, kader, hengkang, elite, geri...
2499    [melati, biru, jambang, kawan, kawan, lupa, pr...
2500    [olahraga, bela, favorit, prabowo, subianto, s...
Name: tweet_tokens_stemmed, Length: 2501, dtype: object


In [None]:
np.sum(TWEET_DATA.isnull().any(axis=1))

In [None]:
def remove_pattern(input_txt):
    r = re.findall(r"@(w+)", input_txt)
    for i in r:
        input_txt = re.sub(i, '', input_txt)      
    return input_txt
TWEET_DATA['@_remove'] = np.vectorize(remove_pattern)(TWEET_DATA['Text'])
TWEET_DATA['@_remove'][:3]

0    potret kekompakan kapolri jenderal polisi drs ...
1    kita yakin pak prabowo subianto yang memang pi...
2    jk tuh jokowi nggak lh kak kataku mah sungjin ...
Name: @_remove, dtype: object

In [None]:
hashtags = []
def hashtag_extract(x):
    # Loop over the words in the tweet
    for i in x:
        ht = re.findall(r"#(w+)", i)
        hashtags.append(ht)
    return hashtags
TWEET_DATA['#_remove'] = np.vectorize(remove_pattern)(TWEET_DATA['Text'])
TWEET_DATA['#_remove'][:3]

0    potret kekompakan kapolri jenderal polisi drs ...
1    kita yakin pak prabowo subianto yang memang pi...
2    jk tuh jokowi nggak lh kak kataku mah sungjin ...
Name: #_remove, dtype: object

In [None]:
!pip install tweet-preprocessor

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting tweet-preprocessor
  Downloading tweet_preprocessor-0.6.0-py3-none-any.whl (27 kB)
Installing collected packages: tweet-preprocessor
Successfully installed tweet-preprocessor-0.6.0


In [None]:
import preprocessor as p

def preprocess_tweet(row):
    text = row['Text']
    text = p.clean(text)
    return text
TWEET_DATA['clean_tweet'] = TWEET_DATA.apply(preprocess_tweet, axis=1)
TWEET_DATA[:6]

Unnamed: 0,Datetime,Tweet Url,Text,Username,User Created,Verified,Followers Count,Retweet Count,Like Count,Reply Count,Hashtags,tweet_tokens,tweet_tokens_fdist,tweet_tokens_WSW,tweet_normalized,tweet_tokens_stemmed,@_remove,#_remove,clean_tweet
0,2023-01-09 09:42:25+00:00,https://twitter.com/restabengkulu/status/16123...,potret kekompakan kapolri jenderal polisi drs ...,restabengkulu,2014-04-23 07:44:52+00:00,False,1003,0,0,0,,"[potret, kekompakan, kapolri, jenderal, polisi...","{'potret': 1, 'kekompakan': 1, 'kapolri': 1, '...","[potret, kekompakan, kapolri, jenderal, polisi...","[potret, kekompakan, kapolri, jenderal, polisi...","[potret, kompak, kapolri, jenderal, polisi, dr...",potret kekompakan kapolri jenderal polisi drs ...,potret kekompakan kapolri jenderal polisi drs ...,potret kekompakan kapolri jenderal polisi drs ...
1,2023-01-09 09:42:16+00:00,https://twitter.com/lilahimala/status/16123842...,kita yakin pak prabowo subianto yang memang pi...,lilahimala,2021-06-26 15:49:42+00:00,False,42,0,0,0,,"[kita, yakin, pak, prabowo, subianto, yang, me...","{'kita': 2, 'yakin': 1, 'pak': 1, 'prabowo': 2...","[prabowo, subianto, pilihan, mengambil, hati, ...","[prabowo, subianto, pilihan, mengambil, hati, ...","[prabowo, subianto, pilih, ambil, hati, rakyat...",kita yakin pak prabowo subianto yang memang pi...,kita yakin pak prabowo subianto yang memang pi...,kita yakin pak prabowo subianto yang memang pi...
2,2023-01-09 09:41:38+00:00,https://twitter.com/bbableaj/status/1612384091...,jk tuh jokowi nggak lh kak kataku mah sungjin ...,bbableaj,2020-03-13 06:04:31+00:00,False,1423,0,0,0,,"[jk, tuh, jokowi, nggak, lh, kak, kataku, mah,...","{'jk': 1, 'tuh': 1, 'jokowi': 1, 'nggak': 1, '...","[jk, jokowi, nggak, lh, kataku, sungjin, prabowo]","[jk, jokowi, tidak, lh, kataku, sungjin, prabowo]","[jk, jokowi, tidak, lh, kata, sungjin, prabowo]",jk tuh jokowi nggak lh kak kataku mah sungjin ...,jk tuh jokowi nggak lh kak kataku mah sungjin ...,jk tuh jokowi nggak lh kak kataku mah sungjin ...
3,2023-01-09 09:40:28+00:00,https://twitter.com/genx36545403/status/161238...,pak prabowo kan orang baik jadi saya hanya tit...,genx36545403,2021-01-07 04:05:36+00:00,False,36,0,0,1,"['PeduliRakyat', 'kerjasama']","[pak, prabowo, kan, orang, baik, jadi, saya, h...","{'pak': 2, 'prabowo': 2, 'kan': 1, 'orang': 2,...","[prabowo, titip, didengerin, pikiranpikiran, b...","[prabowo, titip, didengerin, pikiranpikiran, b...","[prabowo, titip, didengerin, pikiranpikiran, b...",pak prabowo kan orang baik jadi saya hanya tit...,pak prabowo kan orang baik jadi saya hanya tit...,pak prabowo kan orang baik jadi saya hanya tit...
4,2023-01-09 09:40:27+00:00,https://twitter.com/genx36545403/status/161238...,yang saya kenal pak prabowo orang yang sangat ...,genx36545403,2021-01-07 04:05:36+00:00,False,36,0,0,1,"['PeduliRakyat', 'kerjasama']","[yang, saya, kenal, pak, prabowo, orang, yang,...","{'yang': 3, 'saya': 1, 'kenal': 1, 'pak': 1, '...","[kenal, prabowo, rasional, diajak, berpikir, j...","[kenal, prabowo, rasional, diajak, berpikir, j...","[kenal, prabowo, rasional, ajak, pikir, jernih...",yang saya kenal pak prabowo orang yang sangat ...,yang saya kenal pak prabowo orang yang sangat ...,yang saya kenal pak prabowo orang yang sangat ...
5,2023-01-09 09:40:26+00:00,https://twitter.com/genx36545403/status/161238...,luhut mengaku telah berkomunikasi dengan prabo...,genx36545403,2021-01-07 04:05:36+00:00,False,36,0,0,1,"['PeduliRakyat', 'kerjasama']","[luhut, mengaku, telah, berkomunikasi, dengan,...","{'luhut': 1, 'mengaku': 1, 'telah': 1, 'berkom...","[luhut, mengaku, berkomunikasi, prabowo, via, ...","[luhut, mengaku, berkomunikasi, prabowo, via, ...","[luhut, aku, komunikasi, prabowo, via, telepon...",luhut mengaku telah berkomunikasi dengan prabo...,luhut mengaku telah berkomunikasi dengan prabo...,luhut mengaku telah berkomunikasi dengan prabo...


In [None]:

from nltk.sentiment.vader import SentimentIntensityAnalyzer
from nltk.sentiment.util import *
nltk.download('vader_lexicon')
#Sentiment Analysis
SIA = SentimentIntensityAnalyzer()
TWEET_DATA["clean_tweet"]= TWEET_DATA["clean_tweet"].astype(str)
# Applying Model, Variable Creation
TWEET_DATA['Polarity Score']=TWEET_DATA["clean_tweet"].apply(lambda x:SIA.polarity_scores(x)['compound'])
TWEET_DATA['Neutral Score']=TWEET_DATA["clean_tweet"].apply(lambda x:SIA.polarity_scores(x)['neu'])
TWEET_DATA['Negative Score']=TWEET_DATA["clean_tweet"].apply(lambda x:SIA.polarity_scores(x)['neg'])
TWEET_DATA['Positive Score']=TWEET_DATA["clean_tweet"].apply(lambda x:SIA.polarity_scores(x)['pos'])
# Converting 0 to 1 Decimal Score to a Categorical Variable
TWEET_DATA['Sentiment']=''
TWEET_DATA.loc[TWEET_DATA['Polarity Score']>0,'Sentiment']='Positive'
TWEET_DATA.loc[TWEET_DATA['Polarity Score']==0,'Sentiment']='Neutral'
TWEET_DATA.loc[TWEET_DATA['Polarity Score']<0,'Sentiment']='Negative'
TWEET_DATA

[nltk_data] Downloading package vader_lexicon to /root/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


Unnamed: 0,Datetime,Tweet Url,Text,Username,User Created,Verified,Followers Count,Retweet Count,Like Count,Reply Count,...,tweet_normalized,tweet_tokens_stemmed,@_remove,#_remove,clean_tweet,Polarity Score,Neutral Score,Negative Score,Positive Score,Sentiment
0,2023-01-09 09:42:25+00:00,https://twitter.com/restabengkulu/status/16123...,potret kekompakan kapolri jenderal polisi drs ...,restabengkulu,2014-04-23 07:44:52+00:00,False,1003,0,0,0,...,"[potret, kekompakan, kapolri, jenderal, polisi...","[potret, kompak, kapolri, jenderal, polisi, dr...",potret kekompakan kapolri jenderal polisi drs ...,potret kekompakan kapolri jenderal polisi drs ...,potret kekompakan kapolri jenderal polisi drs ...,0.0000,1.000,0.0,0.000,Neutral
1,2023-01-09 09:42:16+00:00,https://twitter.com/lilahimala/status/16123842...,kita yakin pak prabowo subianto yang memang pi...,lilahimala,2021-06-26 15:49:42+00:00,False,42,0,0,0,...,"[prabowo, subianto, pilihan, mengambil, hati, ...","[prabowo, subianto, pilih, ambil, hati, rakyat...",kita yakin pak prabowo subianto yang memang pi...,kita yakin pak prabowo subianto yang memang pi...,kita yakin pak prabowo subianto yang memang pi...,0.0000,1.000,0.0,0.000,Neutral
2,2023-01-09 09:41:38+00:00,https://twitter.com/bbableaj/status/1612384091...,jk tuh jokowi nggak lh kak kataku mah sungjin ...,bbableaj,2020-03-13 06:04:31+00:00,False,1423,0,0,0,...,"[jk, jokowi, tidak, lh, kataku, sungjin, prabowo]","[jk, jokowi, tidak, lh, kata, sungjin, prabowo]",jk tuh jokowi nggak lh kak kataku mah sungjin ...,jk tuh jokowi nggak lh kak kataku mah sungjin ...,jk tuh jokowi nggak lh kak kataku mah sungjin ...,0.2263,0.863,0.0,0.137,Positive
3,2023-01-09 09:40:28+00:00,https://twitter.com/genx36545403/status/161238...,pak prabowo kan orang baik jadi saya hanya tit...,genx36545403,2021-01-07 04:05:36+00:00,False,36,0,0,1,...,"[prabowo, titip, didengerin, pikiranpikiran, b...","[prabowo, titip, didengerin, pikiranpikiran, b...",pak prabowo kan orang baik jadi saya hanya tit...,pak prabowo kan orang baik jadi saya hanya tit...,pak prabowo kan orang baik jadi saya hanya tit...,0.0000,1.000,0.0,0.000,Neutral
4,2023-01-09 09:40:27+00:00,https://twitter.com/genx36545403/status/161238...,yang saya kenal pak prabowo orang yang sangat ...,genx36545403,2021-01-07 04:05:36+00:00,False,36,0,0,1,...,"[kenal, prabowo, rasional, diajak, berpikir, j...","[kenal, prabowo, rasional, ajak, pikir, jernih...",yang saya kenal pak prabowo orang yang sangat ...,yang saya kenal pak prabowo orang yang sangat ...,yang saya kenal pak prabowo orang yang sangat ...,0.0000,1.000,0.0,0.000,Neutral
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2496,2023-01-08 10:35:18+00:00,https://twitter.com/Lintas_MNCTV/status/161203...,hut ke lvri prabowo sejak muda tentara sudah t...,Lintas_MNCTV,2009-07-30 03:22:52+00:00,True,36376,0,0,0,...,"[hut, lvri, prabowo, muda, tentara, teken, mat...","[hut, lvri, prabowo, muda, tentara, teken, mat...",hut ke lvri prabowo sejak muda tentara sudah t...,hut ke lvri prabowo sejak muda tentara sudah t...,hut ke lvri prabowo sejak muda tentara sudah t...,0.0000,1.000,0.0,0.000,Neutral
2497,2023-01-08 10:35:12+00:00,https://twitter.com/SeputariNews/status/161203...,hut ke lvri prabowo sejak muda tentara sudah t...,SeputariNews,2010-04-12 06:26:10+00:00,True,432260,0,0,0,...,"[hut, lvri, prabowo, muda, tentara, teken, mat...","[hut, lvri, prabowo, muda, tentara, teken, mat...",hut ke lvri prabowo sejak muda tentara sudah t...,hut ke lvri prabowo sejak muda tentara sudah t...,hut ke lvri prabowo sejak muda tentara sudah t...,0.0000,1.000,0.0,0.000,Neutral
2498,2023-01-08 10:35:02+00:00,https://twitter.com/WartaEkonomi/status/161203...,prabowo sindir kader yang ingin hengkang elite...,WartaEkonomi,2009-04-12 07:01:14+00:00,False,31572,0,0,0,...,"[prabowo, sindir, kader, hengkang, elite, geri...","[prabowo, sindir, kader, hengkang, elite, geri...",prabowo sindir kader yang ingin hengkang elite...,prabowo sindir kader yang ingin hengkang elite...,prabowo sindir kader yang ingin hengkang elite...,0.0000,1.000,0.0,0.000,Neutral
2499,2023-01-08 10:34:55+00:00,https://twitter.com/adiojanoski/status/1612035...,satu dua melati biru tiga empat dalam jambanga...,adiojanoski,2010-09-10 16:24:10+00:00,False,286,2,4,0,...,"[melati, biru, jambangan, kawan, kawan, dilupa...","[melati, biru, jambang, kawan, kawan, lupa, pr...",satu dua melati biru tiga empat dalam jambanga...,satu dua melati biru tiga empat dalam jambanga...,satu dua melati biru tiga empat dalam jambanga...,0.0000,1.000,0.0,0.000,Neutral




In [None]:
#TWEET_DATA.to_csv("Text_Preprocessing.csv")

In [None]:
#TWEET_DATA.to_excel("Text_Preprocessing.xlsx")

In [None]:
#TWEET_DATA.to_hdf("Text_Preprocessing.h5", 'tweet', mode='w')