In [2]:
import re
import pandas as pd
import numpy as np
from Sastrawi.Stemmer.StemmerFactory import StemmerFactory
from Sastrawi.StopWordRemover.StopWordRemoverFactory import StopWordRemoverFactory
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn import svm
from nltk.corpus import stopwords
from sklearn.metrics import f1_score, recall_score, precision_score, confusion_matrix, accuracy_score, classification_report

In [3]:
def cleansing(text):
    # senang emoticon
    emoticons_happy = set([
    ':-)', ':)', ';)', ':o)', ':]', ':3', ':c)', ':>', '=]', '8)', '=)', ':}',
    ':^)', ':-D', ':D', ':d', '8-D', '8D', 'x-D', 'xD', 'X-D', 'XD', '=-D', '=D',
    '=-3', '=3', ':-))', ":'-)", ":')", ':*', ':^*', '>:P', ':-P', ':P', 'X-P',
    'x-p', 'xp', 'XP', ':-p', ':p', '=p', ':-b', ':b', '>:)', '>;)', '>:-)',
    '<3'
    ])
 
    # sedih emoticon
    emoticons_sad = set([
    ':L', ':-/', '>:/', ':S', '>:[', ':@', ':-(', ':[', ':-||', '=L', ':<',
    ':-[', ':-<', '=\\', '=/', '>:(', ':(', '>.<', ":'-(", ":'(", ':\\', ':-c',
    ':c', ':{', '>:\\', ';('
    ])
    # menghapus emoticon
    emoticons = emoticons_happy.union(emoticons_sad)
    text = ' '.join([word for word in text.split() if word not in emoticons])
    # hapus mentions
    text = re.sub('@[^\s]+','',text)
    # hapus hashtags
    text = re.sub("#[A-Za-z0-9_]+","", text)
    # hapus url / links
    text = re.sub(r"http\S+", "", text)
    text = re.sub(r"www.\S+", "", text)
    # hapus tanda baca
    text = re.sub('[^a-zA-Z0-9\s]', '', text)
    # hapus multiple spaces
    text = re.sub('\s+', ' ', text)
    # hapus retweets
    text = re.sub(r'RT', '', text)
    # hapus angka
    text = re.sub(r'[0-9]+', ' ', text)

    return text

def casefolding(text):
    # mengubah karakter menjadi huruf kecil
    text = text.lower()
    return text

def tokenizing(text):
    text = text.split()
    return text

def remove_stopword(text):
    stp = stopwords.words('indonesian')
    text = ' '.join([word for word in text if word not in stp])
    return text

def stem_text(text):
    text = ' '.join([stemmer.stem(word) for word in text.split()])
    return text

In [11]:
stemmer = StemmerFactory().create_stemmer()
stopword = StopWordRemoverFactory().create_stop_word_remover()

# membaca file csv
df = pd.read_csv(filepath_or_buffer='1800.csv', sep=',', header=0)
# proses cleansing
df['cleansing'] = df['text'].apply(lambda x: cleansing(x))
pd.set_option('display.max_column', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_seq_items', None)
pd.set_option('display.max_colwidth', 500)
pd.set_option('expand_frame_repr', True)
# proses casefolding
df['casefolding'] = df['cleansing'].apply(lambda x: casefolding(x))
# proses tokenizing
df['tokenizing'] = df['casefolding'].apply(lambda x: tokenizing(x))
# proses hapus stopword
df['stopword'] = df['tokenizing'].apply(lambda x: remove_stopword(x))
# proses stemming
df['stem'] = df['stopword'].apply(lambda x: stem_text(x))

df.head(200)

# label = []
# for index, row in df.iterrows():
#     if row["label"] == 'negatif':
#         label.append(0)
#     else:
#         label.append(1)


# df['text_clean'] = df['stem']
# df['polarity'] = label

# df = df.drop(columns=['id', 'created_at', 'text', 'cleansing', 'casefolding', 'tokenizing', 'stopword'])
# x = df['text_clean']
# y = df['polarity']

# X_train, X_test, y_train, y_test = train_test_split(x, y, stratify=y, test_size=0.2, random_state=30)

# vectorizer = TfidfVectorizer()
# X_train = vectorizer.fit_transform(X_train)
# X_test = vectorizer.transform(X_test)

# naivebayes = MultinomialNB()
# naivebayes.fit(X_train, y_train)
# predictions = naivebayes.predict(X_test)
# print(classification_report(y_test,predictions))
# # f1_score
# print('f1_score = '+str('{:4.2f}'.format(f1_score(y_test, predictions)*100))+'%')
# # accuracy score
# print('accuracy score = '+str('{:4.2f}'.format(accuracy_score(y_test, predictions)*100))+'%')
# # precision score
# print('precision score = '+str('{:4.2f}'.format(precision_score(y_test, predictions)*100))+'%')
# # recall score
# print('recall score = '+str('{:4.2f}'.format(recall_score(y_test, predictions)*100))+'%')
# tn, fp, fn, tp = confusion_matrix(y_test, predictions).ravel()
# tn, fp, fn, tp
# tweet = cleansing('jahat banget driver shopee')
# tweet = casefolding(tweet)
# tweet = tokenizing(tweet)
# tweet = remove_stopword(tweet)
# tweet = stem_text(tweet)
# v_data  = vectorizer.transform([tweet])
# y_preds = naivebayes.predict(v_data)
# y_preds

Unnamed: 0,id,created_at,text,label,cleansing,casefolding,tokenizing,stopword,stem
0,1537498780144010000,17/06/22 01.14,@sahabatgiselle Pesen shopeefood yang makanan 24 jam gt kalau gaakkkk:(,positif,Pesen shopeefood yang makanan jam gt kalau gaakkkk,pesen shopeefood yang makanan jam gt kalau gaakkkk,"[pesen, shopeefood, yang, makanan, jam, gt, kalau, gaakkkk]",pesen shopeefood makanan jam gt gaakkkk,sen shopeefood makan jam gt gaakkkk
1,1537498557913400000,17/06/22 01.13,@holyzaa @FOODFESS2 pake shopeefood aja 24k bisa double lauk utama + lauk tambahan lain,positif,pake shopeefood aja k bisa double lauk utama lauk tambahan lain,pake shopeefood aja k bisa double lauk utama lauk tambahan lain,"[pake, shopeefood, aja, k, bisa, double, lauk, utama, lauk, tambahan, lain]",pake shopeefood aja k double lauk utama lauk tambahan,pake shopeefood aja k double lauk utama lauk tambah
2,1537495458813480000,17/06/22 01.00,Indo\n\nPesawat siapin budget : 1 - 1.5jt pp\nKereta : 400k pp\nTj / krl / gojek : 200k (hemat jalan kaki)\nHotel deket venue: 700-1jt/malem (seludupin aja 5 orng jg gapapa) \nMakan : bikin pop mie aja dah kalo ga beli nasi padang di shopeefood potongan 25k __,positif,Indo Pesawat siapin budget jt pp Kereta k pp Tj krl gojek k hemat jalan kaki Hotel deket venue jtmalem seludupin aja orng jg gapapa Makan bikin pop mie aja dah kalo ga beli nasi padang di shopeefood potongan k,indo pesawat siapin budget jt pp kereta k pp tj krl gojek k hemat jalan kaki hotel deket venue jtmalem seludupin aja orng jg gapapa makan bikin pop mie aja dah kalo ga beli nasi padang di shopeefood potongan k,"[indo, pesawat, siapin, budget, jt, pp, kereta, k, pp, tj, krl, gojek, k, hemat, jalan, kaki, hotel, deket, venue, jtmalem, seludupin, aja, orng, jg, gapapa, makan, bikin, pop, mie, aja, dah, kalo, ga, beli, nasi, padang, di, shopeefood, potongan, k]",indo pesawat siapin budget jt pp kereta k pp tj krl gojek k hemat jalan kaki hotel deket venue jtmalem seludupin aja orng jg gapapa makan bikin pop mie aja dah kalo ga beli nasi padang shopeefood potongan k,indo pesawat siapin budget jt pp kereta k pp tj krl gojek k hemat jalan kaki hotel deket venue jtmalem seludupin aja orng jg gapapa makan bikin pop mie aja dah kalo ga beli nasi padang shopeefood potong k
3,1537480425895240000,17/06/22 00.01,"yaampun dapet driver shopee food baik bangettt huhuhu, sehat selalu untuk bapaknya &lt;3",positif,yaampun dapet driver shopee food baik bangettt huhuhu sehat selalu untuk bapaknya lt,yaampun dapet driver shopee food baik bangettt huhuhu sehat selalu untuk bapaknya lt,"[yaampun, dapet, driver, shopee, food, baik, bangettt, huhuhu, sehat, selalu, untuk, bapaknya, lt]",yaampun dapet driver shopee food bangettt huhuhu sehat bapaknya lt,yaampun dapet driver shopee food bangettt huhuhu sehat bapak lt
4,1537478559547390000,16/06/22 23.53,@ShopeePay_ID Mana miiinn???? Aku mau jajan di shopee food nii huhu,positif,Mana miiinn Aku mau jajan di shopee food nii huhu,mana miiinn aku mau jajan di shopee food nii huhu,"[mana, miiinn, aku, mau, jajan, di, shopee, food, nii, huhu]",miiinn jajan shopee food nii huhu,miiinn jajan shopee food nii huhu
5,1537475562846890000,16/06/22 23.41,@Belallangx Shopeefood dulu bang buat modal pacaran,positif,Shopeefood dulu bang buat modal pacaran,shopeefood dulu bang buat modal pacaran,"[shopeefood, dulu, bang, buat, modal, pacaran]",shopeefood bang modal pacaran,shopeefood bang modal pacar
6,1537475138630760000,16/06/22 23.40,@ShopeeCare Jajan Shopeefood biar bisa makan bareng sama keluarga dirumah. Pengen nya beli donat @ShopeeCare,,Jajan Shopeefood biar bisa makan bareng sama keluarga dirumah Pengen nya beli donat,jajan shopeefood biar bisa makan bareng sama keluarga dirumah pengen nya beli donat,"[jajan, shopeefood, biar, bisa, makan, bareng, sama, keluarga, dirumah, pengen, nya, beli, donat]",jajan shopeefood biar makan bareng keluarga dirumah pengen nya beli donat,jajan shopeefood biar makan bareng keluarga rumah ken nya beli donat
7,1537474820774180000,16/06/22 23.38,"@ShopeeCare Buat ""madyang gedhen"" di shopeefood pastinya..",positif,Buat madyang gedhen di shopeefood pastinya,buat madyang gedhen di shopeefood pastinya,"[buat, madyang, gedhen, di, shopeefood, pastinya]",madyang gedhen shopeefood pastinya,madyang gedhen shopeefood pasti
8,1537470638444580000,16/06/22 23.22,"@ravenurs udahh kak avennn, udah shopeefood wkwk",positif,udahh kak avennn udah shopeefood wkwk,udahh kak avennn udah shopeefood wkwk,"[udahh, kak, avennn, udah, shopeefood, wkwk]",udahh kak avennn udah shopeefood wkwk,udahh kak avennn udah shopeefood wkwk
9,1537464743094420000,16/06/22 22.58,"@louimpia ENGGAK SUMPAHHH justru aku mau nemenin terus sampe loui ketemu cowok baik!! buat yang suka ghosting, semoga kalo mau order gofood/shopeefood menunya out of stock terus ______",negatif,ENGGAK SUMPAHHH justru aku mau nemenin terus sampe loui ketemu cowok baik buat yang suka ghosting semoga kalo mau order gofoodshopeefood menunya out of stock terus,enggak sumpahhh justru aku mau nemenin terus sampe loui ketemu cowok baik buat yang suka ghosting semoga kalo mau order gofoodshopeefood menunya out of stock terus,"[enggak, sumpahhh, justru, aku, mau, nemenin, terus, sampe, loui, ketemu, cowok, baik, buat, yang, suka, ghosting, semoga, kalo, mau, order, gofoodshopeefood, menunya, out, of, stock, terus]",sumpahhh nemenin sampe loui ketemu cowok suka ghosting semoga kalo order gofoodshopeefood menunya out of stock,sumpahhh nemenin sampe loui ketemu cowok suka ghosting moga kalo order gofoodshopeefood menu out of stock
