# Data Survivor

Dataset: https://github.com/meisaputri21/Indonesian-Twitter-Emotion-Dataset\


This dataset contains 4.403 Indonesian tweets which are labeled into five emotion classes: love, anger, sadness, joy and fear.

# Import package

In [1]:
import pandas as pd
import re
from string import punctuation
from nltk.corpus import stopwords
import nltk

stop_words = stopwords.words('Indonesian')

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer
from Sastrawi.Stemmer.StemmerFactory import StemmerFactory

factory = StemmerFactory()
stemmer = factory.create_stemmer()

# No 1

In [55]:
df = pd.read_csv('Twitter_Emotion_Dataset.csv')

In [58]:
df = df.head(100)

In [59]:
df['tweet']

0     Soal jln Jatibaru,polisi tdk bs GERTAK gubernu...
1     Sesama cewe lho (kayaknya), harusnya bisa lebi...
2     Kepingin gudeg mbarek Bu hj. Amad Foto dari go...
3     Jln Jatibaru,bagian dari wilayah Tn Abang.Peng...
4     Sharing pengalaman aja, kemarin jam 18.00 bata...
                            ...                        
95    Mudah2an sudah terupload smua sebelum z mudik ...
96    Orang pendukung khilafah memang harus di black...
97    jangan sok akrab ye mention mention gue , mali...
98    Alhamdulillah Prof, setelah berbicara semalam ...
99    Keren!! Kira-kira masih ada nggak yg bilang Pa...
Name: tweet, Length: 100, dtype: object

## Text Preprocessing

Method :
1. converting to lower case
1. remove punctuation
1. remove white spaces
1. remove stopwords and particular words

In [60]:
stopwords.words('Indonesian')

['ada',
 'adalah',
 'adanya',
 'adapun',
 'agak',
 'agaknya',
 'agar',
 'akan',
 'akankah',
 'akhir',
 'akhiri',
 'akhirnya',
 'aku',
 'akulah',
 'amat',
 'amatlah',
 'anda',
 'andalah',
 'antar',
 'antara',
 'antaranya',
 'apa',
 'apaan',
 'apabila',
 'apakah',
 'apalagi',
 'apatah',
 'artinya',
 'asal',
 'asalkan',
 'atas',
 'atau',
 'ataukah',
 'ataupun',
 'awal',
 'awalnya',
 'bagai',
 'bagaikan',
 'bagaimana',
 'bagaimanakah',
 'bagaimanapun',
 'bagi',
 'bagian',
 'bahkan',
 'bahwa',
 'bahwasanya',
 'baik',
 'bakal',
 'bakalan',
 'balik',
 'banyak',
 'bapak',
 'baru',
 'bawah',
 'beberapa',
 'begini',
 'beginian',
 'beginikah',
 'beginilah',
 'begitu',
 'begitukah',
 'begitulah',
 'begitupun',
 'bekerja',
 'belakang',
 'belakangan',
 'belum',
 'belumlah',
 'benar',
 'benarkah',
 'benarlah',
 'berada',
 'berakhir',
 'berakhirlah',
 'berakhirnya',
 'berapa',
 'berapakah',
 'berapalah',
 'berapapun',
 'berarti',
 'berawal',
 'berbagai',
 'berdatangan',
 'beri',
 'berikan',
 'berikut'

### 1. Converting to lower case

In [61]:
def to_lower(text):
    return text.lower()

In [62]:
text1 = 'Saya dan kamu belajar Data Science'
to_lower(text1)

'saya dan kamu belajar data science'

In [63]:
# list(filter(bool, 'Line 1\n\nLine 3\rLine 4\r\n'.splitlines()))

In [64]:
# a = list(filter(bool, 
# '''            
# aamiin;amin 
# adek;adik 
# adlh;adalah 
# aer;air 
# aiskrim;es krim 
# aj;saja 
# aja;saja 
# ajah;saja 
# ajalah;saja 
# aje;saja 
# ajh;saja 
# ajh;saja
# ajj;saja
# aku;saya
# ak;saya 
# akoh;saya
# akutu;saya
# aq;saya
# alesan;alasan 
# ama;sama   
# amaca;ah masa
# amuh;kamu  
# ancur;hancur 
# ane;saya 
# anget;hangat 
# anjeng;anjing 
# ank;anak 
# apah;apa 
# aph;apa
# apo;apa 
# apose;apa
# apz;apa
# aq;saya
# artine;artinya 
# asek;asik 
# ati2;hati-hati 
# atit;sakit 
# atu;satu 
# atw;atau 
# ayang;sayang 
# ayuk;ayo 
# babang;abang
# bae;baik 
# bais;habis
# bakalan;akan 
# bales;balas 
# bandingin;bandingkan 
# banget;sekali 
# bangett;sangat 
# banyakin;perbanyak 
# barengan;bersamaan 
# baryaw;sabar ya
# baso;bakso 
# bay;selamat tinggal
# bbrp;beberapa 
# bc;baca 
# bcs;bisa 
# bct;bicara 
# bdan;badan 
# bdg;bandung 
# bdn;badan 
# becanda;bercanda 
# bedua;berdua 
# bego;bodoh 
# bekel;bekal 
# belom;belum 
# belon;belum 
# bener;benar 
# bener2;benar-benar 
# beneran;benar 
# berantem;bertengkar 
# berenti;berhenti 
# besoq;besok
# bete;sebal 
# beud;banget
# bg;bagi 
# bgd;sangat 
# bget;sangat 
# bgini;begini 
# bgitu;begitu 
# bgks;bungkus 
# bgm;bagaimana 
# bgmn;bagaimana 
# bgn;bangun 
# bgni;begini 
# bgntdr;bangun tidur 
# bgs;bagus 
# bgt;sangat 
# bgtu;begitu 
# bgus;bagus 
# bhay;selamat tinggal
# bhn;bahan 
# bhs;bahas 
# bhw;bahwa 
# bhy;bahaya 
# bisaa;bisa 
# bisya;bisa
# biza;bisa
# bj;baju 
# bju;baju 
# bk;buku 
# bkan;bukan 
# bkin;bikin 
# bkl;bakal 
# bkn;bukan 
# bkr;bakar 
# bks;bungkus 
# bl;beli 
# blajar;belajar 
# blg;bilang 
# blh;boleh 
# bli;beli 
# blik;balik 
# bljr;belajar 
# blk;balik 
# blkg;belakang 
# blkng;belakang 
# blkngny;belakangnya 
# blm;belum 
# bln;bulan 
# blng;bilang 
# blngn;bilang 
# blom;belum 
# bls;balas 
# blum;belum 
# bndg;bandung 
# bndr;bandar 
# bner;benar 
# bneran;benar 
# bnget;sangat 
# bngks;bungkus 
# bngng;bingung 
# bngt;sangat 
# bngun;bangun 
# bnr;benar 
# bnrbnr;benar-benar 
# bnrn;benar 
# bntar;sebentar 
# bntr;sebentar 
# bntr;sebentar
# bnyak;banyak 
# bnyk;banyak 
# bnykny;banyaknya 
# bobo;tidur 
# bobok;tidur 
# bokap;ayah 
# bole;boleh 
# boong;bohong 
# bosen;bosan 
# bpk;bapak 
# brani;berani 
# brapa;berapa 
# brarti;berarti 
# brasa;berasa 
# brg;barang 
# brgkt;berangkat 
# brhsl;berhasil 
# brjln;berjalan 
# brkfst;breakfast 
# brmslh;bermasalah 
# brngkt;berangkat 
# brp;berapa 
# brpa;berapa 
# brrti;berarti 
# brs;beres 
# brshbt;bersahabat 
# brsm;bersama 
# brthn;bertahan 
# brtmn;berteman 
# bru;baru 
# bs;bisa 
# bs;bisa
# bsa;bisa 
# bsa;bisa
# bsk;besok 
# bsk;besok
# bsok;besok 
# bsoq;besok
# bsr;besar 
# btg;batang 
# bth;butuh 
# btl;betul 
# btul;betul 
# buad;buat 
# buatin;buatkan 
# bucin;budak cinta
# bulcit;omong kosong
# bw;bawa 
# bwa;bawa 
# bwh;bawah 
# bwt;buat 
# byk;banyak 
# bynk;banyak 
# byr;bayar 
# cabal;sabar 
# cabut;pergi
# caiank;sayang
# cakep;cakap 
# cape;capai 
# capek;capai 
# catet;catat 
# cb;coba 
# cekola;sekolah
# cekolah;sekolah
# celalaw;selalu
# celalu;selalu
# cemungudh;semangat
# cemungut;semangat
# cemunguth;semangat
# cepet;cepat 
# cewe;cewek 
# cibuq;sibuk
# cini;sini
# ciyus;serius
# ckp;cukup 
# ckrg;sekarang 
# ckup;cukup 
# cll;selalu
# cllu;selalu
# cllw;selalu
# cm;cuma 
# cma;cuma 
# cman;hanya 
# cmn;hanya 
# cmpk;cempaka 
# cobaa;coba 
# coeg;jancuk 
# cowo;cowok 
# cpat;cepat 
# cpe;capek
# cpee;capek
# cpet;cepat 
# cpt;cepat 
# cptn;cepatan 
# crt;cerita 
# cth;contoh 
# cug;jancuk 
# cuk;jancuk 
# cuman;hanya
# cungguh;sungguh 
# cw;cewek 
# cwe;cewek 
# cwo;cowok 
# cyang;sayang 
# dah;sudah 
# dah;sudah
# dalem;dalam 
# dapa;ada apa
# dapah;ada apa
# dapet;dapat 
# dapetin;mendapatkan 
# dateng;datang 
# dberkahi;diberkahi 
# dblg;dibilang 
# dblkng;dibelakang 
# dbls;dibalas 
# dbwt;dibuat 
# dbyr;dibayar 
# de;deh 
# demen;suka 
# denger;dengar 
# deres;deras 
# dftr;daftar 
# dftr;daftar
# dg;dengan 
# dgn;dengan 
# dgr;dengar 
# dgrn;dengarin 
# dh;deh 
# dh;sudah
# didenger;didengar 
# didengerin;didengarkan 
# diem;diam 
# dijadiin;dijadikan 
# dikit;sedikit 
# dimance;dimana
# dimandose;dimana
# dimans;dimana
# dipake;dipakai 
# dipendem;dipendam 
# dirmh;di rumah 
# ditemenin;ditemani 
# djkt;di jakarta 
# djl;di jalan 
# djln;di jalan 
# dkat;dekat 
# dket;dekat 
# dkls;di kelas 
# dkmps;di kampus 
# dkmr;di kamar 
# dkntr;di kantor 
# dksh;dikasih 
# dkt;dekat 
# dl;dulu 
# dll;dan lain
# dlm;dalam 
# dlrng;dilarang 
# dlu;dulu 
# dluan;duluan 
# dmana;dimana 
# dmkn;dimakan 
# dmn;dimana 
# dmna;dimana 
# dmnmn;dimana-mana 
# dmny;dimananya 
# dng;dengan 
# dngar;dengar 
# dnger;dengar 
# dngerin;dengar 
# dngn;dengan 
# dngr;dengar 
# doang;saja 
# doank;saja 
# donlot;download 
# doyan;suka 
# dpan;depan 
# dpet;dapat 
# dpk;depok 
# dpkrkn;dipikirkan 
# dpn;depan 
# dpnrmh;depan rumah 
# dpt;dapat 
# dr;dari 
# drg;dokter gigi 
# drh;darah 
# dri;dari 
# drmh;di rumah 
# drmn;darimana 
# drpd;daripada 
# drtd;dari tadi
# drumah;di rumah
# dsb;dan sebagainya
# dsblh;di sebelah
# dsini;disini 
# dsklh;di sekolah
# dsni;di sini
# dst;dan seterusnya
# dtg;datang 
# dtgny;datangnya 
# dtmbh;ditambah 
# dtmpt;di tempat
# dtng;datang 
# duluw;dulu
# dy;dia 
# ea;ya
# eeq;feses 
# elo;kamu 
# emang;memang 
# emangnya;memangnya 
# emank;memang 
# emg;memang 
# emg;memang
# emng;memang 
# engga;tidak 
# enggak;tidak 
# enk;enak 
# entar;nanti 
# ente;anda 
# eps;episode 
# eteb;bosan
# eug;saya
# faham;paham 
# fb;facebook 
# fav;favorit
# fave;favorit
# g;tidak
# ga;tidak 
# ga;tidak
# gaada;tidak ada
# gabisa;tidak bisa
# gaboleh;tidak boleh
# gabut;menganggur
# gada;tidak ada
# gaenak;tidak enak
# gajadi;tidak jadi
# gajelas;tidak jelas
# gak;tidak 
# gak;tidak
# gakuat;tidak kuat
# gakz;tidak
# galo;galau 
# gamasuk;tidak masuk
# gamau;tidak mau
# gapake;tidak pakai
# gapapa;tidak apa-apa
# gapernah;tidak pernah
# gapunya;tidak punya
# garem;garam 
# gasuka;tidak suka
# gatau;tidak tahu
# gataw;tidak tahu
# gatel;gatal 
# gausah;tidak usah
# gawe;kerja 
# gblk;goblok
# gbr;gambar 
# gdg;gudang 
# gembel;jelek 
# gengges;ganggu
# ghiy;lagi
# gi;lagi
# gilak;gila 
# gile;gila 
# gitu;begitu 
# gituan;begitu 
# gituh;begitu 
# gituin;itu 
# gk;tidak 
# gk;gak 
# gk;tidak
# gmana;bagaimana 
# gmbr;gambar 
# gmn;bagaimana 
# gmna;bagaimana 
# gmpg;gampang 
# gmpng;gampang 
# gni;begini 
# gnti;ganti 
# gpny;tidak punya
# gpp;tidak apa-apa
# gpp;tidak apa apa
# gprnh;tidak pernah
# gr2;gara-gara 
# gratisan;gratis 
# gt;begitu 
# gtu;begitu 
# gtw;tidak tahu
# gua;saya 
# gue;saya 
# gue;saya
# gw;saya 
# gw;saya
# gws;get well soon
# gx;tidak
# hadeh;aduh 
# hapal;hafal
# haqq;hakiki 
# hargain;hargai 
# harini;hari ini
# hbd;happy birthday
# hbis;habis 
# hbng;hubungi 
# hbs;habis 
# hbskn;habiskan 
# helo;halo 
# heters;pembenci 
# hlg;hilang 
# hlm;halaman 
# hlmn;halaman 
# hny;hanya 
# hnya;hanya
# hqq;hakiki 
# hr;hari 
# hrg;harga 
# hrgny;harganya 
# hri;hari 
# hrs;harus 
# hrsny;harusnya 
# hrus;harus 
# hti;hati 
# hums;rumah
# humz;rumah
# huum;iya
# idiiot;idiot 
# idup;hidup
# ily;saya cinta kamu 
# inget;ingat 
# ingetin;ingatkan 
# ingt;ingat 
# istrht;istirahat 
# item;hitam 
# itumah;itu 
# ituu;itu 
# iy;iya 
# iy;iya
# iyach;iya
# iyain;mengiyakan 
# iyap;iya
# iyapz;iya
# iyoo;iya 
# iyup;iya
# iyupz;iya
# jadiin;jadi 
# jadull;jadul 
# jamber;jam berapa
# jancoeg;jancuk 
# jatoh;jatuh 
# jawabanya;jawabannya 
# jd;jadi 
# jd;jadi
# jdi;jadi 
# jdi;jadi
# jdny;jadinya 
# jdnya;jadinya 
# jdwl;jadwal 
# jem;jam 
# jg;juga 
# jg;juga
# jga;juga 
# jga;juga
# jgan;jangan 
# jglh;jagalah 
# jgn;jangan 
# jgn;jangan
# jgnjgn;jangan-jangan 
# jing;anjing 
# jink;anjing 
# jk;jika 
# jkrt;jakarta 
# jkt;jakarta 
# jl;jalan 
# jln;jalan 
# jm;jam 
# jmbtn;jembatan 
# jml;jumlah 
# jmlh;jumlah 
# jmpt;jemput 
# jmt;jumat 
# jng;jangan 
# jngan;jangan 
# jngan;jangan
# jngn;jangan 
# jngn;jangan
# jpn;jepang 
# jrg;jarang 
# jrng;jarang 
# jwb;jawab 
# jwbn;jawaban 
# jwbny;jawabannya 
# kacian;kasihan
# kagak;tidak 
# kaka;kakak
# kalee;kali 
# kaleyan;kalian
# kalo;kalau 
# kalu;kalau
# kamsud;maksud 
# kaos/mug/jaket; 
# karna;karena 
# karoke;karaoke 
# kasian;kasihan 
# kau;kamu 
# kau;kamu
# kayak;mirip 
# kayanya;kayaknya 
# kbnykn;kebanyakan 
# kbr;kabar 
# kbtln;kebetulan 
# kbykn;kebanyakan 
# kdg;kadang 
# kdng;kadang 
# kdpn;ke depan
# kebawa;terbawa 
# kecakepan;kecakapan 
# keknya;sepertinya 
# kelaperan;kelaparan 
# kelar;selesai 
# keles;kali
# keliatan;kelihatan 
# kemaren;kemarin 
# kenapah;kenapa
# kenape;kenapa 
# kenaps;kenapa
# kenapz;kenapa
# kenceng;kencang 
# kepengen;ingin 
# kepngn;ingin 
# kepo;ingin tahu
# kerikil;kerikil 
# kesampean;kesampaian 
# kesel;kesal 
# kesian;kasihan 
# ketawa;tertawa 
# keujanan;kehujanan 
# keyen;keren
# kga;tidak 
# kgak;tidak 
# kgk;tidak 
# khabar;kabar 
# khan;kan 
# khan;kan
# khanz;kan
# khsny;khususnya 
# khwtr;khawatir 
# kirimin;kirimkan 
# kite;kita 
# kk;kakak 
# kk;kakak
# kl;kalau 
# klau;kalau 
# klh;kalah 
# kli;kali 
# klmpk;kelompok 
# klo;kalau 
# klo;kalau
# klp;kelompok 
# kls;kelas 
# klu;kalau 
# kluar;keluar 
# klw;kalau 
# klw;kalau
# km;kamu 
# km;kamu
# kmaren;kemarin 
# kmi;kami 
# kmn;kemana 
# kmna;kemana 
# kmpret;kampret 
# kmps;kamus 
# kmr;kamar 
# kmren;kemarin 
# kmrin;kemarin 
# kmrn;kemarin 
# kmrn;kemarin
# kmu;kamu 
# kmu;kamu
# kn;kan 
# kna;kena 
# knapa;kenapa 
# kngen;kangen 
# knl;kenal 
# knp;kenapa 
# knp;kenapa
# knpa;kenapa 
# kntr;kantor 
# knyng;kenyang 
# konsen;konsentrasi 
# koq;kok 
# koq;kok
# kosan;kost 
# kowe;kamu 
# kpala;kepala 
# kpan;kapan 
# kpan;kapan
# kpd;kepada 
# kpgn;ingin 
# kpn;kapan 
# kpn;kapan
# kpngn;ingin 
# krg;kurang 
# krikil;kerikil 
# krja;kerja 
# krmh;ke rumah
# krn;karena 
# krna;karena 
# krng;kurang 
# krupuk;kerupuk 
# ksh;kasih 
# ksian;kasihan 
# ksih;kasih 
# ksmptn;kesempatan 
# ksn;kesana 
# kt;kita 
# kta;kita 
# ktdrn;ketiduran 
# ktemu;ketemu 
# ktmpt;ke tempat
# ktmu;ketemu 
# ktny;katanya 
# ktnya;katanya 
# ktr;kantor 
# kuq;kok
# kuy;ayo
# kw;kamu
# kyak;seperti 
# kyaknya;sepertinya 
# kyanya;sepertinya 
# kyk;seperti 
# kyk;kayak 
# kykny;sepertinya 
# kyknya;sepertinya 
# kyny;sepertinya 
# kzl;kesal 
# kzl;kesal
# laen;lain 
# laff;cinta
# lagian;lagi 
# lam;salam
# laper;lapar 
# lb;lembar 
# lbh;lebih 
# lbih;lebih 
# leh;boleh
# lelaahh;lelah 
# lemes;lemas 
# lg;lagi 
# lgi;lagi 
# lgkp;lengkap 
# lgs;langsung 
# lgsg;langsung 
# lgsng;langsung 
# lgsung;langsung 
# liatin;lihat 
# lm;lama 
# lmyn;lumayan 
# lngkp;lengkap 
# lngsg;langsung 
# lngsng;langsung 
# lngsung;langsung 
# lnjt;lanjut 
# lnsg;langsung 
# lnsng;langsung 
# lo;kamu 
# lo;kamu
# loe;kamu 
# loe;kamu
# lom;belum
# lov;cinta
# love;cinta
# low;kalau
# lp;lupa
# lpngn;lapangan 
# lpr;lapar 
# lsg;langsung 
# lu;kamu 
# lu;kamu
# luchu;lucu
# lum;belum
# lun;belum
# luthu;lucu
# luv;cinta
# lvl;level 
# lw;kamu
# lwt;lewat 
# maaci;terima kasih
# maacih;terima kasih
# maap;maaf
# maen;main 
# mager;malas bergerak
# makaci;terima kasih
# makana;karena itu
# makasi;terima kasih
# makasihhh;terima kasih
# make;pakai 
# makn;makan 
# malem;malam 
# malem2;malam-malam 
# malming;malam minggu
# mamah;mama 
# mamam;makan 
# mampet;mampat 
# mane;mana 
# maneh;mana 
# manggil;panggil 
# mano;mana 
# mantep;mantap 
# masakin;masakan 
# mateng;matang 
# maw;mau
# mcm;macam 
# mct;macet 
# mdg;lebih baik
# mdgn;baikan 
# mdh;mudah 
# mdpt;mendapat 
# melulu;selalu 
# mendingan;lebih baik
# menyempurnakan; 
# merhatiin;memperhatikan 
# mesen;pesan 
# mgkn;mungkin 
# mhs;mahasiswa 
# mhsw;mahasiswa 
# miapa;demi apa
# miapah;demi apa
# mikirin;memikirkan 
# milih;pilih 
# minjem;pinjam 
# misal'a;misalnya
# mk;maka 
# mka;maka 
# mkan;makan 
# mkanan;makanan 
# mkanya;makanya 
# mkn;makan 
# mknan;makanan 
# mkny;maka itu
# mksd;maksud 
# mksdny;maksudnya 
# mksdnya;maksudnya 
# mksdq;maksudku 
# mksh;terima kasih
# mksih;terima kasih
# mlah;malah 
# mlm;malam 
# mls;malas 
# mmg;memang 
# mn;mana 
# mna;mana 
# mndg;lebih baik
# mndgn;lebih baik
# mndgr;mendengar 
# mndi;mandi 
# mnding;lebih baik
# mndptkn;mendapatkan 
# mng;menang 
# mnrt;menurut 
# mnsempurnakan; 
# mnt;minta 
# mnta;minta 
# mntp;mantap 
# mnum;minum 
# mnyakitiku;menyakitiku 
# mnyngkn;menyenangkan 
# mo;mau 
# moga;semoga 
# mosok;masa 
# mreka;mereka 
# mrh;marah 
# mrk;mereka 
# ms;mas 
# msg;masing 
# msh;masih 
# msih;masih 
# msk;masuk 
# mski;meski 
# mskipun;meskipun 
# mskn;masakan 
# msl;misal 
# mslh;masalah 
# msti;harus 
# muke;muka 
# mulu;terus 
# musti;harus 
# muup;maaf
# mu'uv;maaf
# mw;mau 
# mw;mau
# mz;mas 
# nabung;tabung 
# naek;naik 
# nahan;tahan 
# nak;anak
# nambah;tambah 
# nanya;tanya 
# napa;kenapa 
# napas;nafas 
# nape;kenapa 
# naq;anak
# nasgor;nasi goreng
# nax;anak
# nda;tidak
# ndak;tidak
# ndax;tidak
# ndk;tidak 
# negri;negeri 
# nelen;telan 
# nemenin;menemani 
# nemu;temu 
# nengok;tengok 
# ngabungin;menggabungkan
# ngadain;mengadakan 
# ngajak;mengajak 
# ngajak;mengajak
# ngajar;ajar 
# ngak;tidak 
# ngakak;terbahak-bahak 
# ngambek;marah 
# ngambil;ambil 
# nganter;antar 
# nganterin;antar 
# ngarep;mengharap 
# ngarepinnya;mengharapkannya 
# ngasi;kasih 
# ngasih;kasih 
# ngegosip;gosip 
# ngehadepin;menghadapi 
# ngeliatin;melihat 
# ngeluh;keluh 
# ngerasa;merasa 
# ngerasain;merasakan 
# ngerjain;mengerjakan 
# ngerokok;merokok 
# ngerokok;merokok
# ngetik;ketik 
# ngga;tidak 
# ngga;tidak
# nggak;tidak 
# nggak;tidak
# nggax;tidak
# nggesek;menggesek
# nggk;tidak 
# nggosok;menggosok
# ngibul;berbohong
# ngidam;idam 
# ngikutin;ikuti 
# ngilangin;menghilangkan 
# nginep;menginap 
# ngingetin;mengingatkan 
# ngjk;mengajak 
# ngk;tidak 
# ngmbl;mengambil 
# ngmg;omong 
# ngmng;berbicara 
# ngmpl;berkumpul 
# ngmps;ke kampus
# ngntk;mengantuk 
# ngntr;ke kantor
# ngobatin;obati 
# ngobrol;bicara 
# ngomong;bicara 
# ngomongin;membicarakan 
# ngrasain;merasakan 
# ngundurin;mengundurkan 
# ngunyah;kunyah 
# ngurus;urus 
# ni;ini 
# nich;ini
# ninggalin;tinggalkan 
# nmbh;tambah 
# nmny;namanya 
# nmpk;nampak 
# nmr;nomor 
# nnti;nanti 
# nntn;nonton 
# nntn;menonton
# nnton;nonton 
# nolak;tolak 
# nraktir;traktir 
# ntah;entah 
# ntap;mantap
# ntar;nanti 
# nti;nanti 
# ntn;nonton 
# ntn;menonton
# nton;nonton 
# ntr;nanti 
# ntu;itu 
# nungguin;tunggu 
# nyakiti;menyakiti 
# nyakitin;menyakiti 
# nyalain;nyalakan 
# nyampe;sampai 
# nyangka;sangka 
# nyantai;santai 
# nyari;cari 
# nyebelin;menyebalkan 
# nyeremin;menyeramkan 
# nyesek;sesak 
# nyesel;menyesal 
# nyetel;setel 
# nyiksa;menyiksa 
# nyoba;coba 
# nyobain;coba 
# nyuci;cuci 
# nyukur;cukur 
# nyuruh;suruh 
# nyusul;susul 
# oc;oke
# oce;oke
# ohh;oh
# ok;oke
# okedech;oke
# okedeh;oke
# okeh;baik 
# okeh;oke
# okz;oke
# ol;online 
# org;orang 
# org;orang
# org2;orang-orang 
# orng;orang 
# ortu;orangtua 
# ouch;oh
# ouh;oh
# owh;oh
# pait;pahit 
# pake;pakai 
# pakek;pakai 
# pantesan;pantas 
# pasutri;pasangan suami istri
# paz;pas
# pcr;pacar 
# pcrn;pacaran 
# pcrny;pacarnya 
# pd;pada 
# pdahal;padahal 
# pdhal;padahal 
# pdhl;padahal 
# pedes;pedas 
# pegi;pergi 
# pengen;ingin
# penghianat;pengkhianat 
# pengin;ingin 
# pengin;ingin
# pesen;pesan 
# pg;pergi 
# pgen;ingin 
# pgn;ingin 
# pgn;ingin
# pilem;film 
# pingin;ingin 
# pinjem;pinjam 
# pinter;pintar 
# pkai;pakai 
# pke;pakai 
# plg;pulang 
# plis;tolong 
# pljrn;pelajaran 
# plng;pulang 
# pmpuan;perempuan 
# pndh;pindah 
# pngen;ingin 
# pngn;ingin 
# pnjng;panjang 
# pnting;penting 
# pntng;penting 
# pny;punya 
# pnya;punya 
# pokonya;pokoknya 
# praktek;praktik 
# prasaan;perasaan 
# prgi;pergi 
# prmhn;perumahan 
# prmptn;perempatan 
# prnah;pernah 
# prnh;pernah 
# prthnkn;pertahankan 
# psg;pasang 
# psng;pasang 
# psr;pasar 
# psti;pasti 
# psti;pasti
# ptg;penting 
# punye;punya 
# puter;putar 
# puterin;putar 
# pzt;pasti
# q;saya
# qaqa;kakak
# qq;kakak
# qt;kita 
# qta;kita 
# rada;agak 
# rame;ramai 
# rame2;ramai-ramai 
# rasane;rasanya 
# rb;ribu 
# rmbt;rambut 
# rmh;rumah 
# rmh;rumah
# rmhq;rumahku 
# rsk;rusak 
# sabeb;bebas
# sabi;bisa
# saje;saja 
# salfok;salah fokus
# saltum;salah kostum
# sambel;sambal 
# samo;sama 
# sampe;sampai
# sans;santai 
# sape;siapa 
# sateny;satenya 
# sbg;sebagai 
# sbl;sebelah 
# sblh;sebelah 
# sblhny;sebelahnya 
# sblm;sebelum 
# sblmny;sebelumnya 
# sbnr;sebenarnya 
# sbnrny;sebenarnya 
# sbnrnya;sebenarnya 
# sbnt;sebentar 
# sbntr;sebentar 
# sbr;sabar 
# sbtr;sebentar 
# scr;secara 
# sdah;sudah 
# sdg;sedang 
# sdgkn;sedangkan 
# sdh;sudah 
# sdh;sudah
# sdng;sedang 
# sdngkn;sedangkan 
# sebel;kesal 
# sebenernya;sebenarnya 
# sekola;sekolah 
# selaw;santai
# selow;sebentar 
# selow;santai
# semalem;semalam 
# sembuhin;sembuhkan 
# senen;senin 
# seneng;senang 
# serem;seram 
# sesek;sesak
# seterah;terserah 
# sgala;segala 
# sgini;segini 
# sgitu;segitu 
# sgt;sangat 
# shap;siap
# shaps;siap
# shbt;sahabat 
# shiet;tahi 
# shit;tahi 
# sihat;sehat 
# sja;saja 
# skali;sekali 
# skalian;sekalian 
# skit;sakit 
# sklh;sekolah 
# skola;sekolah 
# skolah;sekolah 
# skr;sekarang 
# skrang;sekarang 
# skrg;sekarang 
# skrng;sekarang 
# skt;sakit 
# slah;salah 
# slalu;selalu 
# slamat;selamat 
# slesai;selesai 
# slg;saling 
# slh;salah 
# slhkn;salahkan 
# sllu;selalu 
# slm;salam 
# slmt;selamat 
# sls;selesai 
# sm;sama 
# smangat;semangat 
# smbg;sambung 
# smbh;sembuh 
# smbhyg;sembahyang 
# smbil;sambil 
# smbl;sambil 
# smbng;sambung 
# smbrg;sembarang 
# smbuh;sembuh 
# smcm;semacam 
# smg;semoga 
# smga;semoga 
# smgt;semangat 
# smlm;semalam 
# smlmn;semalaman 
# smngt;semangat 
# smntr;sementara 
# smoga;semoga 
# smpai;sampai 
# smpe;sampai 
# smpt;sempat 
# smua;semua 
# smuanya;semuanya 
# sndiri;sendiri 
# sndri;sendiri 
# sngt;sangat 
# sni;sini 
# snyum;senyum 
# soale;soalnya 
# sodara;saudara 
# sopo;siapa 
# sorang;seorang 
# sory;maaf 
# sperti;seperti 
# spnjng;sepanjang 
# sprt;seperti 
# sprtny;sepertinya 
# spt;seperti 
# sptny;sepertinya 
# spy;supaya 
# srg;sering 
# sseorg;seseorang 
# stelah;setelah 
# stgh;setengah 
# stiap;setiap 
# stlh;setelah 
# stngh;setengah 
# sudh;sudah 
# sy;saya 
# sya;saya 
# syantik;cantik
# syg;sayang 
# sygku;sayangku 
# sygny;sayangnya 
# syipp;sip
# syng;sayang 
# syp;siapa
# tai;tahi 
# tau;tahu 
# tau;tahu
# tauk;tahu
# taunya;tahunya 
# td;tadu 
# tdi;tadi 
# tdk;tidak 
# tdk;tidak
# tdny;tadinya 
# tdr;tidur 
# tdur;tidur 
# telor;telur 
# telp;telepon
# telpon;telepon 
# temen;teman 
# temen2;teman-teman 
# temenin;temani 
# tercyduk;terciduk
# teriakk;teriak 
# teros;terus 
# tetep;tetap 
# tgannya;tangannya 
# tggu;tunggu 
# tgh;tengah 
# tgl;tanggal 
# tgl;tanggal
# tgn;tangan 
# tgs;tugas 
# thd;terhadap 
# thdp;terhadap 
# thks;terima kasih
# thn;tahun 
# thnx;terima kasih
# thx;terima kasih
# tida;tidak 
# tipi;tv 
# tipi;televisi
# tkut;tsayat 
# tlg;tolong 
# tlh;telah 
# tlng;tolong 
# tlp;telepon 
# tlpn;telepon 
# tamvan;tampan
# tmbah;tambah 
# tmbh;tambah 
# tmen;teman 
# tmn;teman 
# tmpat;tempat 
# tmpt;tempat 
# tmsk;termasuk 
# tnp;tanpa 
# tnpa;tanpa 
# tny;ternyata 
# tnya;tanya 
# tnyt;ternyata 
# tp;tapi 
# tp;tapi
# tpi;tapi 
# tq;terima kasih
# trdpt;terdapat 
# trgntng;tergantung 
# trims;terima kasih
# trimz;terima kasih
# trjadi;terjadi 
# trkdng;terkadang 
# trkhr;terakhir 
# trlalu;terlalu 
# trmksh;terima kasih
# trmsk;termasuk 
# trnsfr;transfer 
# trnyata;ternyata 
# trnyt;ternyata 
# trs;terus 
# trsrh;terserah 
# trus;terus 
# tsadeest;sadis
# tsb;tersebut 
# tsbt;tersebut 
# ttep;tetap 
# ttg;tentang 
# ttp;tetap 
# tu;itu 
# tuch;itu
# tukeran;tukar 
# tumben;tidak biasa
# tw;tahu 
# tw;tahu
# u;kamu
# ubat;obat 
# ud;sudah 
# uda;sudah 
# udah;sudah 
# udah;sudah
# udd;sudah
# udeh;sudah 
# udh;sudah 
# udh;sudah
# uga;juga
# ujan2an;hujan-hujanan 
# untk;untuk 
# utk;untuk 
# von;telepon
# w;saya
# wa;saya
# wad;buat
# waduh;aduh 
# wanjir;anjir 
# wat;buat
# wht;apa 
# wkt;waktu 
# wktu;waktu 
# yaini;ya ini
# yank;sayang 
# yank;sayang
# yap;ya
# yaudh;ya sudah
# yaw;ya
# yawda;ya sudah
# yela;yailah 
# yelah;yailah 
# yg;yang 
# yng;yang 
# yodah;ya sudah
# yoi;iya
# yup;ya 
# yups;ya
# yupz;ya
# ywdh;ya sudah
# '''
# .splitlines()))

In [65]:
# contractions_dict = []

# for string in a:
#     new_string = string.replace(";", ":")
#     contractions_dict.append(new_string)

# print(contractions_dict)

In [66]:
# del contractions_dict[0]

In [67]:
# contractions_dict

In [68]:
# contractions_dict2 = []

# for string in contractions_dict :
#     new_string = string.replace(" ", "")
#     contractions_dict2.append(new_string)

# print(contractions_dict2)

In [69]:
# contractions_dict2

In [70]:
# kiri = []
# for i in range(len(a)):
#     kiri.append(a[i][0])

# print(kiri)

In [71]:
# kanan = []
# for i in range(len(a)):
#     kanan.append(a[i][1])

# print(kanan)

In [72]:
# using naive method
# to convert lists to dictionary
res = {}
for key in kiri:
    for value in kanan:
        res[key] = value
        kanan.remove(value)
        break  

NameError: name 'kiri' is not defined

In [73]:
# res

In [74]:
# contractions_dict =  res.copy()

In [75]:
# def expand_contractions(text, contractions_dict):
#     contractions_pattern = re.compile('({})'.format('|'.join(contractions_dict.keys())),
#                                       flags=re.IGNORECASE | re.DOTALL)

#     def expand_match(contraction):
#         match = contraction.group(0)
#         first_char = match[0]
#         expanded_contraction = contractions_dict.get(match) \
#             if contractions_dict.get(match) \
#             else contractions_dict.get(match.lower())
#         expanded_contraction = expanded_contraction
#         return expanded_contraction

#     expanded_text = contractions_pattern.sub(expand_match, text)
#     expanded_text = re.sub("'", "", expanded_text)
#     return expanded_text

# def main_contraction(text):
#     text = expand_contractions(text, contractions_dict)
#     return text

### 2. Remove or convert Number Into Text

In [76]:
def remove_numbers(text):
    output = ''.join(c for c in text if not c.isdigit())
    return output

In [77]:
text2 = 'Pada tahun 97 saya dilahirkan'
remove_numbers(text2)

'Pada tahun  saya dilahirkan'

### 3. Remove punctuation

In [78]:
def remove_punctuation(text):
    return ''.join(c for c in text if c not in punctuation)

In [79]:
text2 = 'Apakah kamu sudah makan?'
remove_punctuation(text2)

'Apakah kamu sudah makan'

### 4. Remove white space

In [80]:
def to_strip(text):
    return " ".join(text.split())

In [81]:
text = 'Saya dilahirkan pada tahun 1999   '
to_strip(text)

'Saya dilahirkan pada tahun 1999'

### 5. Remove Stopwords

In [82]:
def remove_stopwords(sentence):
    return ' '.join([w for w in nltk.word_tokenize(sentence) if not w in stop_words])

## Combine all function

In [83]:
def text_preprocessing(sentence):
    prep1 = to_lower(sentence)
    prep2 = remove_numbers(prep1)
    prep3 = remove_punctuation(prep2)
    prep4 = to_strip(prep3)
    prep5 = remove_stopwords(prep4)
    
    return prep5

In [84]:
df['tweet']

0     Soal jln Jatibaru,polisi tdk bs GERTAK gubernu...
1     Sesama cewe lho (kayaknya), harusnya bisa lebi...
2     Kepingin gudeg mbarek Bu hj. Amad Foto dari go...
3     Jln Jatibaru,bagian dari wilayah Tn Abang.Peng...
4     Sharing pengalaman aja, kemarin jam 18.00 bata...
                            ...                        
95    Mudah2an sudah terupload smua sebelum z mudik ...
96    Orang pendukung khilafah memang harus di black...
97    jangan sok akrab ye mention mention gue , mali...
98    Alhamdulillah Prof, setelah berbicara semalam ...
99    Keren!! Kira-kira masih ada nggak yg bilang Pa...
Name: tweet, Length: 100, dtype: object

In [85]:
df.head(1)

Unnamed: 0,label,tweet
0,anger,"Soal jln Jatibaru,polisi tdk bs GERTAK gubernu..."


In [86]:
df['tweet_clean'] = df['tweet'].apply(text_preprocessing)
df[['tweet', 'tweet_clean']]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['tweet_clean'] = df['tweet'].apply(text_preprocessing)


Unnamed: 0,tweet,tweet_clean
0,"Soal jln Jatibaru,polisi tdk bs GERTAK gubernu...",jln jatibarupolisi tdk bs gertak gubernur eman...
1,"Sesama cewe lho (kayaknya), harusnya bisa lebi...",cewe lho kayaknya rasain sibuk jaga rasain sak...
2,Kepingin gudeg mbarek Bu hj. Amad Foto dari go...,kepingin gudeg mbarek bu hj amad foto google s...
3,"Jln Jatibaru,bagian dari wilayah Tn Abang.Peng...",jln jatibarubagian wilayah tn abangpengaturan ...
4,"Sharing pengalaman aja, kemarin jam 18.00 bata...",sharing pengalaman aja kemarin jam batalin tik...
...,...,...
95,Mudah2an sudah terupload smua sebelum z mudik ...,mudahan terupload smua z mudik karna fans moda...
96,Orang pendukung khilafah memang harus di black...,orang pendukung khilafah black mail username u...
97,"jangan sok akrab ye mention mention gue , mali...",sok akrab ye mention mention gue maling aje yg...
98,"Alhamdulillah Prof, setelah berbicara semalam ...",alhamdulillah prof berbicara semalam username ...


# No 2

# Feature Engineering

1.CountVectorizer (BOW) <br>
2.TfidfVectorizer

In [87]:
# 1.CountVectorizer (BOW)

bow = CountVectorizer()

In [88]:
X_sentence = bow.fit_transform(df['tweet_clean'])

In [89]:
bow.get_feature_names()

['aalamiin',
 'aamiin',
 'abang',
 'abangpengaturan',
 'abis',
 'abng',
 'acara',
 'action',
 'adem',
 'adzan',
 'against',
 'agama',
 'air',
 'aja',
 'aje',
 'akal',
 'akrab',
 'akun',
 'akutu',
 'alam',
 'alamat',
 'alazhar',
 'alhamdulillah',
 'alhmdllh',
 'alihkan',
 'allah',
 'alm',
 'alur',
 'amad',
 'aman',
 'amatiran',
 'amn',
 'amp',
 'ampe',
 'an',
 'anak',
 'anakanak',
 'aneh',
 'anies',
 'aniespernah',
 'aniessalah',
 'anjing',
 'anniversary',
 'ansor',
 'anteng',
 'antiislam',
 'antrian',
 'apakayaknya',
 'apanya',
 'apapun',
 'apbd',
 'arah',
 'areal',
 'argumen',
 'asing',
 'atulah',
 'ayuting',
 'baca',
 'bacain',
 'bacanya',
 'badak',
 'bae',
 'bagus',
 'bahagia',
 'bahas',
 'bahaya',
 'bahayanya',
 'baiknya',
 'bakalnya',
 'bales',
 'bandit',
 'banget',
 'bangun',
 'bank',
 'bantu',
 'banyakin',
 'banyumas',
 'bapaknya',
 'baper',
 'bareng',
 'barokah',
 'baswedan',
 'batalin',
 'baunya',
 'bawaannye',
 'bawahi',
 'bayangkan',
 'bberapa',
 'bbpjn',
 'bcz',
 'bebas',
 

In [90]:
X_sentence.toarray()

array([[0, 0, 1, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 1, 0, 0],
       [1, 1, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 2, 0, 0]], dtype=int64)

In [91]:
pd.options.display.max_columns = 100

In [92]:
X_sentence_df = pd.DataFrame(X_sentence.toarray(), columns= bow.get_feature_names(), index=df['tweet_clean'])

In [93]:
X_sentence_df.tail(50)

Unnamed: 0_level_0,aalamiin,aamiin,abang,abangpengaturan,abis,abng,acara,action,adem,adzan,against,agama,air,aja,aje,akal,akrab,akun,akutu,alam,alamat,alazhar,alhamdulillah,alhmdllh,alihkan,allah,alm,alur,amad,aman,amatiran,amn,amp,ampe,an,anak,anakanak,aneh,anies,aniespernah,aniessalah,anjing,anniversary,ansor,anteng,antiislam,antrian,apakayaknya,apanya,apapun,...,twitter,uang,uda,udah,udeh,udh,ujian,ummat,undangan,unsur,update,upload,url,usaha,username,ustad,utk,utuh,vallen,verbal,versi,via,video,wa,wajar,wajarkecuali,wajarlah,wan,wani,wannaone,warisan,warns,web,wilayah,wilayahhak,wkt,wkwkw,woi,world,wwnang,ya,yaa,yah,yaow,yassalaaam,yatim,ye,yg,ygdisebarkan,youtube
tweet_clean,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1
hasil sekolah sekolah mikirlah mahkamah internasional kewenangan mengadili perkara genosida dsbnya,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
elu teh berasumsi guw mah kacang kulit maksudnya hayo maksud loooo,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
web permata krg profesional program cashback rb token pln klik info masuknya program paket kuota data piye iki,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
yg takutkan mas tumpangi politik dibalut agama yg bahaya soale masyarakat sensitif dgn,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0
cerita kesiapa udah kayak hancur hidup pikirin gimana hati ku sedih nangis menetes air mata kayaknya lirik lagu sayang pia palen deh wkwkw,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
puasa saur gak kesiangan ngerasain makan saur supeerrr dupeerr ekspress menit imsak bangun yassalaaam,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0
jdi thr dri apbd kuasa takut garis bawahi kpk ggl dpt thr gitu ya buk mentri bilang aman ketakutan,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0
tolong bantu username ya lptop kali gabisa buka dokumen not respondingpdhal dibawa hp center smg lptop produk gagal gimanabeli maret kalo gini trs jd jgkel sndri,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0
udah masuk proses penjualan pas dibuka detail ko gini produk diproses diproses tgl,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
username semen indonesia nambang loh rembangmalah perusahaan yg lokasi meter dr areal tambang semen diprotes bertahun lamanyaknp pas semen diprotes,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0


In [94]:
# 2.TFIDFVectorizer

tfidf = TfidfVectorizer()

X2_sentence = tfidf.fit_transform(df['tweet_clean'])

In [95]:
X2_sentence_df = pd.DataFrame(X2_sentence.toarray(), columns= tfidf.get_feature_names(), index=df['tweet_clean'])

In [96]:
X2_sentence_df.head(50)

Unnamed: 0_level_0,aalamiin,aamiin,abang,abangpengaturan,abis,abng,acara,action,adem,adzan,against,agama,air,aja,aje,akal,akrab,akun,akutu,alam,alamat,alazhar,alhamdulillah,alhmdllh,alihkan,allah,alm,alur,amad,aman,amatiran,amn,amp,ampe,an,anak,anakanak,aneh,anies,aniespernah,aniessalah,anjing,anniversary,ansor,anteng,antiislam,antrian,apakayaknya,apanya,apapun,...,twitter,uang,uda,udah,udeh,udh,ujian,ummat,undangan,unsur,update,upload,url,usaha,username,ustad,utk,utuh,vallen,verbal,versi,via,video,wa,wajar,wajarkecuali,wajarlah,wan,wani,wannaone,warisan,warns,web,wilayah,wilayahhak,wkt,wkwkw,woi,world,wwnang,ya,yaa,yah,yaow,yassalaaam,yatim,ye,yg,ygdisebarkan,youtube
tweet_clean,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1
jln jatibarupolisi tdk bs gertak gubernur emangny polisi tdk pmbhasan jgn berpolitik pengaturan wilayahhak gubernur tn abang turun temurunpelikperlu kesabaran username username url,0.0,0.0,0.210828,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.112199,0.0,0.220218,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.210828,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
cewe lho kayaknya rasain sibuk jaga rasain sakitnya haid paniknya pulang malem gimana orang asing wajarlah korban takut curhat dibela dihujat,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.221315,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
kepingin gudeg mbarek bu hj amad foto google sengaja biar temanteman jg membayangkannya berbagi indah,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.264976,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
jln jatibarubagian wilayah tn abangpengaturan wilayah tgg jwb wwnang gubtng abng rumitsejak gub trdahuluskrg sedng dibenahiagr bermnfaat pihakmohon yg otakberpikirlah dgn wajarkecuali otaknya butekya url,0.0,0.0,0.0,0.201364,0.0,0.201364,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.107163,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.201364,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.402729,0.0,0.0,0.0,0.0,0.0,0.201364,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.087934,0.0,0.0
sharing pengalaman aja kemarin jam batalin tiket stasiun pasar senen lancar antrian rame menitan beress dicoba twips jamjam segitu cc username,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.151145,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.212185,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.120766,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
sekian thread baca thread aneh sih penulisan sumpah aneh bgt ngatain lebay aja bikin thread hadeh ga jago nulis tulisan ga seberantakan thread mbaknya,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111202,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.312222,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
sharing temen tuh emg bgt disaat lu ngerasa lu berat bgt temen berat dr ngga apanya dibanding,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.224653,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
orang pake ponco jas hujan pake kasur ya gara saking gak mamak bilang tuh pernikan gak ngalamin madu kayak skrg org tidur nya aja pake ponco gimane madu,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.119848,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.110864,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
contoh yg gemar menyudutkan teriak toleran tp gemar menuduh gemar menjudge berdasarkan versi tau,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.235962,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.103043,0.0,0.0
pulang udah h lebaran dilema dirumah lebaran buka puasa bareng cuman keluarga sahur,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.183828,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
