In [42]:
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, precision_score
import pickle
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import nltk
import string
import pandas as pd 
import pymysql

In [43]:
def read_mysql_table(table, host='localhost', user='root', password='', database='review'):
    # Establish a connection to the MySQL database
    connection = pymysql.connect(
        host=host,
        user=user,
        password=password,
        database=database
    )
    
    # Create a cursor object to execute SQL queries
    cursor = connection.cursor()
    
    query = f"SELECT * FROM {table}"
    cursor.execute(query)
    result = cursor.fetchall()
    
    # Convert the result to a Pandas DataFrame
    df = pd.DataFrame(result)
    
    # Assign column names based on the cursor description
    df.columns = [column[0] for column in cursor.description]
    
    # Close the cursor and the database connection
    cursor.close()
    connection.close()
    
    return df

In [44]:
table_name = 'input_review'
df = read_mysql_table(table_name)
df.head()

Unnamed: 0,id_review,nama,tanggal,review
0,121,Suep,2023-12-01,Bagus
1,122,Verza,2023-12-02,Bagus bangett mantappp
2,123,Rosi,2023-12-03,Bagus
3,124,Tari,2023-12-04,aplikasinya oke bagus bangettt mantapppp
4,125,Tarmo,2023-12-05,aplikasinya oke bagus bangettt mantapppp


In [45]:
# text preprocessing
def preprocess_text(content):
    import nltk
    import re
    nltk.download('stopwords')
    nltk.download('punkt')

    # filtering

    text = re.sub(r'\W', ' ', str(content))
    text = re.sub(r'\s+[a-zA-Z]\s+', ' ', content)
    text = re.sub(r'\^[a-zA-Z]\s+', ' ', content)
    text = re.sub(r'\s+', ' ', content, flags=re.I)
    text = re.sub(r'^b\s+', '', content)

    # case folding
    text = text.lower()

    # Tokenisasi
    tokens = word_tokenize(text)

    # Menghapus stopwords
    stop_words = set(stopwords.words('indonesian'))
    tokens = [word for word in tokens if word.lower() not in stop_words]

    # Menggabungkan kembali tokens menjadi kalimat
    preprocessed_text = ' '.join(tokens)

    return preprocessed_text

# Melakukan preprocessing pada semua ulasan
df['preprocessed_text'] = df['review'].apply(preprocess_text)

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\rafae\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\rafae\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\rafae\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\rafae\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\rafae\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\rafae\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\

In [46]:
ulasan = df['preprocessed_text']

In [47]:
ulasan

0                                        bagus
1                       bagus bangett mantappp
2                                        bagus
3     aplikasinya oke bagus bangettt mantapppp
4     aplikasinya oke bagus bangettt mantapppp
5     aplikasinya oke bagus bangettt mantapppp
6                     bagus bangettt mantapppp
7                     bagus bangettt mantapppp
8                     bagus bangettt mantapppp
9                                         good
10                                       bagus
11                                       bagus
12                                       bagus
13                                jelek banget
14                       aplikasi jelek banget
15                       aplikasi jelek banget
16              sampah , aplikasi jelek banget
17                                 lumayan sih
18                                     lumayan
19                                lumayan sihh
20                                lumayan sihh
21           

In [48]:
# Specify the file path of the pickle file
file_path = 'Model/reviews_tfidf.pickle'

# Read the pickle file
with open(file_path, 'rb') as file:
    data_train = pickle.load(file)

In [49]:
data_train

['habis rekam video foto .. klo save nya nonton iklan 60 detik .. iklan buruk',
 'dipakai update . refresh . edit foto berbayar . payah .... cari keuntungan kah .....',
 'versi ribet , selfish sesuka developer edit foto sebebas versi , versi secerah apapun wajah , kusam , jerawat kondisikan sesuai pengguna , kembalikan versi ulasan dibintang 1 bertambah , versi hadir hilangkan versi',
 'd upgrade jd ngak bagus , interface ngak senyaman yg , filter natural , aplikasi d jalankan berat , crop ngak , muncul layar hitam d crop .',
 'versi terbaru nggak lengkap ? kembalikan versi aja min . filter . pengaturan warna nya nggak susah edit wana kulit/ terangi latar kayak warna daun pohonya .',
 'kirain bermasalah app nya trus uninstal download , pas buka penilaian pengguna . emang berubah , enak versi . perbaiki aja balikin versi . app andalan banget ngedit foto zaman 😫',
 'gak foto klik suruh bayar , bagusan payahh ahh males pake uinstal aja .',
 "camera 360 kembalikan versi yg sebelumya , tolo

In [50]:
# pembuatan vector kata
vectorizer = TfidfVectorizer()
train_vector = vectorizer.fit_transform(data_train)
review2 = ["".join(r) for r in ulasan]

In [51]:
load_model = pickle.load(open('Model/sentimen_model.pkl','rb'))

result = []

for test in review2:
    test_data = [str(test)]
    test_vector = vectorizer.transform(test_data)
    vektor_tfidf = test_vector.toarray()

    pred = load_model.predict(vektor_tfidf)
    result.append(pred[0])

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [52]:
result

[5, 5, 5, 5, 5, 5, 3, 3, 3, 5, 5, 5, 5, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3]

In [53]:
from sklearn.utils.multiclass import unique_labels
unique_labels(result)

array([1, 3, 5], dtype=int64)

In [54]:
df['label'] = result

In [55]:
df

Unnamed: 0,id_review,nama,tanggal,review,preprocessed_text,label
0,121,Suep,2023-12-01,Bagus,bagus,5
1,122,Verza,2023-12-02,Bagus bangett mantappp,bagus bangett mantappp,5
2,123,Rosi,2023-12-03,Bagus,bagus,5
3,124,Tari,2023-12-04,aplikasinya oke bagus bangettt mantapppp,aplikasinya oke bagus bangettt mantapppp,5
4,125,Tarmo,2023-12-05,aplikasinya oke bagus bangettt mantapppp,aplikasinya oke bagus bangettt mantapppp,5
5,126,Nopal,2023-12-06,aplikasinya oke bagus bangettt mantapppp,aplikasinya oke bagus bangettt mantapppp,5
6,127,Reza,2023-12-07,bagus bangettt mantapppp,bagus bangettt mantapppp,3
7,128,Feri,2023-12-08,bagus bangettt mantapppp,bagus bangettt mantapppp,3
8,129,Atha,2023-12-09,bagus bangettt mantapppp,bagus bangettt mantapppp,3
9,130,Nimah,2023-12-10,good,good,5


In [56]:
def delete_all_data_from_table(table, host='localhost', user='root', password='', database='review'):
    # Establish a connection to the MySQL database
    connection = pymysql.connect(
        host=host,
        user=user,
        password=password,
        database=database
    )
    
    # Create a cursor object to execute SQL queries
    cursor = connection.cursor()
    
    # Delete all data from the specified table
    query = f"DELETE FROM {table}"
    cursor.execute(query)
    
    # Commit the changes
    connection.commit()
    
    # Close the cursor and the database connection
    cursor.close()
    connection.close()

In [57]:
delete_all_data_from_table('input_review')

In [58]:
def insert_df_into_hasil_model(df, host='localhost', user='root', password='', database='review'):
    # Establish a connection to the MySQL database
    connection = pymysql.connect(
        host=host,
        user=user,
        password=password,
        database=database
    )

    # Create a cursor object to execute SQL queries
    cursor = connection.cursor()

    # Insert each row from the DataFrame into the 'hasil_model' table
    for index, row in df.iterrows():
        query = "INSERT INTO hasil_model (id_review, nama, tanggal, review, label) VALUES (%s, %s, %s, %s, %s)"
        cursor.execute(query, (row['id_review'], row['nama'], row['tanggal'], row['review'], row['label']))

    # Commit the changes
    connection.commit()

    # Close the cursor and the database connection
    cursor.close()
    connection.close()

In [59]:
insert_df_into_hasil_model(df)

In [60]:
table_name = 'hasil_model'
hasil_df = read_mysql_table(table_name)
hasil_df.to_csv('hasil_model2.csv')