In [1]:
import pandas as pd
import re
from Sastrawi.Stemmer.StemmerFactory import StemmerFactory
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from connection import get_data_from_api

In [2]:
product_data = get_data_from_api('product-data')

# Mengambil data dari database menggunakan query
dt_product_df = pd.DataFrame(product_data)
dt_product_df['rating'] = dt_product_df['rating'].fillna(0).astype(float)

display(dt_product_df.dtypes)
display(dt_product_df)

id                  int64
category_id         int64
subcategory_id      int64
name               object
skin_type          object
rating            float64
gender             object
skin_type_face     object
hair_issue         object
skin_type_body     object
user_id           float64
dtype: object

Unnamed: 0,id,category_id,subcategory_id,name,skin_type,rating,gender,skin_type_face,hair_issue,skin_type_body,user_id
0,4,2,10,Zinc Refreshing Cool - anti ketombe,Ketombe,5.0,Perempuan,sensitif,ketombe,kombinasi,32.0
1,4,2,10,Zinc Refreshing Cool - anti ketombe,Ketombe,4.0,Laki-laki,berminyak,berminyak,normal,50.0
2,4,2,10,Zinc Refreshing Cool - anti ketombe,Ketombe,5.0,Laki-laki,berminyak,rontok,normal,77.0
3,5,2,10,Zinc clean active - anti ketombe,Ketombe,1.0,Perempuan,normal,berminyak,normal,7.0
4,6,2,10,Zinc Active fresh - anti ketombe,Ketombe,4.0,Laki-laki,normal,kering,normal,16.0
...,...,...,...,...,...,...,...,...,...,...,...
578,316,3,18,Shinzui Glow Spa Honey,Kombinasi,5.0,Laki-laki,berminyak,ketombe,kering,47.0
579,316,3,18,Shinzui Glow Spa Honey,Kombinasi,5.0,Perempuan,sensitif,ketombe,kombinasi,32.0
580,316,3,18,Shinzui Glow Spa Honey,Kombinasi,4.0,Laki-laki,berminyak,kering,normal,15.0
581,316,3,18,Shinzui Glow Spa Honey,Kombinasi,5.0,Perempuan,berminyak,rontok,normal,58.0


In [3]:
# Preprocessing data teks
factory = StemmerFactory()
stemmer = factory.create_stemmer()
stop_words = set(stopwords.words('indonesian'))

def preprocess_text(text):
    text = str(text).lower()
    text = re.sub(r'[^\w\s]', '', text)
    tokens = word_tokenize(text)
    tokens = [word for word in tokens if word not in stop_words]
    tokens = [stemmer.stem(word) for word in tokens]
    text = ' '.join(tokens)
    return text

In [4]:
# Preprocessing kolom teks
text_columns = ['gender', 'skin_type', 'skin_type_face', 'hair_issue', 'skin_type_body']

for column in text_columns:
    dt_product_df[column] = dt_product_df[column].apply(preprocess_text)

# Menampilkan hasil preprocessing
display(dt_product_df)

Unnamed: 0,id,category_id,subcategory_id,name,skin_type,rating,gender,skin_type_face,hair_issue,skin_type_body,user_id
0,4,2,10,Zinc Refreshing Cool - anti ketombe,ketombe,5.0,perempuan,sensitif,ketombe,kombinasi,32.0
1,4,2,10,Zinc Refreshing Cool - anti ketombe,ketombe,4.0,lakilaki,minyak,minyak,normal,50.0
2,4,2,10,Zinc Refreshing Cool - anti ketombe,ketombe,5.0,lakilaki,minyak,rontok,normal,77.0
3,5,2,10,Zinc clean active - anti ketombe,ketombe,1.0,perempuan,normal,minyak,normal,7.0
4,6,2,10,Zinc Active fresh - anti ketombe,ketombe,4.0,lakilaki,normal,kering,normal,16.0
...,...,...,...,...,...,...,...,...,...,...,...
578,316,3,18,Shinzui Glow Spa Honey,kombinasi,5.0,lakilaki,minyak,ketombe,kering,47.0
579,316,3,18,Shinzui Glow Spa Honey,kombinasi,5.0,perempuan,sensitif,ketombe,kombinasi,32.0
580,316,3,18,Shinzui Glow Spa Honey,kombinasi,4.0,lakilaki,minyak,kering,normal,15.0
581,316,3,18,Shinzui Glow Spa Honey,kombinasi,5.0,perempuan,minyak,rontok,normal,58.0


In [5]:
# Lakukan pengelompokan berdasarkan category_id
grouped_data = dt_product_df.groupby('category_id')

# Tampilkan data untuk setiap kelompok
for category_id, group in grouped_data:
    print(f"Category ID: {category_id}")
    display(group)
    print("\n")

Category ID: 1


Unnamed: 0,id,category_id,subcategory_id,name,skin_type,rating,gender,skin_type_face,hair_issue,skin_type_body,user_id
93,46,1,1,Face Tonic Lemon,minyak,5.0,lakilaki,normal,normal,normal,17.0
94,46,1,1,Face Tonic Lemon,minyak,5.0,perempuan,minyak,minyak,kombinasi,28.0
95,46,1,1,Face Tonic Lemon,minyak,5.0,perempuan,normal,ketombe,normal,40.0
96,46,1,1,Face Tonic Lemon,minyak,4.0,lakilaki,sensitif,normal,normal,48.0
97,46,1,1,Face Tonic Lemon,minyak,5.0,perempuan,sensitif,ketombe,kombinasi,32.0
...,...,...,...,...,...,...,...,...,...,...,...
468,255,1,2,Herborist Raspberry & tomato,kombinasi,2.0,perempuan,minyak,minyak,normal,59.0
469,256,1,2,Herborist facial wash tea tree,minyak,0.0,none,none,none,none,
470,257,1,2,herborist facial wash rose,sensitif,0.0,none,none,none,none,
471,258,1,2,herborist facial wash zaitun,kombinasi,4.0,perempuan,sensitif,rontok,kering,64.0




Category ID: 2


Unnamed: 0,id,category_id,subcategory_id,name,skin_type,rating,gender,skin_type_face,hair_issue,skin_type_body,user_id
0,4,2,10,Zinc Refreshing Cool - anti ketombe,ketombe,5.0,perempuan,sensitif,ketombe,kombinasi,32.0
1,4,2,10,Zinc Refreshing Cool - anti ketombe,ketombe,4.0,lakilaki,minyak,minyak,normal,50.0
2,4,2,10,Zinc Refreshing Cool - anti ketombe,ketombe,5.0,lakilaki,minyak,rontok,normal,77.0
3,5,2,10,Zinc clean active - anti ketombe,ketombe,1.0,perempuan,normal,minyak,normal,7.0
4,6,2,10,Zinc Active fresh - anti ketombe,ketombe,4.0,lakilaki,normal,kering,normal,16.0
...,...,...,...,...,...,...,...,...,...,...,...
88,44,2,12,Putri Pure Clean Conditioner - fresh floral fr...,kering,5.0,perempuan,minyak,minyak,normal,3.0
89,44,2,12,Putri Pure Clean Conditioner - fresh floral fr...,kering,4.0,lakilaki,minyak,ketombe,kering,47.0
90,44,2,12,Putri Pure Clean Conditioner - fresh floral fr...,kering,5.0,perempuan,minyak,rontok,normal,57.0
91,44,2,12,Putri Pure Clean Conditioner - fresh floral fr...,kering,3.0,perempuan,kering,kering,kering,67.0




Category ID: 3


Unnamed: 0,id,category_id,subcategory_id,name,skin_type,rating,gender,skin_type_face,hair_issue,skin_type_body,user_id
257,114,3,16,Gatsby Durable Deo Shield,kering,5.0,perempuan,sensitif,kering,normal,37.0
258,114,3,16,Gatsby Durable Deo Shield,kering,5.0,perempuan,sensitif,ketombe,kombinasi,32.0
259,115,3,16,Gatsby Quick 3in1 All Body,normal,0.0,none,none,none,none,
260,116,3,16,Gatsby Shoking Ice Wave,normal,5.0,lakilaki,minyak,ketombe,minyak,45.0
261,116,3,16,Gatsby Shoking Ice Wave,normal,4.0,lakilaki,normal,rontok,kering,66.0
...,...,...,...,...,...,...,...,...,...,...,...
578,316,3,18,Shinzui Glow Spa Honey,kombinasi,5.0,lakilaki,minyak,ketombe,kering,47.0
579,316,3,18,Shinzui Glow Spa Honey,kombinasi,5.0,perempuan,sensitif,ketombe,kombinasi,32.0
580,316,3,18,Shinzui Glow Spa Honey,kombinasi,4.0,lakilaki,minyak,kering,normal,15.0
581,316,3,18,Shinzui Glow Spa Honey,kombinasi,5.0,perempuan,minyak,rontok,normal,58.0






In [8]:
# Fungsi untuk menghitung TF-IDF dan similaritas kosinus serta memberikan rekomendasi
def calculate_similarity(group):
    # Inisialisasi TF-IDF Vectorizer
    tfidf_vectorizer = TfidfVectorizer()

    # Ambil atribut untuk perhitungan (subcategory_id dan skin_type)
    attributes = group[['subcategory_id', 'skin_type_face', 'hair_issue', 'skin_type_body']].astype(str).apply(lambda x: ' '.join(x), axis=1)

    # Hitung TF-IDF
    tfidf_matrix = tfidf_vectorizer.fit_transform(attributes)

    # Tampilkan nilai bobot dari hasil perhitungan TF-IDF
    print("TF-IDF weights:")
    words = tfidf_vectorizer.get_feature_names_out()
    for i, doc in enumerate(tfidf_matrix.toarray()):
        print(f"Product {group.iloc[i]['id']} : {group.iloc[i]['name']}:")
        for j, word in enumerate(words):
            print(f"{word}: {doc[j]:.2f}")
        print()

    # Lakukan iterasi melalui setiap produk sebagai query
    for i, query_index in enumerate(range(len(group))):
        # Ambil query dan lakukan reshape
        query = tfidf_matrix[query_index]

        # Hitung similaritas kosinus antara query dan semua produk
        cosine_similarities = cosine_similarity(query, tfidf_matrix).flatten()

        # Urutkan indeks produk berdasarkan similaritas kosinus
        similar_indices = cosine_similarities.argsort()[::-1]

        # Tampilkan hasil similaritas kosinus untuk setiap produk
        print(f"Query Product: {group.iloc[query_index]['id']} : {group.iloc[query_index]['name']}")
        top_product_names = []
        for j in similar_indices[:10]:  # Ambil 10 hasil teratas
            if j != query_index:
                top_product_names.append(group.iloc[j]['name'])
                print(f"Similarity with Product {group.iloc[j]['id']} - {group.iloc[j]['name']}: {cosine_similarities[j]:.2f}")

        # Cari nilai bobot paling tertinggi-terendah
        max_weight_index = tfidf_matrix[query_index].toarray().argmax()
        min_weight_index = tfidf_matrix[query_index].toarray().argmin()
        max_weight_product = group.iloc[max_weight_index]['id']
        min_weight_product = group.iloc[min_weight_index]['id']
        print(f"Highest weight product: {max_weight_product} : {group.iloc[max_weight_index]['name']}, Weight: {tfidf_matrix[query_index].toarray().max():.2f}")
        print(f"Lowest weight product: {min_weight_product} : {group.iloc[min_weight_index]['name']}, Weight: {tfidf_matrix[query_index].toarray().min():.2f}")

        # Cari hasil tingkat kemiripan yang mendekati 1
        for j, similarity in enumerate(cosine_similarities):
            if similarity > 0.95 and j != query_index:  # Ubah threshold sesuai kebutuhan
                print(f"High similarity with Product {group.iloc[j]['id']} : {group.iloc[j]['name']}: {similarity:.2f}")
                break  # Keluar dari loop setelah menemukan tingkat kemiripan yang mendekati 1
        print()

        # Tampilkan daftar nama 10 produk teratas
        print(30*"=")
        print("Top 10 Recommended Products:")
        for name in top_product_names:
            print(name)
        print()

In [9]:
# Iterasi melalui setiap kelompok
for category_id, group in grouped_data:
    print(f"Category ID: {category_id}")
    # Hitung similaritas untuk setiap kelompok
    calculate_similarity(group)

Category ID: 1
TF-IDF weights:
Product 46 : Face Tonic Lemon:
cabang: 0.00
kering: 0.00
ketombe: 0.00
kombinasi: 0.00
minyak: 0.00
none: 0.00
normal: 1.00
rontok: 0.00
sensitif: 0.00

Product 46 : Face Tonic Lemon:
cabang: 0.00
kering: 0.00
ketombe: 0.00
kombinasi: 0.54
minyak: 0.84
none: 0.00
normal: 0.00
rontok: 0.00
sensitif: 0.00

Product 46 : Face Tonic Lemon:
cabang: 0.00
kering: 0.00
ketombe: 0.66
kombinasi: 0.00
minyak: 0.00
none: 0.00
normal: 0.75
rontok: 0.00
sensitif: 0.00

Product 46 : Face Tonic Lemon:
cabang: 0.00
kering: 0.00
ketombe: 0.00
kombinasi: 0.00
minyak: 0.00
none: 0.00
normal: 0.71
rontok: 0.00
sensitif: 0.71

Product 46 : Face Tonic Lemon:
cabang: 0.00
kering: 0.00
ketombe: 0.56
kombinasi: 0.55
minyak: 0.00
none: 0.00
normal: 0.00
rontok: 0.00
sensitif: 0.63

Product 47 : Face Tonic Bengkoang:
cabang: 0.00
kering: 0.00
ketombe: 0.00
kombinasi: 0.00
minyak: 0.56
none: 0.00
normal: 0.83
rontok: 0.00
sensitif: 0.00

Product 47 : Face Tonic Bengkoang:
cabang: 0.00