In [1]:
import torch
import pandas as pd
import numpy as np
import joblib
import os
import matplotlib.pyplot as plt
import seaborn as sns
import transformers
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from torch.utils.data import Dataset
from torch.utils.data import DataLoader


from transformers import BertTokenizer, AutoModel

from Preprocessing import preprocess_text

# os.chdir('C:/Users/LENOVO/GitHub/Jurnal-Clustering')

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
class BertClassifier(nn.Module):
    def __init__(self, num_labels):
        super(BertClassifier, self).__init__()
        self.bert = AutoModel.from_pretrained('bert-base-multilingual-cased')
        self.classifier = nn.Sequential(
            nn.Linear(self.bert.config.hidden_size, 300),
            nn.ReLU(),
            nn.Linear(300, 100),
            nn.ReLU(),
            nn.Linear(100, 50),
            nn.ReLU(),
            nn.Linear(50, num_labels)
        )

    def forward(self, input_ids, attention_mask):
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        x = outputs['last_hidden_state'][:, 0, :]
        x = self.classifier(x)
        return x

In [3]:
from torch.utils.data import Dataset, DataLoader

# Buat dataloader
class ArticleDataset(Dataset):
    def __init__(self, input_ids, attention_masks):
        self.input_ids = input_ids
        self.attention_masks = attention_masks

    def __getitem__(self, idx):
        return {
            'input_ids': self.input_ids[idx],
            'attention_mask': self.attention_masks[idx]
        }

    def __len__(self):
        return len(self.input_ids)


In [4]:
def tokenize_data(texts, tokenizer, max_length=256):
    input_ids = []
    attention_masks = []

    for text in texts:
        encoded_dict = tokenizer.encode_plus(
                            text,
                            add_special_tokens=True,
                            max_length=max_length,
                            pad_to_max_length=True,
                            return_attention_mask=True,
                            return_tensors='pt'
                       )
        input_ids.append(encoded_dict['input_ids'])
        attention_masks.append(encoded_dict['attention_mask'])

    input_ids = torch.cat(input_ids, dim=0)
    attention_masks = torch.cat(attention_masks, dim=0)

    return input_ids, attention_masks

In [2]:
df = pd.read_csv('data/extracted_publication_journal_s1.csv')
df.dropna(inplace=True)
df.drop_duplicates(inplace=True)

df


Unnamed: 0,journal,title,abstract
0,25024760,Localization schemes in Underwater Sensor Netw...,<jats:p>&lt;p&gt;Underwater Wireless Sensor Ne...
1,25024760,A Review on Voltage Balancing Solutions in Mul...,<jats:p>&lt;p&gt;Multilevel inverters are used...
2,25024760,Collision Detection and Trajectory Planning fo...,<jats:p>This paper proposes an algorithm for C...
3,25024760,A New Method for Optimal Coordination of Overc...,<jats:p>&lt;p&gt;The most of the new protectiv...
4,25024760,Fairness Evaluation and Comparison of Current ...,<jats:p>Transmission Control Protocol (TCP) is...
...,...,...,...
79988,25409581,Hesperitin Synergistically Promotes the Senesc...,"<jats:p>Pentagamavunone-1 (PGV-1), a curcumin ..."
79989,25409581,First Report on Wild Occurrences of Phoenix Mu...,<jats:p>The genus Pleurotus is known as a comm...
79990,25409581,Nannoplankton Biostratigraphy from Banggai-Sul...,<jats:p>The nannoplankton research was conduct...
79991,25409581,Spatial Modelling Habitat Suitability of Javan...,<jats:p>Javan Langur (T. auratus) is well-know...


In [12]:
df.journal.nunique()

123

In [13]:
df.journal.unique()

array(['25024760', '23387238', '2442-8620', '25278045', '27224708',
       '24610399', '26141566', '24423084', '27155072', '24769304',
       '26148013', '27765938', '2406825x', '20888708', '22528938',
       '25488465', '25023357', '24070610', '24610275', '23029277',
       '24610771', '25499904', '22524940', '24606952', '23029285',
       '24606278', '25277456', '25494333', '20854722', '24609196',
       '2338557X', '24432555', '25025791', '24606618', '26156636',
       '24078646', '23548509', '25992147', '24607010', '20888694',
       '24429899', '23556544', '25493167', '25481592', '23556145',
       '23385499', '19782993', '25989685', '24609285', '23549203',
       '24778036', '25483161', '27222594', '25483382', '25027883',
       '24774073', '27156079', '25495747', '26153386', '24067598',
       '24077542', '25992570', '24068799', '24068195', '25029266',
       '23561424', '23385510', '25279238', '23389486', '24429325',
       '2615790X', '26219468', '20864094', '25804391', '24601

In [6]:
cwd = os.getcwd()

tokenizer = BertTokenizer.from_pretrained('bert-base-multilingual-cased')
model = BertClassifier(12)
model.load_state_dict(torch.load('model/finetuned_model_sinta_translated.pt'))

# Membuat dataloader
batch_size = 32


In [11]:
for jour in df.journal.unique():

    journal_type = 'sinta_new_data_s1'

    # Assuming 'jurnal_id' is a variable containing the directory name
    file_path = os.path.join('src', journal_type, jour)

    if not os.path.exists(file_path):
        os.mkdir(file_path)

        data = df[df['journal'] == jour]
        data['data_cleaned'] = data['title'] + data['abstract'] 
        data['data_cleaned'].apply(lambda x : preprocess_text(x))
        X = list(data['data_cleaned'])

        input_ids, attention_masks = tokenize_data(X, tokenizer)
        dataset = ArticleDataset(input_ids, attention_masks)
        dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False)

        device = 'cpu'
        if torch.cuda.is_available() :
            device = 'cuda'

        model.to(device)

        # Set model ke mode evaluasi (non-training)
        model.eval()

        # Embedding
        embeddings = []

        with torch.no_grad():
            for batch in dataloader:
                input_ids = batch['input_ids'].to(device)
                attention_mask = batch['attention_mask'].to(device)

                outputs = model(input_ids, attention_mask=attention_mask)
                embeddings.append(outputs.cpu().numpy())

        embeddings = np.concatenate(embeddings, axis=0)

        # Mengubah array embeddings menjadi matriks dua dimensi
        X = embeddings.reshape(embeddings.shape[0], -1)

        pca = PCA(n_components=2, random_state=0)
        X = pca.fit_transform(X)

        # Perform KMeans clustering
        num_clusters = 1
        kmeans = KMeans(n_clusters=num_clusters, random_state=0, max_iter=1000)
        kmeans.fit(X)

        # Assign each journal to its cluster
        cluster_labels = kmeans.labels_

        # Mendapatkan koordinat pusat cluster
        centroid = kmeans.cluster_centers_

        # Menghitung jarak antara setiap titik data dengan centroid
        jarak_ke_centroid = np.sqrt(np.sum((X - centroid)**2, axis=1))

        # Menentukan batas jarak yang dianggap sebagai "outscoop"
        outscoop_threshold = np.mean(jarak_ke_centroid) + 2 * np.std(jarak_ke_centroid)

        # Memisahkan data yang masih masuk dalam "scoop" dan "outscoop"
        scoop_data = X[jarak_ke_centroid <= outscoop_threshold]
        outscoop_data = X[jarak_ke_centroid > outscoop_threshold]

        scoop_labels = np.ones(len(X))
        scoop_labels[jarak_ke_centroid > outscoop_threshold] = -1

        filename_kmeans = f"{file_path}/{jour}_kmeans.pkl"
        joblib.dump(kmeans, filename_kmeans)

        np.save(f"{file_path}/{jour}_threshold.npy", outscoop_threshold)
        np.save(f"{file_path}/{jour}_pca_data.npy", X)
        np.save(f"{file_path}/{jour}_bert_data.npy", embeddings.reshape(embeddings.shape[0], -1))

        df_res = pd.DataFrame({'Data': data['data_cleaned'],
                   'Label': scoop_labels})

        inScoop_df = df_res[df_res['Label'] == 1]
        outScoop_df = df_res[df_res['Label'] == -1]

        df_res.to_csv(f'{file_path}/{jour}_data_jurnal.csv')
        inScoop_df.to_csv(f'{file_path}/{jour}_inscoop_data_jurnal.csv')
        outScoop_df.to_csv(f'{file_path}/{jour}_outscoop_data_jurnal.csv')

        print("Data sebaran PCA {} telah disimpan.".format(jour))
    else:
        pass

    


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 2442-8620 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 25278045 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 27224708 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 24610399 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 26141566 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 24423084 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 27155072 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 24769304 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 26148013 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 27765938 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 2406825x telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 20888708 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 22528938 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 25488465 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 25023357 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 24070610 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 24610275 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 23029277 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 24610771 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 25499904 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 22524940 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 24606952 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 23029285 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 24606278 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 25277456 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 25494333 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 20854722 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 24609196 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 2338557X telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 24432555 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 25025791 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 24606618 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 26156636 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 24078646 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 23548509 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 25992147 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 24607010 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 20888694 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 24429899 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 23556544 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 25493167 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 25481592 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 23556145 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 23385499 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 19782993 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 25989685 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 24609285 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 23549203 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 24778036 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 25483161 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 27222594 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 25483382 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 25027883 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 24774073 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 27156079 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 25495747 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 26153386 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 24067598 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 24077542 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 25992570 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 24068799 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 24068195 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 25029266 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 23561424 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 23385510 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 25279238 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 23389486 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 24429325 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 2615790X telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 26219468 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 20864094 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 25804391 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 24601578 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 20892063 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 2502180X telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 25026429 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 23385502 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 24606588 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 20886985 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 2527922X telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 2252696X telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 23559179 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 1979570X telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 23564512 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 24069272 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 25280759 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 23565322 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 2442871X telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 23559306 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 25026577 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 20878575 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 24776416 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 24429740 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 24072230 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 1907770x telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 24075825 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 22528083 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 23549114 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 20894392 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 23562641 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 20875886 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 24608149 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 23553596 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 25412426 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 25030310 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 23387610 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 2338476X telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 23564644 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 23563656 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 25274376 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 24076899 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 24600601 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 25026747 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 23378824 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 22249028 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 25416464 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 23381353 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 25410520 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 24600245 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 26228645 telah disimpan.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


Data sebaran PCA 25409581 telah disimpan.


In [None]:
# data = df[df['journal'] == '25024760']
# data['data_cleaned'] = data['title'] + data['abstract'] 
# data['data_cleaned'].apply(lambda x : preprocess_text(x))
# X = list(data['data_cleaned'])

# input_ids, attention_masks = tokenize_data(X, tokenizer)
# dataset = ArticleDataset(input_ids, attention_masks)
# dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['data_cleaned'] = data['title'] + data['abstract']


In [None]:
# device = 'cpu'
# if torch.cuda.is_available() :
#     device = 'cuda'

# model.to(device)

# # Set model ke mode evaluasi (non-training)
# model.eval()

# # Embedding
# embeddings = []

# with torch.no_grad():
#     for batch in dataloader:
#         input_ids = batch['input_ids'].to(device)
#         attention_mask = batch['attention_mask'].to(device)

#         outputs = model(input_ids, attention_mask=attention_mask)
#         embeddings.append(outputs.cpu().numpy())

# embeddings = np.concatenate(embeddings, axis=0)

In [8]:




# # Mengubah array embeddings menjadi matriks dua dimensi
# X = embeddings.reshape(embeddings.shape[0], -1)

# pca = PCA(n_components=2, random_state=0)
# X = pca.fit_transform(X)

# # Perform KMeans clustering
# num_clusters = 1
# kmeans = KMeans(n_clusters=num_clusters, random_state=0, max_iter=1000)
# kmeans.fit(X)

# # Assign each journal to its cluster
# cluster_labels = kmeans.labels_

# # Mendapatkan koordinat pusat cluster
# centroid = kmeans.cluster_centers_

# # Menghitung jarak antara setiap titik data dengan centroid
# jarak_ke_centroid = np.sqrt(np.sum((X - centroid)**2, axis=1))

# # Menentukan batas jarak yang dianggap sebagai "outscoop"
# outscoop_threshold = np.mean(jarak_ke_centroid) + 2 * np.std(jarak_ke_centroid)

# # Memisahkan data yang masih masuk dalam "scoop" dan "outscoop"
# scoop_data = X[jarak_ke_centroid <= outscoop_threshold]
# outscoop_data = X[jarak_ke_centroid > outscoop_threshold]

# scoop_labels = np.ones(len(X))
# scoop_labels[jarak_ke_centroid > outscoop_threshold] = -1

# filename_kmeans = f"{file_path}/{jour}_kmeans.pkl"
# joblib.dump(kmeans, filename_kmeans)

# np.save(f"{file_path}/{jour}_threshold.npy", outscoop_threshold)
# np.save(f"{file_path}/{jour}_pca_data.npy", X)
# np.save(f"{file_path}/{jour}_bert_data.npy", embeddings.reshape(embeddings.shape[0], -1))

# df_res = pd.DataFrame({'Data': data['data_cleaned'],
#             'Label': scoop_labels})

# inScoop_df = df_res[df_res['Label'] == 1]
# outScoop_df = df_res[df_res['Label'] == -1]

# df_res.to_csv(f'{file_path}/{jour}_data_jurnal.csv')
# inScoop_df.to_csv(f'{file_path}/{jour}_inscoop_data_jurnal.csv')
# outScoop_df.to_csv(f'{file_path}/{jour}_outscoop_data_jurnal.csv')
