# Data Preparation

## Converting JSON to CSV with Random Sampling

### Dependencies & Load .JSON files

In [1]:
import json
import random
import csv
import os
from tqdm import tqdm
import pandas as pd
import numpy as np

In [2]:
#load json
def load_json(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        return json.load(file)


### Random Sampling for Train file and setting total datasets to 10k

In [3]:
def random_sampling_from_files(directory, sample_size):
    sampled_data = []
    all_files = [f for f in os.listdir(directory) if f.endswith('.json')]
    total_files = len(all_files)
    
    if sample_size > total_files:
        print(f"Warning: Sample size ({sample_size}) is larger than the number of files ({total_files}). Using all files.")
        files_to_sample = all_files
    else:
        files_to_sample = random.sample(all_files, sample_size)
    
    for file_name in tqdm(files_to_sample, desc="Sampling files"):
        file_path = os.path.join(directory, file_name)
        
        try:
            data = load_json(file_path)
            if isinstance(data, list) and len(data) > 0:
                sampled_data.append(random.choice(data))
            elif isinstance(data, dict):
                sampled_data.append(data)
        except FileNotFoundError:
            print(f"File {file_name} not found. Skipping.")
        except json.JSONDecodeError:
            print(f"Error decoding {file_name}. Skipping.")
        except Exception as e:
            print(f"Error processing {file_name}: {str(e)}. Skipping.")
    
    return sampled_data

def main():
    directory = r'F:\\AI Portfolio Project\\Project 2\\liputan6_data\\canonical\\train'  # Directory file train (wajib double string biar ga error)
    sample_size = 10000 #fixed 10k only

    global sampled_data
    sampled_data = random_sampling_from_files(directory, sample_size)
    
    print(f"Random sampling complete.")

if __name__ == "__main__":
    main()

Sampling files: 100%|██████████| 10000/10000 [02:33<00:00, 65.31it/s]


Random sampling complete.


In [11]:
sampled_data_train = sampled_data
sampled_data_train[0]

{'id': 288252,
 'url': 'https://www.liputan6.com/news/read/288252/pemerintah-akui-kemungkinan-kualitas-premium-kurang-baik',
 'clean_article': [['Liputan6',
   '.',
   'com',
   ',',
   'Jakarta',
   ':',
   'Hingga',
   'kini',
   'kerusakan',
   'fuel',
   'pump',
   'atau',
   'pompa',
   'bahan',
   'bakar',
   'pada',
   'mobil',
   'yang',
   'menggunakan',
   'premium',
   'masih',
   'saja',
   'terjadi',
   '.'],
  ['Bengkel',
   'umum',
   'pun',
   'masih',
   'terus',
   'kedatangan',
   'mobil',
   'yang',
   'rusak',
   '.'],
  ['Menurut',
   'sejumlah',
   'mekanik',
   'yang',
   'ada',
   'di',
   'bengkel',
   'di',
   'berbagai',
   'daerah',
   ',',
   'telah',
   'terjadi',
   'peningkatan',
   'mobil',
   'yang',
   'mengalami',
   'kerusakan',
   'pompa',
   'bahan',
   'bakar',
   '.'],
  ['"',
   'Kualitas',
   'BBM',
   'merupakan',
   'salah',
   'satu',
   'faktor',
   'penting',
   'dalam',
   'kerusakan',
   'pompa',
   'BBM',
   ',',
   'sehingga',
   'mo

In [12]:
#combining into "combined_clean_article" and "combined_clean_summary"
data_train = pd.json_normalize(sampled_data_train)

def normalize_text(tokenized_text):
    normalized_text = ""
    for sentence in tokenized_text:
        for word in sentence:
            if word in [".", ",", ":", ";", "!", "?", ")", "]"]:
                normalized_text = normalized_text.rstrip() + word + " "
            elif word in ["(", "["]:
                normalized_text += word
            else:
                normalized_text += word + " "
    return normalized_text.strip()


data_train['combined_clean_article'] = data_train['clean_article'].apply(normalize_text)
data_train['combined_clean_summary'] = data_train['clean_summary'].apply(normalize_text)

In [13]:
#checking1
data_train.head()

Unnamed: 0,id,url,clean_article,clean_summary,extractive_summary,combined_clean_article,combined_clean_summary
0,288252,https://www.liputan6.com/news/read/288252/peme...,"[[Liputan6, ., com, ,, Jakarta, :, Hingga, kin...","[[Meski, belum, ada, hasil, resmi, analisa, pe...","[4, 5]","Liputan6. com, Jakarta: Hingga kini kerusakan ...",Meski belum ada hasil resmi analisa penyebab k...
1,148249,https://www.liputan6.com/news/read/148249/fred...,"[[Liputan6, ., com, ,, Jakarta, :, Freddy, San...","[[Freddy, Santoso, ,, pelaku, penyuapan, terha...","[0, 2]","Liputan6. com, Jakarta: Freddy Santoso, pelaku...","Freddy Santoso, pelaku penyuapan terhadap Iraw..."
2,30242,https://www.liputan6.com/news/read/30242/perke...,"[[Liputan6, ., com, ,, Jakarta, :, Fotografi, ...","[[Fotografi, mode, terus, berkembang, dengan, ...","[0, 2, 4]","Liputan6. com, Jakarta: Fotografi mode kini se...",Fotografi mode terus berkembang dengan lebih m...
3,198447,https://www.liputan6.com/news/read/198447/pks-...,"[[Liputan6, ., com, ,, Subang, :, Pro, kontra,...","[[Seniman, Subang, menyerukan, memboikot, PKS,...","[2, 6]","Liputan6. com, Subang: Pro kontra tari jaipong...",Seniman Subang menyerukan memboikot PKS dan me...
4,27679,https://www.liputan6.com/news/read/27679/pemda...,"[[Liputan6, ., com, ,, Jakarta, :, Pemda, Jaka...","[[Suku, Dinas, Kependudukan, DKI, Jakarta, men...","[1, 9, 3]","Liputan6. com, Jakarta: Pemda Jakarta menggela...",Suku Dinas Kependudukan DKI Jakarta menggelar ...


In [16]:
#checking2
data_train.sample(1)[["combined_clean_article", "combined_clean_summary"]].values

array([['Liputan6. com, Jakarta: Kepolisian RI membentuk tim khusus untuk membuktikan kebenaran dugaan adanya politik uang dalam pemilihan Gubernur DKI, beberapa waktu silam. Demikian diungkapkan Kepala Bagian Hubungan Masyarakat Polri Inspektur Jenderal Polisi Saleh Saaf, yang ditemui di ruang kerjanya, Selasa (17/9) [baca: Polisi Menyelidiki Kasus Suap Pemilihan Gubernur Jakarta]. Sayangnya, Saleh tak bersedia memberikan nama dan jumlah personel yang bekerja untuk kasus ini. Yang pasti, tambah Saleh, dalam waktu dekat ada seorang tokoh masyarakat yang namanya dirahasiakan akan datang ke Markas Besar Polri untuk memberikan keterangan seputar kasus tersebut. Ia juga mengaku hingga saat ini belum ditemukan adanya pengakuan, pengaduan, laporan atau bukti yang dapat dijadikan alasan dugaan adanya money politic dalam pemilihan tersebut. (PIN/Rubai Kadir dan Yosep HL).',
        'Polri membentuk tim khusus untuk membuktikan kebenaran dugaan adanya politik uang dalam pemilihan Gubernur DKI. 

In [17]:
#save CSV
output_file = 'F:\\AI Portfolio Project\\Project 2-new\\datasets\\data_train.csv' # Directory untuk output (wajib double string biar ga error)
data_train.to_csv(output_file, index=False)
print(f"total {len(data_train)} items saved to {output_file}")

total 10000 items saved to F:\AI Portfolio Project\Project 2-new\datasets\data_train.csv


In [14]:
#before (focus on clean_article and clean_summary)
check_train = pd.read_csv('F:\AI Portfolio Project\Project 2-new\datasets\data_train.csv')
check_train.head()

Unnamed: 0,extractive_summary,url,clean_article,clean_summary,id
0,"[0, 2]",https://www.liputan6.com/news/read/269233/pasu...,"[['Liputan6', '.', 'com', ',', 'Parachinar', '...","[['Sedikitnya', '14', 'orang', 'militan', 'tew...",269233
1,"[0, 2]",https://www.liputan6.com/news/read/55940/gembo...,"[['Liputan6', '.', 'com', ',', 'Madura', ':', ...","[['Polisi', 'menangkap', 'seorang', 'gembong',...",55940
2,"[0, 1]",https://www.liputan6.com/news/read/201133/pela...,"[['Akhirnya', 'Federasi', 'Sepak', 'Bola', 'Ar...","[['Lantaran', 'prestasi', 'tim', 'Arab', 'Saud...",201133
3,"[4, 6]",https://www.liputan6.com/news/read/100076/kiai...,"[['Liputan6', '.', 'com', ',', 'Tuban', ':', '...","[['Konflik', 'yang', 'terjadi', 'di', 'PKB', '...",100076
4,"[0, 1]",https://www.liputan6.com/news/read/168844/adam...,"[['Liputan6', '.', 'com', ',', 'Jakarta', ':',...","[['Mantan', 'Wapres', 'Adam', 'Malik', 'ditudi...",168844


In [18]:
#after (focus on clean_article and clean_summary)
check_train_after = pd.read_csv('F:\AI Portfolio Project\Project 2-new\datasets\data_train.csv')
check_train_after.head()

Unnamed: 0,id,url,clean_article,clean_summary,extractive_summary,combined_clean_article,combined_clean_summary
0,288252,https://www.liputan6.com/news/read/288252/peme...,"[['Liputan6', '.', 'com', ',', 'Jakarta', ':',...","[['Meski', 'belum', 'ada', 'hasil', 'resmi', '...","[4, 5]","Liputan6. com, Jakarta: Hingga kini kerusakan ...",Meski belum ada hasil resmi analisa penyebab k...
1,148249,https://www.liputan6.com/news/read/148249/fred...,"[['Liputan6', '.', 'com', ',', 'Jakarta', ':',...","[['Freddy', 'Santoso', ',', 'pelaku', 'penyuap...","[0, 2]","Liputan6. com, Jakarta: Freddy Santoso, pelaku...","Freddy Santoso, pelaku penyuapan terhadap Iraw..."
2,30242,https://www.liputan6.com/news/read/30242/perke...,"[['Liputan6', '.', 'com', ',', 'Jakarta', ':',...","[['Fotografi', 'mode', 'terus', 'berkembang', ...","[0, 2, 4]","Liputan6. com, Jakarta: Fotografi mode kini se...",Fotografi mode terus berkembang dengan lebih m...
3,198447,https://www.liputan6.com/news/read/198447/pks-...,"[['Liputan6', '.', 'com', ',', 'Subang', ':', ...","[['Seniman', 'Subang', 'menyerukan', 'memboiko...","[2, 6]","Liputan6. com, Subang: Pro kontra tari jaipong...",Seniman Subang menyerukan memboikot PKS dan me...
4,27679,https://www.liputan6.com/news/read/27679/pemda...,"[['Liputan6', '.', 'com', ',', 'Jakarta', ':',...","[['Suku', 'Dinas', 'Kependudukan', 'DKI', 'Jak...","[1, 9, 3]","Liputan6. com, Jakarta: Pemda Jakarta menggela...",Suku Dinas Kependudukan DKI Jakarta menggelar ...


### For Test and Validation file, datasets must be 2k only

#### Data_dev

In [20]:
def random_sampling_from_files(directory, sample_size):
    sampled_data2 = []
    all_files = [f for f in os.listdir(directory) if f.endswith('.json')]
    total_files = len(all_files)
    
    if sample_size > total_files:
        print(f"Warning: Sample size ({sample_size}) is larger than the number of files ({total_files}). Using all files.")
        files_to_sample = all_files
    else:
        files_to_sample = random.sample(all_files, sample_size)
    
    for file_name in tqdm(files_to_sample, desc="Sampling files"):
        file_path = os.path.join(directory, file_name)
        
        try:
            data = load_json(file_path)
            if isinstance(data, list) and len(data) > 0:
                sampled_data2.append(random.choice(data))
            elif isinstance(data, dict):
                sampled_data2.append(data)
        except FileNotFoundError:
            print(f"File {file_name} not found. Skipping.")
        except json.JSONDecodeError:
            print(f"Error decoding {file_name}. Skipping.")
        except Exception as e:
            print(f"Error processing {file_name}: {str(e)}. Skipping.")
    
    return sampled_data2

def main():
    directory = r'F:\\AI Portfolio Project\\Project 2\\liputan6_data\\canonical\\dev'  # Directory file train (wajib double string biar ga error)
    sample_size = 2000 #fixed 2k only

    global sampled_data2
    sampled_data2 = random_sampling_from_files(directory, sample_size)
    
    print(f"Random sampling complete.")

if __name__ == "__main__":
    main()

Sampling files:   0%|          | 0/2000 [00:00<?, ?it/s]

Sampling files: 100%|██████████| 2000/2000 [00:25<00:00, 77.34it/s] 

Random sampling complete.





In [37]:
sampled_data_dev = sampled_data2
sampled_data_dev[0]

{'id': 11043,
 'url': 'https://www.liputan6.com/news/read/11043/tiga-perampok-mobil-beraksi-di-tebet',
 'clean_article': [['Liputan6',
   '.',
   'com',
   ',',
   'Jakarta',
   ':',
   'Tiga',
   'perampok',
   'mobil',
   'bersenjata',
   'api',
   'beraksi',
   'di',
   'rumah',
   'keluarga',
   'David',
   'di',
   'Jalan',
   'Tebet',
   'Barat',
   'VII',
   'No',
   '.'],
  ['8',
   'A',
   ',',
   'Jakarta',
   'Selatan',
   ',',
   'Selasa',
   '(',
   '10/4',
   ')',
   'pagi',
   '.'],
  ['Mereka',
   'merampas',
   'sebuah',
   'mobil',
   'jenis',
   'Mitsubishi',
   'Lancer',
   'tahun',
   '1993',
   'setelah',
   'melepaskan',
   'dua',
   'kali',
   'tembakan',
   'yang',
   'kemudian',
   'mengenai',
   'tembok',
   '.'],
  ['Menurut',
   'Nyonya',
   'Linda',
   'David',
   ',',
   'insiden',
   'itu',
   'berawal',
   'pada',
   'pukul',
   '06',
   '.',
   '15',
   'WIB',
   '.'],
  ['Saat',
   'itu',
   ',',
   'suaminya',
   'sedang',
   'mengelap',
   'mobil',


In [22]:
#combining into "combined_clean_article" and "combined_clean_summary"
data_dev = pd.json_normalize(sampled_data_dev)

def normalize_text(tokenized_text):
    normalized_text = ""
    for sentence in tokenized_text:
        for word in sentence:
            if word in [".", ",", ":", ";", "!", "?", ")", "]"]:
                normalized_text = normalized_text.rstrip() + word + " "
            elif word in ["(", "["]:
                normalized_text += word
            else:
                normalized_text += word + " "
    return normalized_text.strip()


data_dev['combined_clean_article'] = data_dev['clean_article'].apply(normalize_text)
data_dev['combined_clean_summary'] = data_dev['clean_summary'].apply(normalize_text)

In [26]:
#save CSV
output_file = 'F:\\AI Portfolio Project\\Project 2-new\\datasets\\data_dev.csv' # Directory untuk output (wajib double string biar ga error)
data_dev.to_csv(output_file, index=False)
print(f"total {len(data_dev)} items saved to {output_file}")

total 2000 items saved to F:\AI Portfolio Project\Project 2-new\datasets\data_dev.csv


#### Data_test

In [28]:
def random_sampling_from_files(directory, sample_size):
    sampled_data3 = []
    all_files = [f for f in os.listdir(directory) if f.endswith('.json')]
    total_files = len(all_files)
    
    if sample_size > total_files:
        print(f"Warning: Sample size ({sample_size}) is larger than the number of files ({total_files}). Using all files.")
        files_to_sample = all_files
    else:
        files_to_sample = random.sample(all_files, sample_size)
    
    for file_name in tqdm(files_to_sample, desc="Sampling files"):
        file_path = os.path.join(directory, file_name)
        
        try:
            data = load_json(file_path)
            if isinstance(data, list) and len(data) > 0:
                sampled_data3.append(random.choice(data))
            elif isinstance(data, dict):
                sampled_data3.append(data)
        except FileNotFoundError:
            print(f"File {file_name} not found. Skipping.")
        except json.JSONDecodeError:
            print(f"Error decoding {file_name}. Skipping.")
        except Exception as e:
            print(f"Error processing {file_name}: {str(e)}. Skipping.")
    
    return sampled_data3

def main():
    directory = r'F:\\AI Portfolio Project\\Project 2\\liputan6_data\\canonical\\test'  # Directory file train (wajib double string biar ga error)
    sample_size = 2000 #fixed 2k only

    global sampled_data3
    sampled_data3 = random_sampling_from_files(directory, sample_size)
    
    print(f"Random sampling complete.")

if __name__ == "__main__":
    main()

Sampling files: 100%|██████████| 2000/2000 [00:29<00:00, 67.47it/s]

Random sampling complete.





In [30]:
sampled_data_test = sampled_data3
sampled_data_test[0]

{'id': 17009,
 'url': 'https://www.liputan6.com/news/read/17009/selangkah-lagi-mega--menjadi-presiden',
 'clean_article': [['Liputan6',
   '.',
   'com',
   ',',
   'Jakarta',
   ':',
   'Wakil',
   'Presiden',
   'Megawati',
   'Sukarnoputri',
   'selangkah',
   'lagi',
   'naik',
   'satu',
   'tingkat',
   'menjadi',
   'presiden',
   '.'],
  ['Bahkan',
   'boleh',
   'dikatakan',
   'Mega',
   'tinggal',
   'menunggu',
   'pelantikan',
   'saja',
   '.'],
  ['Demikian',
   'laporan',
   'reporter',
   'SCTV',
   'dari',
   'Gedung',
   'DPR/MPR',
   ',',
   'Senin',
   '(',
   '23/7',
   ')',
   '.'],
  ['Selain',
   'itu',
   'dilaporkan',
   ',',
   'MPR',
   'juga',
   'sudah',
   'menyiapkan',
   'ketetapan',
   'yang',
   'akan',
   'memberhentikan',
   'Presiden',
   'Abdurrahman',
   'Wahid',
   'dan',
   'sekaligus',
   'mengangkat',
   'Megawati',
   'sebagai',
   'presiden',
   '.'],
  ['Sedangkan',
   'mengenai',
   'pemilihan',
   'wakil',
   'presiden',
   ',',
   'sam

In [31]:
#combining into "combined_clean_article" and "combined_clean_summary"
data_test = pd.json_normalize(sampled_data_test)

def normalize_text(tokenized_text):
    normalized_text = ""
    for sentence in tokenized_text:
        for word in sentence:
            if word in [".", ",", ":", ";", "!", "?", ")", "]"]:
                normalized_text = normalized_text.rstrip() + word + " "
            elif word in ["(", "["]:
                normalized_text += word
            else:
                normalized_text += word + " "
    return normalized_text.strip()


data_test['combined_clean_article'] = data_test['clean_article'].apply(normalize_text)
data_test['combined_clean_summary'] = data_test['clean_summary'].apply(normalize_text)

In [32]:
#save CSV
output_file = 'F:\\AI Portfolio Project\\Project 2-new\\datasets\\data_test.csv' # Directory untuk output (wajib double string biar ga error)
data_test.to_csv(output_file, index=False)
print(f"total {len(data_test)} items saved to {output_file}")

total 2000 items saved to F:\AI Portfolio Project\Project 2-new\datasets\data_test.csv


### Last Check for CSV Files

In [33]:
#checking total datasets
check_train = pd.read_csv('F:\AI Portfolio Project\Project 2-new\datasets\data_train.csv')
check_test = pd.read_csv('F:\AI Portfolio Project\Project 2-new\datasets\data_test.csv')
check_dev = pd.read_csv('F:\AI Portfolio Project\Project 2-new\datasets\data_dev.csv')
check_train.shape, check_test.shape, check_dev.shape

((10000, 7), (2000, 7), (2000, 7))

In [34]:
#checking example data table
check_train.head()

Unnamed: 0,id,url,clean_article,clean_summary,extractive_summary,combined_clean_article,combined_clean_summary
0,288252,https://www.liputan6.com/news/read/288252/peme...,"[['Liputan6', '.', 'com', ',', 'Jakarta', ':',...","[['Meski', 'belum', 'ada', 'hasil', 'resmi', '...","[4, 5]","Liputan6. com, Jakarta: Hingga kini kerusakan ...",Meski belum ada hasil resmi analisa penyebab k...
1,148249,https://www.liputan6.com/news/read/148249/fred...,"[['Liputan6', '.', 'com', ',', 'Jakarta', ':',...","[['Freddy', 'Santoso', ',', 'pelaku', 'penyuap...","[0, 2]","Liputan6. com, Jakarta: Freddy Santoso, pelaku...","Freddy Santoso, pelaku penyuapan terhadap Iraw..."
2,30242,https://www.liputan6.com/news/read/30242/perke...,"[['Liputan6', '.', 'com', ',', 'Jakarta', ':',...","[['Fotografi', 'mode', 'terus', 'berkembang', ...","[0, 2, 4]","Liputan6. com, Jakarta: Fotografi mode kini se...",Fotografi mode terus berkembang dengan lebih m...
3,198447,https://www.liputan6.com/news/read/198447/pks-...,"[['Liputan6', '.', 'com', ',', 'Subang', ':', ...","[['Seniman', 'Subang', 'menyerukan', 'memboiko...","[2, 6]","Liputan6. com, Subang: Pro kontra tari jaipong...",Seniman Subang menyerukan memboikot PKS dan me...
4,27679,https://www.liputan6.com/news/read/27679/pemda...,"[['Liputan6', '.', 'com', ',', 'Jakarta', ':',...","[['Suku', 'Dinas', 'Kependudukan', 'DKI', 'Jak...","[1, 9, 3]","Liputan6. com, Jakarta: Pemda Jakarta menggela...",Suku Dinas Kependudukan DKI Jakarta menggelar ...


In [35]:
check_test.head()

Unnamed: 0,id,url,clean_article,clean_summary,extractive_summary,combined_clean_article,combined_clean_summary
0,17009,https://www.liputan6.com/news/read/17009/selan...,"[['Liputan6', '.', 'com', ',', 'Jakarta', ':',...","[['Megawati', 'Sukarnoputri', 'pasti', 'akan',...","[0, 3]","Liputan6. com, Jakarta: Wakil Presiden Megawat...",Megawati Sukarnoputri pasti akan menjadi presi...
1,25824,https://www.liputan6.com/news/read/25824/adnan...,"[['Liputan6', '.', 'com', ',', 'Jakarta', ':',...","[['Pemerintah', 'harus', 'memberikan', 'alasan...","[0, 16]","Liputan6. com, Jakarta: Pemerintah sebaiknya m...",Pemerintah harus memberikan alasan hukum menga...
2,17119,https://www.liputan6.com/news/read/17119/jalur...,"[['Liputan6', '.', 'com', ',', 'Semarang', ':'...","[['Sehari', 'setelah', 'pelantikan', 'Megawati...","[0, 5]","Liputan6. com, Semarang: Kepala Kepolisian Dae...",Sehari setelah pelantikan Megawati Sukarnoputr...
3,14898,https://www.liputan6.com/news/read/14898/wapre...,"[['Liputan6', '.', 'com', ',', 'Bali', ':', 'W...","[['Pesta', 'kesenian', 'Bali', 'ke-23', 'dibuk...","[0, 1]","Liputan6. com, Bali: Wakil Presiden Megawati S...",Pesta kesenian Bali ke-23 dibuka Wapres Megawa...
4,25464,https://www.liputan6.com/news/read/25464/ribua...,"[['Liputan6', '.', 'com', ',', 'Jakarta', ':',...","[['Ribuan', 'pemudik', 'tujuan', 'ke', 'bebera...","[0, 1]","Liputan6. com, Jakarta: Ribuan pemudik tujuan ...",Ribuan pemudik tujuan ke beberapa kota di Pula...


In [36]:
check_dev.head()

Unnamed: 0,id,url,clean_article,clean_summary,extractive_summary,combined_clean_article,combined_clean_summary
0,11043,https://www.liputan6.com/news/read/11043/tiga-...,"[['Liputan6', '.', 'com', ',', 'Jakarta', ':',...","[['Tiga', 'pria', 'perampok', 'bersenjata', 'a...","[0, 10, 9]","Liputan6. com, Jakarta: Tiga perampok mobil be...",Tiga pria perampok bersenjata api berhasil mel...
1,623,https://www.liputan6.com/news/read/623/aparat-...,"[['Liputan6', '.', 'com', ',', 'Jakarta', ':',...","[['Aparat', 'keamanan', 'dianggap', 'bersalah'...","[0, 4]","Liputan6. com, Jakarta: Aktivis mahasiswa yang...",Aparat keamanan dianggap bersalah terhadap jat...
2,8843,https://www.liputan6.com/news/read/8843/pemeri...,"[['Liputan6', '.', 'com', ',', 'Jakarta', ':',...","[['Sebanyak', '40', 'persen', 'saham', 'pemeri...","[1, 5]","Liputan6. com, Jakarta: Pemerintah dan Komisi ...",Sebanyak 40 persen saham pemerintah di BCA aka...
3,9563,https://www.liputan6.com/news/read/9563/penduk...,"[['Liputan6', '.', 'com', ',', 'Jakarta', ':',...","[['Ribuan', 'pendukung', 'Presiden', 'Wahid', ...","[1, 2]","Liputan6. com, Jakarta: Sekitar enam ribu pend...",Ribuan pendukung Presiden Wahid meninggalkan I...
4,3478,https://www.liputan6.com/news/read/3478/hari-i...,"[['Liputan6', '.', 'com', ',', 'Banda', 'Aceh'...","[['Sidang', 'Rakyat', 'Aceh', 'yang', 'digelar...","[1, 10]","Liputan6. com, Banda Aceh: Meski ribuan massa ...",Sidang Rakyat Aceh yang digelar selama dua har...
