In [2]:
import os
import platform
import pandas as pd
import numpy as np
import time
import threading
import asyncio
import nest_asyncio
nest_asyncio.apply()

# for logging
import sys
import logging
import datetime
from logging.handlers import TimedRotatingFileHandler

from bs4 import BeautifulSoup

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.select import Select
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from selenium.common.exceptions import TimeoutException, NoSuchElementException

#logging
class PrintLogger:
  def __init__(self, log):
    self.terminal = sys.stdout
    self.log = log

  def write(self, message):
    self.terminal.write(message)
    self.log.write(message)

  def flush(self):
    pass

current_date = datetime.datetime.now().strftime("%Y-%m-%d")
def setup_logging():
  log_formatter = logging.Formatter("%(asctime)s - %(message)s", "%Y-%m-%d %H:%M:%S")
  log_file = f'./loggings/verval_scrape_{current_date}.log'
  log_handler = TimedRotatingFileHandler(log_file, when="midnight", interval=1, backupCount=30, utc=False)
  log_handler.setFormatter(log_formatter)
  log_handler.setLevel(logging.DEBUG)
  logger = logging.getLogger()
  logger.addHandler(log_handler)

  sys.stdout = PrintLogger(log_handler.stream)

setup_logging()

# timeout limit for WebDriverWait
timeout_limit = 5

os_system = platform.system()
print('OS SYSTEM:   ', os_system)

#cpu count
cores = os.cpu_count()
print(f'CPU CORES:    {cores}')

# set path ke file chromedriver to operate the Chrome browser.
# chrome_version = 'v114_0_5735_90'
chrome_version = 'v119_0_6045_105'
if os_system == 'Windows':
    chrome_path = os.path.join('webdriver', 'chrome', os_system, chrome_version, 'chromedriver.exe')
elif os_system == 'Linux':
    chrome_path = os.path.join('webdriver', 'chrome', os_system, chrome_version, 'chromedriver')
else:
    chrome_path = os.path.join('webdriver', 'chrome', 'MacOS', chrome_version, 'chromedriver')

print('CHROME PATH:    ', chrome_path)
#webdriver options
chrome_options = Options()
chrome_options.add_argument('--headless')
chrome_options.add_argument('--disable-gpu')
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-setuid-sandbox')
#overcome limited resource problems
# chrome_options.add_argument('--disable-dev-shm-usage')
#open Browser in maximized mode
chrome_options.add_argument("start-maximized")
#disable extension
chrome_options.add_argument("--disable-extensions")
chrome_options.add_argument("--disable-blink-features=AutomationControlled")

def driversetup():
  # webdriver_service = ChromeService(ChromeDriverManager().install())
  chrome_service = Service(executable_path=chrome_path)
  driver = webdriver.Chrome(service=chrome_service, options=chrome_options)
  return driver



"""## get provinsi"""
#get provinsi
print('getting provices')
start_province = time.time()
driver = driversetup()
driver.get('https://vervalyayasan.data.kemdikbud.go.id/index.php/Chome/rekapitulasi?kode_wilayah=000000')

#to show all link (dropdown)
dropdown_container = driver.find_element(By.CLASS_NAME, 'dataTables_length')
select = dropdown_container.find_element(By.TAG_NAME, 'select')
dropdown = Select(select)
select_all = dropdown.select_by_value('-1')

province_list = []
province_urls = []

urls_elements = driver.find_element(By.TAG_NAME, 'tbody').find_elements(By.XPATH, "tr/td/a")
# urls_elements
for url in urls_elements:
  province_list.append(url.get_attribute('innerHTML'))
  province_urls.append(url.get_attribute('href'))
province_list
# df_provinces = pd.DataFrame({'province': province_list, 'urls': province_urls})
# df_provinces.to_csv('./dataset/province_list.csv', index=False)
# print(f'province done in: {time.time() - start_province} seconds')



# """## Scrape profile dengan Threading"""
# print('getting yayasan profiles')
# # df_yayasan = pd.read_csv('./dataset/yayasan_list.csv')
# df_yayasan = pd.read_csv('./verval_yayasan_test/yayasan_list_test.csv')
# df_yayasan = df_yayasan['urls'][:50]
# start_yayasan_profiles = time.time()

# #profile yayasan
# def make_profile_yayasan_dict():
#   profile_yayasan_dict = {
#     'Pimpinan Yayasan': [],
#     'Operator Yayasan': [],
#     'Telepon Yayasan': [],
#     'Fax Yayasan': [],
#     'Email Yayasan': [],
#     'Kode Pos Yayasan': [],
#     'No Pendirian Yayasan': [],
#     'Tanggal Pendirian Yayasan': [],
#     'No Pengesahan PN LN Yayasan': [],
#     'No Pengesahan Menkumham Yayasan': [],
#     'Tanggal Pengesahan Menkumham Yayasan': [],
#     'Nama Yayasan': [],
#     'Kode Yayasan': [],
#   }
#   return profile_yayasan_dict

# def parse_profile_and_sekolah(raw, soup, profile_yayasan_list, index_table_sekolah):
#   #2.bagian table sekolah naungan
#   #2.1 read table sekolah naungan
#   read_tables = pd.read_html(raw)
#   sekolah_naungan_table = read_tables[index_table_sekolah]

#   #2.2 link sekolah naungan
#   npsn_url_tags = soup.find('table', id='tabelsekolah').find('tbody').find_all('a', href=True)
#   if npsn_url_tags:
#     npsn_urls = [tag['href'] for tag in npsn_url_tags]
#     url_dict = {'url': npsn_urls}
#   else:
#     url_dict = {'url': None}
#   sekolah_naungan_table = sekolah_naungan_table.assign(**url_dict)

#   #3. parsing profile yayasan
#   #3.1 bikin dictionary kosong for each profile
#   profile_yayasan_dict = make_profile_yayasan_dict()

#   #3.2 header yayasan
#   header = soup.find('h4', class_='page-header').get_text().strip() #get all text in h4
#   address = soup.find('font', class_='small').get_text().strip() #get address only
#   yayasan = header.split(f' {address}')[0]
#   yayasan = yayasan.split(' ', 1)

#   nama_yayasan = yayasan[1]
#   kode_yayasan = yayasan[0][1:len(yayasan[0])-1]

#   #3.3 profile yayasan
#   li_profil_yayasan = soup.find('ul', class_='list-group').find_all('li')
#   profile_yayasan = [li.get_text().split(' : ')[1] for li in li_profil_yayasan]
#   profile_yayasan.append(nama_yayasan)
#   profile_yayasan.append(kode_yayasan)
#   for i, (key, value) in enumerate(profile_yayasan_dict.items()):
#     value.append(profile_yayasan[i])

#   #4. make repeated profile yayasan to be inserted to sekolah_naungan_table
#   repeated_profile = {key: value * len(sekolah_naungan_table) for key, value in profile_yayasan_dict.items()}
#   profile_and_schools = sekolah_naungan_table.assign(**repeated_profile)
#   #5. append dataframes
#   # exec(f'profile_yayasan_list{i+1}.append(profile_and_schools)')
#   profile_yayasan_list.append(profile_and_schools)
#   return None

# def parse_cabang_yayasan(soup):
#   npyp_url_tags = soup.find('table', id='tabelyayasan').find('tbody').find_all('a', href=True)
#   if npyp_url_tags:
#     for tag in npyp_url_tags:
#       additional_npyp_urls.append(tag['href'])
#   return None

# def parse_page(url, driver, profile_yayasan_list, max_retries=3):
#   retry_count=0
#   while retry_count < max_retries:
#     try:
#       driver.get(url)
#       content_flag = 0
#       sekolah_flag = 0
#       yayasan_flag = 0
#       try:
#         check_content = WebDriverWait(driver, timeout_limit).until(EC.presence_of_element_located((By.CLASS_NAME, 'box')))
#         content_flag = 1
#       except (TimeoutException, NoSuchElementException):
#         pass
      
#       try:
#         select_sekolah = WebDriverWait(driver, timeout_limit).until(EC.presence_of_element_located((By.NAME, 'tabelsekolah_length')))
#         sekolah_flag = 1
#       except (TimeoutException, NoSuchElementException):
#         pass
      
#       try:
#         select_yayasan = WebDriverWait(driver, timeout_limit).until(EC.presence_of_element_located((By.NAME, 'tabelyayasan_length')))
#         yayasan_flag = 1
#       except (TimeoutException, NoSuchElementException):
#         # retry_count = 2
#         pass
      
#       index_table_sekolah = 0
#       if content_flag == 1:
#         if sekolah_flag == 1 and yayasan_flag == 0:
#           #1. cari dropdown filter untuk set select = 'all'
#           dropdown_sekolah = Select(select_sekolah)
#           dropdown_sekolah.select_by_value('-1')

#           raw = driver.page_source
#           soup = BeautifulSoup(raw, 'html.parser') # untuk parsing
#           parse_profile_and_sekolah(raw, soup, profile_yayasan_list, index_table_sekolah)
#           break

#         elif yayasan_flag == 0 and sekolah_flag == 1:
#           #1. cari dropdown filter untuk set select = 'all'
#           dropdown_yayasan = Select(select_yayasan)
#           dropdown_yayasan.select_by_value('-1')

#           raw = driver.page_source
#           soup = BeautifulSoup(raw, 'html.parser') # untuk parsing
#           parse_cabang_yayasan(soup)

#         if sekolah_flag == 1 and yayasan_flag == 1:
#           index_table_sekolah = 1
#           #1. cari dropdown filter untuk set select = 'all'
#           dropdown_sekolah = Select(select_sekolah)
#           dropdown_sekolah.select_by_value('-1')

#           dropdown_yayasan = Select(select_yayasan)
#           dropdown_yayasan.select_by_value('-1')
          
#           raw = driver.page_source
#           soup = BeautifulSoup(raw, 'html.parser') # untuk parsing
#           parse_cabang_yayasan(soup)
#           parse_profile_and_sekolah(raw, soup, profile_yayasan_list, index_table_sekolah)
#           break

#         else:
#           print(f'table sekolah unavailable at try: {retry_count}, {url[-36:]}')
#           retry_count += 1
#           time.sleep(2)
#           continue

#       else:
#         print(f'content unavailable at try: {retry_count}, {url[-36:]}')
#         retry_count += 1
#         time.sleep(2)
#         continue

#     except Exception as e:
#       # Handle the exception here without stopping the script
#       print(f'exception error     at try: {retry_count}, {url[-36:]},  {e}')
#       if retry_count == 2:
#         failed_yayasan.append(url)
#       retry_count += 1
#       time.sleep(2)
#       continue
#   return None

# def parse_pages(urls, yayasan_profiles):
#   driver = driversetup()
#   for url in urls:
#     parse_page(url, driver, yayasan_profiles)
#   driver.quit()
#   return None

# def main_yayasan(yayasan_partition, yayasan_profiles):
#   threads = []
#   num_threads = cores
#   yayasan_batches = np.array_split(yayasan_partition, num_threads)
#   for t in range(num_threads):
#     thread = threading.Thread(target=parse_pages, args=(yayasan_batches[t], yayasan_profiles))
#     threads.append(thread)

#   for thread in threads:
#     thread.start()

#   for thread in threads:
#     thread.join()

#   return None

# partition = 4
# df_yayasan = [
#   'https://vervalyayasan.data.kemdikbud.go.id/index.php/Chome/profil?yayasan_id=2899B7F2-492D-45B0-90FE-B9F87ADAE561',
#   'https://vervalyayasan.data.kemdikbud.go.id/index.php/Chome/profil?yayasan_id=D23EE1EA-98C2-436C-B57F-5AC249F930B0',
#   'https://vervalyayasan.data.kemdikbud.go.id/index.php/Chome/profil?yayasan_id=B801BC5B-7470-43B6-92C4-6EB0E92529CE',
#   'https://vervalyayasan.data.kemdikbud.go.id/index.php/Chome/profil?yayasan_id=9509361B-9957-4E34-8398-8299989113EF'
# ]
# yayasan_partition = np.array_split(df_yayasan, partition)

# yayasan_profiles_1 = []
# yayasan_profiles_2 = []
# yayasan_profiles_3 = []
# yayasan_profiles_4 = []

# additional_yayasan_profiles = []

# additional_npyp_urls = []
# failed_yayasan = []

['Luar Negeri',
 'Prov. Aceh',
 'Prov. Bali',
 'Prov. Banten',
 'Prov. Bengkulu',
 'Prov. D.I. Yogyakarta',
 'Prov. D.K.I. Jakarta',
 'Prov. Gorontalo',
 'Prov. Jambi',
 'Prov. Jawa Barat',
 'Prov. Jawa Tengah',
 'Prov. Jawa Timur',
 'Prov. Kalimantan Barat',
 'Prov. Kalimantan Selatan',
 'Prov. Kalimantan Tengah',
 'Prov. Kalimantan Timur',
 'Prov. Kalimantan Utara',
 'Prov. Kepulauan Bangka Belitung',
 'Prov. Kepulauan Riau',
 'Prov. Lampung',
 'Prov. Maluku',
 'Prov. Maluku Utara',
 'Prov. Nusa Tenggara Barat',
 'Prov. Nusa Tenggara Timur',
 'Prov. Papua',
 'Prov. Papua Barat',
 'Prov. Papua Barat Daya',
 'Prov. Papua Pegunungan',
 'Prov. Papua Selatan',
 'Prov. Papua Tengah',
 'Prov. Riau',
 'Prov. Sulawesi Barat',
 'Prov. Sulawesi Selatan',
 'Prov. Sulawesi Tengah',
 'Prov. Sulawesi Tenggara',
 'Prov. Sulawesi Utara',
 'Prov. Sumatera Barat',
 'Prov. Sumatera Selatan',
 'Prov. Sumatera Utara']

In [2]:
def make_profile_yayasan_dict():
  profile_yayasan_dict = {
    'Pimpinan Yayasan': [],
    'Operator Yayasan': [],
    'Telepon Yayasan': [],
    'Fax Yayasan': [],
    'Email Yayasan': [],
    'Kode Pos Yayasan': [],
    'No Pendirian Yayasan': [],
    'Tanggal Pendirian Yayasan': [],
    'No Pengesahan PN LN Yayasan': [],
    'No Pengesahan Menkumham Yayasan': [],
    'Tanggal Pengesahan Menkumham Yayasan': [],
    'Nama Yayasan': [],
    'Kode Yayasan': [],
  }
  return profile_yayasan_dict

def parse_profile_and_sekolah(raw, soup, profile_yayasan_list, index_table_sekolah):
  #2.bagian table sekolah naungan
  #2.1 read table sekolah naungan
  read_tables = pd.read_html(raw)
  sekolah_naungan_table = read_tables[index_table_sekolah]

  #2.2 link sekolah naungan
  npsn_url_tags = soup.find('table', id='tabelsekolah').find('tbody').find_all('a', href=True)
  if npsn_url_tags:
    npsn_urls = [tag['href'] for tag in npsn_url_tags]
    url_dict = {'url': npsn_urls}
  else:
    url_dict = {'url': None}
  sekolah_naungan_table = sekolah_naungan_table.assign(**url_dict)

  #3. parsing profile yayasan
  #3.1 bikin dictionary kosong for each profile
  profile_yayasan_dict = make_profile_yayasan_dict()

  #3.2 header yayasan
  header = soup.find('h4', class_='page-header').get_text().strip() #get all text in h4
  address = soup.find('font', class_='small').get_text().strip() #get address only
  yayasan = header.split(f' {address}')[0]
  yayasan = yayasan.split(' ', 1)

  nama_yayasan = yayasan[1]
  kode_yayasan = yayasan[0][1:len(yayasan[0])-1]

  #3.3 profile yayasan
  li_profil_yayasan = soup.find('ul', class_='list-group').find_all('li')
  profile_yayasan = [li.get_text().split(' : ')[1] for li in li_profil_yayasan]
  profile_yayasan.append(nama_yayasan)
  profile_yayasan.append(kode_yayasan)
  for i, (key, value) in enumerate(profile_yayasan_dict.items()):
    value.append(profile_yayasan[i])

  #4. make repeated profile yayasan to be inserted to sekolah_naungan_table
  repeated_profile = {key: value * len(sekolah_naungan_table) for key, value in profile_yayasan_dict.items()}
  profile_and_schools = sekolah_naungan_table.assign(**repeated_profile)
  #5. append dataframes
  # exec(f'profile_yayasan_list{i+1}.append(profile_and_schools)')
  profile_yayasan_list.append(profile_and_schools)
  return None

def parse_cabang_yayasan(soup):
  npyp_url_tags = soup.find('table', id='tabelyayasan').find('tbody').find_all('a', href=True)
  if npyp_url_tags:
    for tag in npyp_url_tags:
      additional_npyp_urls.append(tag['href'])
  return None

In [16]:
def make_profile_yayasan_dict():
  profile_yayasan_dict = {
    'Pimpinan Yayasan': [],
    'Operator Yayasan': [],
    'Telepon Yayasan': [],
    'Fax Yayasan': [],
    'Email Yayasan': [],
    'Kode Pos Yayasan': [],
    'No Pendirian Yayasan': [],
    'Tanggal Pendirian Yayasan': [],
    'No Pengesahan PN LN Yayasan': [],
    'No Pengesahan Menkumham Yayasan': [],
    'Tanggal Pengesahan Menkumham Yayasan': [],
    'Nama Yayasan': [],
    'Kode Yayasan': [],
  }
  return profile_yayasan_dict

url =  'https://vervalyayasan.data.kemdikbud.go.id/index.php/Chome/profil?yayasan_id=4865AC08-2805-4674-8135-06F5BCF68736'
profile_yayasan_list = []
retry_count=0
# while retry_count < 3:
  # try:
driver.get(url)
content_flag = 0
sekolah_flag = 0
yayasan_flag = 0
try:
  check_content = WebDriverWait(driver, timeout_limit).until(EC.presence_of_element_located((By.CLASS_NAME, 'box')))
  content_flag = 1
except (TimeoutException, NoSuchElementException):
  pass

try:
  select_sekolah = WebDriverWait(driver, timeout_limit).until(EC.presence_of_element_located((By.NAME, 'tabelsekolah_length')))
  sekolah_flag = 1
except (TimeoutException, NoSuchElementException):
  pass

try:
  select_yayasan = WebDriverWait(driver, timeout_limit).until(EC.presence_of_element_located((By.NAME, 'tabelyayasan_length')))
  yayasan_flag = 1
except (TimeoutException, NoSuchElementException):
  # retry_count = 2
  pass
index_table_sekolah = 0

# #1. cari dropdown filter untuk set select = 'all'
# dropdown_yayasan = Select(select_yayasan)
# dropdown_yayasan.select_by_value('-1')

raw = driver.page_source
soup = BeautifulSoup(raw, 'html.parser') # untuk parsing
# parse_cabang_yayasan(soup)
#2.bagian table sekolah naungan
#2.1 read table sekolah naungan
read_tables = pd.read_html(raw)
sekolah_naungan_table = read_tables[index_table_sekolah]

#2.2 link sekolah naungan
npsn_url_tags = soup.find('table', id='tabelsekolah').find('tbody').find_all('a', href=True)
if npsn_url_tags:
  npsn_urls = [tag['href'] for tag in npsn_url_tags]
  url_dict = {'url': npsn_urls}
else:
  url_dict = {'url': None}
url_dict
sekolah_naungan_table = sekolah_naungan_table.assign(**url_dict)
sekolah_naungan_table

#3. parsing profile yayasan
#3.1 bikin dictionary kosong for each profile
profile_yayasan_dict = make_profile_yayasan_dict()

#3.2 header yayasan
header = soup.find('h4', class_='page-header').get_text().strip() #get all text in h4
address = soup.find('font', class_='small').get_text().strip() #get address only
yayasan = header.split(f' {address}')[0]
yayasan = yayasan.split(' ', 1)
nama_yayasan = yayasan[1]
kode_yayasan = yayasan[0][1:len(yayasan[0])-1]

#3.3 profile yayasan
li_profil_yayasan = soup.find('ul', class_='list-group').find_all('li')
profile_yayasan = [li.get_text().split(' : ')[1] for li in li_profil_yayasan]
profile_yayasan
profile_yayasan.append(nama_yayasan)
profile_yayasan.append(kode_yayasan)
for i, (key, value) in enumerate(profile_yayasan_dict.items()):
  value.append(profile_yayasan[i])
profile_yayasan_dict

#4. make repeated profile yayasan to be inserted to sekolah_naungan_table
repeated_profile = {key: value * len(sekolah_naungan_table) for key, value in profile_yayasan_dict.items()}
repeated_profile
profile_and_schools = sekolah_naungan_table.assign(**repeated_profile)
profile_and_schools

# #5. append dataframes
# profile_yayasan_list.append(profile_and_schools)

# index_table_sekolah = 0
# if content_flag == 1:
#   if sekolah_flag == 1 and yayasan_flag == 0:
#     #1. cari dropdown filter untuk set select = 'all'
#     dropdown_sekolah = Select(select_sekolah)
#     dropdown_sekolah.select_by_value('-1')

#     raw = driver.page_source
#     soup = BeautifulSoup(raw, 'html.parser') # untuk parsing
#     parse_profile_and_sekolah(raw, soup, profile_yayasan_list, index_table_sekolah)
#     # break

#   elif yayasan_flag == 0 and sekolah_flag == 1:
    
# 11111111111111111111111111111
      # if sekolah_flag == 1 and yayasan_flag == 1:
      #   index_table_sekolah = 1
      #   #1. cari dropdown filter untuk set select = 'all'
      #   dropdown_sekolah = Select(select_sekolah)
      #   dropdown_sekolah.select_by_value('-1')

      #   dropdown_yayasan = Select(select_yayasan)
      #   dropdown_yayasan.select_by_value('-1')
        
      #   raw = driver.page_source
      #   soup = BeautifulSoup(raw, 'html.parser') # untuk parsing
      #   parse_cabang_yayasan(soup)
      #   parse_profile_and_sekolah(raw, soup, profile_yayasan_list, index_table_sekolah)
      #   break

  #     else:
  #       print(f'table sekolah unavailable at try: {retry_count}, {url[-36:]}')
  #       retry_count += 1
  #       time.sleep(2)
  #       continue

  #   else:
  #     print(f'content unavailable at try: {retry_count}, {url[-36:]}')
  #     retry_count += 1
  #     time.sleep(2)
  #     continue

  # except Exception as e:
  #   # Handle the exception here without stopping the script
  #   print(f'exception error     at try: {retry_count}, {url[-36:]},  {e}')
  #   if retry_count == 2:
  #     failed_yayasan.append(url)
  #   retry_count += 1
  #   time.sleep(2)
  #   continue



Unnamed: 0,NPSN,Nama,Jenjang,Kecamatan,Kabupaten,Provinsi,url,Pimpinan Yayasan,Operator Yayasan,Telepon Yayasan,Fax Yayasan,Email Yayasan,Kode Pos Yayasan,No Pendirian Yayasan,Tanggal Pendirian Yayasan,No Pengesahan PN LN Yayasan,No Pengesahan Menkumham Yayasan,Tanggal Pengesahan Menkumham Yayasan,Nama Yayasan,Kode Yayasan
0,10206521,SMP SWASTA BUDI MULIA,SMP,Kec. Manduamas,Kab. Tapanuli Tengah,Prov. Sumatera Utara,https://sekolah.data.kemdikbud.go.id/index.php...,"Br. Anastasius Selvester Tukiman, S.Pd",Yoshapat Aditya Nugroho,214201461,10610,budimulialourdes@gmail.com,10610,60,16 Jan 1958,AHU-0019477.AH.01.12,AHU-AH.01.06-0007163,2017-10-20,Yayasan Budi Mulia Lourdes,AH9883
1,10206586,SMAS SW BUDI MULIA,SMA,Kec. Manduamas,Kab. Tapanuli Tengah,Prov. Sumatera Utara,https://sekolah.data.kemdikbud.go.id/index.php...,"Br. Anastasius Selvester Tukiman, S.Pd",Yoshapat Aditya Nugroho,214201461,10610,budimulialourdes@gmail.com,10610,60,16 Jan 1958,AHU-0019477.AH.01.12,AHU-AH.01.06-0007163,2017-10-20,Yayasan Budi Mulia Lourdes,AH9883
2,10208898,SMP SW. BUDI MULIA PANGURURAN,SMP,Kec. Pangururan,Kab. Samosir,Prov. Sumatera Utara,https://sekolah.data.kemdikbud.go.id/index.php...,"Br. Anastasius Selvester Tukiman, S.Pd",Yoshapat Aditya Nugroho,214201461,10610,budimulialourdes@gmail.com,10610,60,16 Jan 1958,AHU-0019477.AH.01.12,AHU-AH.01.06-0007163,2017-10-20,Yayasan Budi Mulia Lourdes,AH9883
3,10211691,SMAS BUDI MULIA,SMA,Kec. Siantar Marimbun,Kota Pematangsiantar,Prov. Sumatera Utara,https://sekolah.data.kemdikbud.go.id/index.php...,"Br. Anastasius Selvester Tukiman, S.Pd",Yoshapat Aditya Nugroho,214201461,10610,budimulialourdes@gmail.com,10610,60,16 Jan 1958,AHU-0019477.AH.01.12,AHU-AH.01.06-0007163,2017-10-20,Yayasan Budi Mulia Lourdes,AH9883
4,10211806,SMP SWASTA BUDI MULIA PEMATANGSIANTAR,SMP,Kec. Siantar Marihat,Kota Pematangsiantar,Prov. Sumatera Utara,https://sekolah.data.kemdikbud.go.id/index.php...,"Br. Anastasius Selvester Tukiman, S.Pd",Yoshapat Aditya Nugroho,214201461,10610,budimulialourdes@gmail.com,10610,60,16 Jan 1958,AHU-0019477.AH.01.12,AHU-AH.01.06-0007163,2017-10-20,Yayasan Budi Mulia Lourdes,AH9883
5,10211880,SD BUDI MULIA 2,SD,Kec. Siantar Marihat,Kota Pematangsiantar,Prov. Sumatera Utara,https://sekolah.data.kemdikbud.go.id/index.php...,"Br. Anastasius Selvester Tukiman, S.Pd",Yoshapat Aditya Nugroho,214201461,10610,budimulialourdes@gmail.com,10610,60,16 Jan 1958,AHU-0019477.AH.01.12,AHU-AH.01.06-0007163,2017-10-20,Yayasan Budi Mulia Lourdes,AH9883
6,10211881,SD BUDI MULIA-1,SD,Kec. Siantar Marihat,Kota Pematangsiantar,Prov. Sumatera Utara,https://sekolah.data.kemdikbud.go.id/index.php...,"Br. Anastasius Selvester Tukiman, S.Pd",Yoshapat Aditya Nugroho,214201461,10610,budimulialourdes@gmail.com,10610,60,16 Jan 1958,AHU-0019477.AH.01.12,AHU-AH.01.06-0007163,2017-10-20,Yayasan Budi Mulia Lourdes,AH9883
7,10257582,SD BUDI MULIA BINJOHARA,SD,Kec. Manduamas,Kab. Tapanuli Tengah,Prov. Sumatera Utara,https://sekolah.data.kemdikbud.go.id/index.php...,"Br. Anastasius Selvester Tukiman, S.Pd",Yoshapat Aditya Nugroho,214201461,10610,budimulialourdes@gmail.com,10610,60,16 Jan 1958,AHU-0019477.AH.01.12,AHU-AH.01.06-0007163,2017-10-20,Yayasan Budi Mulia Lourdes,AH9883
8,10259643,SD SWASTA BUDI MULIA 3,SD,Kec. Siantar,Kab. Simalungun,Prov. Sumatera Utara,https://sekolah.data.kemdikbud.go.id/index.php...,"Br. Anastasius Selvester Tukiman, S.Pd",Yoshapat Aditya Nugroho,214201461,10610,budimulialourdes@gmail.com,10610,60,16 Jan 1958,AHU-0019477.AH.01.12,AHU-AH.01.06-0007163,2017-10-20,Yayasan Budi Mulia Lourdes,AH9883
9,10901164,SMP BUDI MULIA PANGKALPINANG,SMP,Kec. Girimaya,Kota Pangkalpinang,Prov. Kepulauan Bangka Belitung,https://sekolah.data.kemdikbud.go.id/index.php...,"Br. Anastasius Selvester Tukiman, S.Pd",Yoshapat Aditya Nugroho,214201461,10610,budimulialourdes@gmail.com,10610,60,16 Jan 1958,AHU-0019477.AH.01.12,AHU-AH.01.06-0007163,2017-10-20,Yayasan Budi Mulia Lourdes,AH9883


In [9]:
pd.concat(profile_yayasan_list, axis=0, ignore_index=True)

Unnamed: 0,NPSN,Nama,Jenjang,Kecamatan,Kabupaten,Provinsi,url,Pimpinan Yayasan,Operator Yayasan,Telepon Yayasan,Fax Yayasan,Email Yayasan,Kode Pos Yayasan,No Pendirian Yayasan,Tanggal Pendirian Yayasan,No Pengesahan PN LN Yayasan,No Pengesahan Menkumham Yayasan,Tanggal Pengesahan Menkumham Yayasan,Nama Yayasan,Kode Yayasan
0,No data available in table,No data available in table,No data available in table,No data available in table,No data available in table,No data available in table,,-,,125970002,21955,siln.makkah@kemdikbud.go.id,21955,-,16 Oct 2017,,,2017-10-16,Sekolah Indonesia Makkah,AJ5740
