In [1]:
!pip install builtwith

Collecting builtwith
  Downloading builtwith-1.3.4.tar.gz (34 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: builtwith
  Building wheel for builtwith (setup.py) ... [?25l[?25hdone
  Created wheel for builtwith: filename=builtwith-1.3.4-py3-none-any.whl size=36077 sha256=5f04b0532dd305eea0bc264d3a3c60b935d00fb3fb06135295e6e5d14eec969b
  Stored in directory: /root/.cache/pip/wheels/7f/2d/b2/606e3df914d4aeeab99c4a4e3e9a61673d2293c2e346db00c8
Successfully built builtwith
Installing collected packages: builtwith
Successfully installed builtwith-1.3.4


In [8]:
BASE_URL = "https://pta.trunojoyo.ac.id/c_search/byprod"

In [11]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
import sys
import re

MAX_PAGE_LIMIT = 5  # misal batasi agar tidak terlalu banyak halaman
PRODI_FATEK = {
    9: "Teknik Industri",
    10: "Teknik Informatika",
    11: "Manajemen Informatika",
    19: "Teknik Multimedia Dan Jaringan",
    20: "Mekatronika",
    23: "Teknik Elektro",
    31: "Sistem Informasi",
    32: "Teknik Mesin",
    33: "Teknik Mekatronika"
}

HEADERS = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)'}

def extract_after_colon(text):
    if text and ':' in text:
        return text.split(':', 1)[1].strip()
    return text.strip() if text else "N/A"

def pta_fatek_fast(BASE_URL):
    start_time = time.time()

    data = {
        "id": [], "penulis": [], "judul": [], "abstrak_id": [],
        "abstrak_en": [], "pembimbing_pertama": [], "pembimbing_kedua": [], "prodi": []
    }

    for prodi_id, prodi_name in PRODI_FATEK.items():
        print(f"\n🔹 Scraping program: {prodi_name}")
        try:
            max_page = min(get_max_page(prodi_id, BASE_URL), MAX_PAGE_LIMIT)
        except:
            max_page = 1
        for j in range(1, max_page + 1):
            url = f"{BASE_URL}/{prodi_id}/{j}"
            try:
                r = requests.get(url, headers=HEADERS, timeout=10)
                r.raise_for_status()
                soup = BeautifulSoup(r.content, "html.parser")
                jurnals = soup.select('li[data-cat="#luxury"]')
            except Exception as e:
                print(f"⚠️ Gagal ambil halaman {j} prodi {prodi_name}: {e}")
                continue

            for jurnal in jurnals:
                try:
                    link_keluar = jurnal.select_one('a.gray.button')['href']
                    id_match = re.search(r"/detail/(\d+)", link_keluar)
                    pta_id = id_match.group(1) if id_match else None

                    resp = requests.get(link_keluar, headers=HEADERS, timeout=10)
                    resp.raise_for_status()
                    isi = BeautifulSoup(resp.content, "html.parser").select_one('div#content_journal')

                    judul = isi.select_one('a.title').text.strip() if isi and isi.select_one('a.title') else "N/A"

                    penulis = extract_after_colon(isi.find(string=re.compile("Penulis"))) if isi else "N/A"
                    pembimbing_pertama = extract_after_colon(isi.find(string=re.compile("Dosen Pembimbing I"))) if isi else "N/A"
                    pembimbing_kedua = extract_after_colon(isi.find(string=re.compile("Dosen Pembimbing II"))) if isi else "N/A"

                    paragraf = isi.select('p[align="justify"]') if isi else []
                    abstrak_id = paragraf[0].get_text(strip=True) if len(paragraf) > 0 else "N/A"
                    abstrak_en = paragraf[1].get_text(strip=True) if len(paragraf) > 1 else "N/A"

                    data["id"].append(pta_id)
                    data["penulis"].append(penulis)
                    data["judul"].append(judul)
                    data["abstrak_id"].append(abstrak_id)
                    data["abstrak_en"].append(abstrak_en)
                    data["pembimbing_pertama"].append(pembimbing_pertama)
                    data["pembimbing_kedua"].append(pembimbing_kedua)
                    data["prodi"].append(prodi_name)

                except Exception as e:
                    print(f"⚠️ Gagal scrape jurnal: {e}")
            time.sleep(0.5)  # delay kecil agar tidak dianggap bot
            sys.stdout.write(f"\rHalaman {j}/{max_page} selesai")
            sys.stdout.flush()

    df = pd.DataFrame(data)
    df.to_csv("pta_fatek_fast.csv", index=False, encoding="utf-8-sig")

    elapsed = int(time.time() - start_time)
    jam, sisa = divmod(elapsed, 3600)
    menit, detik = divmod(sisa, 60)

    print(f"\n✅ Selesai! Total entri: {len(df)} | Waktu: {jam}j {menit}m {detik}s")
    return df


In [12]:
pta_fatek_fast(BASE_URL)


🔹 Scraping program: Teknik Industri
Halaman 1/1 selesai
🔹 Scraping program: Teknik Informatika
Halaman 1/1 selesai
🔹 Scraping program: Manajemen Informatika
Halaman 1/1 selesai
🔹 Scraping program: Teknik Multimedia Dan Jaringan
Halaman 1/1 selesai
🔹 Scraping program: Mekatronika
Halaman 1/1 selesai
🔹 Scraping program: Teknik Elektro
Halaman 1/1 selesai
🔹 Scraping program: Sistem Informasi
Halaman 1/1 selesai
🔹 Scraping program: Teknik Mesin
Halaman 1/1 selesai
🔹 Scraping program: Teknik Mekatronika
Halaman 1/1 selesai
✅ Selesai! Total entri: 35 | Waktu: 0j 3m 14s


Unnamed: 0,id,penulis,judul,abstrak_id,abstrak_en,pembimbing_pertama,pembimbing_kedua,prodi
0,80421100005,Siliwangi Fitra Rachmawanto S.T.,OPTIMASI PEMILIHAN PORTOFOLIO SAHAM PERUSAHAAN...,Portofolio adalah sekumpulan saham yang dimili...,Portofolio is a collection of stock owned by i...,"Heri Awalul Ilhamsah S.T., M.T.","Retno Indriartiningtias S.T., M.T.",Teknik Industri
1,80421100087,AHMAD MAS'UD,PERANCANGAN TATA LETAK FASILITAS LANTAI PRODUK...,PT. ABC merupakan perusahaan yang bergerak dib...,PT. ABC is a company engaged in the manufactur...,"SABARUDIN AKHMAD, S.T., M.T.","SUGENG PURWOKO, S.T., M.T.",Teknik Industri
2,80421100019,Yulianto Fauzanta,PERUMUSAN STRATEGI BISNIS UD. BUDI JAYA BANGKA...,Bangkalan merupakan salah satu kabupaten yang ...,Bangkalan is one of the districts that have th...,"Fitri Agustina, S.T., M.T","Retno Indriartiningtias, S.T., M.T",Teknik Industri
3,80421100055,M Mundir Muhlisin,USULAN PERBAIKAN UTILITAS RESOURCES PADA LANTA...,Simulasi adalah duplikasi atau abstraksi dari ...,Simulation is a duplication or abstraction of ...,Mu'alim ST MT,Sugeng Purwoko ST MT,Teknik Industri
4,80421100046,Muhibbin,Peningkatan Kepuasan Masyarakat Terhadap Pelay...,Kepuasan adalah tingkat perasaan seseorang ter...,Satisfaction is feeling level of someone to se...,Rahmad Hidayat,Retno Indriartiningtias,Teknik Industri
5,40411100468,A.Ubaidillah S.Kom,PERANCANGAN DAN IMPLEMENTASI SISTEM DATABASE \...,Sistem informasi akademik (SIAKAD) merupaka...,Academic information systems (SIAKAD) is an in...,Budi Setyono M.T,Hermawan S.T,Teknik Informatika
6,40411100476,"M. Basith Ardianto,",APLIKASI KONTROL DAN MONITORING JARINGAN KOMPU...,Berjalannya koneksi jaringan komputer dengan l...,-,"Drs. Budi Soesilo, MT","Koko Joni, ST",Teknik Informatika
7,40411100480,"Akhmad Suyandi, S.Kom",RANCANG BANGUN APLIKASI PROXY SERVER UNTUK\r\n...,Web server adalah sebuah perangkat lunak serve...,Web server is a server software functioning to...,"Drs. Budi Soesilo, M.T","Hermawan, ST, MT",Teknik Informatika
8,70411100070,Heri Supriyanto,SISTEM PENDUKUNG KEPUTUSAN OPTIMASI PENJADWALA...,Penjadwalan kuliah di Perguruan Tinggi me...,Scheduling courses in universities is a ...,"Mulaab, S.Si., M.Kom","Firli Irhamni, ST., M.Kom",Teknik Informatika
9,80411100115,Septian Rahman Hakim,SISTEM AUGMENTED REALITY ANIMASI BENDA BERGERA...,Seiring perkembangan teknologi yang ada diduni...,As the development of technology existing in t...,"Arik Kurniawati, S.Kom., M.T.","Haryanto, S.T., M.T.",Teknik Informatika
