In [1]:
!pip install git+https://github.com/ibm-granite-community/utils \
    "langchain_community<0.3.0" \
    replicate \
    pandas

Collecting git+https://github.com/ibm-granite-community/utils
  Cloning https://github.com/ibm-granite-community/utils to /tmp/pip-req-build-1kr59x4a
  Running command git clone --filter=blob:none --quiet https://github.com/ibm-granite-community/utils /tmp/pip-req-build-1kr59x4a
  Resolved https://github.com/ibm-granite-community/utils to commit 1514191fbbc4605ed4fdfdcb448f2ee41477058f
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone


In [2]:
import pandas as pd
import json
import ipywidgets as widgets
from IPython.display import display, HTML
from ibm_granite_community.notebook_utils import get_env_var
from langchain_community.llms import Replicate

# Initialize the model
model = Replicate(
    model="ibm-granite/granite-3.3-8b-instruct",
    replicate_api_token=get_env_var('REPLICATE_API_TOKEN'),
    model_kwargs={"max_tokens":1024, "temperature":0.2},
)

REPLICATE_API_TOKEN loaded from Google Colab secret.


In [5]:
class AcademicAIAgent:
    def __init__(self):
        self.papers_data = []
        self.load_sample_data()

    def load_sample_data(self):
        """Load sample academic papers data"""
        # Load data from your CSV and JSON files
        try:
            # Load CSV file
            df = pd.read_csv('../indonesian_cs_papers.csv')
            self.papers_data = self.process_csv_data(df)
            print(f"✅ Loaded {len(self.papers_data)} papers from CSV")

        except FileNotFoundError:
            print("⚠️ File indonesian_cs_papers.csv tidak ditemukan, menggunakan sample data")
            # Fallback sample data
            self.papers_data = [
                {
                    "title": "Machine Learning Klasifikasi Status Gizi Balita Menggunakan Algoritma Random Forest",
                    "authors": ["Handayani, P.", "Charis Fauzan, A.", "Harlina"],
                    "year": 2024,
                    "journal": "KLIK: Kajian Ilmiah Informatika Dan Komputer",
                    "abstract": "Penelitian ini menggunakan algoritma Random Forest untuk klasifikasi status gizi balita...",
                    "keywords": ["machine learning", "random forest", "status gizi", "balita"],
                    "content": "Penelitian ini bertujuan untuk mengklasifikasikan status gizi balita menggunakan algoritma Random Forest...",
                    "doi": "10.30865/klik.v4i6.1909",
                    "url": "",
                    "citation": "Handayani, P., Charis Fauzan, A., & Harlina. (2024). Machine Learning Klasifikasi Status Gizi Balita Menggunakan Algoritma Random Forest. KLIK: Kajian Ilmiah Informatika Dan Komputer, 4(6), 3064–3072."
                }
            ]

        try:
            # Also try to load JSON file if available
            with open('../indonesian_cs_papers.json', 'r', encoding='utf-8') as f:
                json_data = json.load(f)
                additional_data = self.process_json_data(json_data)
                self.papers_data.extend(additional_data)
                print(f"✅ Loaded additional {len(additional_data)} papers from JSON")
        except FileNotFoundError:
            print("⚠️ File indonesian_cs_papers.json tidak ditemukan, hanya menggunakan data CSV")
        except Exception as e:
            print(f"⚠️ Error loading JSON: {e}")

    def process_csv_data(self, df):
        """Process CSV data to match internal structure"""
        processed_data = []

        for _, row in df.iterrows():
            # Extract keywords from search_term if available
            keywords = []
            if pd.notna(row.get('search_term')):
                keywords = [term.strip() for term in str(row['search_term']).split(',')]

            # Process authors
            authors = []
            if pd.notna(row.get('authors')):
                authors = [auth.strip() for auth in str(row['authors']).split(';')]
            elif pd.notna(row.get('authors_raw')):
                authors = [auth.strip() for auth in str(row['authors_raw']).split(',')]

            paper = {
                "title": str(row.get('title', '')),
                "authors": authors,
                "year": int(row.get('year', 0)) if pd.notna(row.get('year')) else 0,
                "journal": str(row.get('journal', '')),
                "abstract": str(row.get('abstract', '')),
                "keywords": keywords,
                "content": str(row.get('abstract', '')),  # Using abstract as content for now
                "doi": str(row.get('doi', '')),
                "url": str(row.get('url', '')),
                "pdf_url": str(row.get('pdf_url', '')),
                "citation": str(row.get('citation', '')),
                "citation_count": int(row.get('citation_count', 0)) if pd.notna(row.get('citation_count')) else 0,
                "categories": str(row.get('categories', '')),
                "published_date": str(row.get('published_date', ''))
            }
            processed_data.append(paper)

        return processed_data

    def process_json_data(self, json_data):
        """Process JSON data to match internal structure"""
        processed_data = []

        for item in json_data:
            # Extract keywords
            keywords = []
            if 'search_term' in item:
                keywords = [term.strip() for term in str(item['search_term']).split(',')]

            # Process authors
            authors = []
            if 'authors' in item:
                authors = [auth.strip() for auth in str(item['authors']).split(';')]
            elif 'authors_raw' in item:
                authors = [auth.strip() for auth in str(item['authors_raw']).split(',')]

            paper = {
                "title": str(item.get('title', '')),
                "authors": authors,
                "year": int(item.get('year', 0)) if item.get('year') else 0,
                "journal": str(item.get('journal', '')),
                "abstract": str(item.get('abstract', '')),
                "keywords": keywords,
                "content": str(item.get('abstract', '')),
                "doi": str(item.get('doi', '')),
                "url": str(item.get('url', '')),
                "pdf_url": str(item.get('pdf_url', '')),
                "citation": str(item.get('citation', '')),
                "citation_count": int(item.get('citation_count', 0)) if item.get('citation_count') else 0,
                "categories": str(item.get('categories', '')),
                "published_date": str(item.get('published_date', ''))
            }
            processed_data.append(paper)

        return processed_data

    def load_data_from_files(self, csv_file=None, json_file=None):
        """Load data from CSV or JSON files"""
        if csv_file:
            try:
                df = pd.read_csv(csv_file)
                self.papers_data.extend(df.to_dict('records'))
            except Exception as e:
                print(f"Error loading CSV: {e}")

        if json_file:
            try:
                with open(json_file, 'r') as f:
                    data = json.load(f)
                    self.papers_data.extend(data)
            except Exception as e:
                print(f"Error loading JSON: {e}")

    def search_papers(self, query):
        """Search papers based on query"""
        # Enhanced search implementation for your data structure
        results = []
        query_lower = query.lower()

        for paper in self.papers_data:
            score = 0

            # Search in title (highest priority)
            if query_lower in paper['title'].lower():
                score += 10

            # Search in abstract (medium priority)
            if query_lower in paper['abstract'].lower():
                score += 5

            # Search in keywords (medium priority)
            if paper['keywords']:
                for keyword in paper['keywords']:
                    if query_lower in keyword.lower() or keyword.lower() in query_lower:
                        score += 3

            # Search in journal (low priority)
            if query_lower in paper['journal'].lower():
                score += 2

            # Search in categories (low priority)
            if query_lower in paper['categories'].lower():
                score += 2

            # Search in authors (low priority)
            if paper['authors']:
                for author in paper['authors']:
                    if query_lower in author.lower():
                        score += 1

            if score > 0:
                paper['relevance_score'] = score
                results.append(paper)

        # Sort by relevance score (descending)
        results.sort(key=lambda x: x.get('relevance_score', 0), reverse=True)

        return results

    def format_citation(self, paper):
        """Format citation in APA style"""
        # Use existing citation if available, otherwise create new one
        if paper.get('citation') and paper['citation'].strip():
            return paper['citation']

        # Create citation from available data
        if paper.get('authors') and len(paper['authors']) > 0:
            if len(paper['authors']) == 1:
                authors = paper['authors'][0]
            elif len(paper['authors']) == 2:
                authors = f"{paper['authors'][0]} & {paper['authors'][1]}"
            else:
                authors = f"{paper['authors'][0]} et al."
        else:
            authors = "Unknown Author"

        citation = f"{authors} ({paper.get('year', 'n.d.')}). {paper.get('title', 'Untitled')}. "

        if paper.get('journal'):
            citation += f"*{paper['journal']}*. "

        if paper.get('url'):
            citation += f"{paper['url']}"
        elif paper.get('doi'):
            citation += f"https://doi.org/{paper['doi']}"

        return citation

    def generate_smart_answer(self, query, relevant_papers):
        """Generate AI-powered answer based on relevant papers"""
        if not relevant_papers:
            return "Tidak ditemukan paper yang relevan dengan pertanyaan Anda."

        # Create context from relevant papers
        context = ""
        for paper in relevant_papers[:3]:  # Use top 3 relevant papers
            context += f"Paper: {paper['title']}\n"
            context += f"Abstract: {paper['abstract']}\n"
            context += f"Content: {paper['content'][:200]}...\n\n"

        prompt = f"""
        Sebagai AI assistant akademik, jawab pertanyaan berikut berdasarkan paper-paper yang relevan:

        Pertanyaan: {query}

        Konteks dari paper-paper relevan:
        {context}

        Berikan jawaban yang informatif dan akademis, serta sebutkan sumber yang mendukung jawaban Anda.
        Jawaban harus dalam bahasa Indonesia dan bersifat objektif.
        """

        try:
            answer = model.invoke(prompt)
            return answer
        except Exception as e:
            return f"Error generating answer: {e}"

    def generate_paper(self, title="", abstract="", method="", custom_prompt=""):
        """Generate a complete academic paper"""
        if custom_prompt:
            prompt = f"""
            Sebagai AI assistant akademik, buatkan draft paper penelitian berdasarkan prompt berikut:
            {custom_prompt}

            Format paper harus mencakup:
            1. Judul
            2. Abstract
            3. Pendahuluan
            4. Metodologi
            5. Hasil dan Pembahasan
            6. Kesimpulan
            7. Daftar Pustaka (minimal 5 referensi)

            Pastikan konten akademis, objektif, dan mengikuti standar penulisan ilmiah.
            """
        else:
            prompt = f"""
            Sebagai AI assistant akademik, buatkan draft paper penelitian dengan informasi berikut:

            Judul: {title if title else "Belum ditentukan"}
            Abstract: {abstract if abstract else "Belum ditentukan"}
            Metodologi: {method if method else "Belum ditentukan"}

            Lengkapi dan kembangkan paper dengan struktur:
            1. Judul (jika belum ada)
            2. Abstract (jika belum lengkap)
            3. Pendahuluan
            4. Metodologi (jika belum lengkap)
            5. Hasil dan Pembahasan
            6. Kesimpulan
            7. Daftar Pustaka

            Pastikan konten akademis, objektif, dan mengikuti standar penulisan ilmiah.
            """

        try:
            paper = model.invoke(prompt)
            return paper
        except Exception as e:
            return f"Error generating paper: {e}"

In [6]:
# Initialize the AI Agent
agent = AcademicAIAgent()

✅ Loaded 131 papers from CSV
✅ Loaded additional 131 papers from JSON


In [7]:
# Create UI Components
def create_search_ui():
    """Create search interface"""
    search_input = widgets.Text(
        placeholder="Masukkan pertanyaan atau topik penelitian...",
        style={'description_width': 'initial'},
        layout=widgets.Layout(width='80%')
    )

    search_button = widgets.Button(
        description="Cari",
        button_style='primary',
        layout=widgets.Layout(width='15%')
    )

    search_output = widgets.Output()

    def on_search_click(b):
        with search_output:
            search_output.clear_output()
            query = search_input.value
            if query:
                print("🔍 Mencari paper yang relevan...")
                relevant_papers = agent.search_papers(query)

                if relevant_papers:
                    print(f"📚 Ditemukan {len(relevant_papers)} paper relevan\n")

                    # Generate smart answer
                    print("🤖 Generating AI Answer...")
                    answer = agent.generate_smart_answer(query, relevant_papers)
                    print(f"Jawaban AI:\n{answer}\n")

                    print("📖 Sumber Referensi:")
                    for i, paper in enumerate(relevant_papers[:5], 1):
                        citation = agent.format_citation(paper)
                        print(f"{i}. {citation}")

                        # Show additional info if available
                        if paper.get('citation_count', 0) > 0:
                            print(f"   📊 Citations: {paper['citation_count']}")
                        if paper.get('pdf_url'):
                            print(f"   📄 PDF: {paper['pdf_url']}")
                        if paper.get('categories'):
                            print(f"   🏷️ Categories: {paper['categories']}")
                        print()
                else:
                    print("❌ Tidak ditemukan paper yang relevan")

    search_button.on_click(on_search_click)

    return widgets.VBox([
        widgets.HTML("<h3>🔍 Pencarian Pintar Paper Akademik</h3>"),
        widgets.HBox([search_input, search_button]),
        search_output
    ])

In [8]:
def create_generator_ui():
    """Create paper generator interface"""
    title_input = widgets.Text(
        description="Judul:",
        placeholder="Masukkan judul penelitian (opsional)",
        style={'description_width': 'initial'},
        layout=widgets.Layout(width='100%')
    )

    abstract_input = widgets.Textarea(
        description="Abstract:",
        placeholder="Masukkan abstract (opsional)",
        style={'description_width': 'initial'},
        layout=widgets.Layout(width='100%', height='80px')
    )

    method_input = widgets.Textarea(
        description="Metodologi:",
        placeholder="Masukkan metodologi penelitian (opsional)",
        style={'description_width': 'initial'},
        layout=widgets.Layout(width='100%', height='80px')
    )

    prompt_input = widgets.Textarea(
        description="Custom Prompt:",
        placeholder="Atau masukkan prompt khusus untuk jenis penelitian yang diinginkan",
        style={'description_width': 'initial'},
        layout=widgets.Layout(width='100%', height='80px')
    )

    generate_button = widgets.Button(
        description="Generate Paper",
        button_style='success',
        layout=widgets.Layout(width='200px')
    )

    generator_output = widgets.Output()

    def on_generate_click(b):
        with generator_output:
            generator_output.clear_output()
            print("📝 Generating paper...")

            paper = agent.generate_paper(
                title=title_input.value,
                abstract=abstract_input.value,
                method=method_input.value,
                custom_prompt=prompt_input.value
            )

            print("📄 Generated Paper:")
            print("="*50)
            print(paper)

    generate_button.on_click(on_generate_click)

    return widgets.VBox([
        widgets.HTML("<h3>📝 Generator Paper Akademik</h3>"),
        title_input,
        abstract_input,
        method_input,
        prompt_input,
        generate_button,
        generator_output
    ])

In [9]:
def create_data_upload_ui():
    """Create data upload interface"""
    upload_csv = widgets.FileUpload(
        accept='.csv',
        description='Upload CSV',
        multiple=False
    )

    upload_json = widgets.FileUpload(
        accept='.json',
        description='Upload JSON',
        multiple=False
    )

    # Button to load files from system
    load_files_button = widgets.Button(
        description="Load Files from System",
        button_style='info'
    )

    upload_output = widgets.Output()

    def load_files_from_system(b):
        """Try to load files from system"""
        with upload_output:
            upload_output.clear_output()
            print("🔍 Searching for files in system...")

            # Reinitialize agent to load files
            global agent
            agent = AcademicAIAgent()

    def on_upload_csv_change(change):
        """Handle CSV file upload"""
        with upload_output:
            upload_output.clear_output()
            if change['new']:
                try:
                    # Get the uploaded file
                    uploaded_file = change['new'][0]
                    content = uploaded_file['content']

                    # Save file temporarily
                    with open('temp_papers.csv', 'wb') as f:
                        f.write(content)

                    # Load the data
                    df = pd.read_csv('temp_papers.csv')
                    global agent
                    agent.papers_data = agent.process_csv_data(df)

                    print(f"✅ Successfully loaded {len(agent.papers_data)} papers from uploaded CSV!")
                    print("📊 Sample data:")
                    if agent.papers_data:
                        sample = agent.papers_data[0]
                        print(f"   Title: {sample.get('title', 'N/A')}")
                        print(f"   Authors: {sample.get('authors', 'N/A')}")
                        print(f"   Year: {sample.get('year', 'N/A')}")
                        print(f"   Journal: {sample.get('journal', 'N/A')}")

                except Exception as e:
                    print(f"❌ Error processing CSV file: {e}")

    def on_upload_json_change(change):
        """Handle JSON file upload"""
        with upload_output:
            upload_output.clear_output()
            if change['new']:
                try:
                    # Get the uploaded file
                    uploaded_file = change['new'][0]
                    content = uploaded_file['content']

                    # Save file temporarily
                    with open('temp_papers.json', 'wb') as f:
                        f.write(content)

                    # Load the data
                    with open('temp_papers.json', 'r', encoding='utf-8') as f:
                        json_data = json.load(f)

                    global agent
                    additional_data = agent.process_json_data(json_data)

                    if hasattr(agent, 'papers_data') and agent.papers_data:
                        agent.papers_data.extend(additional_data)
                        print(f"✅ Added {len(additional_data)} papers from JSON!")
                    else:
                        agent.papers_data = additional_data
                        print(f"✅ Loaded {len(additional_data)} papers from JSON!")

                    print(f"📊 Total papers: {len(agent.papers_data)}")

                except Exception as e:
                    print(f"❌ Error processing JSON file: {e}")

    upload_csv.observe(on_upload_csv_change, names='value')
    upload_json.observe(on_upload_json_change, names='value')
    load_files_button.on_click(load_files_from_system)

    return widgets.VBox([
        widgets.HTML("<h3>📁 Upload Data Paper</h3>"),
        widgets.HTML("<p>Option 1: Upload file CSV atau JSON secara manual</p>"),
        widgets.HBox([upload_csv, upload_json]),
        widgets.HTML("<p>Option 2: Load files yang sudah ada di sistem</p>"),
        load_files_button,
        upload_output
    ])


In [10]:
# Create main application
def create_main_app():
    """Create the main application"""
    # Header
    header = widgets.HTML("""
    <div style="text-align: center; padding: 20px; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; border-radius: 10px; margin-bottom: 20px;">
        <h1>🤖 AI Agent untuk Karya Tulis Ilmiah</h1>
        <p>Asisten AI untuk pencarian paper dan pembuatan karya tulis ilmiah</p>
    </div>
    """)

    # Create tabs
    tab = widgets.Tab()
    tab.children = [
        create_search_ui(),
        create_generator_ui(),
        create_data_upload_ui()
    ]

    tab.set_title(0, "🔍 Pencarian Pintar")
    tab.set_title(1, "📝 Generator Paper")
    tab.set_title(2, "📁 Upload Data")

    # Footer
    footer = widgets.HTML("""
    <div style="text-align: center; padding: 10px; color: #666; margin-top: 20px;">
        <p>Powered by IBM Granite & LangChain | Developed for Academic Research</p>
    </div>
    """)

    return widgets.VBox([header, tab, footer])

In [11]:
# Display the application
main_app = create_main_app()
display(main_app)

VBox(children=(HTML(value='\n    <div style="text-align: center; padding: 20px; background: linear-gradient(13…

In [None]:
# Instructions for data loading
print("""
📋 Petunjuk Penggunaan:

1. 🔍 Pencarian Pintar:
   - Masukkan pertanyaan atau topik penelitian
   - Sistem akan mencari paper yang relevan dari database Indonesian CS Papers
   - AI akan memberikan jawaban dengan sitasi otomatis
   - Menampilkan citation count dan link PDF jika tersedia

2. 📝 Generator Paper:
   - Isi judul, abstract, metodologi, atau custom prompt
   - Sistem akan generate paper lengkap berbasis CS Indonesia
   - Hasil dapat digunakan sebagai draft awal

3. 📁 Upload Data:
   - Sistem otomatis memuat indonesian_cs_papers.csv dan indonesian_cs_papers.json
   - Jika file tidak ditemukan, sistem akan menggunakan sample data

📊 Data yang Dimuat:
   - File CSV: indonesian_cs_papers.csv
   - File JSON: indonesian_cs_papers.json
   - Kolom utama: title, authors, abstract, year, journal, doi, citation, dll.

💡 Tips:
- Gunakan kata kunci spesifik seperti "machine learning", "deep learning", "computer vision"
- Sistem akan memberikan relevance score untuk hasil pencarian
- Citation count dan PDF link akan ditampilkan jika tersedia
- Data fokus pada Computer Science papers dari Indonesia
""")