In [19]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All"
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [21]:
!pip install openai requests pandas



In [22]:
import openai
import requests
import pandas as pd
from concurrent.futures import ThreadPoolExecutor
import time

In [23]:
openai.api_key = "AIzaSyDWxC97bk6sAKSCGasQ5TfxXf0A98rfpvs"


In [24]:
def fetch_articles(topic, max_results=5):
    url = f'http://export.arxiv.org/api/query?search_query=all:{topic}&start=0&max_results={max_results}'
    response = requests.get(url)
    if response.status_code == 200:
        # For simplicity, we extract titles (XML parsing can be added later)
        entries = response.text.split('<entry>')
        articles = []
        for entry in entries[1:]:
            title = entry.split('<title>')[1].split('</title>')[0].strip()
            summary = entry.split('<summary>')[1].split('</summary>')[0].strip()
            articles.append({'title': title, 'summary': summary})
        return articles
    else:
        return []


In [25]:
def summarize_text(text):
    try:
        response = openai.ChatCompletion.create(
            model="gpt-4",
            messages=[
                {"role": "system", "content": "You are a research summarization assistant."},
                {"role": "user", "content": f"Summarize this text in 2-3 concise points:\n{text}"}
            ],
            temperature=0.3
        )
        return response['choices'][0]['message']['content'].strip()
    except Exception as e:
        return f"Error: {e}"


In [26]:
def evaluate_source(title):
    # Simple credibility scoring (mock)
    keywords = ['review', 'journal', 'conference', 'IEEE', 'ACM']
    score = sum([1 for kw in keywords if kw.lower() in title.lower()])
    return score


In [27]:
def process_articles_parallel(articles):
    results = []
    with ThreadPoolExecutor(max_workers=5) as executor:
        futures = []
        for article in articles:
            futures.append(executor.submit(lambda art: {
                'title': art['title'],
                'summary': summarize_text(art['summary']),
                'credibility': evaluate_source(art['title'])
            }, article))

        for future in futures:
            results.append(future.result())
    return results


In [28]:
def automated_research_assistant(topic):
    print(f"Fetching articles for: {topic}")
    articles = fetch_articles(topic)
    print(f"Found {len(articles)} articles. Processing...")

    processed_articles = process_articles_parallel(articles)

    df = pd.DataFrame(processed_articles)
    print("Final Research Summary:")
    display(df)
    return df


In [29]:
topic = "Artificial Intelligence in Healthcare"
df = automated_research_assistant(topic)


Fetching articles for: Artificial Intelligence in Healthcare
Found 5 articles. Processing...
Final Research Summary:


Unnamed: 0,title,summary,credibility
0,A Review on Explainable Artificial Intelligenc...,Error: \n\nYou tried to access openai.ChatComp...,1
1,"The Artificial Scientist: Logicist, Emergentis...",Error: \n\nYou tried to access openai.ChatComp...,0
2,"Compression, The Fermi Paradox and Artificial ...",Error: \n\nYou tried to access openai.ChatComp...,0
3,Artificial Intelligence Framework for Simulati...,Error: \n\nYou tried to access openai.ChatComp...,0
4,Creative Problem Solving in Artificially Intel...,Error: \n\nYou tried to access openai.ChatComp...,0


In [30]:
STATE_FILE = "session_state.json"

def load_state():
    if os.path.exists(STATE_FILE):
        with open(STATE_FILE, "r") as f:
            return json.load(f)
    return {}

def save_state(state):
    with open(STATE_FILE, "w") as f:
        json.dump(state, f, indent=4)


In [31]:
def fetch_articles(topic, max_results=5):
    url = f'http://export.arxiv.org/api/query?search_query=all:{topic}&start=0&max_results={max_results}'
    response = requests.get(url)
    articles = []
    if response.status_code == 200:
        entries = response.text.split('<entry>')
        for entry in entries[1:]:
            title = entry.split('<title>')[1].split('</title>')[0].strip()
            summary = entry.split('<summary>')[1].split('</summary>')[0].strip()
            articles.append({'title': title, 'summary': summary})
    return articles


In [32]:
def summarize_text(text):
    try:
        response = openai.ChatCompletion.create(
            model="gpt-4",
            messages=[
                {"role": "system", "content": "You are a research summarization assistant."},
                {"role": "user", "content": f"Summarize this text in 2-3 concise points:\n{text}"}
            ],
            temperature=0.3
        )
        return response['choices'][0]['message']['content'].strip()
    except Exception as e:
        return f"Error: {e}"


In [33]:
def evaluate_source(title):
    keywords = ['review', 'journal', 'conference', 'IEEE', 'ACM']
    score = sum([1 for kw in keywords if kw.lower() in title.lower()])
    return score


In [34]:
def generate_html_report(data, topic):
    template_str = """
    <html>
        <head><title>Research Report: {{ topic }}</title></head>
        <body>
            <h1>Automated Research Report: {{ topic }}</h1>
            <table border="1" cellpadding="5" cellspacing="0">
                <tr>
                    <th>Title</th>
                    <th>Summary</th>
                    <th>Credibility Score</th>
                </tr>
                {% for row in data %}
                <tr>
                    <td>{{ row.title }}</td>
                    <td>{{ row.summary }}</td>
                    <td>{{ row.credibility }}</td>
                </tr>
                {% endfor %}
            </table>
        </body>
    </html>
    """
    template = Template(template_str)
    html_content = template.render(topic=topic, data=data)
    report_file = f"research_report_{topic.replace(' ', '_')}.html"
    with open(report_file, "w", encoding="utf-8") as f:
        f.write(html_content)
    return report_file


In [35]:
def process_articles_parallel(articles, state):
    results = []
    with ThreadPoolExecutor(max_workers=5) as executor:
        futures = []
        for article in articles:
            title = article['title']
            if title in state:
                # Skip already processed articles
                results.append(state[title])
                continue
            futures.append(executor.submit(lambda art: {
                'title': art['title'],
                'summary': summarize_text(art['summary']),
                'credibility': evaluate_source(art['title'])
            }, article))

        for future in futures:
            result = future.result()
            state[result['title']] = result  # Save to session state
            results.append(result)
    save_state(state)
    return results


In [36]:
def automated_research_assistant(topic):
    print(f"Fetching articles for: {topic}")
    articles = fetch_articles(topic)
    print(f"Found {len(articles)} articles.")

    state = load_state()
    print("Processing articles in parallel...")
    processed_articles = process_articles_parallel(articles, state)

    df = pd.DataFrame(processed_articles)
    display(df)

    report_file = generate_html_report(processed_articles, topic)
    print(f"HTML report generated: {report_file}")
    return df, report_file


In [39]:
import openai
import requests
import pandas as pd
from concurrent.futures import ThreadPoolExecutor
import os
from jinja2 import Template
import json

In [41]:
def load_state():
    if os.path.exists(STATE_FILE):
        try:
            with open(STATE_FILE, "r") as f:
                return json.load(f)
        except json.JSONDecodeError:
            # File exists but empty or invalid â†’ start fresh
            return {}
    return {}


In [42]:
topic = "Artificial Intelligence in Healthcare"
df, report_file = automated_research_assistant(topic)

Fetching articles for: Artificial Intelligence in Healthcare
Found 5 articles.
Processing articles in parallel...


Unnamed: 0,title,summary,credibility
0,A Review on Explainable Artificial Intelligenc...,Error: \n\nYou tried to access openai.ChatComp...,1
1,"The Artificial Scientist: Logicist, Emergentis...",Error: \n\nYou tried to access openai.ChatComp...,0
2,"Compression, The Fermi Paradox and Artificial ...",Error: \n\nYou tried to access openai.ChatComp...,0
3,Artificial Intelligence Framework for Simulati...,Error: \n\nYou tried to access openai.ChatComp...,0
4,Creative Problem Solving in Artificially Intel...,Error: \n\nYou tried to access openai.ChatComp...,0


HTML report generated: research_report_Artificial_Intelligence_in_Healthcare.html
