In [None]:
# imports

import requests
from bs4 import BeautifulSoup
from IPython.display import Markdown, display
import ollama
import re
from typing import Optional
import gradio as gr

import fitz  # PyMuPDF
from io import BytesIO


In [2]:
MODEL = "llama3.2"

In [3]:
def is_pdf_url(url: str) -> bool:
    return url.lower().endswith(".pdf")

class ContentSource:
    """
    Represents an AI paper or article to transform into content
    """

    def __init__(self, url, content_type='article'):
        self.url = url
        self.content_type = content_type

        if self.content_type == "paper" or url.endswith(".pdf"):
            self._extract_pdf()
        else:
            self._extract_html()

    def _extract_html(self):
        response = requests.get(self.url, timeout=15)
        response.raise_for_status()

        soup = BeautifulSoup(response.text, "html.parser")

        self.title = soup.title.string if soup.title else "No title found"

        if soup.body:
            for tag in soup.body.find_all(
                ["script", "style", "img", "input", "nav", "footer", "header"]
            ):
                tag.decompose()

            self.text = soup.body.get_text(separator="\n", strip=True)
        else:
            self.text = soup.get_text(separator="\n", strip=True)

        self.text = re.sub(r'\n\s*\n', '\n\n', self.text)

    def _extract_pdf(self):
        response = requests.get(self.url, timeout=20)
        response.raise_for_status()

        pdf = fitz.open(stream=BytesIO(response.content), filetype="pdf")

        pages_text = []
        for page in pdf:
            pages_text.append(page.get_text())

        self.text = "\n\n".join(pages_text)

        # Fallback title extraction
        self.title = self._infer_title_from_text()

    def _infer_title_from_text(self):
        lines = [l.strip() for l in self.text.split("\n") if len(l.strip()) > 10]
        return lines[0][:200] if lines else "AI Paper"


In [None]:
# Test with an AI article
print("Testing content extraction...")
test_source = ContentSource("https://www.anthropic.com/engineering/building-effective-agents")
print(test_source.title)
print(test_source.text)

In [6]:
# System prompt for LinkedIn transformation
linkedin_system_prompt = """You are a LinkedIn thought leadership expert who transforms technical AI papers and articles into engaging, professional LinkedIn posts.

Your posts should:
- Start with a compelling hook that grabs attention
- Distill complex technical concepts into accessible insights
- Include 3-5 key takeaways or insights
- Use strategic line breaks for readability
- End with a thought-provoking question or call-to-action
- Be 150-300 words (LinkedIn optimal length)
- Use professional yet conversational tone
- Include relevant emojis sparingly (1-3 max)
- Focus on practical implications and industry impact

Format the post in a way that's ready to copy-paste into LinkedIn."""

In [7]:
def user_prompt_for_linkedin(content_source):
    user_prompt = f"""Transform the following {content_source.content_type} into a compelling LinkedIn thought leadership post.

Title: {content_source.title}
URL: {content_source.url}

Content:
{content_source.text[:4000]}  # Limit to avoid token limits

Create a LinkedIn post that:
1. Captures the most important insights
2. Explains why this matters to professionals
3. Engages the reader with a strong opening
4. Ends with a question or discussion prompt

Generate ONLY the LinkedIn post text, ready to publish."""
    return user_prompt

In [8]:
def messages_for_linkedin(content_source):
    """Create message format for Ollama"""
    return [
        {"role": "system", "content": linkedin_system_prompt},
        {"role": "user", "content": user_prompt_for_linkedin(content_source)}
    ]

## Use Ollama + model

In [9]:
def generate_linkedin_post(url, content_type='article'):
    """
    Generate a LinkedIn post from a URL
    Args:
        url: The URL of the paper or article
        content_type: 'paper' or 'article'
    Returns:
        Generated LinkedIn post text
    """
    print(f"Extracting content from {url}...")
    content = ContentSource(url, content_type)
    
    print(f"Generating LinkedIn post with {MODEL}...")
    messages = messages_for_linkedin(content)
    response = ollama.chat(model=MODEL, messages=messages)
    
    return response['message']['content']

In [None]:
url = "https://www.anthropic.com/engineering/building-effective-agents"
generate_linkedin_post(url)

In [11]:
def display_linkedin_post(url, content_type='article'):
    """
    Generate and display a LinkedIn post nicely formatted
    """
    post = generate_linkedin_post(url, content_type)
    
    print("\n" + "="*60)
    print("LINKEDIN POST")
    print("="*60 + "\n")
    display(Markdown(post))
    print("\n" + "="*60)
    print(f"Ready to copy and paste into LinkedIn!")
    print("="*60)
    
    return post

In [None]:
display_linkedin_post(url)

In [None]:
display_linkedin_post(
    "https://arxiv.org/pdf/1706.03762",
    content_type="paper"
)

In [14]:
def generate_linkedin_post_gradio(url, content_type):
    try:
        if not url.strip():
            return "Please enter a valid URL."

        # Auto-detect PDF
        if url.endswith(".pdf"):
            content_type = "paper"

        content = ContentSource(url, content_type)

        messages = messages_for_linkedin(content)
        response = ollama.chat(model=MODEL, messages=messages)

        return response["message"]["content"]

    except Exception as e:
        return f"Error: {str(e)}"


In [15]:
with gr.Blocks(
    title="AI Paper → LinkedIn Post Generator",
    theme=gr.themes.Soft()
) as demo:

    gr.Markdown(
        """
        # AI Paper → LinkedIn Thought Leadership Generator

        Paste a **paper or article URL** and get a **ready-to-publish LinkedIn post**.

        Works great with:
        - arXiv papers 
        - Blogs & research articles 
        """
    )

    with gr.Row():
        url_input = gr.Textbox(
            label="Article / Paper URL",
            placeholder="https://arxiv.org/abs/1706.03762",
            scale=4
        )

    content_type = gr.Radio(
        choices=["article", "paper"],
        value="paper",
        label="Content Type"
    )

    generate_btn = gr.Button("Generate LinkedIn Post", variant="primary")

    output = gr.Textbox(
        label="LinkedIn Post (Copy & Paste)",
        lines=15,
        show_copy_button=True
    )

    generate_btn.click(
        fn=generate_linkedin_post_gradio,
        inputs=[url_input, content_type],
        outputs=output
    )

In [None]:
demo.launch()
# demo.launch(share=True) # optional