In [1]:
import os
import datetime
from datetime import date, timedelta, datetime
import pandas as pd
import numpy as np
from tenacity import retry, wait_exponential, stop_after_attempt, retry_if_exception_type
from crewai_tools import TavilySearchTool
from crewai import Agent, Task, Crew, Process
from langchain_groq import ChatGroq
from dotenv import load_dotenv
load_dotenv()
from langchain_core.rate_limiters import InMemoryRateLimiter

In [2]:
rate_limiter = InMemoryRateLimiter(requests_per_second=0.2,
                                   check_every_n_seconds=5,
                                   max_bucket_size=2
                                   )

In [3]:
import os
import json
from pathlib import Path

class CheckpointManager:
    def __init__(self):
        # Instead of checkpoint_dir, use in-memory dictionary
        self.checkpoints = {}  # Store checkpoints in memory
        
    def save_checkpoint(self, task_name, output, agent_name=None):
        '''Save task output to memory instead of file'''
        checkpoint_data = {
            'task_name': task_name,
            'output': output,
            'timestamp': datetime.now().isoformat(),
            'agent': agent_name
        }
        self.checkpoints[task_name] = checkpoint_data
        print(f"‚úì Checkpoint saved in memory: {task_name}")
        return checkpoint_data
    
    def load_checkpoint(self, task_name):
        '''Load checkpoint from memory'''
        return self.checkpoints.get(task_name)
    
    def checkpoint_exists(self, task_name):
        '''Check if checkpoint exists in memory'''
        return task_name in self.checkpoints
    
    def get_last_completed_task(self, task_sequence):
        '''Find last completed task from sequence'''
        for task_name in reversed(task_sequence):
            if self.checkpoint_exists(task_name):
                return task_name
        return None
    
    def clear_checkpoints(self):
        '''Clear all in-memory checkpoints'''
        self.checkpoints.clear()
        print("üóëÔ∏è  All checkpoints cleared from memory")

# Initialize checkpoint manager
checkpoint_mgr = CheckpointManager()

In [None]:
search_tool = TavilySearchTool()

In [5]:
planner = Agent(
    role="Research & Content Planner",
    goal=(
        "Conduct thorough research and curate factually accurate, engaging content on {topic}. "
        "Ensure the collected information is credible, up-to-date, and valuable for the target audience."
    ),
    backstory=(
        "A meticulous Content Planner with expertise in research, content strategy, and audience engagement. "
        "With a keen eye for detail and a strong analytical mindset, they specialize in gathering reliable data, "
        "identifying key trends, and structuring insights into well-organized content plans. "
        "Their experience spans digital content creation, market research, and fact-checking, ensuring "
        "that every piece of information is credible and impactful. "
        "They excel at distilling complex topics into clear, actionable insights that serve as a foundation "
        "for compelling content creation."
    ),
    allow_delegation=False,
    verbose=True,
    llm="groq/qwen/qwen3-32b", #"groq/llama-3.3-70b-versatile" "groq/qwen/qwen3-32b"
    max_rpm=5,
)

writer = Agent(
    role="Content Writer",
    goal="Write insightful and factually accurate "
            "opinion piece about the topic: {topic}",
    backstory="You're working on a writing "
                "a new opinion piece about the topic: {topic}. "
                "You base your writing on the work of "
                "the Research & Content Planner, who provides an outline "
                "and relevant context about the topic. "
                "You follow the main objectives and "
                "direction of the outline, "
                "as provide by the Content Planner. "
                "You also provide objective and impartial insights "
                "and back them up with information "
                "provide by the Content Planner. "
                "You acknowledge in your opinion piece "
                "when your statements are opinions "
                "as opposed to objective statements.",
    allow_delegation=False,
    verbose=True,
    llm="groq/llama-3.3-70b-versatile",
    max_rpm=5,
)

editor = Agent(
    role="Editor",
    goal="Edit a given blog post to align with "
            "the writing style of the organization. ",
    backstory="You are an editor who receives a blog post "
                "from the Content Writer. "
                "Your goal is to review the blog post "
                "to ensure that it follows journalistic best practices,"
                "provides balanced viewpoints "
                "when providing opinions or assertions, "
                "and also avoids major controversial topics "
                "or opinions when possible.",
    allow_delegation=False,
    verbose=True,
    llm="groq/qwen/qwen3-32b",
    max_rpm=5,
)


In [6]:
from crewai import Task

# Custom callback to save checkpoints after each task
def save_task_checkpoint(task_output, task_name):
    """Callback to save task output to checkpoint"""
    checkpoint_mgr.save_checkpoint(
        task_name,
        task_output.raw,
        metadata={"agent": task_output.agent, "status": "completed"}
    )
    return task_output

plan = Task(
    description=(
        "1. Prioritize the latest trends, key players, "
        "and noteworthy news on {topic}.\n"
        "2. Identify the target audience, considering "
        "their interests and pain points.\n"
        "3. Develop a detailed content outline including "
        "an introduction, key points, and a call to action, conclusion and necessary references.\n"
        "4. Include SEO keywords and relevant data or sources.\n"
        "When using search tools, provide the search query as a simple string."
    ),
    expected_output="A comprehensive content plan document "
    "with an outline, audience analysis, "
    "SEO keywords, and resources.",
    tools=[search_tool],
    agent=planner,
    # output_file="checkpoints/plan.txt"  # Saves raw text output
)

write = Task(
    description=(
        "1. Use the content plan to craft a compelling "
        "blog post on {topic}.\n"
        "2. Incorporate SEO keywords naturally.\n"
        "3. Sections/Subtitles are properly named "
        "in an engaging manner.\n"
        "4. Adding necessary Hyperlinks and bolding for important sentences/words/statements.\n"
        "5. If required you can add comparison tables/table and data if it is necessary for the topic.\n"
        "6. Ensure the post is structured with an "
        "engaging introduction, insightful body, "
        "and a summarizing conclusion.\n"
        "7. Proofread for grammatical errors and "
        "alignment with the brand's voice.\n"
        "When using search tools, provide the search query as a simple string."
    ),
    expected_output="A well-written blog post "
    "in markdown format, ready for publication, "
    "each section should have 2 or 3 paragraphs.",
    tools=[],
    context=[plan],
    agent=writer,
    # output_file="checkpoints/write.txt"
)

edit = Task(
    description=("Proofread the given blog post for "
    "grammatical errors, checks plagiarism and "
    "alignment with the brand's voice."),
    expected_output="A well-written blog post, "
    "ready for publication, "
    "each section should have 2 or 3 paragraphs.",
    tools=[],
    context=[write],
    agent=editor,
    # output_file="checkpoints/edit.txt"
)


In [7]:
def save_as_markdown(content, topic):
    '''
    Save final output as a markdown file
    
    Args:
        content: The blog post content
        topic: Topic name for filename
    '''
    # Create output directory if it doesn't exist
    output_dir = "output"
    os.makedirs(output_dir, exist_ok=True)
    
    # Generate filename from topic (sanitize for filesystem)
    safe_filename = "".join(c for c in topic if c.isalnum() or c in (' ', '-', '_')).strip()
    safe_filename = safe_filename.replace(' ', '_')
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    filename = f"{output_dir}/{safe_filename}_{timestamp}.md"
    
    # Add markdown metadata header
    markdown_content = f'''---
title: {topic}
date: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}
generated_by: CrewAI Pipeline
---

{content}
'''
    
    # Write to file
    with open(filename, 'w', encoding='utf-8') as f:
        f.write(markdown_content)
    
    print(f"üìÑ Markdown saved: {filename}")
    return filename


In [8]:
from crewai import Crew, Process
import time

def run_crew_with_checkpoints(topic, force_restart=False, save_markdown=True):
    '''
    Run crew with in-memory checkpoints and optional markdown export
    
    Args:
        topic: Blog topic
        force_restart: Clear checkpoints and start fresh
        save_markdown: Save final output as markdown file
    '''
    checkpoint_manager = CheckpointManager()
    task_sequence = ['plan', 'write', 'edit']
    
    # Clear memory if force restart
    if force_restart:
        checkpoint_manager.clear_checkpoints()
        print("üîÑ Starting fresh (memory cleared)")
    
    # Check for existing in-memory checkpoints
    last_completed = checkpoint_manager.get_last_completed_task(task_sequence)
    
    if last_completed:
        print(f"üìå Resuming from last checkpoint: {last_completed}")
        
        # If all tasks complete, return final result
        if last_completed == 'edit':
            final_checkpoint = checkpoint_manager.load_checkpoint('edit')
            result = final_checkpoint['output']
            print("‚úÖ All tasks already completed (from memory)")
            
            # Save as markdown if requested
            if save_markdown:
                save_as_markdown(result, topic)
            
            return result
    else:
        print("üöÄ Starting crew from beginning")
    
    # Configure and run crew
    crew = Crew(
        agents=[planner, writer, editor],
        tasks=[plan, write, edit],
        process=Process.sequential,
        max_rpm=10
    )
    
    try:
        print(f"ü§ñ Running crew for topic: {topic}")
        result = crew.kickoff(inputs={"topic": topic})
        
        # Save final checkpoint in memory
        checkpoint_manager.save_checkpoint('edit', result, agent_name='editor')
        
        # Save as markdown file
        if save_markdown:
            save_as_markdown(result, topic)
        
        print("‚úÖ Crew execution completed successfully!")
        return result
        
    except Exception as e:
        print(f"‚ùå Error during execution: {str(e)}")
        print("üíæ Checkpoints preserved in memory for this session")
        raise


# Wrapper with exponential backoff for rate limits
@retry(
    wait=wait_exponential(multiplier=2, min=4, max=120),
    stop=stop_after_attempt(5),
    retry=retry_if_exception_type((Exception,))
)
def kickoff_with_retry(topic, force_restart=False, save_markdown=True):
    '''
    Execute crew with retry logic and markdown export
    
    Args:
        topic: Blog topic
        force_restart: Clear in-memory checkpoints
        save_markdown: Save final output as markdown
    '''
    import time
    time.sleep(5)
    return run_crew_with_checkpoints(topic, force_restart, save_markdown)


In [9]:
topic = "OpenAI Agentkit"
result = kickoff_with_retry(topic, force_restart=True, save_markdown=True)

# Optionally, print the result to notebook
# print(result)


üóëÔ∏è  All checkpoints cleared from memory
üîÑ Starting fresh (memory cleared)
üöÄ Starting crew from beginning
ü§ñ Running crew for topic: OpenAI Agentkit
‚úì Checkpoint saved in memory: edit
üìÑ Markdown saved: output/OpenAI_Agentkit_20251031_102304.md
‚úÖ Crew execution completed successfully!
