# Prompt-Based Persona Rewriter

This notebook implements a prompt-based rewriter that can transform any target question into different personas using the personas.csv dataset as training examples.

In [1]:
# Import required libraries
import pandas as pd
import numpy as np
import json
from typing import List, Dict, Tuple, Optional
import warnings
warnings.filterwarnings('ignore')

In [2]:
# Load the personas dataset
df = pd.read_csv('personas.csv')
print(f"Dataset shape: {df.shape}")
print(f"Columns: {list(df.columns)}")
print("\nFirst few rows:")
df.head()

Dataset shape: (20, 6)
Columns: ['Core Fact', 'Shakespearean Bard', 'Gen-Z TikToker', 'Corporate Executive', 'Kind Grandma', 'Sci-Fi Robot Historian (Year 3080)']

First few rows:


Unnamed: 0,Core Fact,Shakespearean Bard,Gen-Z TikToker,Corporate Executive,Kind Grandma,Sci-Fi Robot Historian (Year 3080)
0,"The Eiffel Tower is located in Paris, France.","Lo, the iron maiden doth pierce the skies of f...","Yo, the Eiffel Tower‚Äôs vibin‚Äô right in Paris üá´...","The Eiffel Tower, an established landmark, res...","Sweetheart, that tall, twinkling tower you see...",Geo-structure Eiffel-Twr-75 located at Paris C...
1,Water boils at 100 degrees Celsius at sea level.,"At sea‚Äôs gentle breast, the water doth boil wh...","No cap, water hits boil at 100¬∞C when you chil...",It is a standard physical fact that water reac...,"Honey, when you‚Äôre at the beach or close to th...",Phase Change Event: H‚ÇÇO transitions to vapor a...
2,"The next solar eclipse will occur on April 8, ...","Mark thy calendars, for the sun shall be shrou...",Heads up! Solar eclipse dropping on 4/8/24 üåíüëÄ ...,Please be advised that the forthcoming solar e...,"Remember dear, the sky‚Äôs gonna get dark on Apr...",Celestial Event Solar-Eclipse-2024 set for Ter...
3,You can reset your password using the ‚ÄúForgot ...,"Shouldst thou forget thy secret key, seek the ...","Bruh, just hit that ‚ÄúForgot Password‚Äù link if ...",Password recovery can be initiated by selectin...,"If you ever forget your password, dear, just c...",User Authentication Reset Procedure: Activate ...
4,The company‚Äôs quarterly earnings report will b...,Hear ye! The scroll of quarterly gain shall be...,"Heads up, the company‚Äôs earnings drop on Augus...",The quarterly financial disclosure is schedule...,They‚Äôre gonna tell us how the company did on A...,Fiscal Data Release Q3-Report scheduled for Te...


In [3]:
# Display available personas and their characteristics
personas = df.columns[1:].tolist()  # Exclude 'Core Fact' column
print("Available personas:")
for i, persona in enumerate(personas, 1):
    print(f"{i}. {persona}")

# Show sample core facts
print("\nSample core facts:")
for i, fact in enumerate(df['Core Fact'].head(5), 1):
    print(f"{i}. {fact}")

Available personas:
1. Shakespearean Bard
2. Gen-Z TikToker
3. Corporate Executive
4. Kind Grandma
5. Sci-Fi Robot Historian (Year 3080)

Sample core facts:
1. The Eiffel Tower is located in Paris, France.
2. Water boils at 100 degrees Celsius at sea level.
3. The next solar eclipse will occur on April 8, 2024.
4. You can reset your password using the ‚ÄúForgot Password‚Äù link.
5. The company‚Äôs quarterly earnings report will be released on August 1st.


In [4]:
class PromptBasedPersonaRewriter:
    """
    A prompt-based rewriter that transforms target questions into different personas
    using examples from the personas.csv dataset.
    """
    
    def __init__(self, df: pd.DataFrame):
        self.df = df
        self.personas = df.columns[1:].tolist()
        self.persona_characteristics = self._analyze_persona_characteristics()
        
    def _analyze_persona_characteristics(self) -> Dict[str, Dict[str, any]]:
        """
        Analyze the characteristics of each persona based on the examples.
        """
        characteristics = {}
        
        for persona in self.personas:
            examples = df[persona].tolist()
            
            # Analyze language patterns
            characteristics[persona] = {
                'examples': examples,
                'avg_length': np.mean([len(str(ex)) for ex in examples]),
                'formality_level': self._assess_formality(examples),
                'emoji_usage': self._count_emojis(examples),
                'hashtag_usage': self._count_hashtags(examples),
                'archaic_words': self._count_archaic_words(examples),
                'technical_terms': self._count_technical_terms(examples),
                'endearments': self._count_endearments(examples)
            }
        
        return characteristics
    
    def _assess_formality(self, examples: List[str]) -> str:
        """Assess the formality level of a persona."""
        formal_indicators = ['shall', 'must', 'please', 'advised', 'scheduled', 'established']
        informal_indicators = ['yo', 'bruh', 'fam', 'vibin', 'cap', 'legit']
        
        formal_count = sum(1 for ex in examples for word in formal_indicators if word.lower() in str(ex).lower())
        informal_count = sum(1 for ex in examples for word in informal_indicators if word.lower() in str(ex).lower())
        
        if formal_count > informal_count:
            return 'formal'
        elif informal_count > formal_count:
            return 'informal'
        else:
            return 'neutral'
    
    def _count_emojis(self, examples: List[str]) -> int:
        """Count emoji usage in examples."""
        emoji_count = 0
        for ex in examples:
            emoji_count += len([c for c in str(ex) if ord(c) > 127 and c not in 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'])
        return emoji_count
    
    def _count_hashtags(self, examples: List[str]) -> int:
        """Count hashtag usage in examples."""
        hashtag_count = 0
        for ex in examples:
            hashtag_count += str(ex).count('#')
        return hashtag_count
    
    def _count_archaic_words(self, examples: List[str]) -> int:
        """Count archaic word usage in examples."""
        archaic_words = ['doth', 'thou', 'thee', 'thy', 'thine', 'ere', 'betwixt', 'hath', 'shall', 'mark']
        archaic_count = 0
        for ex in examples:
            for word in archaic_words:
                archaic_count += str(ex).lower().count(word.lower())
        return archaic_count
    
    def _count_technical_terms(self, examples: List[str]) -> int:
        """Count technical term usage in examples."""
        technical_terms = ['protocol', 'procedure', 'coordinates', 'sector', 'node', 'data', 'system', 'analysis']
        technical_count = 0
        for ex in examples:
            for term in technical_terms:
                technical_count += str(ex).lower().count(term.lower())
        return technical_count
    
    def _count_endearments(self, examples: List[str]) -> int:
        """Count endearment usage in examples."""
        endearments = ['dear', 'honey', 'sweetheart', 'darling', 'sweetie']
        endearment_count = 0
        for ex in examples:
            for endearment in endearments:
                endearment_count += str(ex).lower().count(endearment.lower())
        return endearment_count
    
    def generate_persona_prompt(self, target_persona: str, num_examples: int = 3) -> str:
        """
        Generate a prompt for rewriting text in a specific persona style.
        """
        if target_persona not in self.personas:
            raise ValueError(f"Persona '{target_persona}' not found. Available personas: {self.personas}")
        
        # Get examples for this persona
        examples = self.df[['Core Fact', target_persona]].sample(min(num_examples, len(self.df)))
        
        # Get persona characteristics
        char = self.persona_characteristics[target_persona]
        
        # Build the prompt
        prompt = f"""You are a {target_persona}. Rewrite the following text in your unique style.

Your writing style characteristics:
- Formality level: {char['formality_level']}
- Average response length: {char['avg_length']:.0f} characters
- Emoji usage: {'High' if char['emoji_usage'] > 5 else 'Medium' if char['emoji_usage'] > 0 else 'None'}
- Hashtag usage: {'High' if char['hashtag_usage'] > 2 else 'Medium' if char['hashtag_usage'] > 0 else 'None'}
- Archaic language: {'High' if char['archaic_words'] > 3 else 'Medium' if char['archaic_words'] > 0 else 'None'}
- Technical terminology: {'High' if char['technical_terms'] > 2 else 'Medium' if char['technical_terms'] > 0 else 'None'}
- Endearments: {'High' if char['endearments'] > 2 else 'Medium' if char['endearments'] > 0 else 'None'}

Examples of your style:
"""
        
        for _, row in examples.iterrows():
            prompt += f"\nOriginal: {row['Core Fact']}"
            prompt += f"\nYour version: {row[target_persona]}"
        
        prompt += "\n\nNow rewrite the following text in your style:"
        
        return prompt
    
    def generate_few_shot_prompt(self, target_persona: str, input_text: str, num_examples: int = 3) -> str:
        """
        Generate a few-shot prompt with examples for rewriting.
        """
        prompt = self.generate_persona_prompt(target_persona, num_examples)
        prompt += f"\n\nInput text: {input_text}"
        prompt += f"\nYour response:"
        
        return prompt
    
    def generate_zero_shot_prompt(self, target_persona: str, input_text: str) -> str:
        """
        Generate a zero-shot prompt without examples.
        """
        char = self.persona_characteristics[target_persona]
        
        prompt = f"""You are a {target_persona}. Rewrite the following text in your unique style.

Your writing style:
- {self._get_persona_description(target_persona)}
- Formality: {char['formality_level']}
- Tone: {self._get_persona_tone(target_persona)}

Input text: {input_text}

Your response:"""
        
        return prompt
    
    def _get_persona_description(self, persona: str) -> str:
        """Get a description of the persona's style."""
        descriptions = {
            'Shakespearean Bard': 'Uses archaic English with thee/thou, -eth endings, and poetic language',
            'Gen-Z TikToker': 'Uses modern slang, emojis, hashtags, and casual, energetic tone',
            'Corporate Executive': 'Uses formal, professional language with business terminology',
            'Kind Grandma': 'Uses warm, caring tone with terms of endearment and simple explanations',
            'Sci-Fi Robot Historian (Year 3080)': 'Uses technical, futuristic terminology with formal reporting style'
        }
        return descriptions.get(persona, 'Uses unique writing style')
    
    def _get_persona_tone(self, persona: str) -> str:
        """Get the tone of the persona."""
        tones = {
            'Shakespearean Bard': 'Poetic and dramatic',
            'Gen-Z TikToker': 'Casual and energetic',
            'Corporate Executive': 'Professional and authoritative',
            'Kind Grandma': 'Warm and nurturing',
            'Sci-Fi Robot Historian (Year 3080)': 'Technical and analytical'
        }
        return tones.get(persona, 'Neutral')
    
    def get_persona_analysis(self) -> pd.DataFrame:
        """
        Get a detailed analysis of all personas.
        """
        analysis_data = []
        
        for persona, char in self.persona_characteristics.items():
            analysis_data.append({
                'Persona': persona,
                'Formality': char['formality_level'],
                'Avg Length': f"{char['avg_length']:.0f} chars",
                'Emojis': char['emoji_usage'],
                'Hashtags': char['hashtag_usage'],
                'Archaic Words': char['archaic_words'],
                'Technical Terms': char['technical_terms'],
                'Endearments': char['endearments']
            })
        
        return pd.DataFrame(analysis_data)

In [5]:
# Initialize the prompt-based rewriter
prompt_rewriter = PromptBasedPersonaRewriter(df)

# Display persona analysis
print("Persona Analysis:")
print("=" * 50)
analysis_df = prompt_rewriter.get_persona_analysis()
print(analysis_df.to_string(index=False))

Persona Analysis:
                           Persona Formality Avg Length  Emojis  Hashtags  Archaic Words  Technical Terms  Endearments
                Shakespearean Bard    formal   76 chars       2         0             27                1            0
                    Gen-Z TikToker  informal   59 chars      68         5              1                1            0
               Corporate Executive    formal   74 chars       6         0              2                0            9
                      Kind Grandma  informal   82 chars      17         0              2                1           11
Sci-Fi Robot Historian (Year 3080)    formal   50 chars       4         1              1                8            0


In [6]:
# Test prompt generation for each persona
test_text = "The weather is sunny today."
print(f"Test text: {test_text}")
print("=" * 60)

for persona in prompt_rewriter.personas:
    print(f"\n{persona} - Zero-shot Prompt:")
    print("-" * 40)
    zero_shot_prompt = prompt_rewriter.generate_zero_shot_prompt(persona, test_text)
    print(zero_shot_prompt)
    print("\n" + "="*60)

Test text: The weather is sunny today.

Shakespearean Bard - Zero-shot Prompt:
----------------------------------------
You are a Shakespearean Bard. Rewrite the following text in your unique style.

Your writing style:
- Uses archaic English with thee/thou, -eth endings, and poetic language
- Formality: formal
- Tone: Poetic and dramatic

Input text: The weather is sunny today.

Your response:


Gen-Z TikToker - Zero-shot Prompt:
----------------------------------------
You are a Gen-Z TikToker. Rewrite the following text in your unique style.

Your writing style:
- Uses modern slang, emojis, hashtags, and casual, energetic tone
- Formality: informal
- Tone: Casual and energetic

Input text: The weather is sunny today.

Your response:


Corporate Executive - Zero-shot Prompt:
----------------------------------------
You are a Corporate Executive. Rewrite the following text in your unique style.

Your writing style:
- Uses formal, professional language with business terminology
- Forma

In [7]:
# Test few-shot prompt generation
print("Few-shot Prompt Example (Shakespearean Bard):")
print("=" * 60)
few_shot_prompt = prompt_rewriter.generate_few_shot_prompt(
    'Shakespearean Bard', 
    "The temperature is 25 degrees Celsius.",
    num_examples=2
)
print(few_shot_prompt)

Few-shot Prompt Example (Shakespearean Bard):
You are a Shakespearean Bard. Rewrite the following text in your unique style.

Your writing style characteristics:
- Formality level: formal
- Average response length: 76 characters
- Emoji usage: Medium
- Hashtag usage: None
- Archaic language: High
- Technical terminology: Medium
- Endearments: None

Examples of your style:

Original: Vegetables are essential for a balanced diet.
Your version: Pray, partake in the verdant bounty, for vegetables nourish the body and soul.
Original: Python is a popular programming language for data science.
Your version: Python reigns supreme in the realm of data science coding.

Now rewrite the following text in your style:

Input text: The temperature is 25 degrees Celsius.
Your response:


In [None]:
# Interactive prompt generator
def interactive_prompt_generator():
    """
    Interactive function to generate prompts for different personas.
    """
    print("Prompt-Based Persona Rewriter")
    print("=" * 40)
    
    while True:
        print("\nAvailable personas:")
        for i, persona in enumerate(prompt_rewriter.personas, 1):
            print(f"{i}. {persona}")
        
        print("\nOptions:")
        print("1. Generate zero-shot prompt")
        print("2. Generate few-shot prompt")
        print("3. Show persona analysis")
        print("4. Exit")
        
        choice = input("\nEnter your choice (1-4): ").strip()
        
        if choice == '4':
            print("Goodbye!")
            break
        
        if choice in ['1', '2']:
            persona_idx = input("Enter persona number: ").strip()
            try:
                persona_idx = int(persona_idx) - 1
                if 0 <= persona_idx < len(prompt_rewriter.personas):
                    persona = prompt_rewriter.personas[persona_idx]
                    text = input("Enter text to rewrite: ").strip()
                    
                    if choice == '1':
                        prompt = prompt_rewriter.generate_zero_shot_prompt(persona, text)
                        print(f"\nZero-shot prompt for {persona}:")
                    else:
                        num_examples = input("Number of examples (1-5): ").strip()
                        try:
                            num_examples = int(num_examples)
                            prompt = prompt_rewriter.generate_few_shot_prompt(persona, text, num_examples)
                            print(f"\nFew-shot prompt for {persona}:")
                        except ValueError:
                            print("Invalid number! Using default 3 examples.")
                            prompt = prompt_rewriter.generate_few_shot_prompt(persona, text)
                            print(f"\nFew-shot prompt for {persona}:")
                    
                    print("=" * 50)
                    print(prompt)
                    print("=" * 50)
                else:
                    print("Invalid persona number!")
            except ValueError:
                print("Please enter a valid number!")
        
        elif choice == '3':
            print("\nPersona Analysis:")
            print("=" * 50)
            print(prompt_rewriter.get_persona_analysis().to_string(index=False))
        
        else:
            print("Invalid choice! Please enter 1, 2, 3, or 4.")

# Uncomment the line below to run the interactive mode
# interactive_prompt_generator()

In [None]:
# Batch prompt generation for multiple test cases
test_cases = [
    "The meeting starts at 3 PM.",
    "The restaurant is closed on Sundays.",
    "The movie was released in 2023.",
    "The library has over 10,000 books.",
    "Your flight is delayed by two hours."
]

print("Batch Prompt Generation")
print("=" * 50)

for i, test_case in enumerate(test_cases, 1):
    print(f"\nTest Case {i}: {test_case}")
    print("-" * 40)
    
    # Generate prompts for each persona
    for persona in prompt_rewriter.personas:
        print(f"\n{persona}:")
        zero_shot = prompt_rewriter.generate_zero_shot_prompt(persona, test_case)
        print(f"Zero-shot prompt length: {len(zero_shot)} characters")
        
        few_shot = prompt_rewriter.generate_few_shot_prompt(persona, test_case, 2)
        print(f"Few-shot prompt length: {len(few_shot)} characters")
    
    print("\n" + "="*60)

In [None]:
# Prompt template variations
class PromptTemplateGenerator:
    """
    Generate different types of prompt templates for persona rewriting.
    """
    
    @staticmethod
    def role_playing_prompt(persona: str, input_text: str) -> str:
        """Generate a role-playing style prompt."""
        return f"""You are now embodying the role of a {persona}. 
You must respond to all questions and statements as this character would, 
maintaining their unique voice, mannerisms, and communication style.

Input: {input_text}

Respond as the {persona}:"""
    
    @staticmethod
    def instruction_prompt(persona: str, input_text: str) -> str:
        """Generate an instruction-based prompt."""
        return f"""Rewrite the following text in the style of a {persona}:

Text: {input_text}

Instructions:
- Maintain the core meaning and information
- Adapt the tone, vocabulary, and style to match the persona
- Keep the response natural and authentic to the persona

Rewritten text:"""
    
    @staticmethod
    def conversation_prompt(persona: str, input_text: str) -> str:
        """Generate a conversation-style prompt."""
        return f"""Imagine you're having a conversation with someone who says:

"{input_text}"

How would a {persona} respond to this? Write their response:"""
    
    @staticmethod
    def translation_prompt(persona: str, input_text: str) -> str:
        """Generate a translation-style prompt."""
        return f"""Translate the following text into the language and style of a {persona}:

Original: {input_text}

Translation:"""

# Test different prompt templates
test_text = "The weather is sunny today."
persona = "Shakespearean Bard"

print("Different Prompt Templates")
print("=" * 50)

templates = [
    ("Role-playing", PromptTemplateGenerator.role_playing_prompt),
    ("Instruction-based", PromptTemplateGenerator.instruction_prompt),
    ("Conversation", PromptTemplateGenerator.conversation_prompt),
    ("Translation", PromptTemplateGenerator.translation_prompt)
]

for template_name, template_func in templates:
    print(f"\n{template_name} Template:")
    print("-" * 30)
    prompt = template_func(persona, test_text)
    print(prompt)
    print("\n" + "="*50)

In [None]:
# Export prompts to JSON for external use
def export_prompts_to_json(filename: str = "persona_prompts.json"):
    """
    Export generated prompts to a JSON file for external use.
    """
    prompts_data = {}
    
    test_texts = [
        "The weather is sunny today.",
        "The meeting starts at 3 PM.",
        "The restaurant is closed on Sundays."
    ]
    
    for persona in prompt_rewriter.personas:
        prompts_data[persona] = {
            'characteristics': prompt_rewriter.persona_characteristics[persona],
            'zero_shot_template': prompt_rewriter.generate_zero_shot_prompt(persona, "{input_text}"),
            'few_shot_template': prompt_rewriter.generate_few_shot_prompt(persona, "{input_text}", 3),
            'examples': prompt_rewriter.df[['Core Fact', persona]].to_dict('records'),
            'test_prompts': {}
        }
        
        for test_text in test_texts:
            prompts_data[persona]['test_prompts'][test_text] = {
                'zero_shot': prompt_rewriter.generate_zero_shot_prompt(persona, test_text),
                'few_shot': prompt_rewriter.generate_few_shot_prompt(persona, test_text, 2)
            }
    
    with open(filename, 'w') as f:
        json.dump(prompts_data, f, indent=2)
    
    print(f"Prompts exported to {filename}")
    print(f"Total personas: {len(prompts_data)}")
    print(f"Total test cases: {len(test_texts)}")

# Export the prompts
export_prompts_to_json()