<a href="https://colab.research.google.com/github/shalu27114/Shalu-Priyadharshini/blob/main/PRODIGY_GA_03.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import random
from collections import defaultdict, Counter
import re

# =============================================
# PART 1: CHARACTER-LEVEL MARKOV CHAIN
# =============================================

class CharMarkovChain:
    """Simple character-level Markov Chain"""

    def __init__(self, order=2):
        """Initialize with Markov order (how many previous characters to consider)"""
        self.order = order
        self.model = defaultdict(Counter)
        self.start_chars = []

    def train(self, text):
        """Train the model on input text"""
        print(f"üìä Training character-level model (order={self.order})...")

        # Clean text (keep basic characters)
        text = re.sub(r'\s+', ' ', text.lower())

        # Build model
        for i in range(len(text) - self.order):
            # Current state: sequence of 'order' characters
            state = text[i:i + self.order]
            # Next character
            next_char = text[i + self.order]

            # Add to model
            self.model[state][next_char] += 1

            # Track starting characters
            if i == 0 or text[i-1] in '.!?':
                self.start_chars.append(state)

        print(f"‚úÖ Trained on {len(text)} characters")
        print(f"üìà Learned {len(self.model)} character patterns")

    def generate(self, length=100, start=None):
        """Generate text of specified length"""
        if not self.model:
            return "Model not trained!"

        # Choose starting state
        if start and start in self.model:
            current_state = start
        elif self.start_chars:
            current_state = random.choice(self.start_chars)
        else:
            current_state = random.choice(list(self.model.keys()))

        generated = current_state

        for _ in range(length - self.order):
            # Get possible next characters for current state
            next_chars = self.model.get(current_state)
            if not next_chars:
                break

            # Choose next character based on probabilities
            chars = list(next_chars.keys())
            weights = list(next_chars.values())
            next_char = random.choices(chars, weights=weights)[0]

            generated += next_char

            # Update current state (slide window)
            current_state = generated[-self.order:]

        return generated

# =============================================
# PART 2: WORD-LEVEL MARKOV CHAIN
# =============================================

class WordMarkovChain:
    """Simple word-level Markov Chain"""

    def __init__(self, order=1):
        self.order = order
        self.model = defaultdict(Counter)
        self.start_words = []

    def train(self, text):
        """Train on text"""
        print(f"üìä Training word-level model (order={self.order})...")

        # Split into words
        words = re.findall(r'\b\w+\b|[.,!?;]', text.lower())

        if len(words) < self.order + 1:
            print("‚ùå Text too short for training!")
            return

        # Build model
        for i in range(len(words) - self.order):
            # Current state: sequence of 'order' words
            state = tuple(words[i:i + self.order])
            # Next word
            next_word = words[i + self.order]

            # Add to model
            self.model[state][next_word] += 1

            # Track starting states (sentences)
            if i == 0 or words[i-1] in '.!?':
                self.start_words.append(state)

        print(f"‚úÖ Trained on {len(words)} words")
        print(f"üìà Learned {len(self.model)} word patterns")

    def generate(self, num_words=50, start=None):
        """Generate text with specified number of words"""
        if not self.model:
            return "Model not trained!"

        # Choose starting state
        if start and start in self.model:
            current_state = start
        elif self.start_words:
            current_state = random.choice(self.start_words)
        else:
            current_state = random.choice(list(self.model.keys()))

        generated = list(current_state)

        for _ in range(num_words - self.order):
            # Get possible next words
            next_words = self.model.get(current_state)
            if not next_words:
                break

            # Choose next word
            words = list(next_words.keys())
            weights = list(next_words.values())
            next_word = random.choices(words, weights=weights)[0]

            generated.append(next_word)

            # Update state
            current_state = tuple(generated[-self.order:])

        # Format the generated text
        result = []
        for word in generated:
            if word in '.,!?;':
                result[-1] = result[-1] + word
            else:
                result.append(word)

        return ' '.join(result)

# =============================================
# PART 3: SAMPLE TEXT FOR TRAINING
# =============================================

def get_sample_text():
    """Return sample training text"""
    return """
    Artificial intelligence is changing the world.
    Machine learning helps computers learn from data.
    Natural language processing understands human language.
    Deep learning uses neural networks for complex tasks.
    AI can generate text, images, and even music.
    Computers are getting smarter every day.
    The future of technology looks very exciting.
    Programming is a valuable skill to learn.
    Python is a popular language for AI development.
    Data science involves analyzing large datasets.
    Algorithms are step by step instructions for computers.
    Innovation drives technological advancement forward.
    """

# =============================================
# PART 4: INTERACTIVE DEMO
# =============================================

def interactive_demo():
    """Interactive demonstration of Markov Chains"""
    print("\n" + "="*60)
    print("ü§ñ INTERACTIVE MARKOV CHAIN GENERATOR")
    print("="*60)

    # Get training text
    sample_text = get_sample_text()
    print(f"\nüìñ Sample training text ({len(sample_text)} characters):")
    print("-"*40)
    print(sample_text[:200] + "...")
    print("-"*40)

    while True:
        print("\n" + "="*50)
        print("Choose Markov Chain type:")
        print("1. Character-level (generates character by character)")
        print("2. Word-level (generates word by word)")
        print("3. Exit")

        choice = input("\nYour choice (1/2/3): ").strip()

        if choice == '3':
            print("\nüëã Goodbye!")
            break

        if choice == '1':
            # Character-level Markov
            print("\nüî§ CHARACTER-LEVEL MARKOV CHAIN")

            order = input("Enter order (1-4, default=2): ").strip()
            try:
                order = int(order) if order else 2
                order = max(1, min(4, order))
            except:
                order = 2

            print(f"\nCreating model with order={order}...")
            model = CharMarkovChain(order=order)
            model.train(sample_text)

            length = input("Output length in characters (default=100): ").strip()
            try:
                length = int(length) if length else 100
            except:
                length = 100

            print("\nüé≤ Generating text...")
            generated = model.generate(length=length)

            print("\n" + "="*60)
            print("üìù GENERATED TEXT:")
            print("="*60)
            print(generated)
            print("="*60)

            # Show statistics
            print(f"\nüìä Model used {len(model.model)} character patterns")

        elif choice == '2':
            # Word-level Markov
            print("\nüìù WORD-LEVEL MARKOV CHAIN")

            order = input("Enter order (1-3, default=1): ").strip()
            try:
                order = int(order) if order else 1
                order = max(1, min(3, order))
            except:
                order = 1

            print(f"\nCreating model with order={order}...")
            model = WordMarkovChain(order=order)
            model.train(sample_text)

            num_words = input("Number of words to generate (default=30): ").strip()
            try:
                num_words = int(num_words) if num_words else 30
            except:
                num_words = 30

            print("\nüé≤ Generating text...")
            generated = model.generate(num_words=num_words)

            print("\n" + "="*60)
            print("üìù GENERATED TEXT:")
            print("="*60)
            print(generated)
            print("="*60)

            # Show statistics
            print(f"\nüìä Model used {len(model.model)} word patterns")
            print(f"üìè Generated {len(generated.split())} words")

        else:
            print("‚ùå Invalid choice!")
            continue

        # Ask to continue
        cont = input("\nGenerate more? (y/n): ").lower()
        if cont != 'y':
            break

# =============================================
# PART 5: EDUCATIONAL EXAMPLES
# =============================================

def educational_examples():
    """Show how Markov Chains work step-by-step"""
    print("\n" + "="*60)
    print("üìö HOW MARKOV CHAINS WORK")
    print("="*60)

    # Simple example
    text = "hello world hello there"

    print("\n1. Simple example text: 'hello world hello there'")
    print("\n2. Character-level Markov (order=1):")
    print("   - Looks at one previous character")
    print("   - After 'h': 'e' (100% probability)")
    print("   - After 'e': 'l' (50%), 'r' (25%), space (25%)")

    print("\n3. Training process:")
    print("   - 'h' ‚Üí 'e' (seen 2 times)")
    print("   - 'e' ‚Üí 'l' (2 times), 'r' (1 time), space (1 time)")
    print("   - 'l' ‚Üí 'l' (1 time), 'o' (2 times)")
    print("   - 'o' ‚Üí space (2 times)")

    print("\n4. Generation example:")
    print("   Start with 'h'")
    print("   Next must be 'e' (100%)")
    print("   Next: 50% 'l', 25% 'r', 25% space")
    print("   If 'l', then: 33% 'l', 67% 'o'")
    print("   And so on...")

    # Demo
    print("\n" + "="*40)
    print("DEMO: Character Markov (order=1)")
    print("="*40)

    model = CharMarkovChain(order=1)
    model.train(text)

    print("\nModel probabilities:")
    for state, next_chars in list(model.model.items())[:5]:
        total = sum(next_chars.values())
        print(f"  '{state}': {dict(next_chars)}")

    print("\nGenerated text (20 chars):")
    generated = model.generate(length=20)
    print(f"  '{generated}'")

# =============================================
# PART 6: CUSTOM TEXT INPUT
# =============================================

def custom_text_generator():
    """Train on custom text input"""
    print("\n" + "="*60)
    print("‚úçÔ∏è  CUSTOM TEXT GENERATOR")
    print("="*60)

    print("\nEnter your own training text:")
    print("(Type 'END' on a new line when finished)")

    lines = []
    while True:
        line = input()
        if line.strip() == 'END':
            break
        lines.append(line)

    if not lines:
        print("‚ùå No text provided!")
        return

    custom_text = ' '.join(lines)

    if len(custom_text) < 50:
        print("‚ö†Ô∏è  Text is quite short. Results may not be great.")

    print(f"\nüìè Your text: {len(custom_text)} characters")

    # Choose model type
    print("\nChoose model type:")
    print("1. Character Markov")
    print("2. Word Markov")

    choice = input("\nChoice (1/2): ").strip()

    if choice == '1':
        model = CharMarkovChain(order=2)
        model.train(custom_text)

        length = int(input("Output length in chars (default=200): ") or "200")
        generated = model.generate(length=length)

    elif choice == '2':
        model = WordMarkovChain(order=1)
        model.train(custom_text)

        num_words_input = input("Number of words (default=50): ").strip()
        try:
            num_words = int(num_words_input) if num_words_input else 50
        except ValueError:
            print("Invalid input for number of words. Using default (50).")
            num_words = 50

        generated = model.generate(num_words=num_words)

    else:
        print("Invalid choice!")
        return

    print("\n" + "="*60)
    print("üìù GENERATED FROM YOUR TEXT:")
    print("="*60)
    print(generated)
    print("="*60)

    # Save option
    save = input("\nüíæ Save to file? (y/n): ").lower()
    if save == 'y':
        filename = f"markov_output_{random.randint(1000, 9999)}.txt"
        with open(filename, 'w') as f:
            f.write(generated)
        print(f"‚úÖ Saved as: {filename}")

# =============================================
# MAIN PROGRAM
# =============================================

def main():
    """Main menu"""
    print("\n" + "="*60)
    print("üìù MARKOV CHAIN TEXT GENERATOR")
    print("="*60)

    print("\nWhat would you like to do?")
    print("1. Interactive Demo (Recommended)")
    print("2. Learn How Markov Chains Work")
    print("3. Use Custom Text")
    print("4. Quick Test")
    print("5. Exit")

    while True:
        choice = input("\nChoose option (1-5): ").strip()

        if choice == '1':
            interactive_demo()
        elif choice == '2':
            educational_examples()
        elif choice == '3':
            custom_text_generator()
        elif choice == '4':
            # Quick test
            print("\nüöÄ Quick test: Character Markov")
            model = CharMarkovChain(order=2)
            model.train(get_sample_text())
            generated = model.generate(length=150)
            print("\nGenerated text:")
            print("-"*40)
            print(generated)
            print("-"*40)
        elif choice == '5':
            print("\nüëã Thank you for exploring Markov Chains!")
            break
        else:
            print("Invalid choice!")

        if choice in ['1', '2', '3', '4']:
            input("\nPress Enter to continue...")
            print("\n" + "="*60)

# =============================================
# START THE PROGRAM
# =============================================

if __name__ == "__main__":
    main()

print("\n" + "="*60)
print("‚úÖ PROJECT COMPLETE: MARKOV CHAIN GENERATOR")
print("="*60)
print("\nWhat you learned:")
print("1. ‚úÖ How Markov Chains predict next character/word")
print("2. ‚úÖ Character-level vs word-level models")
print("3. ‚úÖ Statistical text generation")
print("4. ‚úÖ Probability-based predictions")
print("\nüéâ Great work on PRODIGY_GA_03!")



üìù MARKOV CHAIN TEXT GENERATOR

What would you like to do?
1. Interactive Demo (Recommended)
2. Learn How Markov Chains Work
3. Use Custom Text
4. Quick Test
5. Exit

Choose option (1-5): 1

ü§ñ INTERACTIVE MARKOV CHAIN GENERATOR

üìñ Sample training text (641 characters):
----------------------------------------

    Artificial intelligence is changing the world.
    Machine learning helps computers learn from data.
    Natural language processing understands human language.
    Deep learning uses neural netw...
----------------------------------------

Choose Markov Chain type:
1. Character-level (generates character by character)
2. Word-level (generates word by word)
3. Exit

Your choice (1/2/3): 1

üî§ CHARACTER-LEVEL MARKOV CHAIN
Enter order (1-4, default=2): 3

Creating model with order=3...
üìä Training character-level model (order=3)...
‚úÖ Trained on 589 characters
üìà Learned 416 character patterns
Output length in characters (default=100): Six

üé≤ Generating text.