In [8]:
import re
from textblob import TextBlob

def extract_text_features(text_series):
    """
       text_series: pandas Series or list of text messages/emails
    """
    features = []
    for i, text in enumerate(text_series):
        if not isinstance(text, str):
            text = ""
        
        # Sentiment Analysis
        blob = TextBlob(text)
        sentiment = blob.sentiment.polarity  # -1 to 1

        # Tokenization
        tokens = [word for word in re.findall(r'\w+', text.lower())]

        # Simple Entity Extraction (using regex: capitalized words)
        entities = re.findall(r'\b[A-Z][a-zA-Z]+\b', text)

        # Collect features
        feature_dict = {
            "tokens": tokens,
            "sentiment": sentiment,
            "entities": entities
        }
        features.append(feature_dict)

        # --- Display results ---
        print(f"\nText {i+1}: {text}")
        print(f"  Tokens   : {tokens}")
        print(f"  Sentiment: {sentiment}")
        print(f"  Entities : {entities}")

    return features

sample_texts = [
    "Delete the chat logs before the police check your phone.",
    "Meet at the abandoned warehouse at midnight. Don't tell anyone.",
    "Send me the bank account details through WhatsApp, keep it secret.",
    "I wiped the SIM card, no one will find the messages.",
    "The package will be dropped near Central Station tomorrow night."
]

extract_text_features(sample_texts)


Text 1: Delete the chat logs before the police check your phone.
  Tokens   : ['delete', 'the', 'chat', 'logs', 'before', 'the', 'police', 'check', 'your', 'phone']
  Sentiment: 0.0
  Entities : ['Delete']

Text 2: Meet at the abandoned warehouse at midnight. Don't tell anyone.
  Tokens   : ['meet', 'at', 'the', 'abandoned', 'warehouse', 'at', 'midnight', 'don', 't', 'tell', 'anyone']
  Sentiment: 0.0
  Entities : ['Meet', 'Don']

Text 3: Send me the bank account details through WhatsApp, keep it secret.
  Tokens   : ['send', 'me', 'the', 'bank', 'account', 'details', 'through', 'whatsapp', 'keep', 'it', 'secret']
  Sentiment: -0.4
  Entities : ['Send', 'WhatsApp']

Text 4: I wiped the SIM card, no one will find the messages.
  Tokens   : ['i', 'wiped', 'the', 'sim', 'card', 'no', 'one', 'will', 'find', 'the', 'messages']
  Sentiment: 0.0
  Entities : ['SIM']

Text 5: The package will be dropped near Central Station tomorrow night.
  Tokens   : ['the', 'package', 'will', 'be', 'droppe

[{'tokens': ['delete',
   'the',
   'chat',
   'logs',
   'before',
   'the',
   'police',
   'check',
   'your',
   'phone'],
  'sentiment': 0.0,
  'entities': ['Delete']},
 {'tokens': ['meet',
   'at',
   'the',
   'abandoned',
   'warehouse',
   'at',
   'midnight',
   'don',
   't',
   'tell',
   'anyone'],
  'sentiment': 0.0,
  'entities': ['Meet', 'Don']},
 {'tokens': ['send',
   'me',
   'the',
   'bank',
   'account',
   'details',
   'through',
   'whatsapp',
   'keep',
   'it',
   'secret'],
  'sentiment': -0.4,
  'entities': ['Send', 'WhatsApp']},
 {'tokens': ['i',
   'wiped',
   'the',
   'sim',
   'card',
   'no',
   'one',
   'will',
   'find',
   'the',
   'messages'],
  'sentiment': 0.0,
  'entities': ['SIM']},
 {'tokens': ['the',
   'package',
   'will',
   'be',
   'dropped',
   'near',
   'central',
   'station',
   'tomorrow',
   'night'],
  'sentiment': 0.05,
  'entities': ['The', 'Central', 'Station']}]