In [22]:
from azure.core.credentials import AzureKeyCredential
from azure.ai.textanalytics import TextAnalyticsClient
from openai import AzureOpenAI
from datetime import datetime, timedelta
import random
from typing import List, Dict, Any
import json


class AzureContentGenerator:
    def __init__(self,
                 language_endpoint: str,
                 language_key: str,
                 openai_endpoint: str,
                 openai_key: str,
                 deployment_name: str = "gpt-35-turbo",  # or your deployment name
                 temperature: float = 0.7):
        """
        Initialize Azure services for content generation
        """
        # Initialize Azure Language Service
        # self.text_analytics_client = TextAnalyticsClient(
        #     endpoint=language_endpoint,
        #     credential=AzureKeyCredential(language_key)
        # )

        # Initialize Azure OpenAI
        self.openai_client = AzureOpenAI(
            azure_endpoint=openai_endpoint,
            api_key=openai_key,
            api_version="2024-02-15-preview"
        )

        self.deployment_name = deployment_name
        self.temperature = temperature

        # Domain concepts for guidance
        self.domains = {
            'tech': [
                'artificial intelligence', 'machine learning', 'blockchain',
                'cloud computing', 'cybersecurity', 'IoT'
            ],
            'applications': [
                'healthcare', 'finance', 'education', 'transportation',
                'manufacturing', 'retail'
            ]
        }

    def _generate_base_content(self, topic: str) -> str:
        """Generate base content using Azure OpenAI"""
        system_prompt = """
        You are a professional content creator generating social media posts. 
        Generate natural, conversational content about technology trends.
        Keep the tone informal but insightful.
        Do not use hashtags or mentions.
        Keep the message between 50-100 characters.
        Focus on observations, insights, and implications.
        """

        user_prompt = f"""
        Create a natural social media post about {topic}.
        Make it sound like a genuine observation or insight, not promotional.
        Include one specific detail or implication.
        """

        response = self.openai_client.chat.completions.create(
            model=self.deployment_name,
            temperature=self.temperature,
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": user_prompt}
            ]
        )

        return response.choices[0].message.content.strip()

    def _analyze_content(self, text: str) -> Dict[str, Any]:
        """Analyze content using Azure Language Service"""
        try:
            # Entity recognition
            entity_response = self.text_analytics_client.recognize_entities([text])[0]
            entities = [entity.text for entity in entity_response.entities]

            # Key phrase extraction
            key_phrase_response = self.text_analytics_client.extract_key_phrases([text])[0]
            key_phrases = key_phrase_response.key_phrases

            # Sentiment analysis
            sentiment_response = self.text_analytics_client.analyze_sentiment([text])[0]
            sentiment = {
                'sentiment': sentiment_response.sentiment,
                'positive_score': sentiment_response.confidence_scores.positive,
                'neutral_score': sentiment_response.confidence_scores.neutral,
                'negative_score': sentiment_response.confidence_scores.negative
            }

            return {
                'entities': entities,
                'key_phrases': key_phrases,
                'sentiment': sentiment
            }

        except Exception as e:
            print(f"Error in content analysis: {e}")
            return {
                'entities': [],
                'key_phrases': [],
                'sentiment': {'sentiment': 'neutral', 'scores': None}
            }

    def generate_message(self, timestamp: datetime) -> Dict[str, Any]:
        """Generate a complete message with analysis"""
        # Select topic
        domain = random.choice(list(self.domains.keys()))
        topic = random.choice(self.domains[domain])

        # Generate content
        text = self._generate_base_content(topic)

        # Analyze content
        # analysis = self._analyze_content(text)

        return {
            'timestamp': timestamp.isoformat(),
            'text': text,
            'topic': topic,
            'domain': domain,
            # 'entities': analysis['entities'],
            # 'key_phrases': analysis['key_phrases'],
            # 'sentiment': analysis['sentiment'],
            'user_id': f"user_{random.randint(1, 10000)}",
            'engagement_score': random.randint(1, 1000)
        }

    def generate_batch(self,
                       start_time: datetime,
                       batch_size: int,
                       max_retries: int = 3) -> List[Dict[str, Any]]:
        """Generate a batch of messages with retry logic"""
        messages = []

        for i in range(batch_size):
            retries = 0
            while retries < max_retries:
                try:
                    message = self.generate_message(
                        start_time + timedelta(seconds=i)
                    )
                    messages.append(message)
                    break
                except Exception as e:
                    print(f"Error generating message (attempt {retries + 1}): {e}")
                    retries += 1
                    if retries == max_retries:
                        print(f"Failed to generate message after {max_retries} attempts")

        return messages


In [26]:
# Azure credentials would normally come from environment variables
AZURE_LANGUAGE_ENDPOINT = ""
AZURE_LANGUAGE_KEY = ""
AZURE_OPENAI_ENDPOINT = "https://my-first-open-ai-service.openai.azure.com/openai/deployments/gpt-35-turbo/chat/completions?api-version=2024-08-01-preview"
AZURE_OPENAI_KEY = ""

# Initialize generator
generator = AzureContentGenerator(
    language_endpoint=AZURE_LANGUAGE_ENDPOINT,
    language_key=AZURE_LANGUAGE_KEY,
    openai_endpoint=AZURE_OPENAI_ENDPOINT,
    openai_key=AZURE_OPENAI_KEY
)

# Generate sample messages
messages = generator.generate_batch(
    start_time=datetime.now(),
    batch_size=50
)

# Display results
for message in messages:
    print("\nGenerated Message:")
    print(f"Text: {message['text']}")
    print(f"Topic: {message['topic']}")
    # print(f"Entities: {message['entities']}")
    # print(f"Key Phrases: {message['key_phrases']}")
    # print(f"Sentiment: {message['sentiment']['sentiment']}")
    print("-" * 80)


Generated Message:
Text: Have you noticed how electric scooters are becoming more popular in cities? They're convenient and eco-friendly, but I wonder how they'll impact traffic flow.
Topic: transportation
--------------------------------------------------------------------------------

Generated Message:
Text: Wow, telehealth has really taken off this year. It's amazing how much can be done virtually now. I heard that 60% of doctors are now using telemedicine to treat patients.
Topic: healthcare
--------------------------------------------------------------------------------

Generated Message:
Text: "Machine learning is transforming the way we approach data analysis. It's incredible how algorithms can identify patterns and make predictions with such accuracy. But with great power comes great responsibility - we need to ensure that our data is diverse and representative to avoid bias in our models. #MachineLearning #DataAnalysis"
Topic: machine learning
------------------------------

In [134]:
%load_ext autoreload
%autoreload 2

from events import GLOBAL_EVENTS
from azure_generator import AzureContentGenerator

AZURE_OPENAI_ENDPOINT = "https://my-first-open-ai-service.openai.azure.com/openai/deployments/gpt-35-turbo/chat/completions?api-version=2024-08-01-preview"
AZURE_OPENAI_KEY = "uClNQwvESsEPxSFhKKonjSfIa8KDKUsyzLo7wl0rHzSpTI2qd40fJQQJ99AKACYeBjFXJ3w3AAABACOGgkTy"

# Initialize generator


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [143]:
messages = []

In [163]:
generator = AzureContentGenerator(
    openai_endpoint=AZURE_OPENAI_ENDPOINT,
    openai_key=AZURE_OPENAI_KEY,
    temperature=1.2,
)

for j, event in enumerate(GLOBAL_EVENTS):
    for i in range(30):
        msg = generator.generate_message(event)
        messages.append(msg)
        print(j, i)
        # print(msg['text'])
        # print('-'*80)

0 0
0 1
0 2
0 3
0 4
0 5
0 6
0 7
0 8
0 9
0 10
0 11
0 12
0 13
0 14
0 15
0 16
0 17
0 18
0 19
0 20
0 21
0 22
0 23
0 24
0 25
0 26
0 27
0 28
0 29
1 0
1 1
1 2
1 3
1 4
1 5
1 6
1 7
1 8
1 9
1 10
1 11
1 12
1 13
1 14
1 15
1 16
1 17
1 18
1 19
1 20
1 21
1 22
1 23
1 24
1 25
1 26
1 27
1 28
1 29
2 0
2 1
2 2
2 3
2 4
2 5
2 6
2 7
2 8
2 9
2 10
2 11
2 12
2 13
2 14
2 15
2 16
2 17
2 18
2 19
2 20
2 21
2 22
2 23
2 24
2 25
2 26
2 27
2 28
2 29
3 0
3 1
3 2
3 3
3 4
3 5
3 6
3 7
3 8
3 9
3 10
3 11
3 12
3 13
3 14
3 15
3 16
3 17
3 18
3 19
3 20
3 21
3 22
3 23
3 24
3 25
3 26
3 27
3 28
3 29
4 0
4 1
4 2
4 3
4 4
4 5
4 6
4 7
4 8
4 9
4 10
4 11
4 12
4 13
4 14
4 15
4 16
4 17
4 18
4 19
4 20
4 21
4 22
4 23
4 24
4 25
4 26
4 27
4 28
4 29
5 0
5 1
5 2
5 3
5 4
5 5
5 6
5 7
5 8
5 9
5 10
5 11
5 12
5 13
5 14
5 15
5 16
5 17
5 18
5 19
5 20
5 21
5 22
5 23
5 24
5 25
5 26
5 27
5 28
5 29
6 0
6 1
6 2
6 3
6 4
6 5
6 6
6 7
6 8
6 9
6 10
6 11
6 12
6 13
6 14
6 15
6 16
6 17
6 18
6 19
6 20
6 21
6 22
6 23
6 24
6 25
6 26
6 27
6 28
6 29


In [153]:
pd.DataFrame(messages).head()

Unnamed: 0,event_name,category,text
0,SpaceX launch,TECH,"""Amazing Falcon Heavy launch from Cape Canaver..."
1,UFO Sighting,TECH/NATURE,"""Can't believe what I just saw! Large triangul..."
2,Cryptocurrency price crash,TECH,"""That #CryptoCrash tho! Dramacoin drops 30% in..."
3,Birth of AI influencer,TECH,"""Just witnessed the birth of a new era in infl..."
4,Champions League Final in Berlin,SPORTS,"""Unbelievable finish! Manchester City clinches..."


In [164]:
len(messages)

427

In [201]:
df_global = pd.DataFrame(messages)

In [167]:
df_global.to_json('messages.json')

In [184]:
from events import LOCAL_EVENTS
local_events_messages = []

In [189]:
generator = AzureContentGenerator(
    openai_endpoint=AZURE_OPENAI_ENDPOINT,
    openai_key=AZURE_OPENAI_KEY,
    temperature=1.2,
)

for j, event in enumerate(LOCAL_EVENTS):
    for i in range(30):
        msg = generator.generate_message(event)
        local_events_messages.append(msg)


        
        print(j, i)
        # print(msg['text'])
        # print('-'*80)

0 0
0 1
0 2
0 3
0 4
0 5
0 6
0 7
0 8
0 9
0 10
0 11
0 12
0 13
0 14
0 15
0 16
0 17
0 18
0 19
0 20
0 21
0 22
0 23
0 24
0 25
0 26
0 27
0 28
0 29
1 0
1 1
1 2
1 3
1 4
1 5
1 6
1 7
1 8
1 9
1 10
1 11
1 12
1 13
1 14
1 15
1 16
1 17
1 18
1 19
1 20
1 21
1 22
1 23
1 24
1 25
1 26
1 27
1 28
1 29
2 0
2 1
2 2
2 3
2 4
2 5
2 6
2 7
2 8
2 9
2 10
2 11
2 12
2 13
2 14
2 15
2 16
2 17
2 18
2 19
2 20
2 21
2 22
2 23
2 24
2 25
2 26
2 27
2 28
2 29
3 0
3 1
3 2
3 3
3 4
3 5
3 6
3 7
3 8
3 9
3 10
3 11
3 12
3 13
3 14
3 15
3 16
3 17
3 18
3 19
3 20
3 21
3 22
3 23
3 24
3 25
3 26
3 27
3 28
3 29
4 0
4 1
4 2
4 3
4 4
4 5
4 6
4 7
4 8
4 9
4 10
4 11
4 12
4 13
4 14
4 15
4 16
4 17
4 18
4 19
4 20
4 21
4 22
4 23
4 24
4 25
4 26
4 27
4 28
4 29
5 0
5 1
5 2
5 3
5 4
5 5
5 6
5 7
5 8
5 9
5 10
5 11
5 12
5 13
5 14
5 15
5 16
5 17
5 18
5 19
5 20
5 21
5 22
5 23
5 24
5 25
5 26
5 27
5 28
5 29
6 0
6 1
6 2
6 3
6 4
6 5
6 6
6 7
6 8
6 9
6 10
6 11
6 12
6 13
6 14
6 15
6 16
6 17
6 18
6 19
6 20
6 21
6 22
6 23
6 24
6 25
6 26
6 27
6 28
6 29
7 0
7 1
7 2
7 3
7 4


AttributeError: 'NoneType' object has no attribute 'strip'

In [199]:
df_local = pd.DataFrame(local_events_messages)

In [200]:
df_local.groupby('event_name').count()

Unnamed: 0_level_0,category,text
event_name,Unnamed: 1_level_1,Unnamed: 2_level_1
Cherry blossom peak in Tokyo,91,91
Flash mob in Seoul,61,61
Fog covering San Francisco,43,43
Food festival in Rome,61,61
Food market rush in Bangkok,61,61
Local festival in Rio de Janeiro,31,31
"Local protest in Santiago, Chile",61,61
Pop-up market in Barcelona,61,61
Power outage in Sydney,61,61
Public transport delay in Toronto,61,61


In [203]:
df2.groupby('event_name').count()

Unnamed: 0_level_0,category,text
event_name,Unnamed: 1_level_1,Unnamed: 2_level_1
"""Makajambo Dance"" TikTok challenge",61,61
Birth of AI influencer,61,61
Champions League Final in Berlin,61,61
Cryptocurrency price crash,61,61
International streaming show Fake Wars finale,61,61
SpaceX launch,61,61
UFO Sighting,61,61


In [251]:
from tqdm import tqdm
global_messages = df_global.to_dict(orient='records')
local_messages = df_local.to_dict(orient='records')

In [289]:
def generate_messages(messages, events, total_number=100):
    generator = AzureContentGenerator(
        openai_endpoint=AZURE_OPENAI_ENDPOINT,
        openai_key=AZURE_OPENAI_KEY,
        temperature=1.2,
    )

    df = pd.DataFrame(messages)
    counts = df.groupby('event_name')['category'].count().to_dict()

    max_retries = 3
    
    for event in events:
        event_remaining = max(0, total_number - counts[event.event_name])
        print(event.event_name, event_remaining)
        for i in tqdm(range(event_remaining)):
            retries = 0
            while retries < max_retries:
                try:
                    message = generator.generate_message(event)
                    messages.append(message)
                    break
                except Exception as e:
                    tqdm.write(f"Error generating message (attempt {retries + 1}): {e}")
                    retries += 1
                    if retries < max_retries:
                        time.sleep(2)
                    else:
                        tqdm.write(f"Failed to generate message after {max_retries} attempts")
                       

# generate_messages(global_messages, GLOBAL_EVENTS, total_number=300)
generate_messages(local_messages, LOCAL_EVENTS, total_number=100)

Cherry blossom peak in Tokyo 9


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9/9 [00:06<00:00,  1.39it/s]


Traffic pile-up in Los Angeles 25


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 25/25 [00:14<00:00,  1.76it/s]


Pop-up market in Barcelona 39


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 39/39 [00:23<00:00,  1.63it/s]


Street flooding in Mumbai 39


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 39/39 [00:23<00:00,  1.63it/s]


Food festival in Rome 39


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 39/39 [00:24<00:00,  1.59it/s]


Power outage in Sydney 39


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 39/39 [00:20<00:00,  1.93it/s]


Unusually Dark Night in Stockholm 39


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 39/39 [00:22<00:00,  1.73it/s]


Street performance in New York 39


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 39/39 [00:23<00:00,  1.69it/s]


Local protest in Santiago, Chile 39


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 39/39 [00:20<00:00,  1.94it/s]


Flash mob in Seoul 39


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 39/39 [00:23<00:00,  1.66it/s]


School event in London 39


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 39/39 [00:22<00:00,  1.75it/s]


Restaurant rush in Paris 39


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 39/39 [00:22<00:00,  1.76it/s]


Sports celebration in London, UK 39


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 39/39 [00:20<00:00,  1.90it/s]


Street art appearance at Melbourne 39


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 39/39 [00:22<00:00,  1.76it/s]


Public transport delay in Toronto 39


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 39/39 [00:21<00:00,  1.78it/s]


Unusual animal migration in Perth 39


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 39/39 [00:24<00:00,  1.62it/s]


Rainbow phenomenon in Dublin 39


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 39/39 [00:21<00:00,  1.81it/s]


Food market rush in Bangkok 39


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 39/39 [00:22<00:00,  1.76it/s]


Fog covering San Francisco 57


 82%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                           | 47/57 [00:28<00:05,  1.67it/s]

Error generating message (attempt 1): 'NoneType' object has no attribute 'strip'


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 57/57 [00:36<00:00,  1.58it/s]


Local festival in Rio de Janeiro 69


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 69/69 [00:45<00:00,  1.52it/s]


Public transport strike in Berlin 78


 62%|████████████████████████████████████████████████████████████████████████████████████████████████▌                                                            | 48/78 [00:26<00:16,  1.81it/s]

Error generating message (attempt 1): 'NoneType' object has no attribute 'strip'


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 78/78 [00:45<00:00,  1.71it/s]


In [292]:
len(local_messages), len(global_messages)

(2100, 2121)

In [291]:
pd.DataFrame(local_messages).to_json('local_events_messages_2.json')

In [299]:
from azure.core.credentials import AzureKeyCredential
from azure.ai.textanalytics import TextAnalyticsClient

LANGUAGE_SERVICE_ENDPOINT = 'https://my-first-language-resource.cognitiveservices.azure.com/'
LANGUAGE_SERVICE_KEY = 'FjgRszezBgyHl7HaXd1KuSnN2drRfVtR5ObBlUn88a8cLps16s7yJQQJ99AKACYeBjFXJ3w3AAAaACOGdF7j'

text_analytics_client = TextAnalyticsClient(
    endpoint=LANGUAGE_SERVICE_ENDPOINT,
    credential=AzureKeyCredential(LANGUAGE_SERVICE_KEY)
)

def get_text_analysis(text):

    # Categories
    # categories_response = text_analytics_client.single_category_classify([text])[0]
    # categories = [(c.category, c.score) for category in categories_response.categories]
    
    # Entity recognition
    entity_response = text_analytics_client.recognize_entities([text])[0]
    entities = [entity.text for entity in entity_response.entities]
    
    # Key phrase extraction
    key_phrase_response = text_analytics_client.extract_key_phrases([text])[0]
    key_phrases = key_phrase_response.key_phrases
    
    # Sentiment analysis
    sentiment_response = text_analytics_client.analyze_sentiment([text])[0]
    sentiment = {
        'sentiment': sentiment_response.sentiment,
        'positive_score': sentiment_response.confidence_scores.positive,
        'neutral_score': sentiment_response.confidence_scores.neutral,
        'negative_score': sentiment_response.confidence_scores.negative
    }
    
    return {
        'entities': entities,
        'key_phrases': key_phrases,
        'sentiment': sentiment,
        # 'categories': categories,
    }

print(get_text_analysis('LeBron James scored 35 points as the Lakers beat the Warriors at Staples Center in overtime! The crowd went wild as he made the game-winning shot. Traffic around Downtown LA is crazy right now.'))



{'entities': ['LeBron James', '35', 'Lakers', 'Warriors', 'Staples Center', 'crowd', 'Downtown LA', 'now'], 'key_phrases': ['LeBron James', 'Staples Center', 'game-winning shot', 'Downtown LA', 'Lakers', 'Warriors', 'overtime', 'crowd', 'Traffic'], 'sentiment': {'sentiment': 'negative', 'positive_score': 0.04, 'neutral_score': 0.28, 'negative_score': 0.68}}


In [3]:
import spacy

# Load once in rich function
nlp = spacy.load("en_core_web_sm")


In [5]:
doc = nlp('LeBron James scored 35 points as the Lakers beat the Warriors at Staples Center in overtime! The crowd went wild as he made the game-winning shot. Traffic around Downtown LA is crazy right now.')


          

In [8]:
{
        "entities": [(ent.text, ent.label_) for ent in doc.ents],
        "noun_phrases": [chunk.text for chunk in doc.noun_chunks 
                   if not chunk.root.pos_ == 'PRON'],
        # "key_words": [token.text for token in doc if not token.is_stop and not token.is_punct]
    }

{'entities': [('LeBron James', 'PERSON'), ('35', 'CARDINAL'), ('LA', 'GPE')],
 'noun_phrases': ['LeBron James',
  'the Lakers',
  'the Warriors',
  'Staples Center',
  'overtime',
  'The crowd',
  'the game-winning shot',
  'Traffic',
  'Downtown LA']}

In [12]:
entities = [ent.text for ent in doc.ents]

noun_phrases = [chunk.text for chunk in doc.noun_chunks
           if not chunk.root.pos_ == 'PRON']

set(entities + noun_phrases)

{'35',
 'Downtown LA',
 'LA',
 'LeBron James',
 'Staples Center',
 'The crowd',
 'Traffic',
 'overtime',
 'the Lakers',
 'the Warriors',
 'the game-winning shot'}