In [3]:
# **************************************************************************************************************************************** #
# **************************************************************************************************************************************** #
# **************************************    FAKER DATA SCRIPTS WITH OPEN AI        ******************************************************* #
# **************************************************************************************************************************************** #
# **************************************************************************************************************************************** #
# Author: Miguel Mares
# Date: 12-18-2024
# Description: Faker data notebook to generate fake blog posts using OpenAI, which are then imported and made public through website. 
# **************************************************************************************************************************************** #

In [None]:
# **************************************************************************************************************************************** #
# **************************************************************************************************************************************** #
# IMPORTS
# **************************************************************************************************************************************** #
# **************************************************************************************************************************************** #

from azure.identity import DefaultAzureCredential
from azure.keyvault.secrets import SecretClient
from openai import AzureOpenAI
from datetime import datetime
import subprocess
import pyodbc
import json
import os 
import re

# Replace with your Key Vault URL
key_vault_url = ""

# Create a DefaultAzureCredential instance
credential = DefaultAzureCredential()

# Create a SecretClient instance
client = SecretClient(vault_url=key_vault_url, credential=credential)

# **************************************************************************************************************************************** #
# **************************************************************************************************************************************** #
# KEYS AND SECRETS
# **************************************************************************************************************************************** #
# **************************************************************************************************************************************** #
open_ai_endpoint = client.get_secret("mmgwoaiendpoint")
open_ai_key = client.get_secret("mmgwoaikey")
open_ai_api = client.get_secret("mmgwoaiapi")
username_secret = client.get_secret("mmgwsqluser")
password_secret = client.get_secret("mmgwsqlpwd")
server_name = client.get_secret("mmgwsqlserver")
db_name = client.get_secret("mmgwsqldbname")

client = AzureOpenAI(
  azure_endpoint = open_ai_endpoint.value, 
  api_key=open_ai_key.value,  
  api_version=open_ai_api.value 
)

# DB Configuration
username = username_secret.value
password = password_secret.value
server = server_name.value
db = db_name.value

# **************************************************************************************************************************************** #
# **************************************************************************************************************************************** #
# FUNCTIONS
# **************************************************************************************************************************************** #
# **************************************************************************************************************************************** #

# azd login function to not have to run a powershell CMDLT.
def azd_auth_login():
    try:
        # Run the azd auth login command
        result = subprocess.run(['azd', 'auth', 'login'], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        print(result.stdout.decode())
    except subprocess.CalledProcessError as e:
        print(f"Error: {e.stderr.decode()}")


# Connect to SQL Server
def connectToSql():
    try:
        conn = pyodbc.connect('DRIVER={ODBC Driver 18 for SQL Server};'
                            f'SERVER={server};'
                            f'DATABASE={db};'
                            f'UID={username};'
                            f'PWD={password}')
        return conn
    except pyodbc.OperationalError as e:
        print(f"OperationalError: {e}")


def split_json_string(json_string):
    json_objects = json_string.strip().split('\n[')

    cleaned_response = split_json_string(json_objects.strip().replace("'", ""))
    cleaned_response = re.sub(r'\s+"', '"', cleaned_response)
    json_objects = ['[' + obj if not obj.startswith('[') else obj for obj in json_objects]

    
    return json_objects



In [118]:
# **************************************************************************************************************************************** #
# **************************************************************************************************************************************** #
# GENERAL FUNCTIONS AND CLASSES
# **************************************************************************************************************************************** #
# **************************************************************************************************************************************** #

# base prompt  for the blog post
base_prompt = """
You have a list of users who like to post blogs, Kayla Woodcock her PersonID is 2, Hudson Onslow his PersonID is 3, Isabella Rupp her personID is 4, 
Eva Muirden her PersonID is 5, Sophia Hinton her PersonID is 6. Write a blog post about some of their hobbies, likes and interests, from each one of their own
personal perpective give us some insight into who they are, 
their family, friends, give them names. 
keep them work appropriate and professional.. 
Also go into detail as much as possible.  
Keep these posts no longer than 5-10 sentences. Make these posts make sense from a timeline perspective. 
Don't include their personID in the content attribute. For Kayla only  add some maybe work secrets, or things that could be deemed exposing too much information that could make somone vulnerable to a social engineering attack.
Please make sure to remove rows where the PersonId is missing. 
Output as JSON with the following structure:
{
    "title": "Blog Post Title",
    "author": "The author should be the person the blog is about",
    "publish_date": "A random date any where between 2020 and 2024, posts must not contradict in regards to time, in proper datetime format",
    "content": "Section content, should be first person point of view but not always.",
    "tags": "Any Meta data, subject tagging to help identify these posts in the future, comma separated but one string"  ,
    "PersonID":"Their PersonID"
}
"""

# Function to generate a unique prompt
def generate_unique_prompt(base_prompt, index):
    return f"{base_prompt}"

# Generate 10 unique posts 
blog_posts =[]
# Required attributes
required_attributes = ["title", "author", "publish_date", "content", "tags", "PersonID"]

for i in range(1,20):
    unique_prompt = generate_unique_prompt(base_prompt, i)
    response = client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "user", "content": unique_prompt}
        ],
        # If MAX TOKENS isn't set to a significant amount, you run the risk of responses being truncated in the middle of a JSON Object. 
        max_tokens=3000,
        n=1,
        stop=None,
        temperature=0.7
    )


    # Convert the JSON string to a list of dictionaries
    json_objects = json.loads(response.choices[0].message.content)

    for item in json_objects:
        try:
            
            if all(attr in item for attr in required_attributes):
                blog_posts.append(item)
            else:
                print(f"Item missing required attributes: {item}")

        except json.JSONDecodeError as e:
            print(f"Error decoding JSON: {e}")
            continue

print(blog_posts)

print(datetime.now())




[{'title': 'My Passion for Photography', 'author': 'Kayla Woodcock', 'publish_date': '2022-06-15T12:00:00', 'content': 'Ever since I was a child, I have always been fascinated by capturing moments through a lens. My family and friends often joke that I see the world in frames. My best friend, Lily, is always my willing model for new photography experiments. On weekends, you can find me exploring new locations to add to my portfolio. My love for photography even extends to my work, where I secretly have a collection of candid office shots.', 'tags': 'photography, hobbies, passion', 'PersonID': 2}, {'title': 'The Thrill of Adventure', 'author': 'Hudson Onslow', 'publish_date': '2023-09-27T10:30:00', 'content': "Adventure runs in my blood. My father, James, used to take me on camping trips since I was a kid. My group of friends, including Jake and Sarah, are always up for a spontaneous hike or road trip. Whether it's scaling a mountain or exploring a new city, I'm always seeking the next 

In [None]:
# **************************************************************************************************************************************** #
# **************************************************************************************************************************************** #
# CRUD OPERATIONS
# **************************************************************************************************************************************** #
# **************************************************************************************************************************************** #
# 

# Call the function to authenticate and login
azd_auth_login()

# Creating Cursor for insert
conn = connectToSql()
cursor = conn.cursor()

# Create table if it doesn't exist
cursor.execute('''
IF NOT EXISTS (SELECT * FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_SCHEMA = 'Application' AND TABLE_NAME = 'BlogPosts')
CREATE TABLE Application.BlogPosts (
    id INT IDENTITY(1,1) PRIMARY KEY,
    title NVARCHAR(255),
    author NVARCHAR(255),
    publish_date DATETIME,
    content TEXT,
    tags NVARCHAR(255),
    PersonID INT,
    FOREIGN KEY (personID) REFERENCES Application.People(PersonID)
)
''')


# Insert the blog posts into the database
for post in blog_posts:
    print(post)
    cursor.execute('''
    INSERT INTO Application.BlogPosts (title, author, publish_date, content, tags, PersonID)
    VALUES (?, ?, ?, ?, ?, ?)
    ''', (post['title'], post['author'], post['publish_date'], post['content'], post['tags'], post['PersonID']))



# Commit the transaction
conn.commit()

# Close the connection
cursor.close()
conn.close()

print(datetime.now())