In [13]:
# Cell 1: Imports
import google.generativeai as genai
import os
from datetime import datetime
import re

In [14]:
# Cell 2: API Key and Model Setup
# Load the API key from environment variables and configure the model properly
GOOGLE_API_KEY = os.getenv('GOOGLE_API_KEY')  # Make sure you have set this environment variable
if not GOOGLE_API_KEY:
    raise ValueError("Please set the GOOGLE_API_KEY environment variable")

genai.configure(api_key=GOOGLE_API_KEY)

# Initialize the model with safety settings
generation_config = {
    "temperature": 0.9, # Increased temperature
    "top_p": 0.9, # Adjusted top-p
    "top_k": 10, #Adjusted top-k
    "max_output_tokens": 2048,
}

safety_settings = [
    {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
    {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
    {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
    {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
]

model = genai.GenerativeModel(
    model_name="gemini-1.5-pro-latest",
    generation_config=generation_config,
    safety_settings=safety_settings
)

In [15]:
# Cell 3: Prompt Definition
client_sim_prompt = """
    You are a creative writing expert tasked with generating diverse and unique client profiles.
    Generate **ONE** client profile and the details of **ONE** realistic business problem that this client needs solved.
    Make sure that this client is completely unique and different from any other client that you have previously generated, they must have a different name, a different background, and come from a different period of time.
    As the client, describe your personality, background, your understanding of data analysis, and any specific quirks or communication styles you might have, make it somewhat unpredictable and imaginative. The client should have a minimum length of 100 words.
    Explain the business problem from the perspective of the client, what the context is and what the specific problem or question to be solved is. Include fictional demands, caveats, and surprises that are relevant to me as the client, and make sure these are different from previous prompts.
    Speak in first person, and make sure you include your full name, as the very first string in the response in the following format, **as an example**:
    John Smith:
    """

In [16]:
# Cell 4: Helper Functions
def generate_client_and_problem(model, log_documentation):
    log_documentation("Generating client and problem")
    prompt = client_sim_prompt
    response = model.generate_content(prompt)
    log_documentation(f"Generated client and problem with prompt: {prompt}")
    log_documentation(f"Raw model response:\n{response.text}")  # Log the raw response
    return response.text

def extract_client_name(text, log_documentation):
    log_documentation("Extracting client name")
    log_documentation(f"Text passed to name extraction: \n{text}") # log text before extraction
    name_match = re.search(r"^(.+?):", text)
    if name_match:
        log_documentation(f"Extracted name: {name_match.group(1).strip()}")
        return name_match.group(1).strip()
    else:
        log_documentation("Could not extract name")
        return "Unknown_Client"

In [17]:
# Cell 5: Main Function
def main():
    # Create a local documentation log variable for each iteration
    documentation_log = ""
    def log_documentation(text):
        nonlocal documentation_log
        timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        log_entry = f"[{timestamp}]: {text}\n"
        documentation_log += log_entry
    
    log_documentation("Starting the data analysis project simulation.")
    client_and_problem = generate_client_and_problem(model, log_documentation)
    
    # Extract the client name
    client_name = extract_client_name(client_and_problem, log_documentation)
    
    # Sanitize the client name for use in a file name
    sanitized_client_name = re.sub(r'[^\w\s-]', '', client_name)
    sanitized_client_name = re.sub(r'\s+', '_', sanitized_client_name).strip()

    # Create the 'trials' directory if it doesn't exist
    trials_dir = "trials"
    if not os.path.exists(trials_dir):
        os.makedirs(trials_dir)
    
    # Get the next trial number
    trial_number = len(os.listdir(trials_dir)) + 1

    # Create the trial directory
    trial_dir = os.path.join(trials_dir, f"trial_{trial_number}")
    os.makedirs(trial_dir)

    # Create a unique filename for the client file
    filename = os.path.join(trial_dir, f"{sanitized_client_name}.txt")

    # Create a unique filename for the documentation file
    doc_filename = os.path.join(trial_dir, f"{sanitized_client_name}_documentation.txt")

    # Save the client and problem
    with open(filename, "w") as f:
         f.write(client_and_problem)
         
    print(f"Client Profile and Business Problem saved to:\n{filename}")

    # Save documentation
    with open(doc_filename, "w") as f:
        f.write(documentation_log)
    print(f"\nProject documentation saved to: {doc_filename}")
    log_documentation("Project simulation completed.")

In [18]:
# Cell 6: Execution
if __name__ == "__main__":
    main()

Client Profile and Business Problem saved to:
trials\trial_9\Esmeralda_Weatherwax.txt

Project documentation saved to: trials\trial_9\Esmeralda_Weatherwax_documentation.txt
