In [5]:
# !pip install openai jupyter_ai
# echo "OPENAI_API_KEY=your-api-key" > .env
# %env OPENAI_API_KEY = {YOUR_API_KEY}

import os
import openai

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) 
openai.api_key = os.environ['OPENAI_API_KEY']
openai.api_key

%load_ext jupyter_ai_magics

The jupyter_ai_magics extension is already loaded. To reload it, use:
  %reload_ext jupyter_ai_magics


In [6]:
def generate_magic_prompt(task: str, tools: str, output_format: str, example: str) -> str:
    """
    Generates a custom MAGIC PROMPT for a professional data scientist role.

    Args:
    - task (str): The specific data science task to be performed.
    - tools (str): The tools and technologies to be used for the task.
    - output_format (str): The desired format for presenting the findings.
    - example (str): An example to illustrate the type of work expected.

    Returns:
    - str: A custom MAGIC PROMPT tailored for a professional data scientist.
    """
    return (
        "You act as Professional Data Scientist, Principal Solution Architect, Python / R / SAS expert with master's or PhD degrees from the world's top 1% universities; embody the role of the most qualified subject matter experts in the areas of Data Science, Analytics, Machine Learning, AI, DevSecOps, Terraform, and Amazon Web Services (AWS) Cloud; "
        "you are tasked with: {task}. "
        "Utilize tools such as {tools} for this purpose. "
        "The findings should be presented in {output_format}. "
        "For instance, {example}."
    ).format(task=task, tools=tools, output_format=output_format, example=example)

## Magic-Prompt Usage 1
magic_prompt = generate_magic_prompt(
    task="in-depth analysis of large datasets to extract insights and predictions",
    tools="Python, R, SQL, Pandas, NumPy, Scikit-learn, TensorFlow",
    output_format="a detailed report with data visualizations and actionable recommendations",
    example="as seen in industry-leading research papers"
)

print(magic_prompt)


You act as Professional Data Scientist, Principal Solution Architect, Python / R / SAS expert with master's or PhD degrees from the world's top 1% universities; embody the role of the most qualified subject matter experts in the areas of Data Science, Analytics, Machine Learning, AI, DevSecOps, Terraform, and Amazon Web Services (AWS) Cloud; you are tasked with: in-depth analysis of large datasets to extract insights and predictions. Utilize tools such as Python, R, SQL, Pandas, NumPy, Scikit-learn, TensorFlow for this purpose. The findings should be presented in a detailed report with data visualizations and actionable recommendations. For instance, as seen in industry-leading research papers.


In [None]:
%%ai chatgpt

You act as Professional Data Scientist, Principal Solution Architect, Python / R / SAS expert with master's or PhD degrees from the world's top 1% universities; embody the role of the most qualified subject matter experts in the areas of Data Science, Analytics, Machine Learning, AI, DevSecOps, Terraform, and Amazon Web Services (AWS) Cloud; 
Explain in detail step by step the STEM Research Project at master or PhD level a top-tier university

In [None]:
%%ai chatgpt

You act as Professional Data Scientist, Principal Solution Architect, Python / R / SAS expert with master's or PhD degrees from the world's top 1% universities; embody the role of the most qualified subject matter experts in the areas of Data Science, Analytics, Machine Learning, AI, DevSecOps, Terraform, and Amazon Web Services (AWS) Cloud; 
Explain in detail step by step the CRISP-DM methodology from https://www.datascience-pm.com/crisp-dm-2/

In [None]:
## Magic-Prompt for a real-life end-to-end data science project following CRISP-DM Methodology
magic_prompt = generate_magic_prompt(
    task="in-depth analysis of large datasets to extract insights and predictions",
    tools="Python, R, SQL, Pandas, NumPy, Scikit-learn, TensorFlow",
    output_format="a detailed report with data visualizations and actionable recommendations",
    example="as seen in industry-leading research papers"
)

In [8]:
def generate_magic_prompt(crisp_dm_steps: dict) -> str:
    """
    Generates a custom MAGIC PROMPT for a professional data scientist role,
    guiding the AI to approach a data science project following the CRISP-DM Methodology.

    Args:
    - crisp_dm_steps (dict): A dictionary where keys are CRISP-DM steps and values are dictionaries 
                             containing descriptions and expected outputs for each step.

    Returns:
    - str: A custom MAGIC PROMPT tailored for a professional data scientist following CRISP-DM Methodology.
    """
    prompt = (
        "You act as a Professional Data Scientist with master's or PhD degrees from the world's top 1% universities. "
        "Embody the role of the most qualified expert in Data Science, Analytics, Machine Learning, AI, DevSecOps, Terraform, "
        "and Amazon Web Services (AWS) Cloud. Approach this project following the CRISP-DM Methodology:\n\n"
    )

    for step, details in crisp_dm_steps.items():
        prompt += f"### {step}\n"
        prompt += f"{details['description']}\n\n"
        prompt += "Expected Output:\n"
        for output in details['outputs']:
            prompt += f"- {output}\n"
        prompt += "\n"

    return prompt

# Example usage
crisp_dm_steps = {
    "1. Business Understanding": {
        "description": (
            "Understand the project objectives and requirements from a business perspective. Define the problem statement, goals, "
            "and success criteria. Identify stakeholders and their requirements."
        ),
        "outputs": [
            "Documented understanding of the business problem and objectives.",
            "Identified stakeholders and their requirements.",
            "Defined project scope and success criteria."
        ],
    },
    "2. Data Understanding": {
        "description": (
            "Focus on collecting and understanding the available data. Perform initial data exploration to identify "
            "quality, relevance, and limitations."
        ),
        "outputs": [
            "Identified data sources.",
            "Documentation of data quality, completeness, and limitations.",
            "Initial data exploration and insights."
        ],
    },
    "3. Data Preparation": {
        "description": (
            "Clean, transform, and format the data for analysis. Handle missing values, outliers, and perform feature scaling. "
            "Apply data preprocessing techniques."
        ),
        "outputs": [
            "Cleaned and preprocessed data.",
            "Feature engineering outcomes.",
            "Selected features and transformed data ready for modeling."
        ],
    },
    "4. Modeling": {
        "description": (
            "Apply various modeling techniques to develop suitable models. Select algorithms based on objectives and data characteristics. "
            "Train and evaluate the models using appropriate metrics."
        ),
        "outputs": [
            "Trained models (potentially multiple).",
            "Model evaluation results based on metrics.",
            "Identified best-performing model."
        ],
    },
    "5. Evaluation": {
        "description": (
            "Assess the model's performance and alignment with business objectives. Use validation datasets or cross-validation techniques "
            "for thorough evaluation."
        ),
        "outputs": [
            "Evaluation metrics and results.",
            "Determination of model's suitability for deployment."
        ],
    },
    "6. Deployment": {
        "description": (
            "Deploy the chosen model into a production environment. Integrate the model into existing systems or processes and develop necessary "
            "user interfaces or APIs."
        ),
        "outputs": [
            "Deployed model ready for production use.",
            "Documentation and training materials.",
            "Integration with existing systems."
        ],
    }
}

magic_prompt = generate_magic_prompt(crisp_dm_steps)
print(magic_prompt)


You act as a Professional Data Scientist with master's or PhD degrees from the world's top 1% universities. Embody the role of the most qualified expert in Data Science, Analytics, Machine Learning, AI, DevSecOps, Terraform, and Amazon Web Services (AWS) Cloud. Approach this project following the CRISP-DM Methodology:

### 1. Business Understanding
Understand the project objectives and requirements from a business perspective. Define the problem statement, goals, and success criteria. Identify stakeholders and their requirements.

Expected Output:
- Documented understanding of the business problem and objectives.
- Identified stakeholders and their requirements.
- Defined project scope and success criteria.

### 2. Data Understanding
Focus on collecting and understanding the available data. Perform initial data exploration to identify quality, relevance, and limitations.

Expected Output:
- Identified data sources.
- Documentation of data quality, completeness, and limitations.
- Initi

In [15]:
import re

def read_markdown_file(file_path: str) -> str:
    with open(file_path, 'r', encoding='utf-8') as file:
        return file.read()

def extract_section_content(content: str, section_title: str) -> str:
    pattern = rf"## {re.escape(section_title)}\n(.*?)(?=\n## |\Z)"
    match = re.search(pattern, content, re.DOTALL)
    return match.group(1).strip() if match else ""

def generate_magic_prompt_from_md(content: str) -> str:
    sections = ["Abstract", "Introduction", "Literature Review", "Research Questions and Scoping", 
                "Research Design/Methods", "Application of Research Method", "Findings, Analysis, and Discussion", 
                "Conclusion, Limitations & Future Work", "STEM Research Methods", 
                "Rubric for Marking"]

    prompt = "You act as a Professional Data Scientist, Cloud Solutions Architect, and Researcher with Master's or PhD degrees from the world's top 1% universities; conducting a STEM project at a top-tier university, consider the following guidelines:\n\n"
    for section in sections:
        section_content = extract_section_content(content, section)
        prompt += f"### {section}\n{section_content}\n\n"

    return prompt


# Example usage
md_content = read_markdown_file('STEM_Research_Project.md')
magic_prompt = generate_magic_prompt_from_md(md_content)
print(magic_prompt)


You act as a researcher conducting a STEM project at a top-tier university, consider the following guidelines:

### Abstract
- [Brief summary including motivation, research question, method, findings/recommendations, future vision]

[Summary of motivation, research question, method, findings, future vision]

### Introduction
- [Description of the topic area, why it's interesting, and its relevance]

[Context, background, importance, literature overview, problem definition, purpose, and significance]

### Literature Review
- [Summary of the literature critiqued in the proposal]

[Discussion on known topic areas, research methods, challenges, critical evaluation of existing solutions, summary of problem area, gaps in literature]

### Research Question(s)


### Research Design


### Findings, Analysis, and Discussion
- [Division of findings, analysis, and discussion as suits the study]

[Results, analysis, how findings support or contradict previous research, diagrams/tables for visual ai

In [25]:
import json

def read_data(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        return json.load(file)

def generate_prompts(data):
    prompts = []  # Initialize prompts list here
    for task, info in data.items():
        prompt = f"{task}\n- {info['description']}\n- Consideration: {info['consideration']}\n"
        prompts.append(prompt)
    return prompts

## Load the data and generate prompts
file_path = 'thesis_research_structure.json'  ## Contains structured research project details
data = read_data(file_path)
prompts = generate_prompts(data)

role = "You act as a Professional Data Scientist, Cloud Solutions Architect, and Researcher with Master's or PhD degrees from the world's top 1% universities; conducting a STEM Research Project described in STAT995 at a top-tier university, consider the following WORKFLOW:\n"
print(f"{role}")

## Print the generated prompts
for i, prompt in enumerate(prompts, 1):
    print(f"[Prompt][Task]{i}. {prompt} Next task.[/Task] \n")


You act as a Professional Data Scientist, Cloud Solutions Architect, and Researcher with Master's or PhD degrees from the world's top 1% universities; conducting a STEM Research Project described in STAT995 at a top-tier university, consider the following WORKFLOW:

[Prompt][Task]1. Abstract
- Condense your research into a concise abstract. What motivated this study, what were your methods, and what have you discovered?
- Consideration: Ensure it's within 150 words, capturing the essence of your research, including Summary of motivation, research question, method, findings / recommendations, future vision.
 Next task.[/Task] 

[Prompt][Task]2. Introduction
- Develop an introduction that sets the stage for your research. Why is this topic significant, and what gap does it address?
- Consideration: Highlight the relevance to the field of Analytics as per STAT995, including Context, background, importance, literature overview, problem definition, purpose, and significance.
 Next task.[/Ta