### Generalized Intruction Tuning

In [19]:
"""
Pseudocode:
D ← build_taxonomy() ▷ build a taxonomy and return a list of disciplines (Section 2.1)
L ← ∅
for each discipline d ∈ D do
    S ← generate_subjects(d) ▷ Obtain a list of subjects in d (Section 2.2)
    for each subject s ∈ S do
        A ← generate_syllabus(s, d) ▷ Return syllabus A for s (Section 2.3)
        C, K ← extract_class_details(A) ▷ Extract class sessions and key concepts (Section 2.3)
    Q ← generate_instructions(A, C, K, d) ▷ Generate instructions by sampling class
    sessions and key concepts (Section 2.4)
    L ← L ∪ Q
    end for
end for
return L
"""

import google.generativeai as genai
import utils

# Setting llms
genai.configure(api_key=utils.get_GEMINI_API_KEY())
model = genai.GenerativeModel(
    "gemini-pro",
    generation_config=genai.GenerationConfig(temperature=0, max_output_tokens=2048),
)


prompt = "Write an email to your boss explaining the new project you are working on."


def build_taxonomy(prompt):
    """
    Build a taxonomy and return a list of disciplines
    """
    template = """You are a teacher at a university. You are tasked with creating a new course. The course is about the prompt provided: "{prompt}".

    What are the fields of human knowledge that are relevant to the prompt? What are the subfields of these fields? What disciplines fall under those subfields?

    Respond using a JSON object. For example, the prompt provided is "How strong is the gravitational force between the Earth and the Moon?" The response would be:

    {{
        "Physics": {{
            "Classical Mechanics": ["Gravitational Force"],
            "Quantum Mechanics": [],
            "Thermodynamics": []
        }},
        "Mathematics": {{
            "Calculus": ["Derivatives", "Integrals"],
            "Linear Algebra": []
        }},
        "Astronomy": {{
            "Astrophysics": []
            "Planetary Science": []
            "Cosmology": []
        }}
    }}
    
    Take a deep breath and think step-by-step.

    Respond with only a JSON object. Do not include any other text in your response. Do not respond with markdown. Do not surround response in backticks.
    """
    response = (
        model.generate_content(template.format(prompt=prompt))
        .candidates[0]
        .content.parts[0]
        .text
    )
    return response


print(build_taxonomy(prompt))

{
  "Communication": {
    "Technical Writing": ["Email Writing", "Project Proposals"],
    "Business Communication": [],
    "Interpersonal Communication": []
  },
  "Project Management": {
    "Project Planning": ["Project Scope", "Project Timeline"],
    "Project Execution": ["Task Management", "Resource Allocation"],
    "Project Evaluation": ["Project Success Metrics", "Project Reporting"]
  },
  "Computer Science": {
    "Software Engineering": ["Project Management", "Software Development"],
    "Data Science": [],
    "Artificial Intelligence": []
  }
}
