# Step 0

In [1]:
!pip install requests gitpython



In [2]:
import os
from git import Repo
import requests

import shutil
import json
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
# Block: Clone GitHub Repository and Extract Instructions/Topics
def clone_repo(repo_url, local_dir):
    # Clone the GitHub repo to a local directory
    if os.path.exists(local_dir):
        shutil.rmtree(local_dir)  # Remove directory if it exists
    Repo.clone_from(repo_url, local_dir)

def extract_instructions(local_dir):
    # You can modify this part to extract specific files (like README.md) for instructions
    readme_path = os.path.join(local_dir, 'README.md')

    if os.path.exists(readme_path):
        with open(readme_path, 'r') as f:
            content = f.read()
        return content
    else:
        return "No instructions found in the repository."

In [10]:
openai_api_key = 'api-key'
ANTHROPIC_API_KEY='api-key'
mistral_api_key = 'api-key'
repo_url = 'https://github.com/chrisdavisj/open-kg-curriculum'

# Experimental Code

## 1. GPT

In [None]:
# prompt: Generate code to Access and call Chat GPT to generate educational content from question which are in github repo
!pip install --upgrade openai

Collecting openai
  Downloading openai-1.51.0-py3-none-any.whl.metadata (24 kB)
Collecting httpx<1,>=0.23.0 (from openai)
  Downloading httpx-0.27.2-py3-none-any.whl.metadata (7.1 kB)
Collecting jiter<1,>=0.4.0 (from openai)
  Downloading jiter-0.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.6 kB)
Collecting httpcore==1.* (from httpx<1,>=0.23.0->openai)
  Downloading httpcore-1.0.6-py3-none-any.whl.metadata (21 kB)
Collecting h11<0.15,>=0.13 (from httpcore==1.*->httpx<1,>=0.23.0->openai)
  Downloading h11-0.14.0-py3-none-any.whl.metadata (8.2 kB)
Downloading openai-1.51.0-py3-none-any.whl (383 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m383.5/383.5 kB[0m [31m11.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading httpx-0.27.2-py3-none-any.whl (76 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.4/76.4 kB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading httpcore-1.0.6-py3-none-any.whl (78 kB)
[2K   [90m━

In [None]:
# Block: Setup OpenAI API and GitHub Repo Access
import openai
from openai import OpenAI

client = None
# Function to set OpenAI API Key
def set_openai_api_key(api_key):
  global client
  client = OpenAI(api_key=api_key)
  openai.api_key = api_key

In [None]:
# Block: Define the Prompt for ChatGPT API based on repo contents
def create_prompt(repo_content):
    # Modify this function based on how the instructions are structured in the repo
    prompt = f"I have a GitHub repository that outlines the structure of a knowledge graph curriculum. I need to generate educational content for each module based on the structure defined in the markdown file. Specifically, refer to the module list and structure provided here: {repo_content}."""
    return prompt

In [None]:
messages = [
    {"role": "system", "content": "You are an helpful assistant and an expert in generation of academic curriculum with are very thorough, detailed and comprehensive enough.."}
]

# Block: Send Prompt to OpenAI ChatGPT API
def generate_academic_content(prompt, engine='gpt-4o', max_tokens=1000):
    # Call the OpenAI API to generate content
    global client
    temp_Client = client
    messages.append({"role": "user", "content": prompt})

    try:
      response = temp_Client.completions.create(
        model=engine,
        prompt=prompt,
        max_tokens=max_tokens,
        n=1,
        stop=None,
        temperature=0.7
      )
    except openai.APIError as e:
      try:
        response = client.chat.completions.create(
          model=engine,
          messages=messages
          )
        assistant_reply = response.choices[0].message.content
        messages.append({"role": "assistant", "content": assistant_reply})
        return response.choices[0].message.content.strip(), response.model_dump_json(indent = 4)
      except Exception as e:
        print(f"Error: {e}")
        return None, None
    except Exception as e:
      print(f"Error: {e}")
    return response.choices[0].text.strip(), response.model_dump_json(indent = 4)

In [None]:
# Block: Experiment with Different Engines and Control Output Flow
def analyze_repo_with_different_engines(repo_url, api_key, engines=['gpt-3.5-turbo', 'gpt-4o-mini-2024-07-18', 'gpt-4o-mini']):
    local_dir = "repo_temp" + "/curriculum/modules"
    clone_repo(repo_url, local_dir)

    # Extract instructions/topics from the repo
    repo_content = extract_instructions(local_dir)

    set_openai_api_key(api_key)

    # Run the analysis using different engines
    for engine in engines:
        print(f"Using engine: {engine}")
        prompt = create_prompt(repo_content)
        result, response_dump = generate_academic_content(prompt, engine=engine)
        print(f"Results from {engine}:\n", result, "\n" + "-"*40,"\n\n\n", response_dump, "\n" + "-"*40)
        messages = [
            {"role": "system", "content": "You are a helpful assistant."}
            ]


In [None]:
api_key = openai_api_key
repo_url = 'https://github.com/chrisdavisj/open-kg-curriculum'
# analyze_repo_with_different_engines(repo_url, api_key)

In [None]:
# Block: Flexible Code Block to Extend Functionality
# You can add more flexibility here to modify prompts or experiment with other parameters (e.g., temperature, max tokens, etc.)
def custom_prompt_analysis(repo_url, api_key, custom_prompt, engine='gpt-4o', max_tokens=1000):
    local_dir = "repo_temp"
    clone_repo(repo_url, local_dir)

    # Extract instructions/topics from the repo
    repo_content = extract_instructions(local_dir + "/curriculum/modules")

    set_openai_api_key(api_key)

    # Create custom prompt
    full_prompt = f"{custom_prompt}\n{repo_content}"

    print(full_prompt)

    print(f"Using engine:{engine}")

    result, response_dump = generate_academic_content(full_prompt, engine=engine, max_tokens=max_tokens)
    print(f"Custom Prompt Result from {engine}:\n", result, "\n" + "-"*40,"\n\n\n", response_dump, "\n" + "-"*40)

In [None]:
analysis_prompt = """
I have a GitHub repo that contains details about the knowledge graph. I want to generate educational content for its modules in the specified structure mentioned in the markdown file. Here is the GitHub repo: https://github.com/chrisdavisj/open-kg-curriculum/tree/master
here is the module list markdown: https://github.com/chrisdavisj/open-kg-curriculum/blob/master/curriculum/modules/README.md. Look at the module structure at: curriculum/modules/README.md. Extract the modules from the modules list first In the curriculum/modules/README.md

Here is the markdown file:
"""

In [None]:
custom_prompt_analysis(repo_url, api_key, analysis_prompt)


I have a GitHub repo that contains details about the knowledge graph. I want to generate educational content for its modules in the specified structure mentioned in the markdown file. Here is the GitHub repo: https://github.com/chrisdavisj/open-kg-curriculum/tree/master
here is the module list markdown: https://github.com/chrisdavisj/open-kg-curriculum/blob/master/curriculum/modules/README.md. Look at the module structure at: curriculum/modules/README.md. Extract the modules from the modules list first In the curriculum/modules/README.md

Here is the markdown file:

# The Open Curriculum Module List

The vocabulary that we use:
- Audience: Undergraduate Student, Graduate Student, Developer, Project Management, Any
- Level: Beginner, Intermediate, Advanced
- Category: Context/History, Technology, Foundational, Standards, Methods, Resources, Query Language, Markup Languages, Visualization

## Modules List
* What is Metadata?
  * Category: Foundational
  * Module Prerequisites: None
  * 

In [None]:
print(*messages, sep = "\n\n")

{'role': 'system', 'content': 'You are an helpful assistant and an expert in generation of academic curriculum with are very thorough, detailed and comprehensive enough..'}

{'role': 'user', 'content': '\nI have a GitHub repo that contains details about the knowledge graph. I want to generate educational content for its modules in the specified structure mentioned in the markdown file. Here is the GitHub repo: https://github.com/chrisdavisj/open-kg-curriculum/tree/master\nhere is the module list markdown: https://github.com/chrisdavisj/open-kg-curriculum/blob/master/curriculum/modules/README.md. Look at the module structure at: curriculum/modules/README.md. Extract the modules from the modules list first In the curriculum/modules/README.md\n\nHere is the markdown file:\n\n# The Open Curriculum Module List\n\nThe vocabulary that we use:\n- Audience: Undergraduate Student, Graduate Student, Developer, Project Management, Any\n- Level: Beginner, Intermediate, Advanced\n- Category: Context

In [None]:
def custom_module_generation_prompt(custom_prompt, engine='gpt-4o', max_tokens=8000):

    # Create custom prompt
    full_prompt = f"{custom_prompt}"

    print(f"Using engine:{engine}")

    result, response_dump = generate_academic_content(full_prompt, engine=engine, max_tokens=max_tokens)
    print(f"Custom Prompt Result from {engine}:\n", result, "\n" + "-"*40,"\n\n\n", response_dump, "\n" + "-"*40)
    return result, response_dump

In [None]:
custom_module_prompt = """
Let's focus on the module topic {}. Look at the structural data given for that topic. Using the provided structural data, generate an educational curriculum focusing only on the 'Content' and 'References' sections. Ensure the content section is highly detailed, thoroughly explaining each topic in depth to suit the target academic audience. Cite all external sources appropriately, using relevant references, and avoid any self-citation of the provided repository. The content should flow logically, be organized clearly into subsections, and comprehensively cover all necessary information from the structural data. Format the ouput to a markdown structure.
"""

In [None]:
print(*messages, sep = "\n\n")
temp_messages = messages.copy()

{'role': 'system', 'content': 'You are an helpful assistant and an expert in generation of academic curriculum with are very thorough, detailed and comprehensive enough..'}

{'role': 'user', 'content': '\nI have a GitHub repo that contains details about the knowledge graph. I want to generate educational content for its modules in the specified structure mentioned in the markdown file. Here is the GitHub repo: https://github.com/chrisdavisj/open-kg-curriculum/tree/master\nhere is the module list markdown: https://github.com/chrisdavisj/open-kg-curriculum/blob/master/curriculum/modules/README.md. Look at the module structure at: curriculum/modules/README.md. Extract the modules from the modules list first In the curriculum/modules/README.md\n\nHere is the markdown file:\n\n# The Open Curriculum Module List\n\nThe vocabulary that we use:\n- Audience: Undergraduate Student, Graduate Student, Developer, Project Management, Any\n- Level: Beginner, Intermediate, Advanced\n- Category: Context

In [None]:
import time
for i in range(18,19):
  result, raw = custom_module_generation_prompt(custom_module_prompt.format(i))
  messages = temp_messages.copy()
  print(*messages, sep = "\n\n")
  with open(f"{i}. module.md", "w") as f_result:
    f_result.write(result)
  with open(f"{i}. module.txt", "w") as f_raw:
    f_raw.write(raw)

  # Step 2: Copy the file from Colab to Google Drive
  import shutil

  # Replace with the path of the file you want to upload in Colab
  source_file_md = f"/content/{i}. module.md"
  source_file_raw = f"/content/{i}. module.txt"

  # Replace with the destination folder in Google Drive (adjust folder name as needed)
  destination_md = f"/content/drive/My Drive/my_folder/{i}. module.md"
  destination_raw = f"/content/drive/My Drive/my_folder/{i}. module.txt"

  # Create the folder if it doesn't exist (optional)
  import os
  os.makedirs(os.path.dirname(destination_md), exist_ok=True)
  os.makedirs(os.path.dirname(destination_raw), exist_ok=True)

  # Copy the file to Google Drive
  shutil.copy(source_file_md, destination_md)
  shutil.copy(source_file_raw, destination_raw)
  time.sleep(2)

Using engine:gpt-4o
Custom Prompt Result from gpt-4o:
 Here's an educational curriculum focusing on the 'Content' and 'References' sections for the module topic "SPARQL" in markdown:

```markdown
# SPARQL Module

## Content

### Introduction to SPARQL
SPARQL (SPARQL Protocol and RDF Query Language) is a powerful query language and protocol developed for retrieving and manipulating data stored in Resource Description Framework (RDF) format. Designed to be analogous to SQL for relational databases, SPARQL provides means to query required and optional graph patterns along with their conjunctions and disjunctions.

SPARQL allows for:
- **Retrieving data** in various formats, such as JSON, XML, or CSV.
- **Filtering results** using conditions.
- **Aggregating data** to provide summaries and insights.
- **Manipulating RDF data** through updates and deletes.

### Components of a SPARQL Query
1. **SELECT Queries**: Used for extracting raw data from RDF graphs.
   ```sparql
   SELECT ?subject ?

In [None]:
# Custom prompt usage:
custom_prompt = f"""
I have a GitHub repository with details about an open knowledge graph. The goal is to generate educational content for its modules, following the structure provided in the markdown file. Please complete the following steps in sequence and dont assume anything but only extract it from markdowns:
 - Extract the list of modules and the structure from the markdown file located here: curriculum/modules/README.md.
 - Focus on the first module in the list and identify the structural data for that module.
 - Using the identified structure, generate detailed educational curriculum content, including:
   - Content Section: Write detailed academic content.
   - Related Media Section: Suggest relevant images or videos.
   - References Section: Cite all sources for content and media used.
 - Ensure the content is comprehensive, includes relevant references, and media (images or videos) from credible sources.
 - The final output should be ready-to-use for the repository, with properly cited media and references, aligning with the context of the open knowledge graph curriculum.
"""
custom_prompt_analysis(repo_url, api_key, custom_prompt)

Using engine:gpt-4
Custom Prompt Result from gpt-4:
 Apologies for misunderstanding, but as an AI, I currently can not extract actual data from an external file or link. I can, however, provide you an example of how to structure and generate content based on the information you provided from the repository.

## Module: Introduction to Knowledge Graphs 

### Metadata:
* Audience: Beginners/Knowledge Graph Enthusiasts 
* Prerequisites: Basic understanding of graphs
* Categories: Introduction, Basics

### Tutorial Article: 

#### Content Section:

**What is a Knowledge Graph?**

A knowledge graph is a specific kind of graph, a collection of interlinked descriptions of entities – real-world objects, events, situations, or abstract concepts. With knowledge graphs, we are not just interested in storing data, but we aim to "understand" and interpret data in ways similar to humans.

**Creating a Knowledge Graph**

Creating a knowledge graph involves:
- Defining entities and their properties.
-

In [None]:
custom_prompt_analysis(repo_url, api_key, custom_prompt, engine = "o1-preview")

Using engine:o1-preview


BadRequestError: Error code: 400 - {'error': {'message': "Your organization must qualify for at least usage tier 5 to access 'o1-preview'. See https://platform.openai.com/docs/guides/rate-limits/usage-tiers for more details on usage tiers.", 'type': 'invalid_request_error', 'param': 'model', 'code': 'below_usage_tier'}}

In [None]:
custom_prompt_analysis(repo_url, api_key, custom_prompt, engine = "gpt-4o")

## 2. Claude

In [None]:
!pip install anthropic

Collecting anthropic
  Downloading anthropic-0.34.2-py3-none-any.whl.metadata (18 kB)
Downloading anthropic-0.34.2-py3-none-any.whl (891 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m891.9/891.9 kB[0m [31m10.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: anthropic
Successfully installed anthropic-0.34.2


In [None]:
import anthropic

client = anthropic.Anthropic(
    api_key=ANTHROPIC_API_KEY,
)

In [None]:
initial_system_message = "You are an helpful assistant and an expert in generation of academic curriculum with are very thorough, detailed and comprehensive enough."
messages = []

In [None]:
def initial_analysis_of_repo(repo_url,prompt, model="claude-3-5-sonnet-20240620"):
  local_dir = "repo_temp"
  clone_repo(repo_url, local_dir)
  markdown_content = extract_instructions(local_dir + "/curriculum/modules")
  antropic_prompt = [
      {
          "type": "text",
          "text": prompt + "\n" + markdown_content
          }
      ]
  messages.append({"role": "user", "content": antropic_prompt})
  message = client.messages.create(
        model = model,
        max_tokens = 1000,
        temperature = 0.7,
        system = initial_system_message,
        messages = messages
  )

  messages.append({"role": "assistant", "content": message.content})
  return message.content, message.model_dump_json(indent = 4)

In [None]:
claude_analysis_prompt = """
I have a GitHub repository that includes details on a knowledge graph, and I want to generate educational content based on its modules. The structure for each module is specified in the markdown file.
GitHub repo: https://github.com/chrisdavisj/open-kg-curriculum/
Module list markdown file: https://github.com/chrisdavisj/open-kg-curriculum/blob/master/curriculum/modules/README.md
Please do the following:
 - Extract all modules listed in the curriculum/modules/README.md.
 - Review and analyze the structure for each module, and any specific details mentioned as specified in that file.
 - list all the modules

Markdown file content:
"""

In [None]:
repo_url = 'https://github.com/chrisdavisj/open-kg-curriculum'

In [None]:
result, raw = initial_analysis_of_repo(repo_url,claude_analysis_prompt)

In [None]:
print(result[0].text)

Based on the provided markdown file content, I have extracted and analyzed the modules listed in the curriculum. Here's the list of all modules:

1. What is Metadata?
2. What is an Ontology?
3. What is a Knowledge Graph?
4. What is an Identifier?
5. Introduction to Logic
6. Propositional Logic
7. Datalog
8. Predicate Logic
9. Description Logic
10. Rules
11. Introduction to Set Theory
12. Introduction to Discrete Mathematics
13. Open World Assumption vs Closed World Assumption
14. RDF
15. RDFS
16. RDF Serializations
17. RDF Star
18. SPARQL
19. SWRL
20. OWL
21. OWL Dialects
22. Manchester Syntax
23. SHACL
24. Schema.org
25. Dublin Core
26. SOSA & SSN
27. PROV-O
28. SKOS
29. Survey of Modeling Tools
30. Protege
31. Deploying a Knowledge Graph
32. Introduction to Knowledge Engineering
33. Reification
34. GraphQL
35. Survey of Documentation Practices
36. Survey of Visualization Tools
37. Introduction to Upper Ontologies
38. Introduction to Modular Ontologies
39. Survey of Triplestores
40. H

In [None]:
def generation_of_module_content(prompt, model="claude-3-5-sonnet-20240620"):
  antropic_prompt = [
      {
          "type": "text",
          "text": prompt
          }
      ]
  antropic_temp_messages = messages.copy()
  antropic_temp_messages.append({"role": "user", "content": antropic_prompt})
  message = client.messages.create(
        model = model,
        max_tokens = 8000,
        temperature = 0.7,
        system = initial_system_message,
        messages = antropic_temp_messages
  )

  antropic_temp_messages.append({"role": "assistant", "content": message.content})
  return message.content, message.model_dump_json(indent = 4)

In [None]:
custom_claude_module_prompt = """
Let's focus on the module topic {}. Look at the structural data given for that topic. Using the provided structural data, generate an educational curriculum focusing only on the 'Content' and 'References' sections. Ensure the content section is highly detailed and lengthy, thoroughly explaining each topic in depth to suit the target academic audience. Cite all external sources appropriately, using relevant references, and avoid any self-citation of the provided repository. The content should flow logically, be organized clearly into subsections, and comprehensively cover all necessary information from the structural data. Format the ouput to a markdown structure.
"""

In [None]:
import time
for i in range(18,19):
  result, raw = generation_of_module_content(custom_claude_module_prompt.format(i))
  with open(f"{i}. module.md", "w") as f_result:
    f_result.write(result[0].text)
  with open(f"{i}. module.txt", "w") as f_raw:
    f_raw.write(raw)

  # Step 2: Copy the file from Colab to Google Drive
  import shutil

  # Replace with the path of the file you want to upload in Colab
  source_file_md = f"/content/{i}. module.md"
  source_file_raw = f"/content/{i}. module.txt"

  # Replace with the destination folder in Google Drive (adjust folder name as needed)
  destination_md = f"/content/drive/My Drive/claude_folder/{i}. module.md"
  destination_raw = f"/content/drive/My Drive/claude_folder/{i}. module.txt"

  # Create the folder if it doesn't exist (optional)
  import os
  os.makedirs(os.path.dirname(destination_md), exist_ok=True)
  os.makedirs(os.path.dirname(destination_raw), exist_ok=True)

  # Copy the file to Google Drive
  shutil.copy(source_file_md, destination_md)
  shutil.copy(source_file_raw, destination_raw)
  time.sleep(2)

## 3. Mistral

In [None]:
!pip install mistralai



In [None]:
import os
from mistralai import Mistral

In [None]:
# api_key = os.environ["MISTRAL_API_KEY"]
api_key = mistral_api_key

mistral_client = Mistral(api_key=api_key)

In [None]:
initial_system_message = "You are an helpful assistant and an expert in generation of academic curriculum with are very thorough, detailed and comprehensive enough."

In [None]:
mistal_messages = []
mistal_messages.append({"role": "system", "content": initial_system_message})

def initial_analysis_of_repo_with_mistral(repo_url, prompt, model = "mistral-large-latest", max_tokens = 1000):
  local_dir = "repo_temp"
  clone_repo(repo_url, local_dir)
  markdown_content = extract_instructions(local_dir + "/curriculum/modules")
  mistral_prompt = prompt + "\n" + markdown_content
  mistal_messages.append({"role": "user", "content": mistral_prompt})
  chat_response = mistral_client.chat.complete(
    model = model,
    messages = mistal_messages,
    max_tokens = max_tokens,
)

  mistal_messages.append({"role": "assistant", "content": chat_response.choices[0].message.content})
  return chat_response.choices[0].message.content, chat_response.model_dump_json(indent = 4)

In [None]:
mistral_analysis_prompt = """
I have a GitHub repository that includes details on a knowledge graph, and I want to generate educational content based on its modules. The structure for each module is specified in the markdown file.
GitHub repo: https://github.com/chrisdavisj/open-kg-curriculum/
Module list markdown file: https://github.com/chrisdavisj/open-kg-curriculum/blob/master/curriculum/modules/README.md
Please do the following:
 - Extract all modules listed in the curriculum/modules/README.md.
 - Review and analyze the structure for each module, and any specific details mentioned as specified in that file.
 - list all the modules

Markdown file content:
"""

In [None]:
repo_url = 'https://github.com/chrisdavisj/open-kg-curriculum'

In [None]:
result, raw = initial_analysis_of_repo_with_mistral(repo_url,mistral_analysis_prompt, max_tokens=4000)

In [None]:
print(*mistal_messages, sep="\n\n")

{'role': 'system', 'content': 'You are an helpful assistant and an expert in generation of academic curriculum with are very thorough, detailed and comprehensive enough.'}

{'role': 'user', 'content': '\nI have a GitHub repository that includes details on a knowledge graph, and I want to generate educational content based on its modules. The structure for each module is specified in the markdown file.\nGitHub repo: https://github.com/chrisdavisj/open-kg-curriculum/\nModule list markdown file: https://github.com/chrisdavisj/open-kg-curriculum/blob/master/curriculum/modules/README.md\nPlease do the following:\n - Extract all modules listed in the curriculum/modules/README.md.\n - Review and analyze the structure for each module, and any specific details mentioned as specified in that file.\n - list all the modules\n\nMarkdown file content:\n\n# The Open Curriculum Module List\n\nThe vocabulary that we use:\n- Audience: Undergraduate Student, Graduate Student, Developer, Project Managemen

In [None]:
print(result)

Certainly! Below is a thorough and detailed analysis of the modules listed in the `curriculum/modules/README.md` file from the GitHub repository.

## The Open Curriculum Module List

### Modules Overview

The curriculum includes various modules categorized by audience, level, and category. Below is a structured summary of each module, including their prerequisites, audience, level, and covered concepts.

1. **What is Metadata?**
   - **Category:** Foundational
   - **Module Prerequisites:** None
   - **Audience:** Any
   - **Level:** Beginner
   - **Covered Concepts:** Metadata

2. **What is an Ontology?**
   - **Category:** Not specified
   - **Module Prerequisites:** Not specified
   - **Audience:** Not specified
   - **Level:** Not specified
   - **Covered Concepts:** Ontology, Linked Data, Taxonomy, Schema, Statement, Triple

3. **What is a Knowledge Graph?**
   - **Category:** Foundational
   - **Module Prerequisites:** What is Metadata?
   - **Audience:** Any
   - **Level:** Begi

In [None]:
def generation_of_module_content_with_mistral(prompt, model = "mistral-large-latest", max_tokens = 8000):
  mistral_prompt = prompt
  temp_mistral_message = mistal_messages.copy()
  temp_mistral_message.append({"role": "user", "content": mistral_prompt})
  chat_response = mistral_client.chat.complete(
    model = model,
    messages = temp_mistral_message,
    max_tokens = max_tokens,
)

  temp_mistral_message.append({"role": "assistant", "content": chat_response.choices[0].message.content})
  return chat_response.choices[0].message.content, chat_response.model_dump_json(indent = 4)

In [None]:
custom_mistral_module_prompt = """
Let's focus on the module topic {}. Look at the structural data given for that topic. Using the provided structural data, generate an educational curriculum focusing only on the 'Content' and 'References' sections. Ensure the content section is highly detailed and lengthy, thoroughly explaining each topic in depth to suit the target academic audience. Cite all external sources appropriately, using relevant references, and avoid any self-citation of the provided repository. The content should flow logically, be organized clearly into subsections, and comprehensively cover all necessary information from the structural data. Format the ouput to a markdown structure.
"""

In [None]:
import time
for i in range(18,19):
  result, raw = generation_of_module_content_with_mistral(custom_mistral_module_prompt.format(i))
  with open(f"{i}. module.md", "w") as f_result:
    f_result.write(result)
  with open(f"{i}. module.txt", "w") as f_raw:
    f_raw.write(raw)

  # Replace with the path of the file you want to upload in Colab
  source_file_md = f"/content/{i}. module.md"
  source_file_raw = f"/content/{i}. module.txt"

  # Replace with the destination folder in Google Drive (adjust folder name as needed)
  destination_md = f"/content/drive/My Drive/mistral_folder/{i}. module.md"
  destination_raw = f"/content/drive/My Drive/mistral_folder/{i}. module.txt"

  # Create the folder if it doesn't exist (optional)
  import os
  os.makedirs(os.path.dirname(destination_md), exist_ok=True)
  os.makedirs(os.path.dirname(destination_raw), exist_ok=True)

  # Copy the file to Google Drive
  shutil.copy(source_file_md, destination_md)
  shutil.copy(source_file_raw, destination_raw)
  time.sleep(2)

# First Pass

## 1. GPT

In [None]:
# prompt: Generate code to Access and call Chat GPT to generate educational content from question which are in github repo
!pip install --upgrade openai

Collecting openai
  Downloading openai-1.51.0-py3-none-any.whl.metadata (24 kB)
Collecting httpx<1,>=0.23.0 (from openai)
  Downloading httpx-0.27.2-py3-none-any.whl.metadata (7.1 kB)
Collecting jiter<1,>=0.4.0 (from openai)
  Downloading jiter-0.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.6 kB)
Collecting httpcore==1.* (from httpx<1,>=0.23.0->openai)
  Downloading httpcore-1.0.6-py3-none-any.whl.metadata (21 kB)
Collecting h11<0.15,>=0.13 (from httpcore==1.*->httpx<1,>=0.23.0->openai)
  Downloading h11-0.14.0-py3-none-any.whl.metadata (8.2 kB)
Downloading openai-1.51.0-py3-none-any.whl (383 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m383.5/383.5 kB[0m [31m11.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading httpx-0.27.2-py3-none-any.whl (76 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.4/76.4 kB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading httpcore-1.0.6-py3-none-any.whl (78 kB)
[2K   [90m━

In [None]:
# Block: Setup OpenAI API and GitHub Repo Access
import openai
from openai import OpenAI

client = None
# Function to set OpenAI API Key
def set_openai_api_key(api_key):
  global client
  client = OpenAI(api_key=api_key)
  openai.api_key = api_key

In [None]:
# Block: Define the Prompt for ChatGPT API based on repo contents
def create_prompt(repo_content):
    # Modify this function based on how the instructions are structured in the repo
    prompt = f"I have a GitHub repository that outlines the structure of a knowledge graph curriculum. I need to generate educational content for each module based on the structure defined in the markdown file. Specifically, refer to the module list and structure provided here: {repo_content}."""
    return prompt

In [None]:
messages = [
    {"role": "system", "content": "You are an helpful assistant and an expert in generation of academic curriculum with are very thorough, detailed and comprehensive enough.."}
]

# Block: Send Prompt to OpenAI ChatGPT API
def generate_academic_content(prompt, engine='gpt-4o', max_tokens=1000):
    # Call the OpenAI API to generate content
    global client
    temp_Client = client
    messages.append({"role": "user", "content": prompt})

    try:
      response = temp_Client.completions.create(
        model=engine,
        prompt=prompt,
        max_tokens=max_tokens,
        n=1,
        stop=None,
        temperature=0.7
      )
    except openai.APIError as e:
      try:
        response = client.chat.completions.create(
          model=engine,
          messages=messages
          )
        assistant_reply = response.choices[0].message.content
        messages.append({"role": "assistant", "content": assistant_reply})
        return response.choices[0].message.content.strip(), response.model_dump_json(indent = 4)
      except Exception as e:
        print(f"Error: {e}")
        return None, None
    except Exception as e:
      print(f"Error: {e}")
    return response.choices[0].text.strip(), response.model_dump_json(indent = 4)

In [None]:
# Block: Flexible Code Block to Extend Functionality
# You can add more flexibility here to modify prompts or experiment with other parameters (e.g., temperature, max tokens, etc.)
def custom_prompt_analysis(repo_url, api_key, custom_prompt, engine='gpt-4o', max_tokens=1000):
    local_dir = "repo_temp"
    clone_repo(repo_url, local_dir)

    # Extract instructions/topics from the repo
    repo_content = extract_instructions(local_dir + "/curriculum/modules")

    set_openai_api_key(api_key)

    # Create custom prompt
    full_prompt = f"{custom_prompt}\n{repo_content}"

    print(full_prompt)

    print(f"Using engine:{engine}")

    result, response_dump = generate_academic_content(full_prompt, engine=engine, max_tokens=max_tokens)
    print(f"Custom Prompt Result from {engine}:\n", result, "\n" + "-"*40,"\n\n\n", response_dump, "\n" + "-"*40)

In [None]:
analysis_prompt = """
I have a GitHub repo that contains details about the knowledge graph. I want to generate educational content for its modules in the specified structure mentioned in the markdown file. Here is the GitHub repo: https://github.com/chrisdavisj/open-kg-curriculum/tree/master
here is the module list markdown: https://github.com/chrisdavisj/open-kg-curriculum/blob/master/curriculum/modules/README.md. Look at the module structure at: curriculum/modules/README.md. Extract the modules from the modules list first In the curriculum/modules/README.md

Here is the markdown file:
"""

In [None]:
custom_prompt_analysis(repo_url, api_key, analysis_prompt)


I have a GitHub repo that contains details about the knowledge graph. I want to generate educational content for its modules in the specified structure mentioned in the markdown file. Here is the GitHub repo: https://github.com/chrisdavisj/open-kg-curriculum/tree/master
here is the module list markdown: https://github.com/chrisdavisj/open-kg-curriculum/blob/master/curriculum/modules/README.md. Look at the module structure at: curriculum/modules/README.md. Extract the modules from the modules list first In the curriculum/modules/README.md

Here is the markdown file:

# The Open Curriculum Module List

The vocabulary that we use:
- Audience: Undergraduate Student, Graduate Student, Developer, Project Management, Any
- Level: Beginner, Intermediate, Advanced
- Category: Context/History, Technology, Foundational, Standards, Methods, Resources, Query Language, Markup Languages, Visualization

## Modules List
* What is Metadata?
  * Category: Foundational
  * Module Prerequisites: None
  * 

In [None]:
print(*messages, sep = "\n\n")

{'role': 'system', 'content': 'You are an helpful assistant and an expert in generation of academic curriculum with are very thorough, detailed and comprehensive enough..'}

{'role': 'user', 'content': '\nI have a GitHub repo that contains details about the knowledge graph. I want to generate educational content for its modules in the specified structure mentioned in the markdown file. Here is the GitHub repo: https://github.com/chrisdavisj/open-kg-curriculum/tree/master\nhere is the module list markdown: https://github.com/chrisdavisj/open-kg-curriculum/blob/master/curriculum/modules/README.md. Look at the module structure at: curriculum/modules/README.md. Extract the modules from the modules list first In the curriculum/modules/README.md\n\nHere is the markdown file:\n\n# The Open Curriculum Module List\n\nThe vocabulary that we use:\n- Audience: Undergraduate Student, Graduate Student, Developer, Project Management, Any\n- Level: Beginner, Intermediate, Advanced\n- Category: Context

In [None]:
def custom_module_generation_prompt(custom_prompt, engine='gpt-4o', max_tokens=4000):

    # Create custom prompt
    full_prompt = f"{custom_prompt}"

    print(f"Using engine:{engine}")

    result, response_dump = generate_academic_content(full_prompt, engine=engine, max_tokens=max_tokens)
    print(f"Custom Prompt Result from {engine}:\n", result, "\n" + "-"*40,"\n\n\n", response_dump, "\n" + "-"*40)
    return result, response_dump

In [None]:
custom_module_prompt = """
Let's focus on the module topic {}. Look at the structural data given for that topic. Using the provided structural data, generate an educational curriculum focusing only on the 'Content' and 'References' sections. Ensure the content section is highly detailed, thoroughly explaining each topic in depth to suit the target academic audience. Cite all external sources appropriately, using relevant references, and avoid any self-citation of the provided repository. The content should flow logically, be organized clearly into subsections, and comprehensively cover all necessary information from the structural data. Format the ouput to a markdown structure.
"""

In [None]:
print(*messages, sep = "\n\n")
temp_messages = messages.copy()

{'role': 'system', 'content': 'You are an helpful assistant and an expert in generation of academic curriculum with are very thorough, detailed and comprehensive enough..'}

{'role': 'user', 'content': '\nI have a GitHub repo that contains details about the knowledge graph. I want to generate educational content for its modules in the specified structure mentioned in the markdown file. Here is the GitHub repo: https://github.com/chrisdavisj/open-kg-curriculum/tree/master\nhere is the module list markdown: https://github.com/chrisdavisj/open-kg-curriculum/blob/master/curriculum/modules/README.md. Look at the module structure at: curriculum/modules/README.md. Extract the modules from the modules list first In the curriculum/modules/README.md\n\nHere is the markdown file:\n\n# The Open Curriculum Module List\n\nThe vocabulary that we use:\n- Audience: Undergraduate Student, Graduate Student, Developer, Project Management, Any\n- Level: Beginner, Intermediate, Advanced\n- Category: Context

In [None]:
import time
for i in range(18,19):
  result, raw = custom_module_generation_prompt(custom_module_prompt.format(i))
  messages = temp_messages.copy()
  print(*messages, sep = "\n\n")
  with open(f"{i}. module.md", "w") as f_result:
    f_result.write(result)
  with open(f"{i}. module.txt", "w") as f_raw:
    f_raw.write(raw)

  # Step 2: Copy the file from Colab to Google Drive
  import shutil

  # Replace with the path of the file you want to upload in Colab
  source_file_md = f"/content/{i}. module.md"
  source_file_raw = f"/content/{i}. module.txt"

  # Replace with the destination folder in Google Drive (adjust folder name as needed)
  destination_md = f"/content/drive/My Drive/my_folder/{i}. module.md"
  destination_raw = f"/content/drive/My Drive/my_folder/{i}. module.txt"

  # Create the folder if it doesn't exist (optional)
  import os
  os.makedirs(os.path.dirname(destination_md), exist_ok=True)
  os.makedirs(os.path.dirname(destination_raw), exist_ok=True)

  # Copy the file to Google Drive
  shutil.copy(source_file_md, destination_md)
  shutil.copy(source_file_raw, destination_raw)
  time.sleep(2)

Using engine:gpt-4o
Custom Prompt Result from gpt-4o:
 Here's an educational curriculum focusing on the 'Content' and 'References' sections for the module topic "SPARQL" in markdown:

```markdown
# SPARQL Module

## Content

### Introduction to SPARQL
SPARQL (SPARQL Protocol and RDF Query Language) is a powerful query language and protocol developed for retrieving and manipulating data stored in Resource Description Framework (RDF) format. Designed to be analogous to SQL for relational databases, SPARQL provides means to query required and optional graph patterns along with their conjunctions and disjunctions.

SPARQL allows for:
- **Retrieving data** in various formats, such as JSON, XML, or CSV.
- **Filtering results** using conditions.
- **Aggregating data** to provide summaries and insights.
- **Manipulating RDF data** through updates and deletes.

### Components of a SPARQL Query
1. **SELECT Queries**: Used for extracting raw data from RDF graphs.
   ```sparql
   SELECT ?subject ?

## 2. Claude

In [None]:
!pip install anthropic

Collecting anthropic
  Downloading anthropic-0.34.2-py3-none-any.whl.metadata (18 kB)
Downloading anthropic-0.34.2-py3-none-any.whl (891 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m891.9/891.9 kB[0m [31m10.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: anthropic
Successfully installed anthropic-0.34.2


In [None]:
import anthropic

client = anthropic.Anthropic(
    api_key=ANTHROPIC_API_KEY,
)

In [None]:
initial_system_message = "You are an helpful assistant and an expert in generation of academic curriculum with are very thorough, detailed and comprehensive enough."
messages = []

In [None]:
def initial_analysis_of_repo(repo_url,prompt, model="claude-3-5-sonnet-20240620"):
  local_dir = "repo_temp"
  clone_repo(repo_url, local_dir)
  markdown_content = extract_instructions(local_dir + "/curriculum/modules")
  antropic_prompt = [
      {
          "type": "text",
          "text": prompt + "\n" + markdown_content
          }
      ]
  messages.append({"role": "user", "content": antropic_prompt})
  message = client.messages.create(
        model = model,
        max_tokens = 1000,
        temperature = 0.7,
        system = initial_system_message,
        messages = messages
  )

  messages.append({"role": "assistant", "content": message.content})
  return message.content, message.model_dump_json(indent = 4)

In [None]:
claude_analysis_prompt = """
I have a GitHub repository that includes details on a knowledge graph, and I want to generate educational content based on its modules. The structure for each module is specified in the markdown file.
GitHub repo: https://github.com/chrisdavisj/open-kg-curriculum/
Module list markdown file: https://github.com/chrisdavisj/open-kg-curriculum/blob/master/curriculum/modules/README.md
Please do the following:
 - Extract all modules listed in the curriculum/modules/README.md.
 - Review and analyze the structure for each module, and any specific details mentioned as specified in that file.
 - list all the modules

Markdown file content:
"""

In [None]:
repo_url = 'https://github.com/chrisdavisj/open-kg-curriculum'

In [None]:
result, raw = initial_analysis_of_repo(repo_url,claude_analysis_prompt)

In [None]:
print(result[0].text)

Based on the provided markdown file content, I have extracted and analyzed the modules listed in the curriculum. Here's the list of all modules:

1. What is Metadata?
2. What is an Ontology?
3. What is a Knowledge Graph?
4. What is an Identifier?
5. Introduction to Logic
6. Propositional Logic
7. Datalog
8. Predicate Logic
9. Description Logic
10. Rules
11. Introduction to Set Theory
12. Introduction to Discrete Mathematics
13. Open World Assumption vs Closed World Assumption
14. RDF
15. RDFS
16. RDF Serializations
17. RDF Star
18. SPARQL
19. SWRL
20. OWL
21. OWL Dialects
22. Manchester Syntax
23. SHACL
24. Schema.org
25. Dublin Core
26. SOSA & SSN
27. PROV-O
28. SKOS
29. Survey of Modeling Tools
30. Protege
31. Deploying a Knowledge Graph
32. Introduction to Knowledge Engineering
33. Reification
34. GraphQL
35. Survey of Documentation Practices
36. Survey of Visualization Tools
37. Introduction to Upper Ontologies
38. Introduction to Modular Ontologies
39. Survey of Triplestores
40. H

In [None]:
def generation_of_module_content(prompt, model="claude-3-5-sonnet-20240620"):
  antropic_prompt = [
      {
          "type": "text",
          "text": prompt
          }
      ]
  antropic_temp_messages = messages.copy()
  antropic_temp_messages.append({"role": "user", "content": antropic_prompt})
  message = client.messages.create(
        model = model,
        max_tokens = 4000,
        temperature = 0.7,
        system = initial_system_message,
        messages = antropic_temp_messages
  )

  antropic_temp_messages.append({"role": "assistant", "content": message.content})
  return message.content, message.model_dump_json(indent = 4)

In [None]:
custom_claude_module_prompt = """
Let's focus on the module topic {}. Look at the structural data given for that topic. Using the provided structural data, generate an educational curriculum focusing only on the 'Content' and 'References' sections. Ensure the content section is highly detailed and lengthy, thoroughly explaining each topic in depth to suit the target academic audience. Cite all external sources appropriately, using relevant references, and avoid any self-citation of the provided repository. The content should flow logically, be organized clearly into subsections, and comprehensively cover all necessary information from the structural data. Format the ouput to a markdown structure.
"""

In [None]:
import time
for i in range(18,19):
  result, raw = generation_of_module_content(custom_claude_module_prompt.format(i))
  with open(f"{i}. module.md", "w") as f_result:
    f_result.write(result[0].text)
  with open(f"{i}. module.txt", "w") as f_raw:
    f_raw.write(raw)

  # Step 2: Copy the file from Colab to Google Drive
  import shutil

  # Replace with the path of the file you want to upload in Colab
  source_file_md = f"/content/{i}. module.md"
  source_file_raw = f"/content/{i}. module.txt"

  # Replace with the destination folder in Google Drive (adjust folder name as needed)
  destination_md = f"/content/drive/My Drive/claude_folder/{i}. module.md"
  destination_raw = f"/content/drive/My Drive/claude_folder/{i}. module.txt"

  # Create the folder if it doesn't exist (optional)
  import os
  os.makedirs(os.path.dirname(destination_md), exist_ok=True)
  os.makedirs(os.path.dirname(destination_raw), exist_ok=True)

  # Copy the file to Google Drive
  shutil.copy(source_file_md, destination_md)
  shutil.copy(source_file_raw, destination_raw)
  time.sleep(2)

## 3. Mistral

In [None]:
!pip install mistralai



In [None]:
import os
from mistralai import Mistral

In [None]:
# api_key = os.environ["MISTRAL_API_KEY"]
api_key = mistral_api_key

mistral_client = Mistral(api_key=api_key)

In [None]:
initial_system_message = "You are an helpful assistant and an expert in generation of academic curriculum with are very thorough, detailed and comprehensive enough."

In [None]:
mistal_messages = []
mistal_messages.append({"role": "system", "content": initial_system_message})

def initial_analysis_of_repo_with_mistral(repo_url, prompt, model = "mistral-large-latest", max_tokens = 1000):
  local_dir = "repo_temp"
  clone_repo(repo_url, local_dir)
  markdown_content = extract_instructions(local_dir + "/curriculum/modules")
  mistral_prompt = prompt + "\n" + markdown_content
  mistal_messages.append({"role": "user", "content": mistral_prompt})
  chat_response = mistral_client.chat.complete(
    model = model,
    messages = mistal_messages,
    max_tokens = max_tokens,
)

  mistal_messages.append({"role": "assistant", "content": chat_response.choices[0].message.content})
  return chat_response.choices[0].message.content, chat_response.model_dump_json(indent = 4)

In [None]:
mistral_analysis_prompt = """
I have a GitHub repository that includes details on a knowledge graph, and I want to generate educational content based on its modules. The structure for each module is specified in the markdown file.
GitHub repo: https://github.com/chrisdavisj/open-kg-curriculum/
Module list markdown file: https://github.com/chrisdavisj/open-kg-curriculum/blob/master/curriculum/modules/README.md
Please do the following:
 - Extract all modules listed in the curriculum/modules/README.md.
 - Review and analyze the structure for each module, and any specific details mentioned as specified in that file.
 - list all the modules

Markdown file content:
"""

In [None]:
repo_url = 'https://github.com/chrisdavisj/open-kg-curriculum'

In [None]:
result, raw = initial_analysis_of_repo_with_mistral(repo_url,mistral_analysis_prompt, max_tokens=1000)

In [None]:
print(*mistal_messages, sep="\n\n")

{'role': 'system', 'content': 'You are an helpful assistant and an expert in generation of academic curriculum with are very thorough, detailed and comprehensive enough.'}

{'role': 'user', 'content': '\nI have a GitHub repository that includes details on a knowledge graph, and I want to generate educational content based on its modules. The structure for each module is specified in the markdown file.\nGitHub repo: https://github.com/chrisdavisj/open-kg-curriculum/\nModule list markdown file: https://github.com/chrisdavisj/open-kg-curriculum/blob/master/curriculum/modules/README.md\nPlease do the following:\n - Extract all modules listed in the curriculum/modules/README.md.\n - Review and analyze the structure for each module, and any specific details mentioned as specified in that file.\n - list all the modules\n\nMarkdown file content:\n\n# The Open Curriculum Module List\n\nThe vocabulary that we use:\n- Audience: Undergraduate Student, Graduate Student, Developer, Project Managemen

In [None]:
print(result)

Certainly! Below is a thorough and detailed analysis of the modules listed in the `curriculum/modules/README.md` file from the GitHub repository.

## The Open Curriculum Module List

### Modules Overview

The curriculum includes various modules categorized by audience, level, and category. Below is a structured summary of each module, including their prerequisites, audience, level, and covered concepts.

1. **What is Metadata?**
   - **Category:** Foundational
   - **Module Prerequisites:** None
   - **Audience:** Any
   - **Level:** Beginner
   - **Covered Concepts:** Metadata

2. **What is an Ontology?**
   - **Category:** Not specified
   - **Module Prerequisites:** Not specified
   - **Audience:** Not specified
   - **Level:** Not specified
   - **Covered Concepts:** Ontology, Linked Data, Taxonomy, Schema, Statement, Triple

3. **What is a Knowledge Graph?**
   - **Category:** Foundational
   - **Module Prerequisites:** What is Metadata?
   - **Audience:** Any
   - **Level:** Begi

In [None]:
def generation_of_module_content_with_mistral(prompt, model = "mistral-large-latest", max_tokens = 4000):
  mistral_prompt = prompt
  temp_mistral_message = mistal_messages.copy()
  temp_mistral_message.append({"role": "user", "content": mistral_prompt})
  chat_response = mistral_client.chat.complete(
    model = model,
    messages = temp_mistral_message,
    max_tokens = max_tokens,
)

  temp_mistral_message.append({"role": "assistant", "content": chat_response.choices[0].message.content})
  return chat_response.choices[0].message.content, chat_response.model_dump_json(indent = 4)

In [None]:
custom_mistral_module_prompt = """
Let's focus on the module topic {}. Look at the structural data given for that topic. Using the provided structural data, generate an educational curriculum focusing only on the 'Content' and 'References' sections. Ensure the content section is highly detailed and lengthy, thoroughly explaining each topic in depth to suit the target academic audience. Cite all external sources appropriately, using relevant references, and avoid any self-citation of the provided repository. The content should flow logically, be organized clearly into subsections, and comprehensively cover all necessary information from the structural data. Format the ouput to a markdown structure.
"""

In [None]:
import time
for i in range(18,19):
  result, raw = generation_of_module_content_with_mistral(custom_mistral_module_prompt.format(i))
  with open(f"{i}. module.md", "w") as f_result:
    f_result.write(result)
  with open(f"{i}. module.txt", "w") as f_raw:
    f_raw.write(raw)

  # Replace with the path of the file you want to upload in Colab
  source_file_md = f"/content/{i}. module.md"
  source_file_raw = f"/content/{i}. module.txt"

  # Replace with the destination folder in Google Drive (adjust folder name as needed)
  destination_md = f"/content/drive/My Drive/mistral_folder/{i}. module.md"
  destination_raw = f"/content/drive/My Drive/mistral_folder/{i}. module.txt"

  # Create the folder if it doesn't exist (optional)
  import os
  os.makedirs(os.path.dirname(destination_md), exist_ok=True)
  os.makedirs(os.path.dirname(destination_raw), exist_ok=True)

  # Copy the file to Google Drive
  shutil.copy(source_file_md, destination_md)
  shutil.copy(source_file_raw, destination_raw)
  time.sleep(2)

# Second Pass

## 1. GPT

In [None]:
# prompt: Generate code to Access and call Chat GPT to generate educational content from question which are in github repo
!pip install --upgrade openai

Collecting openai
  Downloading openai-1.51.0-py3-none-any.whl.metadata (24 kB)
Collecting httpx<1,>=0.23.0 (from openai)
  Downloading httpx-0.27.2-py3-none-any.whl.metadata (7.1 kB)
Collecting jiter<1,>=0.4.0 (from openai)
  Downloading jiter-0.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.6 kB)
Collecting httpcore==1.* (from httpx<1,>=0.23.0->openai)
  Downloading httpcore-1.0.6-py3-none-any.whl.metadata (21 kB)
Collecting h11<0.15,>=0.13 (from httpcore==1.*->httpx<1,>=0.23.0->openai)
  Downloading h11-0.14.0-py3-none-any.whl.metadata (8.2 kB)
Downloading openai-1.51.0-py3-none-any.whl (383 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m383.5/383.5 kB[0m [31m11.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading httpx-0.27.2-py3-none-any.whl (76 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.4/76.4 kB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading httpcore-1.0.6-py3-none-any.whl (78 kB)
[2K   [90m━

In [None]:
# Block: Setup OpenAI API and GitHub Repo Access
import openai
from openai import OpenAI

client = None
# Function to set OpenAI API Key
def set_openai_api_key(api_key):
  global client
  client = OpenAI(api_key=api_key)
  openai.api_key = api_key

In [None]:
# Block: Define the Prompt for ChatGPT API based on repo contents
def create_prompt(repo_content):
    # Modify this function based on how the instructions are structured in the repo
    prompt = f"I have a GitHub repository that outlines the structure of a knowledge graph curriculum. I need to generate educational content for each module based on the structure defined in the markdown file. Specifically, refer to the module list and structure provided here: {repo_content}."""
    return prompt

In [None]:
messages = [
    {"role": "system", "content": "You are an helpful assistant and an expert in generation of academic curriculum with are very thorough, detailed and comprehensive enough.."}
]

# Block: Send Prompt to OpenAI ChatGPT API
def generate_academic_content(prompt, engine='gpt-4o', max_tokens=1000):
    # Call the OpenAI API to generate content
    global client
    temp_Client = client
    messages.append({"role": "user", "content": prompt})

    try:
      response = temp_Client.completions.create(
        model=engine,
        prompt=prompt,
        max_tokens=max_tokens,
        n=1,
        stop=None,
        temperature=0.7
      )
    except openai.APIError as e:
      try:
        response = client.chat.completions.create(
          model=engine,
          messages=messages
          )
        assistant_reply = response.choices[0].message.content
        messages.append({"role": "assistant", "content": assistant_reply})
        return response.choices[0].message.content.strip(), response.model_dump_json(indent = 4)
      except Exception as e:
        print(f"Error: {e}")
        return None, None
    except Exception as e:
      print(f"Error: {e}")
    return response.choices[0].text.strip(), response.model_dump_json(indent = 4)

In [None]:
# Block: Flexible Code Block to Extend Functionality
# You can add more flexibility here to modify prompts or experiment with other parameters (e.g., temperature, max tokens, etc.)
def custom_prompt_analysis(repo_url, api_key, custom_prompt, engine='gpt-4o', max_tokens=1000):
    local_dir = "repo_temp"
    clone_repo(repo_url, local_dir)

    # Extract instructions/topics from the repo
    repo_content = extract_instructions(local_dir + "/curriculum/modules")

    set_openai_api_key(api_key)

    # Create custom prompt
    full_prompt = f"{custom_prompt}\n{repo_content}"

    print(full_prompt)

    print(f"Using engine:{engine}")

    result, response_dump = generate_academic_content(full_prompt, engine=engine, max_tokens=max_tokens)
    print(f"Custom Prompt Result from {engine}:\n", result, "\n" + "-"*40,"\n\n\n", response_dump, "\n" + "-"*40)

In [None]:
analysis_prompt = """
I have a GitHub repo that contains details about the knowledge graph. I want to generate educational content for its modules in the specified structure mentioned in the markdown file. Here is the GitHub repo: https://github.com/chrisdavisj/open-kg-curriculum/tree/master
here is the module list markdown: https://github.com/chrisdavisj/open-kg-curriculum/blob/master/curriculum/modules/README.md. Look at the module structure at: curriculum/modules/README.md. Extract the modules from the modules list first In the curriculum/modules/README.md

Here is the markdown file:
"""

In [None]:
custom_prompt_analysis(repo_url, api_key, analysis_prompt)


I have a GitHub repo that contains details about the knowledge graph. I want to generate educational content for its modules in the specified structure mentioned in the markdown file. Here is the GitHub repo: https://github.com/chrisdavisj/open-kg-curriculum/tree/master
here is the module list markdown: https://github.com/chrisdavisj/open-kg-curriculum/blob/master/curriculum/modules/README.md. Look at the module structure at: curriculum/modules/README.md. Extract the modules from the modules list first In the curriculum/modules/README.md

Here is the markdown file:

# The Open Curriculum Module List

The vocabulary that we use:
- Audience: Undergraduate Student, Graduate Student, Developer, Project Management, Any
- Level: Beginner, Intermediate, Advanced
- Category: Context/History, Technology, Foundational, Standards, Methods, Resources, Query Language, Markup Languages, Visualization

## Modules List
* What is Metadata?
  * Category: Foundational
  * Module Prerequisites: None
  * 

In [None]:
print(*messages, sep = "\n\n")

{'role': 'system', 'content': 'You are an helpful assistant and an expert in generation of academic curriculum with are very thorough, detailed and comprehensive enough..'}

{'role': 'user', 'content': '\nI have a GitHub repo that contains details about the knowledge graph. I want to generate educational content for its modules in the specified structure mentioned in the markdown file. Here is the GitHub repo: https://github.com/chrisdavisj/open-kg-curriculum/tree/master\nhere is the module list markdown: https://github.com/chrisdavisj/open-kg-curriculum/blob/master/curriculum/modules/README.md. Look at the module structure at: curriculum/modules/README.md. Extract the modules from the modules list first In the curriculum/modules/README.md\n\nHere is the markdown file:\n\n# The Open Curriculum Module List\n\nThe vocabulary that we use:\n- Audience: Undergraduate Student, Graduate Student, Developer, Project Management, Any\n- Level: Beginner, Intermediate, Advanced\n- Category: Context

In [None]:
def custom_module_generation_prompt(custom_prompt, engine='gpt-4o', max_tokens=8000):

    # Create custom prompt
    full_prompt = f"{custom_prompt}"

    print(f"Using engine:{engine}")

    result, response_dump = generate_academic_content(full_prompt, engine=engine, max_tokens=max_tokens)
    print(f"Custom Prompt Result from {engine}:\n", result, "\n" + "-"*40,"\n\n\n", response_dump, "\n" + "-"*40)
    return result, response_dump

In [None]:
custom_module_prompt = """
Let's focus on the module topic {}. Look at the structural data given for that topic. Using the provided structural data, generate an educational curriculum focusing only on the 'Content' and 'References' sections. Ensure the content section is highly detailed, thoroughly explaining each topic in depth to suit the target academic audience. Cite all external sources appropriately, using relevant references, and avoid any self-citation of the provided repository. The content should flow logically, be organized clearly into subsections, and comprehensively cover all necessary information from the structural data. Format the ouput to a markdown structure.
"""

In [None]:
print(*messages, sep = "\n\n")
temp_messages = messages.copy()

{'role': 'system', 'content': 'You are an helpful assistant and an expert in generation of academic curriculum with are very thorough, detailed and comprehensive enough..'}

{'role': 'user', 'content': '\nI have a GitHub repo that contains details about the knowledge graph. I want to generate educational content for its modules in the specified structure mentioned in the markdown file. Here is the GitHub repo: https://github.com/chrisdavisj/open-kg-curriculum/tree/master\nhere is the module list markdown: https://github.com/chrisdavisj/open-kg-curriculum/blob/master/curriculum/modules/README.md. Look at the module structure at: curriculum/modules/README.md. Extract the modules from the modules list first In the curriculum/modules/README.md\n\nHere is the markdown file:\n\n# The Open Curriculum Module List\n\nThe vocabulary that we use:\n- Audience: Undergraduate Student, Graduate Student, Developer, Project Management, Any\n- Level: Beginner, Intermediate, Advanced\n- Category: Context

In [None]:
import time
for i in range(18,19):
  result, raw = custom_module_generation_prompt(custom_module_prompt.format(i))
  messages = temp_messages.copy()
  print(*messages, sep = "\n\n")
  with open(f"{i}. module.md", "w") as f_result:
    f_result.write(result)
  with open(f"{i}. module.txt", "w") as f_raw:
    f_raw.write(raw)

  # Step 2: Copy the file from Colab to Google Drive
  import shutil

  # Replace with the path of the file you want to upload in Colab
  source_file_md = f"/content/{i}. module.md"
  source_file_raw = f"/content/{i}. module.txt"

  # Replace with the destination folder in Google Drive (adjust folder name as needed)
  destination_md = f"/content/drive/My Drive/my_folder/{i}. module.md"
  destination_raw = f"/content/drive/My Drive/my_folder/{i}. module.txt"

  # Create the folder if it doesn't exist (optional)
  import os
  os.makedirs(os.path.dirname(destination_md), exist_ok=True)
  os.makedirs(os.path.dirname(destination_raw), exist_ok=True)

  # Copy the file to Google Drive
  shutil.copy(source_file_md, destination_md)
  shutil.copy(source_file_raw, destination_raw)
  time.sleep(2)

Using engine:gpt-4o
Custom Prompt Result from gpt-4o:
 Here's an educational curriculum focusing on the 'Content' and 'References' sections for the module topic "SPARQL" in markdown:

```markdown
# SPARQL Module

## Content

### Introduction to SPARQL
SPARQL (SPARQL Protocol and RDF Query Language) is a powerful query language and protocol developed for retrieving and manipulating data stored in Resource Description Framework (RDF) format. Designed to be analogous to SQL for relational databases, SPARQL provides means to query required and optional graph patterns along with their conjunctions and disjunctions.

SPARQL allows for:
- **Retrieving data** in various formats, such as JSON, XML, or CSV.
- **Filtering results** using conditions.
- **Aggregating data** to provide summaries and insights.
- **Manipulating RDF data** through updates and deletes.

### Components of a SPARQL Query
1. **SELECT Queries**: Used for extracting raw data from RDF graphs.
   ```sparql
   SELECT ?subject ?

## 2. Claude

In [None]:
!pip install anthropic

Collecting anthropic
  Downloading anthropic-0.34.2-py3-none-any.whl.metadata (18 kB)
Downloading anthropic-0.34.2-py3-none-any.whl (891 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m891.9/891.9 kB[0m [31m10.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: anthropic
Successfully installed anthropic-0.34.2


In [None]:
import anthropic

client = anthropic.Anthropic(
    api_key=ANTHROPIC_API_KEY,
)

In [None]:
initial_system_message = "You are an helpful assistant and an expert in generation of academic curriculum with are very thorough, detailed and comprehensive enough."
messages = []

In [None]:
def initial_analysis_of_repo(repo_url,prompt, model="claude-3-5-sonnet-20240620"):
  local_dir = "repo_temp"
  clone_repo(repo_url, local_dir)
  markdown_content = extract_instructions(local_dir + "/curriculum/modules")
  antropic_prompt = [
      {
          "type": "text",
          "text": prompt + "\n" + markdown_content
          }
      ]
  messages.append({"role": "user", "content": antropic_prompt})
  message = client.messages.create(
        model = model,
        max_tokens = 1000,
        temperature = 0.7,
        system = initial_system_message,
        messages = messages
  )

  messages.append({"role": "assistant", "content": message.content})
  return message.content, message.model_dump_json(indent = 4)

In [None]:
claude_analysis_prompt = """
I have a GitHub repository that includes details on a knowledge graph, and I want to generate educational content based on its modules. The structure for each module is specified in the markdown file.
GitHub repo: https://github.com/chrisdavisj/open-kg-curriculum/
Module list markdown file: https://github.com/chrisdavisj/open-kg-curriculum/blob/master/curriculum/modules/README.md
Please do the following:
 - Extract all modules listed in the curriculum/modules/README.md.
 - Review and analyze the structure for each module, and any specific details mentioned as specified in that file.
 - list all the modules

Markdown file content:
"""

In [None]:
repo_url = 'https://github.com/chrisdavisj/open-kg-curriculum'

In [None]:
result, raw = initial_analysis_of_repo(repo_url,claude_analysis_prompt)

In [None]:
print(result[0].text)

Based on the provided markdown file content, I have extracted and analyzed the modules listed in the curriculum. Here's the list of all modules:

1. What is Metadata?
2. What is an Ontology?
3. What is a Knowledge Graph?
4. What is an Identifier?
5. Introduction to Logic
6. Propositional Logic
7. Datalog
8. Predicate Logic
9. Description Logic
10. Rules
11. Introduction to Set Theory
12. Introduction to Discrete Mathematics
13. Open World Assumption vs Closed World Assumption
14. RDF
15. RDFS
16. RDF Serializations
17. RDF Star
18. SPARQL
19. SWRL
20. OWL
21. OWL Dialects
22. Manchester Syntax
23. SHACL
24. Schema.org
25. Dublin Core
26. SOSA & SSN
27. PROV-O
28. SKOS
29. Survey of Modeling Tools
30. Protege
31. Deploying a Knowledge Graph
32. Introduction to Knowledge Engineering
33. Reification
34. GraphQL
35. Survey of Documentation Practices
36. Survey of Visualization Tools
37. Introduction to Upper Ontologies
38. Introduction to Modular Ontologies
39. Survey of Triplestores
40. H

In [None]:
def generation_of_module_content(prompt, model="claude-3-5-sonnet-20240620"):
  antropic_prompt = [
      {
          "type": "text",
          "text": prompt
          }
      ]
  antropic_temp_messages = messages.copy()
  antropic_temp_messages.append({"role": "user", "content": antropic_prompt})
  message = client.messages.create(
        model = model,
        max_tokens = 8000,
        temperature = 0.7,
        system = initial_system_message,
        messages = antropic_temp_messages
  )

  antropic_temp_messages.append({"role": "assistant", "content": message.content})
  return message.content, message.model_dump_json(indent = 4)

In [None]:
custom_claude_module_prompt = """
Let's focus on the module topic {}. Look at the structural data given for that topic. Using the provided structural data, generate an educational curriculum focusing only on the 'Content' and 'References' sections. Ensure the content section is highly detailed and lengthy, thoroughly explaining each topic in depth to suit the target academic audience. Cite all external sources appropriately, using relevant references, and avoid any self-citation of the provided repository. The content should flow logically, be organized clearly into subsections, and comprehensively cover all necessary information from the structural data. Format the ouput to a markdown structure.
"""

In [None]:
import time
for i in range(18,19):
  result, raw = generation_of_module_content(custom_claude_module_prompt.format(i))
  with open(f"{i}. module.md", "w") as f_result:
    f_result.write(result[0].text)
  with open(f"{i}. module.txt", "w") as f_raw:
    f_raw.write(raw)

  # Step 2: Copy the file from Colab to Google Drive
  import shutil

  # Replace with the path of the file you want to upload in Colab
  source_file_md = f"/content/{i}. module.md"
  source_file_raw = f"/content/{i}. module.txt"

  # Replace with the destination folder in Google Drive (adjust folder name as needed)
  destination_md = f"/content/drive/My Drive/claude_folder/{i}. module.md"
  destination_raw = f"/content/drive/My Drive/claude_folder/{i}. module.txt"

  # Create the folder if it doesn't exist (optional)
  import os
  os.makedirs(os.path.dirname(destination_md), exist_ok=True)
  os.makedirs(os.path.dirname(destination_raw), exist_ok=True)

  # Copy the file to Google Drive
  shutil.copy(source_file_md, destination_md)
  shutil.copy(source_file_raw, destination_raw)
  time.sleep(2)

## 3. Mistral

In [None]:
!pip install mistralai



In [None]:
import os
from mistralai import Mistral

In [None]:
# api_key = os.environ["MISTRAL_API_KEY"]
api_key = mistral_api_key

mistral_client = Mistral(api_key=api_key)

In [None]:
initial_system_message = "You are an helpful assistant and an expert in generation of academic curriculum with are very thorough, detailed and comprehensive enough."

In [None]:
mistal_messages = []
mistal_messages.append({"role": "system", "content": initial_system_message})

def initial_analysis_of_repo_with_mistral(repo_url, prompt, model = "mistral-large-latest", max_tokens = 1000):
  local_dir = "repo_temp"
  clone_repo(repo_url, local_dir)
  markdown_content = extract_instructions(local_dir + "/curriculum/modules")
  mistral_prompt = prompt + "\n" + markdown_content
  mistal_messages.append({"role": "user", "content": mistral_prompt})
  chat_response = mistral_client.chat.complete(
    model = model,
    messages = mistal_messages,
    max_tokens = max_tokens,
)

  mistal_messages.append({"role": "assistant", "content": chat_response.choices[0].message.content})
  return chat_response.choices[0].message.content, chat_response.model_dump_json(indent = 4)

In [None]:
mistral_analysis_prompt = """
I have a GitHub repository that includes details on a knowledge graph, and I want to generate educational content based on its modules. The structure for each module is specified in the markdown file.
GitHub repo: https://github.com/chrisdavisj/open-kg-curriculum/
Module list markdown file: https://github.com/chrisdavisj/open-kg-curriculum/blob/master/curriculum/modules/README.md
Please do the following:
 - Extract all modules listed in the curriculum/modules/README.md.
 - Review and analyze the structure for each module, and any specific details mentioned as specified in that file.
 - list all the modules

Markdown file content:
"""

In [None]:
repo_url = 'https://github.com/chrisdavisj/open-kg-curriculum'

In [None]:
result, raw = initial_analysis_of_repo_with_mistral(repo_url,mistral_analysis_prompt, max_tokens=1000)

In [None]:
print(*mistal_messages, sep="\n\n")

{'role': 'system', 'content': 'You are an helpful assistant and an expert in generation of academic curriculum with are very thorough, detailed and comprehensive enough.'}

{'role': 'user', 'content': '\nI have a GitHub repository that includes details on a knowledge graph, and I want to generate educational content based on its modules. The structure for each module is specified in the markdown file.\nGitHub repo: https://github.com/chrisdavisj/open-kg-curriculum/\nModule list markdown file: https://github.com/chrisdavisj/open-kg-curriculum/blob/master/curriculum/modules/README.md\nPlease do the following:\n - Extract all modules listed in the curriculum/modules/README.md.\n - Review and analyze the structure for each module, and any specific details mentioned as specified in that file.\n - list all the modules\n\nMarkdown file content:\n\n# The Open Curriculum Module List\n\nThe vocabulary that we use:\n- Audience: Undergraduate Student, Graduate Student, Developer, Project Managemen

In [None]:
print(result)

Certainly! Below is a thorough and detailed analysis of the modules listed in the `curriculum/modules/README.md` file from the GitHub repository.

## The Open Curriculum Module List

### Modules Overview

The curriculum includes various modules categorized by audience, level, and category. Below is a structured summary of each module, including their prerequisites, audience, level, and covered concepts.

1. **What is Metadata?**
   - **Category:** Foundational
   - **Module Prerequisites:** None
   - **Audience:** Any
   - **Level:** Beginner
   - **Covered Concepts:** Metadata

2. **What is an Ontology?**
   - **Category:** Not specified
   - **Module Prerequisites:** Not specified
   - **Audience:** Not specified
   - **Level:** Not specified
   - **Covered Concepts:** Ontology, Linked Data, Taxonomy, Schema, Statement, Triple

3. **What is a Knowledge Graph?**
   - **Category:** Foundational
   - **Module Prerequisites:** What is Metadata?
   - **Audience:** Any
   - **Level:** Begi

In [None]:
def generation_of_module_content_with_mistral(prompt, model = "mistral-large-latest", max_tokens = 8000):
  mistral_prompt = prompt
  temp_mistral_message = mistal_messages.copy()
  temp_mistral_message.append({"role": "user", "content": mistral_prompt})
  chat_response = mistral_client.chat.complete(
    model = model,
    messages = temp_mistral_message,
    max_tokens = max_tokens,
)

  temp_mistral_message.append({"role": "assistant", "content": chat_response.choices[0].message.content})
  return chat_response.choices[0].message.content, chat_response.model_dump_json(indent = 4)

In [None]:
custom_mistral_module_prompt = """
Let's focus on the module topic {}. Look at the structural data given for that topic. Using the provided structural data, generate an educational curriculum focusing only on the 'Content' and 'References' sections. Ensure the content section is highly detailed and lengthy, thoroughly explaining each topic in depth to suit the target academic audience. Cite all external sources appropriately, using relevant references, and avoid any self-citation of the provided repository. The content should flow logically, be organized clearly into subsections, and comprehensively cover all necessary information from the structural data. Format the ouput to a markdown structure.
"""

In [None]:
import time
for i in range(18,19):
  result, raw = generation_of_module_content_with_mistral(custom_mistral_module_prompt.format(i))
  with open(f"{i}. module.md", "w") as f_result:
    f_result.write(result)
  with open(f"{i}. module.txt", "w") as f_raw:
    f_raw.write(raw)

  # Replace with the path of the file you want to upload in Colab
  source_file_md = f"/content/{i}. module.md"
  source_file_raw = f"/content/{i}. module.txt"

  # Replace with the destination folder in Google Drive (adjust folder name as needed)
  destination_md = f"/content/drive/My Drive/mistral_folder/{i}. module.md"
  destination_raw = f"/content/drive/My Drive/mistral_folder/{i}. module.txt"

  # Create the folder if it doesn't exist (optional)
  import os
  os.makedirs(os.path.dirname(destination_md), exist_ok=True)
  os.makedirs(os.path.dirname(destination_raw), exist_ok=True)

  # Copy the file to Google Drive
  shutil.copy(source_file_md, destination_md)
  shutil.copy(source_file_raw, destination_raw)
  time.sleep(2)

# Third Pass

## 1. GPT

In [4]:
# prompt: Generate code to Access and call Chat GPT to generate educational content from question which are in github repo
!pip install --upgrade openai

Collecting openai
  Downloading openai-1.53.0-py3-none-any.whl.metadata (24 kB)
Downloading openai-1.53.0-py3-none-any.whl (387 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m387.1/387.1 kB[0m [31m10.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: openai
  Attempting uninstall: openai
    Found existing installation: openai 1.52.2
    Uninstalling openai-1.52.2:
      Successfully uninstalled openai-1.52.2
Successfully installed openai-1.53.0


In [66]:
# Block: Setup OpenAI API and GitHub Repo Access
import openai
from openai import OpenAI

client = None
# Function to set OpenAI API Key
def set_openai_api_key(api_key):
  global client
  client = OpenAI(api_key=api_key)
  openai.api_key = api_key

In [67]:
audience_info = """
Any: This audience type covers content that could be of interest to members of any or all of the groups listed below.
Academic: An Academic is interested in advanced topics in knowledge graphs, including formal logic, machine learning, term extraction, and postgraduate or scientific research.
Developer: A Developer may be tasked with designing, implementing, and supporting knowledge graphs in a professional setting. This role is inherently more technical than the others and may involve maintaining the infrastructure supporting a corporate, academic, or government system.
Designer: A Designer, who also can be considered an _Architect_, is a flavor of Practitioner who is responsible for the initial development, maintenance, and evolution/transformation a knowledge graph and its schema. This role focuses primarily on knowledge engineering best practices, understanding current web standards, and the tradeoffs of knowledge representation and reasoning with respect to query complexity.
Practitioner: A Practioner is someone who wants to gain a sufficient understanding of knowledge graphs to be able to design and implement basic ontologies as well as query data collections for individual, business, or educational purposes.
Student: A Student seeks to understand or use knowledge graphs as part of a curriculum. The scope of the Student role could be as broad as that of a Practitioner or limited to identifying and querying existing resources. While it generally includes a broad overview of all knowledge graph technologies, it also delves into some technical details, so as to prepare for a transition to any other role.
Stakeholder: A Stakeholder is interested in a top level overview of the concepts and technologies so as to gain a broad understanding and inform them of potential pros and cons of adopting these technologies.
"""

In [73]:
messages = [
    {"role": "system", "content": f"You are a helpful assistant and an expert in developing thorough, detailed, and comprehensive academic curricula. Consider the following audience types: {audience_info}"}
]

# Block: Send Prompt to OpenAI ChatGPT API
def global_openai_api_call(prompt, engine='gpt-4o', max_tokens=1000):
    # Call the OpenAI API to generate content
    global client
    temp_Client = client
    messages.append({"role": "user", "content": prompt})

    try:
      response = temp_Client.completions.create(
        model=engine,
        prompt=prompt,
        max_tokens=max_tokens,
        n=1,
        stop=None,
        temperature=0.7
      )
    except openai.APIError as e:
      try:
        response = client.chat.completions.create(
          model=engine,
          messages=messages
          )
        assistant_reply = response.choices[0].message.content
        messages.append({"role": "assistant", "content": assistant_reply})
        return response.choices[0].message.content.strip(), response.model_dump_json(indent = 4)
      except Exception as e:
        print(f"Error: {e}")
        return None, None
    except Exception as e:
      print(f"Error: {e}")
    return response.choices[0].text.strip(), response.model_dump_json(indent = 4)

In [74]:
# Block: Flexible Code Block to Extend Functionality
# You can add more flexibility here to modify prompts or experiment with other parameters (e.g., temperature, max tokens, etc.)
def custom_prompt_analysis(repo_url, api_key, custom_prompt, engine='gpt-4o', max_tokens=1500):
    local_dir = "repo_temp"
    clone_repo(repo_url, local_dir)

    # Extract instructions/topics from the repo
    repo_content = extract_instructions(local_dir + "/curriculum/modules")

    set_openai_api_key(api_key)

    # Create custom prompt
    full_prompt = f"{custom_prompt}\n{repo_content}"

    print(f"\n\n\n {'-'*50} Full Prompt {'-'*50}", end="\n\n\n")
    print(full_prompt)
    print('-'*110, end="\n\n\n")

    result, response_dump = global_openai_api_call(full_prompt, engine=engine, max_tokens=max_tokens)
    print(f"Custom Prompt Result from {engine}:\n", result, "\n" + "-"*40,"\n\n\n", response_dump, "\n" + "-"*40)

    return result, response_dump

In [75]:
analysis_prompt = """
I have a GitHub repository that includes details on a knowledge graph, and I want to generate educational content based on its modules. The structure for each module is specified in the markdown file.
GitHub repo: https://github.com/chrisdavisj/open-kg-curriculum/
Module list markdown file: https://github.com/chrisdavisj/open-kg-curriculum/blob/master/curriculum/modules/README.md
Please do the following:
 - Extract all modules listed in the curriculum/modules/README.md.
 - Review and analyze the structure for each module, and any specific details mentioned as specified in that file.
 - List all the modules
 - Give a JSON from each module's structure and properties as outlined

Markdown file content:
"""

In [76]:
gpt_result, gpt_apt_raw_response = custom_prompt_analysis(repo_url, openai_api_key, analysis_prompt)




 -------------------------------------------------- Full Prompt --------------------------------------------------



I have a GitHub repository that includes details on a knowledge graph, and I want to generate educational content based on its modules. The structure for each module is specified in the markdown file.
GitHub repo: https://github.com/chrisdavisj/open-kg-curriculum/
Module list markdown file: https://github.com/chrisdavisj/open-kg-curriculum/blob/master/curriculum/modules/README.md
Please do the following:
 - Extract all modules listed in the curriculum/modules/README.md.
 - Review and analyze the structure for each module, and any specific details mentioned as specified in that file.
 - List all the modules
 - Give a JSON from each module's structure and properties as outlined

Markdown file content:

# The Open Curriculum Module List

The vocabulary that we use:
- Audience: Undergraduate Student, Graduate Student, Developer, Project Management, Any
- Level: Beginner,

In [110]:
json_content = gpt_result[gpt_result.find("[") : gpt_result.rfind("]") + 1] if "[" in gpt_result and "]" in gpt_result else ""

print(json_content)

[
    {
        "Module Name": "What is Metadata?",
        "Category": "Foundational",
        "Module Prerequisites": "None",
        "Audience": "Any",
        "Level": "Beginner",
        "Covered Concepts": ["Metadata"]
    },
    {
        "Module Name": "What is an Ontology?",
        "Category": "",
        "Module Prerequisites": "",
        "Audience": "",
        "Level": "",
        "Covered Concepts": ["Ontology", "Linked Data", "Taxonomy", "Schema", "Statement", "Triple"]
    },
    {
        "Module Name": "What is a Knowledge Graph?",
        "Category": "Foundational",
        "Module Prerequisites": "What is Metadata?",
        "Audience": "Any",
        "Level": "Beginner",
        "Covered Concepts": ["Schema", "Statements", "Triple"]
    },
    {
        "Module Name": "What is an Identifier?",
        "Category": "Foundational",
        "Module Prerequisites": "None",
        "Audience": "Any",
        "Level": "Beginner",
        "Covered Concepts": ["URI", "Hash

In [111]:
print(*messages, sep = "\n\n")

{'role': 'system', 'content': 'You are a helpful assistant and an expert in developing thorough, detailed, and comprehensive academic curricula. Consider the following audience types: \nAny: This audience type covers content that could be of interest to members of any or all of the groups listed below.\nAcademic: An Academic is interested in advanced topics in knowledge graphs, including formal logic, machine learning, term extraction, and postgraduate or scientific research. \nDeveloper: A Developer may be tasked with designing, implementing, and supporting knowledge graphs in a professional setting. This role is inherently more technical than the others and may involve maintaining the infrastructure supporting a corporate, academic, or government system.\nDesigner: A Designer, who also can be considered an _Architect_, is a flavor of Practitioner who is responsible for the initial development, maintenance, and evolution/transformation a knowledge graph and its schema. This role focus

In [112]:
def custom_module_generation_prompt(custom_prompt, engine='gpt-4o', max_tokens=8000):

    # Create custom prompt
    full_prompt = f"{custom_prompt}"

    print(f"Using engine:{engine}")

    result, response_dump = global_openai_api_call(full_prompt, engine=engine, max_tokens=max_tokens)
    print(f"Custom Prompt Result from {engine}:\n", result, "\n" + "-"*100,"\n\n\n", response_dump, "\n" + "-"*100)
    return result, response_dump

In [113]:
custom_module_prompt = """
Let's focus on the module {}. Look at the structural data given for that topic. Using the provided structural data, generate an educational curriculum focusing only on the 'Content' and 'References' sections. Ensure the content section is highly detailed, thoroughly explaining each topic in depth to suit the target academic audience. Cite all external sources appropriately, using relevant references, and avoid any self-citation of the provided repository. The content should flow logically, be organized clearly into subsections, and comprehensively cover all necessary information from the structural data. Format the output to a markdown structure.
"""

In [114]:
print(*messages, sep = "\n\n")
temp_messages = messages.copy()

{'role': 'system', 'content': 'You are a helpful assistant and an expert in developing thorough, detailed, and comprehensive academic curricula. Consider the following audience types: \nAny: This audience type covers content that could be of interest to members of any or all of the groups listed below.\nAcademic: An Academic is interested in advanced topics in knowledge graphs, including formal logic, machine learning, term extraction, and postgraduate or scientific research. \nDeveloper: A Developer may be tasked with designing, implementing, and supporting knowledge graphs in a professional setting. This role is inherently more technical than the others and may involve maintaining the infrastructure supporting a corporate, academic, or government system.\nDesigner: A Designer, who also can be considered an _Architect_, is a flavor of Practitioner who is responsible for the initial development, maintenance, and evolution/transformation a knowledge graph and its schema. This role focus

In [115]:
import time
import json

# Using only SPARQL module for testing
json_data = json.loads(json_content)
json_data = [json_data[17]]

for item in json_data:
  files_to_write = []

  module_name = item[[key for key in item if 'name' in key.lower() or 'title' in key.lower()][0]]

  #Handle Multiple Persona
  if type(item['Audience']) is not str:
    separate_dicts = [{**item, 'Audience': audience} for audience in item['Audience']]
    for sub_item in separate_dicts:
      result, raw = custom_module_generation_prompt(custom_module_prompt.format(str(sub_item)))
      messages = temp_messages.copy()
      files_to_write.append( (module_name.strip(), str(sub_item['Audience']).strip(), result, raw) )
      print(*messages, sep = "\n\n")
      time.sleep(2)
  else:
    result, raw = custom_module_generation_prompt(custom_module_prompt.format(str(item)))
    messages = temp_messages.copy()
    print(*messages, sep = "\n\n")
    files_to_write.append( (module_name.strip(), str(item['Audience']).strip(), result, raw) )

  for file_name, audience, result, raw in files_to_write:
    module_number = str(json_data.index(item) + 1) if json_data.index(item) >= 0 else ""
    file_write_name = f"{module_number}.{file_name}-{audience}"
    with open(f"{file_write_name}.md", "w") as f_result:
      f_result.write(result)
    with open(f"{file_write_name}.txt", "w") as f_raw:
      f_raw.write(raw)

    # Step 2: Copy the file from Colab to Google Drive
    import shutil

    # Replace with the path of the file you want to upload in Colab
    source_file_md = f"/content/{file_write_name}.md"
    source_file_raw = f"/content/{file_write_name}.txt"

    # Replace with the destination folder in Google Drive (adjust folder name as needed)
    destination_md = f"/content/drive/My Drive/Third Pass/GPT/Content/{file_write_name}.md"
    destination_raw = f"/content/drive/My Drive/Third Pass/GPT/Raw//{file_write_name}.txt"

    # Create the folder if it doesn't exist (optional)
    import os
    os.makedirs(os.path.dirname(destination_md), exist_ok=True)
    os.makedirs(os.path.dirname(destination_raw), exist_ok=True)

    # Copy the file to Google Drive
    shutil.copy(source_file_md, destination_md)
    shutil.copy(source_file_raw, destination_raw)
    time.sleep(2)
  time.sleep(2)


Using engine:gpt-4o
Custom Prompt Result from gpt-4o:
 Here's a detailed educational curriculum focusing on the "Content" and "References" sections for the SPARQL module. The curriculum is structured to provide comprehensive insights into each covered concept, tailored to suit an academic audience interested in deepening their understanding of SPARQL.

```markdown
# SPARQL Module Curriculum

## Content

### Introduction to SPARQL
SPARQL (SPARQL Protocol and RDF Query Language) is the standard query language and protocol for querying RDF (Resource Description Framework) data. It allows for the retrieval and manipulation of data stored in Resource Description Framework format. SPARQL is an essential tool for data analysts and researchers working with semantic web technologies and knowledge graphs.

### Query Concepts in SPARQL

#### Basic Query Pattern
In SPARQL, a basic query is structured to match triples in the RDF dataset. A SPARQL query comprises of triple patterns, which are simila

## 2. Claude

In [116]:
!pip install anthropic

Collecting anthropic
  Downloading anthropic-0.37.1-py3-none-any.whl.metadata (21 kB)
Downloading anthropic-0.37.1-py3-none-any.whl (945 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m946.0/946.0 kB[0m [31m19.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: anthropic
Successfully installed anthropic-0.37.1


In [150]:
import anthropic

client = anthropic.Anthropic(
    api_key=ANTHROPIC_API_KEY,
)

In [151]:
audience_info = """
Any: This audience type covers content that could be of interest to members of any or all of the groups listed below.
Academic: An Academic is interested in advanced topics in knowledge graphs, including formal logic, machine learning, term extraction, and postgraduate or scientific research.
Developer: A Developer may be tasked with designing, implementing, and supporting knowledge graphs in a professional setting. This role is inherently more technical than the others and may involve maintaining the infrastructure supporting a corporate, academic, or government system.
Designer: A Designer, who also can be considered an _Architect_, is a flavor of Practitioner who is responsible for the initial development, maintenance, and evolution/transformation a knowledge graph and its schema. This role focuses primarily on knowledge engineering best practices, understanding current web standards, and the tradeoffs of knowledge representation and reasoning with respect to query complexity.
Practitioner: A Practioner is someone who wants to gain a sufficient understanding of knowledge graphs to be able to design and implement basic ontologies as well as query data collections for individual, business, or educational purposes.
Student: A Student seeks to understand or use knowledge graphs as part of a curriculum. The scope of the Student role could be as broad as that of a Practitioner or limited to identifying and querying existing resources. While it generally includes a broad overview of all knowledge graph technologies, it also delves into some technical details, so as to prepare for a transition to any other role.
Stakeholder: A Stakeholder is interested in a top level overview of the concepts and technologies so as to gain a broad understanding and inform them of potential pros and cons of adopting these technologies.
"""

In [152]:
initial_system_message = f"You are a helpful assistant and an expert in developing thorough, detailed, and comprehensive academic curricula. Consider the following audience types: {audience_info}"
messages = []

In [153]:
def initial_analysis_of_repo(repo_url,prompt, model="claude-3-5-sonnet-20240620"):
  local_dir = "repo_temp"
  clone_repo(repo_url, local_dir)
  markdown_content = extract_instructions(local_dir + "/curriculum/modules")
  antropic_prompt = [
      {
          "type": "text",
          "text": prompt + "\n" + markdown_content
          }
      ]
  messages.append({"role": "user", "content": antropic_prompt})
  message = client.messages.create(
        model = model,
        max_tokens = 5000,
        temperature = 0.7,
        system = initial_system_message,
        messages = messages
  )

  messages.append({"role": "assistant", "content": message.content})
  return message.content, message.model_dump_json(indent = 4)

In [154]:
claude_analysis_prompt = """
I have a GitHub repository that includes details on a knowledge graph, and I want to generate educational content based on its modules. The structure for each module is specified in the markdown file.
GitHub repo: https://github.com/chrisdavisj/open-kg-curriculum/
Module list markdown file: https://github.com/chrisdavisj/open-kg-curriculum/blob/master/curriculum/modules/README.md
Please do the following:
 - Extract all modules listed in the curriculum/modules/README.md.
 - Review and analyze the structure for each module, and any specific details mentioned as specified in that file.
 - List all the modules
 - Give a JSON from each module's structure and properties as outlined

Markdown file content:
"""

In [155]:
repo_url = 'https://github.com/chrisdavisj/open-kg-curriculum'

In [156]:
anthropic_analysis_result, anthropic_analysis_raw = initial_analysis_of_repo(repo_url,claude_analysis_prompt)

In [157]:
print(anthropic_analysis_result[0].text)

Based on the provided markdown file, I have extracted and analyzed the modules. Here's the list of all modules and their JSON representations:

List of Modules:
1. What is Metadata?
2. What is an Ontology?
3. What is a Knowledge Graph?
4. What is an Identifier?
5. Introduction to Logic
6. Propositional Logic
7. Datalog
8. Predicate Logic
9. Description Logic
10. Rules
11. Introduction to Set Theory
12. Introduction to Discrete Mathematics
13. Open World Assumption vs Closed World Assumption
14. RDF
15. RDFS
16. RDF Serializations
17. RDF Star
18. SPARQL
19. SWRL
20. OWL
21. OWL Dialects
22. Manchester Syntax
23. SHACL
24. Schema.org
25. Dublin Core
26. SOSA & SSN
27. PROV-O
28. SKOS
29. Survey of Modeling Tools
30. Protege
31. Deploying a Knowledge Graph
32. Introduction to Knowledge Engineering
33. Reification
34. GraphQL
35. Survey of Documentation Practices
36. Survey of Visualization Tools
37. Introduction to Upper Ontologies
38. Introduction to Modular Ontologies
39. Survey of Tri

In [158]:
json_content = anthropic_analysis_result[0].text[anthropic_analysis_result[0].text.find("[") : anthropic_analysis_result[0].text.rfind("]") + 1] if "[" in anthropic_analysis_result[0].text and "]" in anthropic_analysis_result[0].text else ""

print(json_content)

[
  {
    "name": "What is Metadata?",
    "category": "Foundational",
    "modulePrerequisites": "None",
    "audience": "Any",
    "level": "Beginner",
    "coveredConcepts": ["Metadata"]
  },
  {
    "name": "What is an Ontology?",
    "category": "",
    "modulePrerequisites": "",
    "audience": "",
    "level": "",
    "coveredConcepts": ["Ontology", "Linked Data", "Taxonomy", "Schema", "Statement", "Triple"]
  },
  {
    "name": "What is a Knowledge Graph?",
    "category": "Foundational",
    "modulePrerequisites": "What is Metadata?",
    "audience": "Any",
    "level": "Beginner",
    "coveredConcepts": ["Schema", "Statements", "Triple"]
  },
  {
    "name": "What is an Identifier?",
    "category": "Foundational",
    "modulePrerequisites": "None",
    "audience": "Any",
    "level": "Beginner",
    "coveredConcepts": ["URI", "Hash or Slash", "Namespace"]
  },
  {
    "name": "Introduction to Logic",
    "category": "Foundational",
    "modulePrerequisites": "None",
    "aud

In [159]:
def generation_of_module_content(prompt, model="claude-3-5-sonnet-20240620"):
  antropic_prompt = [
      {
          "type": "text",
          "text": prompt
          }
      ]
  antropic_temp_messages = messages.copy()
  antropic_temp_messages.append({"role": "user", "content": antropic_prompt})
  message = client.messages.create(
        model = model,
        max_tokens = 8000,
        temperature = 0.7,
        system = initial_system_message,
        messages = antropic_temp_messages
  )

  antropic_temp_messages.append({"role": "assistant", "content": message.content})
  return message.content, message.model_dump_json(indent = 4)

In [160]:
custom_claude_module_prompt = """
Let's focus on the module {}. Look at the structural data given for that topic. Using the provided structural data, generate an educational curriculum focusing only on the 'Content' and 'References' sections. Ensure the content section is highly detailed and lengthy, thoroughly explaining each topic in depth to suit the target academic audience. Cite all external sources appropriately, using relevant references, and avoid any self-citation of the provided repository. The content should flow logically, be organized clearly into subsections, and comprehensively cover all necessary information from the structural data. Format the ouput to a markdown structure.
"""

In [161]:
import time
import json

# Using only SPARQL module for testing
json_data = json.loads(json_content)
json_data = [json_data[17]]

for item in json_data:
  files_to_write = []

  module_name = item[[key for key in item if 'name' in key.lower() or 'title' in key.lower()][0]]

  #Handle Multiple Persona
  if type(item['audience']) is not str:
    separate_dicts = [{**item, 'audience': audience} for audience in item['audience']]
    for sub_item in separate_dicts:
      result, raw = generation_of_module_content(custom_claude_module_prompt.format(str(sub_item)))
      files_to_write.append( (module_name.strip(), str(sub_item['audience']).strip(), result, raw) )
      time.sleep(2)
  else:
    result, raw = generation_of_module_content(custom_claude_module_prompt.format(str(item)))
    files_to_write.append( (module_name.strip(), str(item['audience']).strip(), result, raw) )

  for file_name, audience, result, raw in files_to_write:
    module_number = str(json_data.index(item) + 1) if json_data.index(item) >= 0 else ""
    file_write_name = f"{module_number}.{file_name}-{audience}"
    with open(f"{file_write_name}.md", "w") as f_result:
      f_result.write(result[0].text)
    with open(f"{file_write_name}.txt", "w") as f_raw:
      f_raw.write(raw)

    # Step 2: Copy the file from Colab to Google Drive
    import shutil

    # Replace with the path of the file you want to upload in Colab
    source_file_md = f"/content/{file_write_name}.md"
    source_file_raw = f"/content/{file_write_name}.txt"

    # Replace with the destination folder in Google Drive (adjust folder name as needed)
    destination_md = f"/content/drive/My Drive/Third Pass/Claude/Content/{file_write_name}.md"
    destination_raw = f"/content/drive/My Drive/Third Pass/Claude/Raw//{file_write_name}.txt"

    # Create the folder if it doesn't exist (optional)
    import os
    os.makedirs(os.path.dirname(destination_md), exist_ok=True)
    os.makedirs(os.path.dirname(destination_raw), exist_ok=True)

    # Copy the file to Google Drive
    shutil.copy(source_file_md, destination_md)
    shutil.copy(source_file_raw, destination_raw)
    time.sleep(2)
  time.sleep(2)

## 3. Mistral

In [162]:
!pip install mistralai

Collecting mistralai
  Downloading mistralai-1.1.0-py3-none-any.whl.metadata (23 kB)
Collecting jsonpath-python<2.0.0,>=1.0.6 (from mistralai)
  Downloading jsonpath_python-1.0.6-py3-none-any.whl.metadata (12 kB)
Collecting typing-inspect<0.10.0,>=0.9.0 (from mistralai)
  Downloading typing_inspect-0.9.0-py3-none-any.whl.metadata (1.5 kB)
Collecting mypy-extensions>=0.3.0 (from typing-inspect<0.10.0,>=0.9.0->mistralai)
  Downloading mypy_extensions-1.0.0-py3-none-any.whl.metadata (1.1 kB)
Downloading mistralai-1.1.0-py3-none-any.whl (229 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m229.7/229.7 kB[0m [31m8.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading jsonpath_python-1.0.6-py3-none-any.whl (7.6 kB)
Downloading typing_inspect-0.9.0-py3-none-any.whl (8.8 kB)
Downloading mypy_extensions-1.0.0-py3-none-any.whl (4.7 kB)
Installing collected packages: mypy-extensions, jsonpath-python, typing-inspect, mistralai
Successfully installed jsonpath-python-1.0.6 mistralai-1

In [163]:
import os
from mistralai import Mistral

In [164]:
# api_key = os.environ["MISTRAL_API_KEY"]
api_key = mistral_api_key

mistral_client = Mistral(api_key=api_key)

In [165]:
audience_info = """
Any: This audience type covers content that could be of interest to members of any or all of the groups listed below.
Academic: An Academic is interested in advanced topics in knowledge graphs, including formal logic, machine learning, term extraction, and postgraduate or scientific research.
Developer: A Developer may be tasked with designing, implementing, and supporting knowledge graphs in a professional setting. This role is inherently more technical than the others and may involve maintaining the infrastructure supporting a corporate, academic, or government system.
Designer: A Designer, who also can be considered an _Architect_, is a flavor of Practitioner who is responsible for the initial development, maintenance, and evolution/transformation a knowledge graph and its schema. This role focuses primarily on knowledge engineering best practices, understanding current web standards, and the tradeoffs of knowledge representation and reasoning with respect to query complexity.
Practitioner: A Practioner is someone who wants to gain a sufficient understanding of knowledge graphs to be able to design and implement basic ontologies as well as query data collections for individual, business, or educational purposes.
Student: A Student seeks to understand or use knowledge graphs as part of a curriculum. The scope of the Student role could be as broad as that of a Practitioner or limited to identifying and querying existing resources. While it generally includes a broad overview of all knowledge graph technologies, it also delves into some technical details, so as to prepare for a transition to any other role.
Stakeholder: A Stakeholder is interested in a top level overview of the concepts and technologies so as to gain a broad understanding and inform them of potential pros and cons of adopting these technologies.
"""

In [166]:
initial_system_message = f"You are a helpful assistant and an expert in developing thorough, detailed, and comprehensive academic curricula. Consider the following audience types: {audience_info}"

In [205]:
mistal_messages = []
mistal_messages.append({"role": "system", "content": initial_system_message})

def initial_analysis_of_repo_with_mistral(repo_url, prompt, model = "mistral-large-latest", max_tokens = 1000):
  local_dir = "repo_temp"
  clone_repo(repo_url, local_dir)
  markdown_content = extract_instructions(local_dir + "/curriculum/modules")
  mistral_prompt = prompt + "\n" + markdown_content
  mistal_messages.append({"role": "user", "content": mistral_prompt})
  chat_response = mistral_client.chat.complete(
    model = model,
    messages = mistal_messages,
    max_tokens = max_tokens,
)

  mistal_messages.append({"role": "assistant", "content": chat_response.choices[0].message.content})
  return chat_response.choices[0].message.content, chat_response.model_dump_json(indent = 4)

In [206]:
mistral_analysis_prompt = """
I have a GitHub repository that includes details on a knowledge graph, and I want to generate educational content based on its modules. The structure for each module is specified in the markdown file.
GitHub repo: https://github.com/chrisdavisj/open-kg-curriculum/
Module list markdown file: https://github.com/chrisdavisj/open-kg-curriculum/blob/master/curriculum/modules/README.md
Please do the following:
 - Extract all modules listed in the curriculum/modules/README.md.
 - Review and analyze the structure for each module, and any specific details mentioned as specified in that file.
 - List all the modules
 - Give a JSON from each module's structure and properties as outlined

Markdown file content:
"""

In [207]:
repo_url = 'https://github.com/chrisdavisj/open-kg-curriculum'

In [208]:
mistral_analysis_result, mistral_analysis_raw = initial_analysis_of_repo_with_mistral(repo_url,mistral_analysis_prompt, max_tokens=5000)

In [209]:
print(*mistal_messages, sep="\n\n")

{'role': 'system', 'content': 'You are a helpful assistant and an expert in developing thorough, detailed, and comprehensive academic curricula. Consider the following audience types: \nAny: This audience type covers content that could be of interest to members of any or all of the groups listed below.\nAcademic: An Academic is interested in advanced topics in knowledge graphs, including formal logic, machine learning, term extraction, and postgraduate or scientific research. \nDeveloper: A Developer may be tasked with designing, implementing, and supporting knowledge graphs in a professional setting. This role is inherently more technical than the others and may involve maintaining the infrastructure supporting a corporate, academic, or government system.\nDesigner: A Designer, who also can be considered an _Architect_, is a flavor of Practitioner who is responsible for the initial development, maintenance, and evolution/transformation a knowledge graph and its schema. This role focus

In [210]:
print(mistral_analysis_result)

### List of Modules

Here is the list of all modules extracted from the markdown file:

1. What is Metadata?
2. What is an Ontology?
3. What is a Knowledge Graph?
4. What is an Identifier?
5. Introduction to Logic
6. Propositional Logic
7. Datalog
8. Predicate Logic
9. Description Logic
10. Rules
11. Introduction to Set Theory
12. Introduction to Discrete Mathematics
13. Open World Assumption vs Closed World Assumption
14. RDF
15. RDFS
16. RDF Serializations
17. RDF Star
18. SPARQL
19. SWRL
20. OWL
21. OWL Dialects
22. Manchester Syntax
23. SHACL
24. Schema.org
25. Dublin Core
26. SOSA & SSN
27. PROV-O
28. SKOS
29. Survey of Modeling Tools
30. Protege
31. Deploying a Knowledge Graph
32. Introduction to Knowledge Engineering
33. Reification
34. GraphQL
35. Survey of Documentation Practices
36. Survey of Visualization Tools
37. Introduction to Upper Ontologies
38. Introduction to Modular Ontologies
39. Survey of Triplestores
40. History of the Semantic Web
41. Property Graphs
42. Nanopub

In [211]:
json_content = mistral_analysis_result[mistral_analysis_result.find("[") : mistral_analysis_result.rfind("]") + 1] if "[" in mistral_analysis_result and "]" in mistral_analysis_result else ""

print(json_content)

[
    {
        "title": "What is Metadata?",
        "category": "Foundational",
        "module_prerequisites": "None",
        "audience": "Any",
        "level": "Beginner",
        "covered_concepts": "Metadata"
    },
    {
        "title": "What is an Ontology?",
        "category": "",
        "module_prerequisites": "",
        "audience": "",
        "level": "",
        "covered_concepts": "Ontology, Linked Data, Taxonomy, Schema, Statement, Triple"
    },
    {
        "title": "What is a Knowledge Graph?",
        "category": "Foundational",
        "module_prerequisites": "What is Metadata?",
        "audience": "Any",
        "level": "Beginner",
        "covered_concepts": "Schema, Statements, Triple"
    },
    {
        "title": "What is an Identifier?",
        "category": "Foundational",
        "module_prerequisites": "None",
        "audience": "Any",
        "level": "Beginner",
        "covered_concepts": "URI, Hash or Slash, Namespace"
    },
    {
        "tit

In [212]:
def generation_of_module_content_with_mistral(prompt, model = "mistral-large-latest", max_tokens = 8000):
  mistral_prompt = prompt
  temp_mistral_message = mistal_messages.copy()
  temp_mistral_message.append({"role": "user", "content": mistral_prompt})
  chat_response = mistral_client.chat.complete(
    model = model,
    messages = temp_mistral_message,
    max_tokens = max_tokens,
    temperature = 0.7,
)

  temp_mistral_message.append({"role": "assistant", "content": chat_response.choices[0].message.content})
  return chat_response.choices[0].message.content, chat_response.model_dump_json(indent = 4)

In [213]:
custom_mistral_module_prompt = """
Let's focus on the module {}. Look at the structural data given for that topic. Using the provided structural data, generate an educational curriculum focusing only on the 'Content' and 'References' sections. Ensure the content section is highly detailed and lengthy, thoroughly explaining each topic in depth to suit the target academic audience. Cite all external sources appropriately, using relevant references, and avoid any self-citation of the provided repository. The content should flow logically, be organized clearly into subsections, and comprehensively cover all necessary information from the structural data. Format the ouput to a markdown structure.
"""

In [214]:
import time
import json

# Using only SPARQL module for testing
json_data = json.loads(json_content)
json_data = [json_data[17]]

for item in json_data:
  files_to_write = []

  module_name = item[[key for key in item if 'name' in key.lower() or 'title' in key.lower()][0]]

  #Handle Multiple Persona
  if "," in item['audience']:
    item['audience'] = list(map(str.strip, item['audience'].split(",")))
    separate_dicts = [{**item, 'audience': audience} for audience in item['audience']]
    for sub_item in separate_dicts:
      result, raw = generation_of_module_content_with_mistral(custom_mistral_module_prompt.format(str(sub_item)))
      files_to_write.append( (module_name.strip(), str(sub_item['audience']).strip(), result, raw) )
      time.sleep(2)
  else:
    result, raw = generation_of_module_content_with_mistral(custom_mistral_module_prompt.format(str(item)))
    files_to_write.append( (module_name.strip(), str(item['audience']).strip(), result, raw) )

  for file_name, audience, result, raw in files_to_write:
    module_number = str(json_data.index(item) + 1) if json_data.index(item) >= 0 else ""
    file_write_name = f"{module_number}.{file_name}-{audience}"
    with open(f"{file_write_name}.md", "w") as f_result:
      f_result.write(result)
    with open(f"{file_write_name}.txt", "w") as f_raw:
      f_raw.write(raw)

    # Step 2: Copy the file from Colab to Google Drive
    import shutil

    # Replace with the path of the file you want to upload in Colab
    source_file_md = f"/content/{file_write_name}.md"
    source_file_raw = f"/content/{file_write_name}.txt"

    # Replace with the destination folder in Google Drive (adjust folder name as needed)
    destination_md = f"/content/drive/My Drive/Third Pass/Mistral/Content/{file_write_name}.md"
    destination_raw = f"/content/drive/My Drive/Third Pass/Mistral/Raw//{file_write_name}.txt"

    # Create the folder if it doesn't exist (optional)
    import os
    os.makedirs(os.path.dirname(destination_md), exist_ok=True)
    os.makedirs(os.path.dirname(destination_raw), exist_ok=True)

    # Copy the file to Google Drive
    shutil.copy(source_file_md, destination_md)
    shutil.copy(source_file_raw, destination_raw)
    time.sleep(2)
  time.sleep(2)