In [157]:
import dill
import pandas as pd
import numpy as np
from langchain.llms import Cohere, OpenAI, GooglePalm, Anthropic
from langchain.prompts import PromptTemplate
from langchain.output_parsers import PydanticOutputParser
from langchain.pydantic_v1 import BaseModel, Field, validator
from typing import List
from pypdf import PdfReader

# from langchain.chat_models import ChatCohere
# from langchain.prompts import ChatPromptTemplate, PromptTemplate
# from langchain.chains import ConversationChain
# from langchain.memory import ConversationBufferMemory
# from langchain.output_parsers import ResponseSchema, StructuredOutputParser, CommaSeparatedListOutputParser, PydanticOutputParser
# import cohere

from dotenv import load_dotenv, find_dotenv

_ = load_dotenv(find_dotenv())

# co = cohere.Client(os.environ['COHERE_API_KEY'])

# Load Data

Load data gathered in an excel spreadsheet

In [164]:
df = pd.read_excel(
    '../data/CSC869 Term Project Dataset.xlsx', 
    sheet_name='Course Descriptions', 
    skiprows=3, 
    names=['School', 'MATH226 eqv', 'MATH226 mult eqv', 'MATH226 alt eqv', 'MATH226 alt mult eqv', 'CSC230 eqv', 'CSC230 alt eqv', 'CSC256 eqv', 'CSC256 multipleEqv'],
    index_col=None
)
df = df.dropna(how='all').set_index("School")

In [165]:
math226_desc = df.iloc[0,0]
csc230_desc = df.iloc[0,4]
csc256_desc = df.iloc[0,6]
print(math226_desc, "\n\n")
print(csc230_desc, "\n\n")
print(csc256_desc)

MATH 226 - Calculus I
4 Units

Graphs. Differentiation: theory, techniques, and applications. Integration: Fundamental Theorem of Calculus and applications. Transcendental functions. 


CSC 230 - Discrete Mathematical Structures for Computer Science
3 Units

Review of set algebra, relations and functions, permutations, propositional logic, proof techniques, introduction to graph theory, and infinite sets, and their applications to computer science. 


CSC 256 - Machine Structures
3 Units

Digital logic circuits, data representation, assembly language programming, subroutine linkage, machine language encoding, interrupt and exception handling, memory system concepts, and CPU organization and performance.


In [166]:
df

Unnamed: 0_level_0,MATH226 eqv,MATH226 mult eqv,MATH226 alt eqv,MATH226 alt mult eqv,CSC230 eqv,CSC230 alt eqv,CSC256 eqv,CSC256 multipleEqv
School,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
SFSU,MATH 226 - Calculus I\n4 Units\n\nGraphs. Diff...,,,,CSC 230 - Discrete Mathematical Structures for...,,CSC 256 - Machine Structures\n3 Units\n\nDigit...,
Berkeley City College,"MATH 3A - Calculus I\n5 Units, 5 lecture hours...",,,,MATH 11 - Discrete Mathematics\n4 units\n\nN/A,,CIS 20: Microcomputer Assembly Language\n4 Uni...,
"California Polytechnic State University, Humboldt","MATH 109 - Calculus I\n4 Units, 4 Lecture hour...",,,,"MATH 253 - Discrete Mathematics\n3 Units, 3 Le...",,,
"California Polytechnic State University, Pomona",MAT 1140 - Calculus I\n4 Units\nPrerequisites:...,,,,CS 1300 - Discrete Structures\n4 Units\nPrereq...,,CS 2640 - Computer Organization and Assembly P...,
"California Polytechnic State University, San Luis Obispo",MATH 141 - Calculus I\n4 Units\nGE Area B1\nPr...,MATH 142 - Calculus II\n4 units\nGE Area B1\nP...,HNRS 141 - Calculus I \n4 Units \nPrerequisite...,HNRS 142 - Calculus II \n4 units\nPrerequisite...,,,,
...,...,...,...,...,...,...,...,...
Solano Community College,MATH020:\n\n4.0 Units\nPrint\nDescription:\n\n...,,,,CIS021:\n\n3.0 Units\nPrint\nDescription:\n\nA...,,CIS020:\n\n3.0 Units\nPrint\nDescription:\n\nA...,
Sonoma State University,MATH 161 - Differential and Integral Calculus ...,,,,,,,
Ventura College,MATH V21A Calculus with Analytic Geometry I 5 ...,,,,MATH V52 Discrete Structures 3 Units\nSame-As:...,CS V17 Discrete Structures 3 Units\nSame-As: M...,,
West Valley College,MATH 003A: Calculus and Analytical Geometry\nC...,,,,MATH 019: Discrete Mathematics\nCourse Descrip...,,,


# Extract Topics

Exploration and testing of topic (and general data) extraction.

## Initialize LLM

In [167]:
# llm_cohere = ChatCohere(temperature=0)
# cgpt = OpenAI(model_name="text-davinci-003", temperature=0)
# cohere = Cohere(model="command", temperature=0)
palm = GooglePalm(model_name="models/text-bison-001", temperature=0)

## Create Course Class for Output Parsing

In [168]:
class Course(BaseModel):
    name: str = Field(description="name of the course")
    discipline: str = Field(description="academic discipline of the course")
    # add college name?
    topics: List[str] = Field(description="list of topics that the course covers")

course_parser = PydanticOutputParser(pydantic_object=Course)

## Create Prompt from Template

### Prompt Template for Prompt Generation

In [169]:
# Prompt Template to extract only course information
# course_query = """\
# For the course description, extract the course information.\n\
# {format_instructions}\n\n\
# This is the course description to be analyzed:\n```{course_desc}```\n
# """

course_prompt_template = """
For the course description, extract the course information.  Categorize the \
academic discipline that the course belongs to.\n
{format_instructions}\n\n
This is the course description to be analyzed:\n{course_desc}\n
"""

course_desc = df.iloc[2,4]

In [170]:
course_desc

'MATH 253 - Discrete Mathematics\n3 Units, 3 Lecture Hours\nPrerequisites: MATH 101T (C), or MATH 102 and CS 111\n\nSets, functions, relations, algorithms, induction, recursion, combinatorics, graphs, trees, and propositional logic.'

In [171]:
course_prompt = PromptTemplate(
    template=course_prompt_template,
    input_variables=["course_desc"],
    partial_variables={"format_instructions": course_parser.get_format_instructions()}
)

In [172]:
_input = course_prompt.format_prompt(course_desc=course_desc)

In [173]:
print(_input.to_string())


For the course description, extract the course information.  Categorize the academic discipline that the course belongs to.

The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema:
```
{"properties": {"name": {"title": "Name", "description": "name of the course", "type": "string"}, "discipline": {"title": "Discipline", "description": "academic discipline of the course", "type": "string"}, "topics": {"title": "Topics", "description": "list of topics that the course covers", "type": "array", "items": {"type": "string"}}}, "required": ["name", "discipline", "topics"]}
```


This is the course description

In [174]:
# output_cohere = cohere(_input.to_string())
# output_cgpt = cgpt(_input.to_string())

In [175]:
course_output_palm = palm(_input.to_string())

In [176]:
course_output_palm

'```\n{\n  "name": "MATH 253 - Discrete Mathematics",\n  "discipline": "Mathematics",\n  "topics": ["Sets", "Functions", "Relations", "Algorithms", "Induction", "Recursion", "Combinatorics", "Graphs", "Trees", "Propositional Logic"]\n}\n```'

In [177]:
course_data_palm = course_parser.parse(course_output_palm)

In [178]:
course_data_palm.discipline

'Mathematics'

# Process Entire DataFrame

Here, I take the exploration that was completed above to develop functions that
allow for our entire dataframe to be processed.

In [251]:
from tqdm.auto import tqdm

def process_course(course_desc: str):
    if course_desc is np.NaN:
        return np.NaN
    _input = prompt.format_prompt(course_desc=course_desc)
    result = palm(_input.to_string())
    return result

def process_all_courses(df: pd.DataFrame):
    for col in tqdm(df.columns, desc="Total Progress", position=0):
        df[f"{col} data"] = list(map(process_course, tqdm(df.loc[:, col], desc=f"{col}", position=1)))
    return df

In [252]:
toy_df = df.iloc[2:5].copy()
toy_df

Unnamed: 0_level_0,MATH226 eqv,MATH226 mult eqv,MATH226 alt eqv,MATH226 alt mult eqv,CSC230 eqv,CSC230 alt eqv,CSC256 eqv,CSC256 multipleEqv,MATH226 eqv data,MATH226 mult eqv data,MATH226 alt eqv data,MATH226 alt mult eqv data,CSC230 eqv data,CSC230 alt eqv data,CSC256 eqv data,CSC256 multipleEqv data
School,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
"California Polytechnic State University, Humboldt","MATH 109 - Calculus I\n4 Units, 4 Lecture hour...",,,,"MATH 253 - Discrete Mathematics\n3 Units, 3 Le...",,,,"```\n{\n ""name"": ""MATH 109 - Calculus I"",\n ...",,,,"```\n{\n ""name"": ""MATH 253 - Discrete Mathema...",,,
"California Polytechnic State University, Pomona",MAT 1140 - Calculus I\n4 Units\nPrerequisites:...,,,,CS 1300 - Discrete Structures\n4 Units\nPrereq...,,CS 2640 - Computer Organization and Assembly P...,,"```\n{\n ""name"": ""MAT 1140 - Calculus I"",\n ...",,,,"```\n{\n ""name"": ""CS 1300 - Discrete Structur...",,"```\n{\n ""name"": ""CS 2640 - Computer Organiza...",
"California Polytechnic State University, San Luis Obispo",MATH 141 - Calculus I\n4 Units\nGE Area B1\nPr...,MATH 142 - Calculus II\n4 units\nGE Area B1\nP...,HNRS 141 - Calculus I \n4 Units \nPrerequisite...,HNRS 142 - Calculus II \n4 units\nPrerequisite...,,,,,"{""name"": ""MATH 141 - Calculus I"", ""discipline""...","```\n{\n ""name"": ""MATH 142 - Calculus II"",\n ...","```\n{\n ""name"": ""HNRS 141 - Calculus I"",\n ...","```\n{\n ""name"": ""HNRS 142 - Calculus II"",\n ...",,,,


In [250]:
toy_df2 = process_all_courses(toy_df)
toy_df2

Total Progress:   0%|          | 0/16 [00:00<?, ?it/s]

Column: MATH226 eqv:   0%|          | 0/3 [00:00<?, ?it/s]

Column: MATH226 mult eqv:   0%|          | 0/3 [00:00<?, ?it/s]

Column: MATH226 alt eqv:   0%|          | 0/3 [00:00<?, ?it/s]

Column: MATH226 alt mult eqv:   0%|          | 0/3 [00:00<?, ?it/s]

Column: CSC230 eqv:   0%|          | 0/3 [00:00<?, ?it/s]

Column: CSC230 alt eqv:   0%|          | 0/3 [00:00<?, ?it/s]

Column: CSC256 eqv:   0%|          | 0/3 [00:00<?, ?it/s]

Column: CSC256 multipleEqv:   0%|          | 0/3 [00:00<?, ?it/s]

Column: MATH226 eqv data:   0%|          | 0/3 [00:00<?, ?it/s]

Column: MATH226 mult eqv data:   0%|          | 0/3 [00:00<?, ?it/s]

Column: MATH226 alt eqv data:   0%|          | 0/3 [00:00<?, ?it/s]

Column: MATH226 alt mult eqv data:   0%|          | 0/3 [00:00<?, ?it/s]

Column: CSC230 eqv data:   0%|          | 0/3 [00:00<?, ?it/s]

Column: CSC230 alt eqv data:   0%|          | 0/3 [00:00<?, ?it/s]

Column: CSC256 eqv data:   0%|          | 0/3 [00:00<?, ?it/s]

Column: CSC256 multipleEqv data:   0%|          | 0/3 [00:00<?, ?it/s]

Unnamed: 0_level_0,MATH226 eqv,MATH226 mult eqv,MATH226 alt eqv,MATH226 alt mult eqv,CSC230 eqv,CSC230 alt eqv,CSC256 eqv,CSC256 multipleEqv,MATH226 eqv data,MATH226 mult eqv data,...,CSC256 eqv data,CSC256 multipleEqv data,MATH226 eqv data data,MATH226 mult eqv data data,MATH226 alt eqv data data,MATH226 alt mult eqv data data,CSC230 eqv data data,CSC230 alt eqv data data,CSC256 eqv data data,CSC256 multipleEqv data data
School,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"California Polytechnic State University, Humboldt","MATH 109 - Calculus I\n4 Units, 4 Lecture hour...",,,,"MATH 253 - Discrete Mathematics\n3 Units, 3 Le...",,,,"```\n{\n ""name"": ""MATH 109 - Calculus I"",\n ...",,...,,,"```\n{\n ""name"": ""MATH 109 - Calculus I"",\n ...",,,,"```\n{\n ""name"": ""MATH 253 - Discrete Mathema...","{""name"": ""nan"", ""discipline"": ""nan"", ""topics"":...",,"{""name"": ""nan"", ""discipline"": ""nan"", ""topics"":..."
"California Polytechnic State University, Pomona",MAT 1140 - Calculus I\n4 Units\nPrerequisites:...,,,,CS 1300 - Discrete Structures\n4 Units\nPrereq...,,CS 2640 - Computer Organization and Assembly P...,,"```\n{\n ""name"": ""MAT 1140 - Calculus I"",\n ...",,...,"```\n{\n ""name"": ""CS 2640 - Computer Organiza...",,"```\n{\n ""name"": ""MAT 1140 - Calculus I"",\n ...",,,,"```\n{\n ""name"": ""CS 1300 - Discrete Structur...","{""name"": ""nan"", ""discipline"": ""nan"", ""topics"":...","```\n{\n ""name"": ""CS 2640 - Computer Organiza...","{""name"": ""nan"", ""discipline"": ""nan"", ""topics"":..."
"California Polytechnic State University, San Luis Obispo",MATH 141 - Calculus I\n4 Units\nGE Area B1\nPr...,MATH 142 - Calculus II\n4 units\nGE Area B1\nP...,HNRS 141 - Calculus I \n4 Units \nPrerequisite...,HNRS 142 - Calculus II \n4 units\nPrerequisite...,,,,,"{""name"": ""MATH 141 - Calculus I"", ""discipline""...","```\n{\n ""name"": ""MATH 142 - Calculus II"",\n ...",...,,,"{""name"": ""MATH 141 - Calculus I"", ""discipline""...","```\n{\n ""name"": ""MATH 142 - Calculus II"",\n ...","```\n{\n ""name"": ""HNRS 141 - Calculus I"",\n ...","```\n{\n ""name"": ""HNRS 142 - Calculus II"",\n ...",,"{""name"": ""nan"", ""discipline"": ""nan"", ""topics"":...",,"{""name"": ""nan"", ""discipline"": ""nan"", ""topics"":..."


In [20]:
toy_df2.iloc[0,8]

Course(name='MATH 109 - Calculus I', discipline='Mathematics', topics=['Limits', 'Continuity', 'Derivatives', 'Integrals'])

In [243]:
df2 = process_all_courses(df)

Total Progress:   0%|          | 0/8 [00:00<?, ?it/s]

Course:   0%|          | 0/63 [00:00<?, ?it/s]

Course:   0%|          | 0/63 [00:00<?, ?it/s]

Course:   0%|          | 0/63 [00:00<?, ?it/s]

Course:   0%|          | 0/63 [00:00<?, ?it/s]

Course:   0%|          | 0/63 [00:00<?, ?it/s]

Course:   0%|          | 0/63 [00:00<?, ?it/s]

Course:   0%|          | 0/63 [00:00<?, ?it/s]

Course:   0%|          | 0/63 [00:00<?, ?it/s]

In [244]:
df2

Unnamed: 0_level_0,MATH226 eqv,MATH226 mult eqv,MATH226 alt eqv,MATH226 alt mult eqv,CSC230 eqv,CSC230 alt eqv,CSC256 eqv,CSC256 multipleEqv,MATH226 eqv data,MATH226 mult eqv data,MATH226 alt eqv data,MATH226 alt mult eqv data,CSC230 eqv data,CSC230 alt eqv data,CSC256 eqv data,CSC256 multipleEqv data
School,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
SFSU,MATH 226 - Calculus I\n4 Units\n\nGraphs. Diff...,,,,CSC 230 - Discrete Mathematical Structures for...,,CSC 256 - Machine Structures\n3 Units\n\nDigit...,,"{""name"": ""MATH 226 - Calculus I"", ""discipline""...",,,,"{""name"": ""CSC 230 - Discrete Mathematical Stru...",,"```\n{\n ""name"": ""CSC 256 - Machine Structure...",
Berkeley City College,"MATH 3A - Calculus I\n5 Units, 5 lecture hours...",,,,MATH 11 - Discrete Mathematics\n4 units\n\nN/A,,CIS 20: Microcomputer Assembly Language\n4 Uni...,,"```\n{\n ""name"": ""MATH 3A - Calculus I"",\n ""...",,,,"{""name"": ""MATH 11 - Discrete Mathematics"", ""di...",,"```\n{\n ""name"": ""CIS 20: Microcomputer Assem...",
"California Polytechnic State University, Humboldt","MATH 109 - Calculus I\n4 Units, 4 Lecture hour...",,,,"MATH 253 - Discrete Mathematics\n3 Units, 3 Le...",,,,"```\n{\n ""name"": ""MATH 109 - Calculus I"",\n ...",,,,"```\n{\n ""name"": ""MATH 253 - Discrete Mathema...",,,
"California Polytechnic State University, Pomona",MAT 1140 - Calculus I\n4 Units\nPrerequisites:...,,,,CS 1300 - Discrete Structures\n4 Units\nPrereq...,,CS 2640 - Computer Organization and Assembly P...,,"```\n{\n ""name"": ""MAT 1140 - Calculus I"",\n ...",,,,"```\n{\n ""name"": ""CS 1300 - Discrete Structur...",,"```\n{\n ""name"": ""CS 2640 - Computer Organiza...",
"California Polytechnic State University, San Luis Obispo",MATH 141 - Calculus I\n4 Units\nGE Area B1\nPr...,MATH 142 - Calculus II\n4 units\nGE Area B1\nP...,HNRS 141 - Calculus I \n4 Units \nPrerequisite...,HNRS 142 - Calculus II \n4 units\nPrerequisite...,,,,,"{""name"": ""MATH 141 - Calculus I"", ""discipline""...","```\n{\n ""name"": ""MATH 142 - Calculus II"",\n ...","```\n{\n ""name"": ""HNRS 141 - Calculus I"",\n ...","```\n{\n ""name"": ""HNRS 142 - Calculus II"",\n ...",,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Solano Community College,MATH020:\n\n4.0 Units\nPrint\nDescription:\n\n...,,,,CIS021:\n\n3.0 Units\nPrint\nDescription:\n\nA...,,CIS020:\n\n3.0 Units\nPrint\nDescription:\n\nA...,,"{""name"": ""MATH020"", ""discipline"": ""Mathematics...",,,,"```\n{\n ""name"": ""CIS021"",\n ""discipline"": ""...",,"{""name"": ""CIS020"", ""discipline"": ""Computer Sci...",
Sonoma State University,MATH 161 - Differential and Integral Calculus ...,,,,,,,,"```\n{\n ""name"": ""MATH 161 - Differential and...",,,,,,,
Ventura College,MATH V21A Calculus with Analytic Geometry I 5 ...,,,,MATH V52 Discrete Structures 3 Units\nSame-As:...,CS V17 Discrete Structures 3 Units\nSame-As: M...,,,"```\n{\n ""name"": ""MATH V21A Calculus with Ana...",,,,"```\n{\n ""name"": ""MATH V52 Discrete Structure...","```\n{\n ""name"": ""CS V17 Discrete Structures""...",,
West Valley College,MATH 003A: Calculus and Analytical Geometry\nC...,,,,MATH 019: Discrete Mathematics\nCourse Descrip...,,,,"```\n{\n ""name"": ""MATH 003A: Calculus and Ana...",,,,"```\n{\n ""name"": ""MATH 019: Discrete Mathemat...",,,


In [245]:
with open('../data/course_desc_and_data.pkl', 'wb') as f:
    dill.dump(df2, f)

# with open('../data/course_desc_and_data.pkl', 'rb') as f:
#     df2 = dill.load(f)

In [247]:
course_data_palm = course_parser.parse(df2.iloc[0,8])
course_data_palm

Course(name='MATH 226 - Calculus I', discipline='Mathematics', topics=['Calculus', 'Differentiation', 'Integration', 'Transcendental functions'])

# Baseline Equivalency

Here we attempt to determine if two courses are equivalent

In [104]:
# Prompt Template for Course Equivalency prediction

equiv_prompt_template = """
Extract the detailed title for each course below and determine whether the two \
following courses are equivalent. For course equivalency, respond with 'true',\
'false', or 'unsure'.\n
{format_instructions}\n
Course 1:\n```{course1_desc}```\n
Course 2:\n```{course2_desc}```
"""

In [105]:
from typing import Literal

class Equivalency(BaseModel):
    courses: list[str] = Field(description="subject of the two courses to be evaluated")
    isEquiv: bool | Literal["unsure"] = Field(description="equivalency prediction")

equiv_parser = PydanticOutputParser(pydantic_object=Equivalency)

equiv_prompt = PromptTemplate(
    template=equiv_prompt_template,
    input_variables=["course1_desc", "course2_desc"],
    partial_variables={"format_instructions": equiv_parser.get_format_instructions()}
)

In [136]:
df.iloc[0,0]

'MATH 226 - Calculus I\n4 Units\n\nGraphs. Differentiation: theory, techniques, and applications. Integration: Fundamental Theorem of Calculus and applications. Transcendental functions.'

In [134]:
course1_desc = df.iloc[0,4]
course2_desc = df.iloc[2,4]
course3_desc = "\n".join(course2_desc.split("\n")[0:2]) + """\
\n\nSets, functions, relations, algorithms, induction, \
differentiation, integration, and transcentental functions."""
# + "\n\nSets, relations, induction, algorithms, differentiation, and limits."
print(course1_desc, "\n\n", course2_desc, "\n\n", course3_desc)

Limits, Continuity, Derivatives, Integrals 

 Calculus, Differentiation, Integration 

 sets, functions, relations, algorithms, induction, differentiation, integration, and transcentental functions.


In [123]:
_input = equiv_prompt.format_prompt(course1_desc=course1_desc, course2_desc=course3_desc)
print(_input.to_string())


Extract the detailed title for each course below and determine whether the two following courses are equivalent. For course equivalency, respond with 'true','false', or 'unsure'.

The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema:
```
{"properties": {"courses": {"title": "Courses", "description": "subject of the two courses to be evaluated", "type": "array", "items": {"type": "string"}}, "isEquiv": {"title": "Isequiv", "description": "equivalency prediction", "anyOf": [{"type": "boolean"}, {"enum": ["unsure"], "type": "string"}]}}, "required": ["courses", "isEquiv"]}
```

Course 1:
```CSC 230 - D

In [124]:
equiv_output_palm = palm(_input.to_string())
equiv_output_palm

'{"courses": ["CSC 230 - Discrete Mathematical Structures for Computer Science", "MATH 253 - Discrete Mathematics"], "isEquiv": "unsure"}'

In [125]:
equiv_data_palm = equiv_parser.parse(equiv_output_palm)
equiv_data_palm

Equivalency(courses=['CSC 230 - Discrete Mathematical Structures for Computer Science', 'MATH 253 - Discrete Mathematics'], isEquiv='unsure')

# Topic Equivalency



In [143]:
# Prompt Template for Course Equivalency prediction

topicequiv_prompt_template = """
Given the discipline and list of topics for the two courses below, determine whether the two \
are equivalent. For course equivalency, respond with 'true', 'false', or 'unsure'.\n
{format_instructions}\n
Course 1 Discipline: ```{course1_disc}```\nCourse 1 Topics: ```{course1_topics}```\n
Course 2 Discipline: ```{course2_disc}```\nCourse 2 Topics: ```{course2_topics}``````
"""

In [144]:
class TopicEquivalency(BaseModel):
    isEquiv: bool | Literal["unsure"] = Field(description="equivalency prediction")

topicequiv_parser = PydanticOutputParser(pydantic_object=TopicEquivalency)

topicequiv_prompt = PromptTemplate(
    template=topicequiv_prompt_template,
    input_variables=["course1_disc", "course1_topics", "course2_disc", "course2_topics"],
    partial_variables={"format_instructions": topicequiv_parser.get_format_instructions()}
)

In [145]:
toy_df.iloc[0,8]

Course(name='MATH 109 - Calculus I', discipline='Mathematics', topics=['Limits', 'Continuity', 'Derivatives', 'Integrals'])

In [146]:
course1_topics = ", ".join(toy_df.iloc[0,8].topics)
course2_topics = ", ".join(toy_df.iloc[2,8].topics)
course3_topics = """\
\n\nSets, functions, relations, algorithms, induction, \
differentiation, integration, transcentental functions."""
# + "\n\nSets, relations, induction, algorithms, differentiation, and limits."
print(course1_desc, "\n\n", course2_desc, "\n\n", course3_desc)

Limits, Continuity, Derivatives, Integrals 

 Calculus, Differentiation, Integration 

 sets, functions, relations, algorithms, induction, differentiation, integration, and transcentental functions.


In [147]:
_input = topicequiv_prompt.format_prompt(
    course1_disc=toy_df.iloc[0,8].discipline, 
    course1_topics=course1_topics,
    course2_disc=toy_df.iloc[2,8].discipline, 
    course2_topics=course2_topics
)
print(_input.to_string())


Given the discipline and list of topics for the two courses below, determine whether the two are equivalent. For course equivalency, respond with 'true', 'false', or 'unsure'.

The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema:
```
{"properties": {"isEquiv": {"title": "Isequiv", "description": "equivalency prediction", "anyOf": [{"type": "boolean"}, {"enum": ["unsure"], "type": "string"}]}}, "required": ["isEquiv"]}
```

Course 1 Discipline: ```Mathematics```
Course 1 Topics: ```Limits, Continuity, Derivatives, Integrals```

Course 2 Discipline: ```Mathematics```
Course 2 Topics: ```Calculus, Dif

In [148]:
topicequiv_output_palm = palm(_input.to_string())
topicequiv_output_palm

'{"isEquiv": "true"}'

In [150]:
topicequiv_data_palm = topicequiv_parser.parse(topicequiv_output_palm)
topicequiv_data_palm

TopicEquivalency(isEquiv=True)

# DEPRECATED PROCESSES

#### Section: [Process Entire Dataframe](#process-entire-dataframe)

I was going to convert the dataframe into a Python dictionary to process the
data before realizing that I can just process the data directly from the dataframe.

In [None]:
# school_courses_desc_dict = df.to_dict(orient='index')
# school_courses_desc_dict = {school:{course:(desc if type(desc) is str else None) for course,desc in courses.items()} for school,courses in school_courses_desc_dict.items()}

In [None]:
# school_courses_desc_dict['SFSU']