In [81]:
from langchain.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Pinecone
from langchain.embeddings.openai import OpenAIEmbeddings
import pinecone
import os
from langchain.chat_models import ChatOpenAI
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from langchain.document_loaders import TextLoader
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
from langchain.prompts import PromptTemplate
from langchain.chains import (
    StuffDocumentsChain, LLMChain, ConversationalRetrievalChain
)
import json

In [78]:
pinecone.init(
    api_key=os.environ['PINECONE_API_KEY'],
    environment= 'asia-southeast1-gcp-free')
index = pinecone.Index('coursecrafter')
embeddings = OpenAIEmbeddings(openai_api_key=os.environ['OPENAI_API_KEY'])
vectordb = Pinecone.from_documents(documents='', embedding=embeddings, index_name='coursecrafter')

retriever = vectordb.as_retriever(k=10)

template = '''I'm trying to run cosine similarity on a user-generated text input against a number of course descriptions. To do this, I will need to convert the user-generated text input into a course description. Using your own knowledge of the content of computer science classes as well as the examples below, you will create the course description. Your description should be a little bit longer and more verbose than the examples.

Here are some examples of course descriptions:

Course Title: CSCE 411 Design and Analysis of Algorithms, Description: Credits 3.  3 Lecture Hours.       Study of computer algorithms for numeric and non-numeric problems; design paradigms; analysis of time and space requirements of algorithms; correctness of algorithms; NP-completeness and undecidability of problems. Prerequisite:  Grade of C or better in CSCE 221 and CSCE 222/ECEN 222; junior or senior classification or approval of instructor.
Course Title: CSCE 412 Cloud Computing, Description: Credits 3.  3 Lecture Hours.       Operating system and distributed systems fields that form the basis of cloud computing such as virtualization, key-value storage solutions, group membership, failure detection, peer to peer systems, datacenter networking, resource management and scalability; popular frameworks such as MapReduce and HDFS and case studies on failure determination. Prerequisite:  Grade of C or better in CSCE 315 or CSCE 331.
Course Title: CSCE 413 Software Security, Description: Credits 3.  3 Lecture Hours.       Basic principles of design and implementation of defect-free software, code reviews including tool-assisted review by static and dynamic analysis, risk analysis and management and methods for software security testing. Prerequisites:  Grade of C or better in CSCE 315 or CSCE 331; or approval of instructor.

Convert the following user-generated string into a course description. Do not include anything other than the description in your response. Here is the user-generated string:
{user}'''

prompt = PromptTemplate(
    input_variables=["adjective"], template=template
)
llm = ChatOpenAI(model_name='gpt-3.5-turbo', temperature=0.01, verbose=True)
llm_chain = LLMChain(llm=llm, prompt=prompt, verbose=True)

# llm = ChatOpenAI(model_name='gpt-3.5-turbo', temperature=0, verbose=True)
# memory = ConversationBufferMemory(memory_key="chat_history", return_messages= True)
# chain = ConversationalRetrievalChain.from_llm(llm, retriever=retriever, memory=memory, verbose=True)

In [79]:
llm_response = llm_chain.run('''I'm really interested in low-level programming. I would love to work for a company that would allow me to work on operating systems or on embedded devices. Using a language like C would be great.''')

print(llm_response)




[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mI'm trying to run cosine similarity on a user-generated text input against a number of course descriptions. To do this, I will need to convert the user-generated text input into a course description. Using your own knowledge of the content of computer science classes as well as the examples below, you will create the course description. Your description should be a little bit longer and more verbose than the examples.

Here are some examples of course descriptions:

Course Title: CSCE 411 Design and Analysis of Algorithms, Description: Credits 3.  3 Lecture Hours.       Study of computer algorithms for numeric and non-numeric problems; design paradigms; analysis of time and space requirements of algorithms; correctness of algorithms; NP-completeness and undecidability of problems. Prerequisite:  Grade of C or better in CSCE 221 and CSCE 222/ECEN 222; junior or senior classification or approval of instructo

In [84]:
def create_index_from_file(file_path):
    index = []
    with open(file_path, 'r') as file:
        for line in file:
            cleaned_line = line.strip()
            index.append(cleaned_line)
    return index
file_path = 'Data_Files/Catalog.txt'
index = create_index_from_file(file_path)

vectorizer = TfidfVectorizer()
dict = {}
for i in index:
    tfidf_matrix = vectorizer.fit_transform([llm_response, i])
    similarity = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])
    dict[similarity[0][0]] = i[20:23]
    # print(similarity[0][0], i[20:23])

dict = sorted(dict.items(), reverse=True)
# print(dict)
max_value = max(item[0] for item in dict)
scaled_vals = [(item[0], item[1], (item[0] / max_value) * 100) for item in dict]
# print(scaled_vals)

scaled_vals = [{"val": i[0], "course_number": i[1], "scaled_val": i[2]} for i in scaled_vals]
json_string = json.dumps(scaled_vals, indent=4)
print(json_string)

[
    {
        "val": 0.49300028846454164,
        "course_number": "410",
        "scaled_val": 100.0
    },
    {
        "val": 0.4270555146349255,
        "course_number": "420",
        "scaled_val": 86.62378595456764
    },
    {
        "val": 0.3896294403836369,
        "course_number": "461",
        "scaled_val": 79.03229460517049
    },
    {
        "val": 0.36614791392364127,
        "course_number": "470",
        "scaled_val": 74.26931028053059
    },
    {
        "val": 0.36387910235274584,
        "course_number": "412",
        "scaled_val": 73.80910536301184
    },
    {
        "val": 0.3461331519650922,
        "course_number": "438",
        "scaled_val": 70.20952321207157
    },
    {
        "val": 0.3250075454582067,
        "course_number": "465",
        "scaled_val": 65.92441283765748
    },
    {
        "val": 0.30829190309677157,
        "course_number": "444",
        "scaled_val": 62.533817993687656
    },
    {
        "val": 0.28897924784448,
      

In [14]:
query = 'what professor of csce 489 has the highest gpa'
chain.run({'question': query})



[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
Course Number: 489, Professor: KHADEMI KALANTARI N, GPA: 3.412, A's: 13, B's: 1, C's: 1, D's: 1, E's: 1, F's: 1, Q's: 0
Course Number: 489, Professor: CHASPARI T, GPA: 4.000, A's: 23, B's: 0, C's: 0, D's: 0, E's: 0, F's: 0, Q's: 0
Course Number: 489, Professor: CARLISLE M, GPA: 3.609, A's: 16, B's: 5, C's: 2, D's: 0, E's: 0, F's: 0, Q's: 0
Course Number: 489, Professor: ZHANG Y, GPA: 3.933, A's: 14, B's: 1, C's: 0, D's: 0, E's: 0, F's: 0, Q's: 0
Course Number: 489, Professor: SUEDA S, GPA: 3.833, A's: 16, B's: 1, C's: 1, D's: 0, E's: 0, F's: 0, Q's: 0
Course Number: 489, Professor: HAMMOND T, GPA: 3.528, A's: 24, B's: 7, C's: 5, D's: 0, E's: 0, F's: 0, Q's: 0
Course Numbe

'The professor of CSCE 489 with the highest GPA is Professor CHASPARI T, with a GPA of 4.000.'

In [4]:
# query = 'list all the courses with the highest gpas'
# chain.run({'question': query})

In [5]:
# query = 'Which course should I take if I want a high grade?'
# chain.run({'question': query})

In [6]:
# query = 'Give me a course that has a GPA of 3.69 and a focus on cybersecurity.'
query = 'Rank all the cybersecurity courses available in order of highest GPA.'
# query = 'What professors teach CSCE 402'
chain.run({'question': query})



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mGiven the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.

Chat History:

Human: which course has the highest gpa?
Assistant: Course Number 489, taught by Professor CHASPARI T, has the highest GPA of 4.000.
Follow Up Input: Rank all the cybersecurity courses available in order of highest GPA.
Standalone question:[0m



[1m> Finished chain.[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
Course Title: CSCEÂ 439 Data Analytics for Cybersecurity, Description: Credits 3.  3 Lecture Hours.       Theoretical foundations, algorithms and methods of data analytics for cybersecurity; study of data analytics including cluster analysis, supervised machine learning, anomaly detection, and visualization applied to cyber attacks, anomaly detection, vulnerability analysis, strategic manipulation, propaganda and other topics. Prerequisites:  Grade of C or better in CSCEÂ 221; grade of C or better in ECENÂ 303, STATÂ 211, STATÂ 301, STATÂ 302, or STATÂ 303.
Course Title: CSCEÂ 440 Quantum Algorithms, Description: Credits 3.  3 Lecture Hours.    

'Based on the provided information, the ranking of the cybersecurity courses in order of highest GPA is as follows:\n\n1. Course Number: 421, Professor: CHASPARI T (H), GPA: 4.000\n2. Course Number: 431, Professor: WADE P, GPA: 3.667\n3. Course Number: 430, Professor: KEYSER J (H), GPA: 3.643\n4. Course Number: 431, Professor: WADE P, GPA: 3.550\n5. Course Number: 430, Professor: KEYSER J (H), GPA: 3.538\n6. Course Number: 430, Professor: KEYSER J, GPA: 3.231\n7. Course Number: 430, Professor: KEYSER J (H), GPA: 3.097\n8. Course Number: 430, Professor: KEYSER J, GPA: 2.952\n\nPlease note that the ranking is based on the available information and may not include all the cybersecurity courses.'

i think we need the data in a form like this:
class 
desc
gpa w/ profs

that way when it looks up cybersecurity it will also have all the GPAs to look at as well

In [7]:
print('Based on the provided information, the ranking of the cybersecurity courses in order of highest GPA is as follows:\n\n1. Course Number: 421, Professor: CHASPARI T (H), GPA: 4.000\n2. Course Number: 431, Professor: WADE P, GPA: 3.667\n3. Course Number: 430, Professor: KEYSER J (H), GPA: 3.643\n4. Course Number: 431, Professor: WADE P, GPA: 3.550\n5. Course Number: 430, Professor: KEYSER J (H), GPA: 3.538\n6. Course Number: 430, Professor: KEYSER J, GPA: 3.231\n7. Course Number: 430, Professor: KEYSER J (H), GPA: 3.097\n8. Course Number: 430, Professor: KEYSER J, GPA: 2.952\n\nPlease note that the ranking is based on the available information and may not include all the cybersecurity courses.')

Based on the provided information, the ranking of the cybersecurity courses in order of highest GPA is as follows:

1. Course Number: 421, Professor: CHASPARI T (H), GPA: 4.000
2. Course Number: 431, Professor: WADE P, GPA: 3.667
3. Course Number: 430, Professor: KEYSER J (H), GPA: 3.643
4. Course Number: 431, Professor: WADE P, GPA: 3.550
5. Course Number: 430, Professor: KEYSER J (H), GPA: 3.538
6. Course Number: 430, Professor: KEYSER J, GPA: 3.231
7. Course Number: 430, Professor: KEYSER J (H), GPA: 3.097
8. Course Number: 430, Professor: KEYSER J, GPA: 2.952

Please note that the ranking is based on the available information and may not include all the cybersecurity courses.
