In [121]:
from llama_hub.file.unstructured.base import UnstructuredReader
from unstructured.partition.auto import partition
from unstructured.documents.elements import NarrativeText, Title
import llama_index as li
from pathlib import Path
import openai
import os
import pandas as pd
import re
import numpy as np
import ast
import random
import json

In [None]:
openai.api_key = 'your_key'
os.environ['OPENAI_API_KEY'] = 'your_key'

In [None]:
#Path to textbook pdf
pdf = Path(f'data/ex_textbook.pdf')

In [3]:
UnstructuredReader = li.download_loader("UnstructuredReader", refresh_cache=True, use_gpt_index_import=True)

In [None]:
loader = UnstructuredReader()

In [5]:
textbook = loader.load_data(file=pdf, split_documents=True)


In [6]:
elements = partition(filename=pdf)


In [None]:
#What you need to modify

#Start of every chapter
pattern1 = r"(\d+)\s+Chapter\s+(\d+):"
#End of every chapter introduction
pattern2 = r"^\d+[CE](?: [A-Z])+"
#End of last chapter
end = "Need to Know More?"

In [None]:
#To look for patterns
for ele in elements:
    if isinstance(ele, Title):
        print(ele)

In [7]:
#Tracking where chapters start/end
chapter_found = {}
chapter_starts = []
intros = []

for iteration, element in enumerate(textbook):
    match1 = re.search(pattern1, element.text)
    match2 = re.search(pattern2, element.text)
    if match1:
        chapter_number = match1.group(2)
        
        if chapter_number not in chapter_found:
            chapter_found[chapter_number] = True
            chapter_starts.append(iteration)
            intros.append(iteration)
    if match2:
        intros.append(iteration)

In [8]:
#Finding where last chapter ends
for x in textbook[chapter_starts[len(chapter_starts) - 1]:]:
    if x.text == end:
        chapter_starts.append(textbook.index(x))

In [9]:
#Collecting chapter summaries for GPT prompts
summaries = []
iteration = 0
for x in intros[::2]:
    temp =''
    for element in elements[x:intros[iteration+1]]:
        temp = temp + textbook[elements.index(element)].text + '\n'
    summaries.append(temp)
    iteration += 2

In [10]:
#Making dictionary with chapter as key and document objects as elements
directory = {}
chapter_num = 1
for x in range(len(chapter_starts) - 1):
    text = []
    for element in elements[chapter_starts[x]:chapter_starts[x+1]]:
        if isinstance(element, NarrativeText):
            text.append(textbook[elements.index(element)])
    directory['Chapter ' + str(chapter_num)] = text
    chapter_num += 1

In [11]:
#Combining all the narrative text of each chapter into one string and adding "This is Chapter 'x': " to the beginning and "This is the end of Chapter 'x'" to the end
final=[]
for chapter in directory:
    txt = ''
    for text in directory[chapter]:
        txt = txt + text.text
    directory[chapter][0].text = txt
    final.append(directory[chapter][0])
    
for iteration, text in enumerate(final):
    final[iteration].text = "This is Chapter " + str(iteration + 1) + ":\n" + text.text  + "\nThis is the end of Chapter " + str(iteration + 1)

In [12]:
node_parser = li.node_parser.SimpleNodeParser()

In [13]:
nodes = node_parser.get_nodes_from_documents(final)

In [14]:
test_index = li.GPTVectorStoreIndex(nodes=nodes, chunk_size_limit=512)

In [15]:
query_engine = test_index.as_query_engine()

In [106]:
def create_questions(num_chapters):
    form ="""[
    {
    "question": ,
    "choices": ,
    "correct_answer_index": ,
    "explanation":
    }
    ]
    """
    final = []
    for chapter in range(num_chapters):
        temp = []
        chap_num = str(chapter + 1)
        summary = query_engine.query(f"""Elaborate on these key topics of chapter {chap_num} in detail:
        {summaries[chapter]}
        """ )
        temp.append(str(summary))
        response = query_engine.query(f"""
        CHAPTER {chap_num}:
        {str(summary)}
        Please generate SIX different multiple choice questions that covers all of the above information. Must be variety in the type of questions (scenario questions, definitions, comparison questions) and some must have multiple correct answers. Do NOT reference the text in the questions and explanations themselves. Do not repeat any questions. In the explanation, provide more insight and also the chapter that it comes from
        Return the result in the following JSON format:


        {form}
        """)
        temp.append(ast.literal_eval(str(response)))
        final.append(temp)
    return final

In [51]:
#To see what GPT is elaborating on
for x in summaries:
    print(x)

1C H A P T E R O N E
Introduction to Networking
Objectives
2.3 Identify common physical network topologies
. Star . Mesh . Bus . Ring . Point to point . Point to multipoint . Hybrid
2.7 Explain common logical network topologies and their char-
acteristics . Peer-to-peer . Client/server . VPN . VLAN
What You Need to Know
. Understand the differences between local area networks (LANs), wide area
networks (WANs), and personal area networks (PANs).
. Identify the characteristics between peer-to-peer and client/server network-
ing.
. Identify the characteristics of various network topologies.

2C H A P T E R T W O
Cabling, Connectors, and Ethernet Standards
Objectives
2.1 Categorize standard cable types and their properties
. Types:
.CAT3, CAT5, CAT5e, CAT6 .STP, UTP .Multimode fiber, single-mode fiber .Coaxial
. RG-59 . RG-6
.Serial .Plenum vs. Non-plenum
. Properties:
.Transmission speeds .Distance .Duplex .Noise immunity (security, EMI) .Frequency
2.2
Identify common connector types
. RJ

In [107]:
questions = create_questions(len(chapter_starts) - 1)

In [119]:
test = []
for chap in questions:
    for question in chap[1]:
        test.append(question)

In [122]:
json_string = json.dumps(test, indent=4)

# Write the JSON string to a file
with open("questions.json", "w") as json_file:
    json_file.write(json_string)


## As a chat bot

In [124]:
chat_engine = test_index.as_chat_engine(verbose=True)
response = chat_engine.chat("Tell me about VLANs")
print(str(response))

Querying with: What is a VLAN and how does it work?

A VLAN (Virtual Local Area Network) is a group of connected computers that act as if they are on their own network segments, even though they might not be. VLANs allow for the creation of multiple broadcast domains on a single switch, which is the same as creating separate networks for each VLAN. VLANs offer increased security, performance, and simplified administration. VLAN membership can be determined by protocol, port, or MAC address. With protocol-based VLANs, computers are assigned to VLANs using the protocol that is in use and the Layer 3 address. With port-based VLANs, specific ports on a network switch are assigned to a VLAN. With MAC address-based VLANs, the Media Access Control (MAC) address is used to assign membership to a VLAN. VLAN segmentation provides a new level of administrative flexibility, organization, and security.


In [125]:
print(str(chat_engine.chat("So like you just mentioned, the MAC address strips people of their membership to a VLAn")))

Querying with: Can you explain how the Media Access Control (MAC) address is used to assign membership to a VLAN?

The MAC address is used to assign membership to a VLAN by keeping track of the MAC addresses that belong to each VLAN. The MAC address is a unique 12-digit hexadecimal number that is stamped into every network interface card. Every device that will be used on a network has this unique address built into it and it cannot be modified in any way. This allows a workstation computer to be moved anywhere in an office without needing to be reconfigured, as the MAC address does not change and the workstation remains a member of a particular VLAN.


In [126]:
print(str(chat_engine.chat("Can you tell me other applications?")))

Querying with: What are some other applications of using a MAC address to assign membership to a VLAN?

Other applications of using a MAC address to assign membership to a VLAN include:

1. Allowing a workstation to be moved anywhere in an office without needing to be reconfigured.
2. Isolating certain network segments from others for increased security.
3. Allowing for easier administration when moving users between LAN segments.
4. Reducing broadcast storms and freeing up bandwidth.
5. Simplifying the process of addressing new stations and reconfiguring hubs and routers.
