In [1]:
from langchain.embeddings import HuggingFaceInstructEmbeddings
from langchain.chains import RetrievalQA
from langchain_community.chat_models import ChatOllama
from langchain import PromptTemplate
from langchain_community.vectorstores import FAISS
import json



In [2]:
embeddings = HuggingFaceInstructEmbeddings(
    model_name="hkunlp/instructor-large", model_kwargs={"device": "cpu"}
)

  from tqdm.autonotebook import trange


load INSTRUCTOR_Transformer
max_seq_length  512


In [3]:
with open('chunks.json', 'r') as file:
    chunks = json.load(file)
len(chunks)

203

In [91]:
with open('segments.json', 'r') as file:
    filtered_segments = json.load(file)
len(filtered_segments)

5684

In [5]:
db2 = FAISS.from_texts(chunks, embeddings)

In [6]:
DEFAULT_SYSTEM_PROMPT = """
You are Karan, a student at TH Bingen University. Act as Karan and reply all questions on his behalf. Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. 
""".strip()


def generate_prompt(prompt: str, system_prompt: str = DEFAULT_SYSTEM_PROMPT) -> str:
    return f"""
      <|system|>

      {system_prompt}

      <|user|>

      {prompt}

      <|assistant|>
""".strip()

In [7]:
# DEFAULT_SYSTEM_PROMPT = """
# You are Karan, a student at TH Bingen University. Act as Karan and reply all questions on his basis. Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. 
# """.strip()


# def generate_prompt(prompt: str, system_prompt: str = DEFAULT_SYSTEM_PROMPT) -> str:
#     return f"""
# [INST] <>
# {system_prompt}
# <>

# {prompt} [/INST]
# """.strip()

In [8]:
template = generate_prompt(
    """
{context}

Question: {question}
"""
)

In [9]:
prompt = PromptTemplate(template=template, input_variables=["context", "question"])

In [183]:
qa_chain = RetrievalQA.from_chain_type(
    llm=ChatOllama(model="llama2:latest"),
    chain_type="stuff",
    retriever=db2.as_retriever(search_kwargs={"k": 2}),
    return_source_documents=True,
    chain_type_kwargs={"prompt": prompt},
)

In [120]:
def getStartTimeFromSegments(sourceString):
    json_data = filtered_segments

    words = sourceString.split()

    # Start with 3 words and increase by 1 if no match is found
    num_words = 1
    found_objects = []

    max_num_words = len(words)  # Maximum number of words in the string

    while num_words <= max_num_words:
        # Take the first num_words from the words list
        search_phrase = ' '.join(words[:num_words])

        # Search for matches in the JSON data
        found_objects = [obj for obj in json_data if search_phrase in obj['text']]

        # If no matches found or more than 1 found, and num_words doesn't exceed the max number of words, increase the number of words
        if not found_objects or len(found_objects) > 1:
            num_words += 1
        else:
            # Return the first found object and exit the loop
            print("Found object")
            return found_objects[0]

    # If num_words exceeds the total number of words in the string, print the first object and break
    if num_words > max_num_words:
        print("Exceeded total number of words. Returning the first object:")
        return json_data[0]


In [182]:
#WITHOUT BUFFER MEMORY
from flask import Flask, request, render_template_string, send_file

app = Flask(__name__)

html_template = """
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Chatbot</title>
    <style>
        body {
            font-family: Arial, sans-serif;
            margin: 0;
            padding: 0;
            background-color: #f0f0f0;
        }
        .chat-container {
            max-width: 800px;
            margin: 0 auto;
            padding: 20px;
            background-color: #ffffff;
            border-radius: 10px 10px 0px 0px;
            box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
        }
        .chat-form{
            max-width: 800px;
            margin: 0 auto;
            padding: 20px;
            background-color: #ffffff;
            border-radius: 0px 0px 10px 10px;
            box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
        }
        .chat-message {
            background-color: #f9f9f9;
            border-radius: 10px;
            padding: 10px;
            margin: 10px 0;
        }
        .user-message {
            text-align: right;
        }
        input[type="text"] {
            width: calc(100% - 100px); /* Adjust width of input */
            padding: 8px;
            border-radius: 5px;
            border: 1px solid #ccc;
            margin-right: 10px;
        }
        .additional-content {
            display: none;
        }
        .show-button{
            background-color: #555555;
            border: none;
            color: white;
            padding: 8px 10px;
            text-align: center;
            text-decoration: none;
            display: inline-block;
            font-size: 16px;
            margin: 4px 2px;
            cursor: pointer;
            border-radius: 5px;
        }
        #input_submit{
            background-color: #555555;
            border: none;
            color: white;
            padding: 8px 10px;
            text-align: center;
            text-decoration: none;
            display: inline-block;
            font-size: 16px;
            margin: 4px 2px;
            cursor: pointer;
            border-radius: 5px;
        }
    </style>
</head>
<body>
    <div class="chat-container" id="chat-container">
    <h3>Welcome to the Artificial Intelligence QnA service.  You may ask me anything 🙃</h3>
        {% for message in chat_history %}
            <div class="chat-message {% if message['sender'] == 'user' %}user-message{% endif %}">
                {{ message['content'] }}
                {% if message['sender'] == 'bot' %}
                    <br><br>
                    <button class="show-button">Show source</button>
                    <div class="additional-content">
                        {{ message['source'] }}
                        <br><br>
                        {{ message['seekTime'] }}
                        <video id="myVideo" width="780" height="640" controls>
                            <source src="http://127.0.0.1:81/Kint2" type="video/mp4">
                            Your browser does not support the video tag.
                        </video>
                    </div>
                    <br><br>
                {% endif %}
            </div>
        {% endfor %}
    </div>
    <div class="chat-form">
    <form action="/" method="POST" id="chat-form">
        <input type="text" name="input_text" id="input_text" placeholder="Message Chatbot...">
        <input type="submit" id="input_submit" value="Send">
    </form>
    </div>
    <script>
        document.getElementById('chat-form').addEventListener('submit', function(event) {
            event.preventDefault(); // Prevent default form submission
            var inputText = document.getElementById('input_text').value;
            if (inputText.trim() !== '') {
                var inputBox = document.getElementById('input_text');
                var chatContainer = document.getElementById('chat-container');
                var userMessage = document.createElement('div');
                userMessage.className = 'chat-message user-message';
                userMessage.textContent = inputText;
                chatContainer.appendChild(userMessage);
                document.getElementById('chat-form').submit(); // Submit form
            }
        });

        //show/hide the source button
        document.querySelectorAll('.show-button').forEach(button => {
            button.addEventListener('click', function() {
                const additionalContent = this.nextElementSibling;
                additionalContent.style.display = additionalContent.style.display === 'block' ? 'none' : 'block';
                this.textContent = additionalContent.style.display === 'block' ? 'Hide Source' : 'Show source';
            });
        });

        //to seek the video
        document.addEventListener('DOMContentLoaded', function () {
        var video = document.getElementById('myVideo');
        var chatHistory = {{ chat_history|tojson }};
        console.log('chatHistory', chatHistory)
        startTime = chatHistory[chatHistory.length - 1]['seekTime']['start'];
        console.log('startTime', startTime)

        // When the video metadata has loaded, set the start time
        video.addEventListener('loadedmetadata', function () {
            video.currentTime = startTime;
        });
    });
    </script>
</body>
</html>
"""

chat_history = []

@app.route('/Kint2')
def video():
    video_path = 'Kint2.mp4'
    return send_file(video_path, mimetype='video/mp4')

@app.route('/', methods=['GET', 'POST'])
def index():
    global chat_history

    if request.method == 'POST':
        input_text = request.form['input_text']
        inputAfterSimilaritySearch = db2.similarity_search(query=input_text, k=2)
        output_text = qa_chain({"input_documents": inputAfterSimilaritySearch[0].page_content, "query": input_text})
        # output_text = qa_chain(input_text)
        chat_history.append({'sender': 'user', 'content': input_text})

        # Add logic here to generate response based on input_text
        seekTime = getStartTimeFromSegments(inputAfterSimilaritySearch[0].page_content)
        response_text = output_text['result']
        chat_history.append({'sender': 'bot', 'content': response_text, 'source': inputAfterSimilaritySearch[0].page_content, 'seekTime': seekTime})

    return render_template_string(html_template, chat_history=chat_history)

if __name__ == '__main__':
    app.run(host="0.0.0.0", port=81)

 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:81
 * Running on http://192.168.0.101:81
[33mPress CTRL+C to quit[0m
127.0.0.1 - - [21/Mar/2024 14:48:07] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [21/Mar/2024 14:48:34] "POST / HTTP/1.1" 200 -
127.0.0.1 - - [21/Mar/2024 14:48:34] "[35m[1mGET /Kint2 HTTP/1.1[0m" 206 -
127.0.0.1 - - [21/Mar/2024 14:48:34] "[35m[1mGET /Kint2 HTTP/1.1[0m" 206 -


Found object


127.0.0.1 - - [21/Mar/2024 14:49:49] "POST / HTTP/1.1" 200 -
127.0.0.1 - - [21/Mar/2024 14:49:49] "[35m[1mGET /Kint2 HTTP/1.1[0m" 206 -
127.0.0.1 - - [21/Mar/2024 14:49:49] "[35m[1mGET /Kint2 HTTP/1.1[0m" 206 -
127.0.0.1 - - [21/Mar/2024 14:49:49] "[35m[1mGET /Kint2 HTTP/1.1[0m" 206 -


Found object


127.0.0.1 - - [21/Mar/2024 14:50:12] "POST / HTTP/1.1" 200 -


Exceeded total number of words. Returning the first object:


In [179]:
# from flask import Flask, request, jsonify

# app = Flask(__name__)

# response_text = ''

# @app.route('/', methods=['GET', 'POST'])
# def index():
#     global response_text

#     if request.method == 'POST':
#         input_text = request.form['input_text']
#         inputAfterSimilaritySearch = db2.similarity_search(query=input_text, k=2)
#         output_text = qa_chain({"input_documents": inputAfterSimilaritySearch[0].page_content, "query": input_text})
#         seekTime = getStartTimeFromSegments(inputAfterSimilaritySearch[0].page_content)
#         response_data = {
#             'result': output_text['result'],
#             'source': inputAfterSimilaritySearch[0].page_content,
#             'seekTime': seekTime
#         }
#         return jsonify(response_data)

# if __name__ == '__main__':
#     app.run(host="0.0.0.0", port=81)

 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:81
 * Running on http://192.168.0.101:81
[33mPress CTRL+C to quit[0m
127.0.0.1 - - [21/Mar/2024 14:44:37] "POST / HTTP/1.1" 200 -


Found object


127.0.0.1 - - [21/Mar/2024 14:46:26] "POST / HTTP/1.1" 200 -


Found object
