## GenAI Github Coding Assistant

This file is used for testing out the code in developing features for the Github Coding Assistant.

`1. github_api_calls.py`: This script contains functions for making API calls to Github.

In [10]:
import os
import requests
from dotenv import load_dotenv
from langchain_core.documents import Document
import re

In [11]:
load_dotenv()
github_token = os.getenv('GITHUB_TOKEN')

In [12]:
def fetch_github(owner, repo, endpoint):
    url = f"https://api.github.com/repos/{owner}/{repo}/{endpoint}"
    headers = {
        "Authorization": f"Bearer {github_token}"
        }
    response = requests.get(url, headers=headers)
    
    if response.status_code == 200:
        print(f"Fetched data from Github: {url}")
        return response.json()
    else:
        print(f"Error fetching data from Github: {response.status_code}")
        return []
    
#preprocess issues data to ensure uniform encoding format

    
# get results from fetch_github function and wrap them in a langchain Document
def load_issues(issues):
    docs = []
    for issue_num in range(len(issues)):
        metadata = {
            "author": issues[issue_num]["user"]["login"],
            "comments": issues[issue_num]["comments"],
            "body": issues[issue_num]["body"],
            "labels": issues[issue_num]["labels"],
            "created_at": issues[issue_num]["created_at"]
        }
        data = issues[issue_num]["title"] 
        if issues[issue_num]["body"]:
            data += issues[issue_num]["body"]
            
        # Decode the data to ensure consistency and avoid encoding issues
        data = data.encode('utf-8').decode('utf-8')
        
        doc = Document(page_content=data, metadata=metadata)
        docs.append(doc)
             
    return docs

def test_load_github(test_data):
    metadata = {
    "author": "Test Author",
    "comments": "Test Comments",
    "body": "Test Body",
    "labels": ["label1"],
    "created_at": "2024-08-24T11:14:00Z"
    }
    doc = Document(page_content=test_data, metadata=metadata)
    print(doc)

In [13]:
test_data = "This is a simple test document with basic text content."
test_load_github(test_data)

page_content='This is a simple test document with basic text content.' metadata={'author': 'Test Author', 'comments': 'Test Comments', 'body': 'Test Body', 'labels': ['label1'], 'created_at': '2024-08-24T11:14:00Z'}


In [14]:
def clean_text(text):
    """
    Clean a given text by removing non-printable characters and normalizing whitespace.
    1. Removes non-ASCII characters, which may cause encoding issues.
    2. Normalizes whitespace by replacing multiple spaces with a single space.
    3. Strips leading and trailing whitespace for consistency.
    4. Removes urls as they can introduce security vulnerabilities & formatting issues.
    5. Removes image markdowns, which are unnecessary for this task.
    """
    
    if not isinstance(text, str):  
        text = str(text)  
    # Remove non-printable characters and excessive whitespace
    text = re.sub(r'[^\x00-\x7F]+', ' ', text)  # Remove non-ASCII characters
    text = re.sub(r'\s+', ' ', text)  # Replace multiple whitespace with single space
    text = re.sub(r'!\[.*?\]\(.*?\)', '', text)  # Remove image markdowns
    text = re.sub(r'https?://\S+', '', text)  # Remove URLs
    
    return text.strip()


def clean_issues_data(issues):
    """
    Clean the issues data to ensure proper encoding and formatting.
    1. Converts data to strings and strips any leading or trailing whitespace.
    2. Ensures the comments field is an integer, avoiding issues with non-integer data.
    3. Cleans up the body and title fields using the clean_text function.
    """
    cleaned_issues = []
    for issue in issues:
        cleaned_issue = {}
        
        # Extract and clean metadata
        cleaned_issue["user"] = {
            "login": str(issue.get("user", {}).get("login", "")).strip()
        }
        cleaned_issue["comments"] = int(issue.get("comments", 0))
        cleaned_issue["body"] = str(issue.get("body", "")).strip()
        cleaned_issue["labels"] = [str(label).strip() for label in issue.get("labels", [])]
        cleaned_issue["created_at"] = str(issue.get("created_at", "")).strip()

        # Clean title and body
        cleaned_issue["title"] = str(issue.get("title", "")).strip()
        cleaned_issue["body"] = clean_text(issue.get("body", ""))

        cleaned_issues.append(cleaned_issue)
    
    return cleaned_issues


In [15]:
git_result = fetch_github(owner="techwithtim",
                    repo="Flask-Web-App-Tutorial", 
                    endpoint="issues")

Fetched data from Github: https://api.github.com/repos/techwithtim/Flask-Web-App-Tutorial/issues


In [16]:
cleaned_issues = clean_issues_data(git_result)
cleaned_issues[0]

{'user': {'login': 'Abstargo'},
 'comments': 0,
 'body': "ValueError: Invalid hash method 'sha256'. ``` new_user = User(email=email, first_name=first_name, password=generate_password_hash( password1, method='sha256')) ```",
 'labels': [],
 'created_at': '2024-08-24T11:14:00Z',
 'title': 'Update auth.py'}

In [17]:
doc_results = load_issues(cleaned_issues)

In [18]:
doc_results[1]

Document(page_content='Database not being created.Code: "from flask import Flask from flask_sqlalchemy import SQLAlchemy from os import path db = SQLAlchemy() DB_NAME = "CSIA.db" def create_app(): app = Flask(__name__) app.config[\'SECRET_KEY\'] = \'www\' app.config[\'SQLALCHEMY_DATABASE_URI\'] = f\'sqlite:///{DB_NAME}\' db.init_app(app) from .views import views from .auth import auth app.register_blueprint(views, url_prefix=\'/\') app.register_blueprint(auth, url_prefix=\'/\') from .models import Admin, Employees, Product, Order, OrderItem, InventoryPage, Supplier create_database(app) with app.app_context(): db.create_all() return app def create_database(app): if not path.exists(\'Website/\' + DB_NAME): db.create_all(app=app) print(\'Created Database!\') " Error: "S D:\\MY PC\\Documents\\> & "C:/Program Files/Python312/python.exe" "d:/MY PC/Documents/main.py" Traceback (most recent call last): File "d:\\MY PC\\Documents\\main.py", line 3, in <module> app = create_app() ^^^^^^^^^^^^ Fi

#### Further Improvements in cleaning text:

`Whitespace and Formatting:`

The text could benefit from better formatting, such as spacing and indentation, especially for code snippets. This will improve readability and help distinguish different parts of the text.

`Markdown Handling:`

If you are dealing with markdown or code blocks, it is useful to convert them into a format that is easy to read and understand.

`Cleaning Code Snippets:`

If code snippets are part of the text, ensure they are properly formatted and indented. For example, newlines and indentation in code blocks should be preserved or made consistent.

`Consistency and Readability:`

Ensure that the text is consistent and easily readable. For example, handling apostrophes correctly, ensuring proper punctuation, and avoiding overly long lines.

In [19]:
# the loaded issues ie., the text wrapped as langchain docs should be stored in vector DB
# connect to vector Db & save Docs
# additional imports
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_astradb import AstraDBVectorStore
from langchain.agents import create_tool_calling_agent
from langchain.agents import AgentExecutor
from langchain.tools.retriever import create_retriever_tool
from langchain import hub

In [20]:
def connect_to_vector_db():
    """
    Create embeddings ie., cnvert documents to vectors
    and connect to astra db and save the embedded documents.
    """
    embeddings = OpenAIEmbeddings()
    ASTRA_DB_API_ENDPOINT = os.getenv('ASTRA_DB_API_ENDPOINT')
    ASTRA_DB_APPLICATION_TOKEN = os.getenv('ASTRA_DB_APPLICATION_TOKEN')
    DESIRED_NAMESPACE = os.getenv('ASTRA_DB_KEYSPACE')
    
    if DESIRED_NAMESPACE:
        ASTRA_DB_KEYSPACE = DESIRED_NAMESPACE
    else:
        ASTRA_DB_KEYSPACE = None
    
    vstore = AstraDBVectorStore(
        embedding=embeddings,
        collection_name="github_issues", # creates a collection if it doesn't exist
        api_endpoint=ASTRA_DB_API_ENDPOINT,
        token=ASTRA_DB_APPLICATION_TOKEN,
        namespace=ASTRA_DB_KEYSPACE   
    )
    return vstore

In [15]:
# prompt user for input if the issues need to be updated
# add_to_vectorstore = input("Do you want to update the issues? (y/N): ").lower() in [
#     "yes",
#     "y",
# ]

In [21]:
update_vstore = True

if update_vstore:
    
    owner="techwithtim"
    repo="Flask-Web-App-Tutorial"
    vstore = connect_to_vector_db()
    # issue_docs = load_issues(cleaned_issues)
    
    try:
        vstore.delete_collection("github_issues")  # delete existing collection if it exists)  # delete existing collection if it exists
    except:
        pass
    
    vstore.add_documents(doc_results)
    
    #retreving/ vector lookup of some docs for tetsing
    results = vstore.similarity_search("Flash messages", k=3)
    
    for res in results:
        print(f"{res.page_content}")
    

FLASH MESSAGES NOT SHOWING IN LOGIN PAGEmy auth.py and login.html file doesnt haave any error. the signup page flash messages work but login page flash messages are not showing. and even after pressing login button nothing happens. plss help
Alert message in home pageHi there, So as I'm following the video as typing the code i got into the login_requried mode in views.py. As i put the @login_required in views.route as Tim showed, and in the others routes, as put the user= current user as well, when i go to the site it appears the alert message that i need to login to access the page  When I refresh the page the alert disappears but it shoulndt be there in the first place.
Flask Web Dev courseNone


### Define an agent

How to define a tool:
- provide name for the tool
- provide a description
- when to use the tool -> in the form of a promt

Set up a chain of prompts that tell the AI/GPT to use the tool when appropriate. To do this we can use langchain hub to download stuff that can automatically use the tool when it feels appropriate.

Python functions as tools:

- Doc string description for a function works as a prompt that the AI/GPT will use when it thinks it's appropriate to use the tool.
- Name of the tool will be the function name
- Decorator @tool lets the function be treated as a langchain tool to our agent


In [24]:
# Writing the agent - we need to provide the vector store db as a toold to the agent and it can decide when to use it 
# need to wrap the vstore in a retriever tool
# another tool a function that allows us to save a note

from langchain.tools.retriever import create_retriever_tool
from langchain.agents import create_tool_calling_agent
from langchain.agents import AgentExecutor
from langchain import hub

vstore = connect_to_vector_db()
retriever = vstore.as_retriever(search_kwargs={"k": 3})
retriever_tool = create_retriever_tool(retriever,
                                       "github_issue_search",
                                       "Search for information about github issued."\
                                       "For any related to github issues you must use this tool.")

prompt = hub.pull("hwchase17/openai-functions-agent")
# create an llm that can use these tools
llm = ChatOpenAI(temperature=0.3, max_tokens=1000)

tools = [retriever_tool]
agent = create_tool_calling_agent(llm, tools, prompt)
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)

# use the agent executor in a loop to ask questions

test_question = "What is the most common issue in Flask-Web-App-Tutorial?"
test_result = agent_executor.invoke({"input":test_question})
print(test_result["output"])

# while (question := input("Ask a question about github issues (q to quit): "))!= "q":
#     result = agent_executor.invoke({"input":question})
#     print(result{"output"})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `github_issue_search` with `{'query': 'repo:smirnov-am/flask-web-app-tutorial is:issue'}`


[0m[36;1m[1;3mFlask Web Dev courseNone

Flask Web Dev courseNone

Error in getting the url address after the installation of pipHi I came across your contents when searching on how to learn a python. Your information about on develop "Flask Web App Tutorial" are very fantabulous and seriously educative. Thank you for impact your knowledge positively Please i have some issues when following your tutorial and the issue is that , i can't generate how you get the url address  from the terminal after debug. Although , i am very new to python programming language. Please help me on how to rectify the issue. Thank you[0m[32;1m[1;3mThe most common issue in the Flask-Web-App-Tutorial repository seems to be related to getting the URL address after the installation of pip. Users have reported difficulties in generating the URL ad

In [28]:
# note making tool

from langchain_core.tools import tool
import datetime
from datetime import datetime
import logging

@tool
def note_tool(note):
    """
    A tool that allows users to save a note.
    Args:
        note (str): The note to save.
    """
    # Generate a unique filename based on the current timestamp
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    filename = f"note_{timestamp}.txt"
    dir_path = "/Users/sjr11/Work/SelfLearning/GenAI Coding Agent/data/processed/"
    file_path = os.path.join(dir_path, filename)
    
    with open(file_path, "w") as f:
        f.write(f"{note}\n")
        logging.info(f"Saved note to {file_path}")
        
# add to the tools list
tools = [retriever_tool, note_tool]
agent = create_tool_calling_agent(llm, tools, prompt)
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)

# use the agent executor in a loop to ask questions

test_question = "Find issues related to sql alchemy, summarise them and make a note of them"
test_result = agent_executor.invoke({"input":test_question})
print(test_result["output"])
    
    



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `github_issue_search` with `{'query': 'SQLAlchemy'}`


[0m[36;1m[1;3mDatabase not being created.Code: "from flask import Flask from flask_sqlalchemy import SQLAlchemy from os import path db = SQLAlchemy() DB_NAME = "CSIA.db" def create_app(): app = Flask(__name__) app.config['SECRET_KEY'] = 'www' app.config['SQLALCHEMY_DATABASE_URI'] = f'sqlite:///{DB_NAME}' db.init_app(app) from .views import views from .auth import auth app.register_blueprint(views, url_prefix='/') app.register_blueprint(auth, url_prefix='/') from .models import Admin, Employees, Product, Order, OrderItem, InventoryPage, Supplier create_database(app) with app.app_context(): db.create_all() return app def create_database(app): if not path.exists('Website/' + DB_NAME): db.create_all(app=app) print('Created Database!') " Error: "S D:\MY PC\Documents\> & "C:/Program Files/Python312/python.exe" "d:/MY PC/Documents/main.py" Traceback (most recent ca

NotADirectoryError: [Errno 20] Not a directory: '/Users/sjr11/Work/SelfLearning/GenAI Coding Agent/data/processed/notes.txt/note_20240824_220537.txt'