In [None]:
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

In [None]:
! pip3 install --upgrade --quiet google-cloud-aiplatform \
                                        gitpython \
                                        magika

In [None]:
PROJECT_ID = "heroic-rain-298320"  # @param {type:"string"}
# LOCATION = "europe-west3-a"  # @param {type:"string"}
LOCATION = "europe-west1"

import vertexai

vertexai.init(project=PROJECT_ID, location=LOCATION)

In [None]:
import IPython.display
from IPython.core.interactiveshell import InteractiveShell

InteractiveShell.ast_node_interactivity = "all"

from vertexai.generative_models import (
    FunctionDeclaration,
    GenerationConfig,
    GenerativeModel,
    Tool,
)

In [None]:
# The GitHub repository URL
repo_url = "https://github.com/assafelovic/gpt-researcher"  # @param {type:"string"}

# The location to clone the repo
repo_dir = "./repo"

In [None]:
import os
import shutil
from pathlib import Path
import requests
import git
import magika
import zipfile
from io import BytesIO
import fnmatch
from pathlib import Path



m = magika.Magika()


def clone_repo(repo_url, repo_dir):
    """Clone a GitHub repository."""

    if os.path.exists(repo_dir):
        shutil.rmtree(repo_dir)
    os.makedirs(repo_dir)
    git.Repo.clone_from(repo_url, repo_dir)


def download_latest_snapshot(repo_url, repo_dir):
    """Download the latest snapshot of a GitHub repository."""

    # Derive the URL for downloading the zip file
    if repo_url.endswith('/'):
        repo_url = repo_url[:-1]
    zip_url = repo_url + '/archive/refs/heads/master.zip'  # Adjust branch if necessary

    # Ensure the target directory exists
    if os.path.exists(repo_dir):
        shutil.rmtree(repo_dir)
    os.makedirs(repo_dir)

    # Download and extract the repository's zip file
    print('Downloading from url:', zip_url)
    response = requests.get(zip_url)
    if response.status_code == 200:
        with zipfile.ZipFile(BytesIO(response.content)) as zip_ref:
            zip_ref.extractall(repo_dir)
    else:
        raise Exception('Failed to download the repository snapshot.')


def extract_code(repo_dir, exclude_patterns=["*poetry.lock"]):
    """Create an index, extract content of code/text files, excluding specific files."""
    
    code_index = []
    code_text = ""
    for root, _, files in os.walk(repo_dir):
        for file in files:
            if any(fnmatch.fnmatch(file, pattern) for pattern in exclude_patterns):
                continue  # Skip files matching any of the exclude patterns

            file_path = os.path.join(root, file)
            relative_path = os.path.relpath(file_path, repo_dir)
            code_index.append(relative_path)

            file_type = m.identify_path(Path(file_path))
            if file_type.output.group in ("text", "code"):
                try:
                    with open(file_path, "r") as f:
                        code_text += f"----- File: {relative_path} -----\n"
                        code_text += f.read()
                        code_text += "\n-------------------------\n"
                except Exception:
                    pass

    return code_index, code_text


def get_github_issue(owner: str, repo: str, issue_number: str) -> str:
    headers = {
        "Accept": "application/vnd.github+json",
        "X-GitHub-Api-Version": "2022-11-28",
    }  # Set headers for GitHub API

    # Construct API URL
    url = f"https://api.github.com/repos/{owner}/{repo}/issues/{issue_number}"

    try:
        response_git = requests.get(url, headers=headers)
        response_git.raise_for_status()  # Check for HTTP errors
    except requests.exceptions.RequestException as error:
        print(f"Error fetching issue: {error}")  # Handle potential errors

    issue_data = response_git.json()
    if issue_data:
        return issue_data["body"]
    return ""


def save_text_to_file(text: str, filename: str):
    """Save the given text to a file in the current working directory."""

    try:
        with open(filename, "w") as file:
            file.write(text)
        print(f"Text successfully saved to {filename}")
    except IOError as e:
        print(f"An error occurred while saving text to {filename}: {e}")


In [None]:
import json

download_latest_snapshot(repo_url, repo_dir)

code_index, code_text = extract_code(repo_dir, exclude_patterns=["*.log", "*.lock", ".*"])

save_text_to_file(code_text, "code_text")
save_text_to_file(json.dumps(code_index), "code_index")

In [None]:
MODEL_ID = "gemini-1.5-pro-preview-0514"  # @param {type:"string"}

model = GenerativeModel(
    MODEL_ID,
    system_instruction=[
        "You are a coding expert.",
        "Your mission is to answer all code related questions with given context and instructions.",
    ],
)

In [None]:
def get_code_prompt(question):
    """Generates a prompt to a code related question."""

    prompt = f"""
    Questions: {question}

    Context:
    - The entire codebase is provided below.
    - Here is an index of all of the files in the codebase:
      \n\n{code_index}\n\n.
    - Then each of the files is concatenated together. You will find all of the code you need:
      \n\n{code_text}\n\n

    Answer:
  """

    return prompt

In [None]:
# question = """
#   Give me a summary of this codebase, and tell me the top 3 things that I can learn from it.
# """
question = """
  Provide a getting started guide to onboard new developers to the codebase.
"""

prompt = get_code_prompt(question)
contents = [prompt]

# Generate text using non-streaming method
response = model.generate_content(contents)

# Print generated text and usage metadata
print(f"\nAnswer:\n{response.text}")
print(f'\nUsage metadata:\n{response.to_dict().get("usage_metadata")}')
print(f"\nFinish reason:\n{response.candidates[0].finish_reason}")
print(f"\nSafety settings:\n{response.candidates[0].safety_ratings}")

In [None]:
question = """
  Provide a getting started guide to onboard new developers to the codebase.
"""

prompt = get_code_prompt(question)
contents = [prompt]

responses = model.generate_content(contents, stream=True)
for response in responses:
    IPython.display.Markdown(response.text)