In [11]:
!git clone https://github.com/kyllan16693/code-commenter.git



In [39]:
!pip install langchain langchain-openai langchain-core transformers torch

Collecting langchain-openai
  Downloading langchain_openai-0.2.10-py3-none-any.whl.metadata (2.6 kB)
Collecting tiktoken<1,>=0.7 (from langchain-openai)
  Downloading tiktoken-0.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.6 kB)
Downloading langchain_openai-0.2.10-py3-none-any.whl (50 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.6/50.6 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading tiktoken-0.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m18.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: tiktoken, langchain-openai
Successfully installed langchain-openai-0.2.10 tiktoken-0.8.0


In [72]:
import torch
import os
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_core.tools import BaseTool
from langchain_core.output_parsers import StrOutputParser
from typing import Optional, Type

# Set your OpenAI API key
os.environ["OPENAI_API_KEY"] = "KEY"
# Load the custom model and tokenizer
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model_path = '/content/code-commenter/final_model'
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForSeq2SeqLM.from_pretrained(model_path).to(device)

# Define input schemas
class CodeInput(BaseModel):
    code_snippet: str = Field(description="The code snippet to generate headers for")

class HeaderInput(BaseModel):
    function_headers: str = Field(description="The function headers to generate project description from")

# Define the custom function header generation tool
class FunctionHeaderGenerator(BaseTool):
    name: str = "function_header_generator"
    description: str = "Generates headers for functions in the given code snippet"
    args_schema: Type[BaseModel] = CodeInput

    def _run(self, code_snippet: str) -> str:
        # Preprocess the code snippet to improve tokenization
        code_lines = code_snippet.strip().split('\n')
        formatted_code = ' '.join(line.strip() for line in code_lines if line.strip())
        print("Formatted code", formatted_code)

        inputs = tokenizer(
            formatted_code,
            return_tensors="pt",
            max_length=512,
            truncation=True
        ).to(device)

        outputs = model.generate(
            inputs['input_ids'],
            max_length=128,
            num_beams=4,
            early_stopping=True
        )

        header = tokenizer.decode(outputs[0], skip_special_tokens=True)
        return header

class ProjectDescriptionTool(BaseTool):
    name: str = "project_description_generator"
    description: str = "Generates a project description and file header from function headers"
    args_schema: Type[BaseModel] = HeaderInput
    llm: Optional[ChatOpenAI] = Field(default_factory=lambda: ChatOpenAI(temperature=0))
    prompt: Optional[ChatPromptTemplate] = Field(default=None)
    chain: Optional[any] = Field(default=None)

    def __init__(self, **data):
        super().__init__(**data)
        self.prompt = ChatPromptTemplate.from_template("""
        You are a technical writer. Based on the following function headers, write a detailed project description
        and a comprehensive file header that can be used at the top of the project file:

        {function_headers}

        Please provide a clear and organized description that explains the purpose and functionality of these functions.
        """)
        self.chain = self.prompt | self.llm | StrOutputParser()

    def _run(self, function_headers: str) -> str:
        try:
            return self.chain.invoke({"function_headers": function_headers})
        except Exception as e:
            return f"Error generating description: {str(e)}"

def main():
    # Initialize the LLM
    llm = ChatOpenAI(temperature=0)

    # Example input
    code_snippet1 = """
    def add(a, b):
        return a + b
    """

    code_snippet2 = """
    def subtract(a, b):
        return a - b
    """

    # Generate function headers
    print("Generating function headers...")
    header_generator = FunctionHeaderGenerator()
    header1 = header_generator._run(code_snippet1)
    header2 = header_generator._run(code_snippet2)
    headers = [header1, header2]
    print(f"Function Headers: {headers}\n")

    # Generate project description and file header
    print("Generating project description and file header...")
    desc_generator = ProjectDescriptionTool(llm=llm)
    documentation = desc_generator._run(' '.join(headers))
    print(f"Project Documentation:\n{documentation}")

if __name__ == "__main__":
    main()

  warn(


Generating function headers...
Formatted code def add(a, b): return a + b
Formatted code def subtract(a, b): return a - b
Function Headers: ['a a a + b', 'return a - b']

Generating project description and file header...
Project Documentation:
File Header:
/*
* Project Name: Arithmetic Operations
* Description: This project contains functions for performing basic arithmetic operations on two numbers.
* Author: [Your Name]
* Date: [Date]
*/

Project Description:
The Arithmetic Operations project is designed to provide a set of functions for performing basic arithmetic operations on two numbers. The project includes a function named "addition" which takes two parameters, a and b, and returns the sum of the two numbers. Another function named "subtraction" is also included, which takes two parameters, a and b, and returns the difference between the two numbers. 

The "addition" function calculates the sum of the two input numbers by adding them together, while the "subtraction" function c