In [46]:
import os
import json
from openai import OpenAI
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field
# from groq import Groq

GROQ_API_KEY = os.environ['GROQ_API_KEY']
client = OpenAI()



def extract_answer(input_string):
    # Trim the extraneous part of the string if necessary
    # Assuming the JSON data starts with `{` and ends with `}`
    json_start = input_string.find('{')
    json_end = input_string.rfind('}') + 1
    
    if json_start == -1 or json_end == -1:
        raise ValueError("Invalid input: No JSON data found.")

    json_data = input_string[json_start:json_end]
    
    try:
        # Convert the JSON string to a Python dictionary
        data_dict = json.loads(json_data)
        return data_dict
    except json.JSONDecodeError as e:
        # Handle possible JSON decoding errors
        raise ValueError(f"An error occurred while parsing JSON: {str(e)}")



class summary(BaseModel):
    summary: str = Field(description="Summary of git commits")
            
parser = JsonOutputParser(pydantic_object= summary)
        
template = \
'''You are provided with a series of git commits. You are tasked with generating a technical summary of these git commits that are roughly 10 to 30 words long. 
Elaborate as best as you can based on the information from these commits to write an informative summary paragraph of what the user has been doing with this github repository.
Ensure that your summaries are helpful, accessible, and factual, catering to both technical and non-technical audiences. 
Do not share the names of the files directly with end users. Under no circumstances provide a download link to any of the files.
Start the summary with 'In this repository, I have been"

List of git commits:
{commits}

{format_instructions}
Ensure and double check that the answer is in accordance to the format above.
    '''

prompt = PromptTemplate(
            template=template,
            input_variables=["commits"],
            partial_variables={"format_instructions": parser.get_format_instructions()},
        ) 

ls_commits = \
'''
8941440 refactor: pair review with zuo zhen
363eec2 (origin/refactor) feat: env template
d713548 fix: addition of test id to run code
baf7f98 refactored google nlp notebook
f5c3738 add google nlp exploration and renamed timeline notebooks
65ad592 Merge pull request #7 from SPHTech-Data/refactor
2e6e751 fix: final edits to refactoring
a705a81 fix: renamed vue scripts and adjusted schema in server.js
d5007a2 feat: add hierarchical experimentation notebook to refactored notebooks
c49de55 feat: Add google styled docstrings to both versions of timeline
862b5fb (origin/dev, dev) Merge pull request #6 from SPHTech-Data/dev
654620e Label code sections for splitting into .py files
44e20b4 Label code sections for splitting into .py files
37920ea Create seperate folder for refactored code
687acca second official original timeline refactor
91bf5ee more refactoring
d70f3f7 feat: Refactored Hhybrid + first official refactored
33a4429 official hybrid timeline refactor
923dbcc official hybrid search refactor
8a9ab87 Hybrid approach re factor
a80a762 Hybrid approach re factor
e5838b1 Hybrid approach re factor
e35dc5a Tidy code for hybrid and original approach
16ff740 Add webpage title and icon
62022fa Clean up hybrid timeline
814702f Add demo for hybrid and original timeline
ae34ffa add modifications
03d1e5f fix: prep for demo
6193dd0 feat: hier + re rank experimentation
843855c update GPT prompt for generating timeline header from article title
33bd6c9 feat: hybrid timeline generation with updated vue script
45198ef feat: new timeline with hybrid approach
0d9dcfb feat: hybrid search with re ranker timeline before optimisation
a816ffe feat: re ranker with hybrid approach
078caf6 fix: update TODOS
d8dd1ef feat: clustering with re ranker
53a3b84 feat(clustering): new analytics
'''

final_prompt = prompt.format(commits=ls_commits)
completion = client.chat.completions.create(
    model="gpt-3.5-turbo",
    messages=[
        {"role": "user", "content": final_prompt}
    ],
    temperature=0,
    max_tokens=100,
)
answer = completion.choices[0].message.content

In [48]:
summary_dict = extract_answer(answer)
print(summary_dict['summary'])

In this repository, I have been refactoring code, adding new features like hierarchical experimentation, Google NLP exploration, and hybrid timeline generation with re ranker.


In [49]:
print(final_prompt)

You are provided with a series of git commits. You are tasked with generating a technical summary of these git commits that are roughly 10 to 30 words long. 
Elaborate as best as you can based on the information from these commits to write an informative summary paragraph of what the user has been doing with this github repository.
Ensure that your summaries are helpful, accessible, and factual, catering to both technical and non-technical audiences. 
Do not share the names of the files directly with end users. Under no circumstances provide a download link to any of the files.
Start the summary with 'In this repository, I have been"

List of git commits:

8941440 refactor: pair review with zuo zhen
363eec2 (origin/refactor) feat: env template
d713548 fix: addition of test id to run code
baf7f98 refactored google nlp notebook
f5c3738 add google nlp exploration and renamed timeline notebooks
65ad592 Merge pull request #7 from SPHTech-Data/refactor
2e6e751 fix: final edits to refactoring

In [None]:
'''You are provided with a series of git commits. You are tasked with generating a technical summary of these git commits that are roughly 10 to 30 words long. 
Elaborate as best as you can based on the information from these commits to write an informative summary paragraph of what the user has been doing with this github repository.
Ensure that your summaries are helpful, accessible, and factual, catering to both technical and non-technical audiences. 
Do not share the names of the files directly with end users. Under no circumstances provide a download link to any of the files.
Start the summary with 'In this repository, I have been"

List of git commits:
{commits}
...
{"properties": {"summary": {"title": "Summary", "description": "Summary of git commits", "type": "string"}}, "required": ["summary"]}
```
Ensure and double check that the answer is in accordance to the format above.'''
    