In [1]:
import pandas as pd
import numpy as np
import json
import openai
import base64
import os
from langchain import OpenAI
from langchain.prompts import PromptTemplate

In [2]:
# getting keys
with open('../config.json') as f:
    keys = json.load(f)
PATH = keys['path']
openai_organization = keys['openai_organization']
openai.organization = openai_organization
openai_api_key = keys['openai_api_key']
openai.api_key = openai_api_key

In [3]:
os.environ["OPENAI_API_KEY"] = openai_api_key

# Variables 

In [4]:
today = pd.Timestamp.today()

In [5]:
DATE = str(today)[0:10]

# History generation 

## Code input

In [6]:
# add code input as text here
code_input = str('''
    # Libraries
    import streamlit as st
    import pandas as pd
    import numpy as np
    import json
    import base64

    # getting variables from config.json
    with open('config/config.json') as f:
        keys = json.load(f)
    PATH = keys['path']


    # Functions
    # better read functions from utils, but not yet working
    def add_bg():
        st.markdown(
        f"""
        <style>
        .stApp {{
            background-image: url(https://gist.githubusercontent.com/kiralenz/8fa216a5ab87e92944129da83d84dd5b/raw/806c89b90ee9c6eaf75f833eb9482c9cbca7dec1/bread_loaf.svg);
            background-size: cover
        }}
        </style>
        """,
        unsafe_allow_html=True
        )

    def add_logo(height):
        st.markdown(
            f"""
            <style>
                [data-testid="stSidebarNav"] {{
                    background-image: url(https://gist.githubusercontent.com/kiralenz/16203a45856cfb596741f24f85e82fbe/raw/c9d93e3336730e77132d40df4eb8d758471bcfd8/keeprising_logo.svg);
                    background-repeat: no-repeat;
                    padding-top: {height - 40}px;
                    background-position: 20px 20px;
                }}
            </style>
            """,
            unsafe_allow_html=True,
        )

    # merging historical activities (df_hist) with latest activity data (df_new) 
    # on the target or shared date column (date_column)
    def add_latest_activity(df_hist, df_new, date_column):
        # Fixing dtypes
        df_hist[date_column] = df_hist[date_column].astype(str)
        df_new[date_column] = df_new[date_column].astype(str)

        # Df merging of historical feedings and latest feeding
        df = pd.concat([df_hist, df_new], ignore_index=True)
        # Fixing dtypes and formatting
        df[date_column] = pd.to_datetime(df[date_column])
        df[date_column] = df[date_column].dt.strftime('%Y-%m-%d')

        return df

    # adding a column with the microbial composition based on the feeding temperature
    def bacteria_column(df, bac_compos):
        df['bacteria_composition'] = np.where(
            df["temperature"] <= 20,
            bac_compos.loc[
                bac_compos["temperature"] == 20, "dominant_microbes"
            ],
            np.where(
                ((df["temperature"] > 20) & (df["temperature"] <= 25)),
                bac_compos.loc[
                    bac_compos["temperature"] == 25, "dominant_microbes"
                ],
                np.where(
                    ((df["temperature"] > 25) & (df["temperature"] <= 30)),
                    bac_compos.loc[
                        bac_compos["temperature"] == 30, "dominant_microbes"
                    ],
                    bac_compos.loc[
                        bac_compos["temperature"] == 35, "dominant_microbes"
                    ],
                ),
            ),
        )
        return df

    # adding two columns for growth rates to a dataframe, one is time normalized
    def growth_rate_cols(df):
        df['growth_rate'] = (
            df['end_height'] / df['initial_height']
        )

        df['growth_rate_per_hour'] = (
            df['end_height'] 
            / df['initial_height'] 
            / df['feeding_time']
        )

        return df


    # Loading data
    feedings = pd.read_parquet(PATH + 'feedings.parquet')
    bacteria_composition = pd.read_parquet(PATH + 'bacteria_composition.parquet')


    # streamlit page
    st.set_page_config(page_title="Keeprising")
    add_bg()  
    add_logo(height=160)
    st.title('How was your last feeding?') 


    # Adding new feeding data
    # user input for feeding
    date_today = st.date_input('Feeding date')
    temperature_today = st.number_input('Temperature')
    feeding_time_today = st.number_input('Feeding duration')
    initial_height_today = st.number_input('Intial height')
    end_height_today = st.number_input('End height')
    bubble_size_today = st.number_input('Bubble size')

    # error handling for invalid input
    if temperature_today < 0 or feeding_time_today < 0 or initial_height_today < 0 or end_height_today < 0 or end_height_today < initial_height_today:
        st.error('Invalid input! Please enter valid values for all feeding data. IF these had been your actual values consider immediately repeating the feeding to save your starter!')
    else:
        # storing latest information in a df
        latest_feeding = pd.DataFrame(data={
            'feeding_date':date_today, 
            'temperature':temperature_today,
            'feeding_time':feeding_time_today,
            'initial_height':initial_height_today,
            'end_height':end_height_today,
            'bubble_size':bubble_size_today
        }, index=[0])

        # merging new feeding to history of feedings
        feedings = add_latest_activity(df_hist=feedings, df_new=latest_feeding, date_column='feeding_date')

        # saving df to local file
        feedings.to_parquet(PATH + 'feedings.parquet')

        # application display of latest feedings
        st.dataframe(feedings.tail())
        st.write("Nice job! Well done!")


        # Data processing
        feedings_processed = feedings.copy()
        # Bacteria composition depending on temperature
        feedings_processed = bacteria_column(df=feedings_processed, bac_compos=bacteria_composition)
        # Growth rate composition
        feedings_processed = growth_rate_cols(df=feedings_processed)


        # Storing data
        feedings_processed.to_parquet(PATH + 'feedings_processed.parquet')" 
        '''
)

In [7]:
# add saving to text file here

## Feedback

### Test area

In [8]:
llm = OpenAI(temperature=0.9)

In [9]:
text = "What would be a good company name for a company that makes colorful socks?"
print(llm(text))



Cheerful Toes.


In [10]:
from langchain.prompts import PromptTemplate

prompt = PromptTemplate(
    input_variables=["product"],
    template="What is a good name for a company that makes {product}?",
)

In [11]:
print(prompt.format(product="colorful socks"))

What is a good name for a company that makes colorful socks?


In [12]:
llm = OpenAI(temperature=0.9)
prompt = PromptTemplate(
    input_variables=["product"],
    template="What is a good name for a company that makes {product}?",
)

In [13]:
from langchain.chains import LLMChain
chain = LLMChain(llm=llm, prompt=prompt)

In [14]:
chain.run("colorful socks")
# -> '\n\nSocktastic!'

'\n\nVivid Socks.'

### Implementation area

In [10]:
# https://python.langchain.com/en/latest/modules/prompts/chat_prompt_template.html
# defining the prompt template for a standardized input
prompt = PromptTemplate(
    input_variables=["code"],
    template="Please review the following code and give friendly and constructive recommendations how to improve programming style: {code}?",
)

In [None]:
# a simple chain taking user input, formatting the prompt and sending it to the LM
llm = OpenAI(temperature=0.1)

In [10]:
# calling the model to get initial feedback
response = openai.ChatCompletion.create(
  model="gpt-3.5-turbo",
  messages=[
        {"role": "system", "content": "You are reviewing code and giving recommendations on programming style"},
        {"role": "user", "content": code_input},
    ]
)
feedback = response['choices'][0]['message']['content']

In [48]:
feedback

"Here are some recommendations:\n\n1. Use meaningful variable names: The current variable names are okay, but they could be improved for better clarity and readability. For example, instead of `df_hist` and `df_new`, you could use `historical_data` and `new_data`. \n\n2. Modularize code: The current code is in one big chunk, and it could benefit from being broken down into smaller, more manageable functions. You could have a separate file for functions that can be reused across applications. \n\n3. Use docstrings: It would be helpful to have docstrings that describe what each function does, what arguments are expected, and what the function returns. This would make it easier for someone else to understand and use the code. \n\n4. Avoid hardcoding: It is good to load variables such as file paths or configuration parameters from external files like `config.json`, but there are still some hardcoded values such as the background image and logo URLs. It would be better to store these values

In [54]:
history_feedback = pd.DataFrame({
    'date':today,
    'feedback':feedback
}, index=[0])

In [55]:
# formatting the df
history_feedback['date'] = history_feedback['date'].dt.strftime("%Y-%m-%d")
history_feedback = history_feedback.set_index('date')

In [56]:
# replace with PATH here
history_feedback.to_csv('../data/history_feedback.csv')

## Shorten review for learning target

In [62]:
# calling the model to create a short form of the feedback
response = openai.ChatCompletion.create(
  model="gpt-3.5-turbo",
  messages=[
        {"role": "system", "content": "Summarise the points of this feedback and give back only the summarised points"},
        {"role": "user", "content": feedback},
    ]
)
short_feedback = response['choices'][0]['message']['content']

In [63]:
short_feedback

'The feedback suggests that the code can be improved by: \n1. using meaningful variable names, \n2. modularizing the code, \n3. using docstrings to describe functions, \n4. avoiding hardcoding values, \n5. adding error handling, \n6. using consistent formatting, and \n7. organizing imports.'

In [None]:
# TODO or TOCHECK: maybe directly storing in a json with a date as name part 

In [70]:
history_short_feedback = pd.DataFrame({
    'date':today,
    'short_feedback':short_feedback
}, index=[0])

In [71]:
# formatting the df
history_short_feedback['date'] = history_short_feedback['date'].dt.strftime("%Y-%m-%d")
history_short_feedback = history_short_feedback.set_index('date')

In [72]:
# replace with PATH here
history_short_feedback.to_csv('../data/history_short_feedback.csv')

# Adding new code reviews

In [14]:
# add code input as text here
# code_input = str('''
# '''
# )

In [15]:
# code_input

In [16]:
# today = pd.Timestamp.today()

In [17]:
# replace with PATH here
# history_code = pd.read_csv('../data/history_code.csv')

In [18]:
# storing the code in a df to save as csv
# latest_code = pd.DataFrame({
#     'date':today,
#     'code_input':code_input
# }, index=[0])

In [19]:
# updating the history file
# history_code = pd.concat([history_code, latest_code], axis=0, ignore_index=True)

In [20]:
# overwriting the history file with the latest version
# history_code.to_csv('../data/history_code.csv')

In [21]:
# calling the model to get initial feedback
# response = openai.ChatCompletion.create(
#   model="gpt-3.5-turbo",
#   messages=[
#         {"role": "system", "content": "You are reviewing code and giving recommendations on programming style"},
#         {"role": "user", "content": code_input},
#     ]
# )
# feedback = response['choices'][0]['message']['content']

In [22]:
# feedback

In [23]:
# replace with PATH here
# history_feedback = pd.read_csv('../data/history_feedback.csv')

In [24]:
# latest_feedback = pd.DataFrame({
#     'date':today,
#     'code_input':feedback
# }, index=[0])

In [25]:
# updating the history file
# history_feedback = pd.concat([history_feedback, latest_feedback], axis=0, ignore_index=True)

In [26]:
# overwriting the file
# history_feedback.to_csv('../data/history_feedback.csv')

# Learning goals 

## Set learning goals

In [27]:
# This has to be done before the latest request is added OR only if the user checks a button like "generate learning targets".

In [None]:
# Maybe select only the last X feedbacks

In [73]:
# replace with PATH here
# read all short feedbacks
history_short_feedback = pd.read_csv(
    '../data/history_short_feedback.csv', 
    # index_col='date'
)

In [74]:
history_short_feedback

Unnamed: 0,date,short_feedback
0,2023-04-21,The feedback suggests that the code can be imp...


In [76]:
short_feedb_json = history_short_feedback['short_feedback'].to_json()

In [77]:
short_feedb_json

'{"0":"The feedback suggests that the code can be improved by: \\n1. using meaningful variable names, \\n2. modularizing the code, \\n3. using docstrings to describe functions, \\n4. avoiding hardcoding values, \\n5. adding error handling, \\n6. using consistent formatting, and \\n7. organizing imports."}'

In [None]:
# maybe add a rule like "put special weight on recurring aspects"

In [78]:
# calling the model to get initial feedback
response = openai.ChatCompletion.create(
  model="gpt-3.5-turbo",
  messages=[
        {"role": "system", "content": "Summarize the feedback and create up to 4 short learning targets. A learning target should be an aspect of coding the user can focus on and try to improve when coding"},
        {"role": "user", "content": short_feedb_json},
    ]
)
learning_target = response['choices'][0]['message']['content']

In [79]:
learning_target

'Learning targets:\n1. Improve variable naming by using descriptive and meaningful names.\n2. Create modular code by breaking down complex functions into smaller, reusable modules.\n3. Use docstrings to clearly and concisely describe the purpose of functions.\n4. Avoid hardcoding values and use variables/constants whenever possible.'

In [80]:
# write the string variable to a JSON file
with open('../data/learning_target.json', 'w') as f:
    json.dump(learning_target, f)

In [None]:
# TODO: add option to generate new learning targets

## Compare learning goals and latest submitted code

In [81]:
# read the JSON file
with open('../data/learning_target.json', 'r') as f:
    learning_target = json.load(f)

In [82]:
learning_target

'Learning targets:\n1. Improve variable naming by using descriptive and meaningful names.\n2. Create modular code by breaking down complex functions into smaller, reusable modules.\n3. Use docstrings to clearly and concisely describe the purpose of functions.\n4. Avoid hardcoding values and use variables/constants whenever possible.'

In [83]:
feedback

"Here are some recommendations:\n\n1. Use meaningful variable names: The current variable names are okay, but they could be improved for better clarity and readability. For example, instead of `df_hist` and `df_new`, you could use `historical_data` and `new_data`. \n\n2. Modularize code: The current code is in one big chunk, and it could benefit from being broken down into smaller, more manageable functions. You could have a separate file for functions that can be reused across applications. \n\n3. Use docstrings: It would be helpful to have docstrings that describe what each function does, what arguments are expected, and what the function returns. This would make it easier for someone else to understand and use the code. \n\n4. Avoid hardcoding: It is good to load variables such as file paths or configuration parameters from external files like `config.json`, but there are still some hardcoded values such as the background image and logo URLs. It would be better to store these values

In [None]:
# TODO: Improve system prompt!

In [None]:
# calling the model to get initial feedback
response = openai.ChatCompletion.create(
  model="gpt-3.5-turbo",
  messages=[
        {"role": "system", "content": "Compare the learning targets and the feedback. If there is a strong overlap between the latest feedback and the learning targets, the user didn't pay attention to the targets. If that is the case, give him a friendly reminder to pay attention to the learning targets."},
        {"role": "user", "content": learning_target},
        {"role": "user", "content": feedback}
    ]
)
reminder = response['choices'][0]['message']['content']

In [None]:
reminder

'Thanks for the recommendations. It seems like you have a good understanding of the learning targets. However, I noticed that your feedback mostly overlaps with learning targets one, two, and three. Remember to pay attention to all the learning targets when providing feedback. It is important to cover all the areas in which the user needs to improve.'