In [25]:
%load_ext blackcellmagic

In [1]:
import pandas as pd
import numpy as np
import json
import openai
import base64
import os
from langchain import OpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain

In [2]:
# getting keys
# TODO: remove unnecessary aspects
with open('../config.json') as f:
    keys = json.load(f)
PATH = keys['path']
openai_organization = keys['openai_organization']
openai.organization = openai_organization
openai_api_key = keys['openai_api_key']
openai.api_key = openai_api_key

In [3]:
os.environ["OPENAI_API_KEY"] = openai_api_key

# Variables 

In [4]:
today = pd.Timestamp.today()

In [5]:
DATE = str(today)[0:10]

# History generation 

## Code input

In [6]:
# add code input as text here
code_input = str('''
    # Libraries
    import streamlit as st
    import pandas as pd
    import numpy as np
    import json
    import base64

    # getting variables from config.json
    with open('config/config.json') as f:
        keys = json.load(f)
    PATH = keys['path']


    # Functions
    # better read functions from utils, but not yet working
    def add_bg():
        st.markdown(
        f"""
        <style>
        .stApp {{
            background-image: url(https://gist.githubusercontent.com/kiralenz/8fa216a5ab87e92944129da83d84dd5b/raw/806c89b90ee9c6eaf75f833eb9482c9cbca7dec1/bread_loaf.svg);
            background-size: cover
        }}
        </style>
        """,
        unsafe_allow_html=True
        )

    def add_logo(height):
        st.markdown(
            f"""
            <style>
                [data-testid="stSidebarNav"] {{
                    background-image: url(https://gist.githubusercontent.com/kiralenz/16203a45856cfb596741f24f85e82fbe/raw/c9d93e3336730e77132d40df4eb8d758471bcfd8/keeprising_logo.svg);
                    background-repeat: no-repeat;
                    padding-top: {height - 40}px;
                    background-position: 20px 20px;
                }}
            </style>
            """,
            unsafe_allow_html=True,
        )

    # merging historical activities (df_hist) with latest activity data (df_new) 
    # on the target or shared date column (date_column)
    def add_latest_activity(df_hist, df_new, date_column):
        # Fixing dtypes
        df_hist[date_column] = df_hist[date_column].astype(str)
        df_new[date_column] = df_new[date_column].astype(str)

        # Df merging of historical feedings and latest feeding
        df = pd.concat([df_hist, df_new], ignore_index=True)
        # Fixing dtypes and formatting
        df[date_column] = pd.to_datetime(df[date_column])
        df[date_column] = df[date_column].dt.strftime('%Y-%m-%d')

        return df

    # adding a column with the microbial composition based on the feeding temperature
    def bacteria_column(df, bac_compos):
        df['bacteria_composition'] = np.where(
            df["temperature"] <= 20,
            bac_compos.loc[
                bac_compos["temperature"] == 20, "dominant_microbes"
            ],
            np.where(
                ((df["temperature"] > 20) & (df["temperature"] <= 25)),
                bac_compos.loc[
                    bac_compos["temperature"] == 25, "dominant_microbes"
                ],
                np.where(
                    ((df["temperature"] > 25) & (df["temperature"] <= 30)),
                    bac_compos.loc[
                        bac_compos["temperature"] == 30, "dominant_microbes"
                    ],
                    bac_compos.loc[
                        bac_compos["temperature"] == 35, "dominant_microbes"
                    ],
                ),
            ),
        )
        return df

    # adding two columns for growth rates to a dataframe, one is time normalized
    def growth_rate_cols(df):
        df['growth_rate'] = (
            df['end_height'] / df['initial_height']
        )

        df['growth_rate_per_hour'] = (
            df['end_height'] 
            / df['initial_height'] 
            / df['feeding_time']
        )

        return df


    # Loading data
    feedings = pd.read_parquet(PATH + 'feedings.parquet')
    bacteria_composition = pd.read_parquet(PATH + 'bacteria_composition.parquet')


    # streamlit page
    st.set_page_config(page_title="Keeprising")
    add_bg()  
    add_logo(height=160)
    st.title('How was your last feeding?') 


    # Adding new feeding data
    # user input for feeding
    date_today = st.date_input('Feeding date')
    temperature_today = st.number_input('Temperature')
    feeding_time_today = st.number_input('Feeding duration')
    initial_height_today = st.number_input('Intial height')
    end_height_today = st.number_input('End height')
    bubble_size_today = st.number_input('Bubble size')

    # error handling for invalid input
    if temperature_today < 0 or feeding_time_today < 0 or initial_height_today < 0 or end_height_today < 0 or end_height_today < initial_height_today:
        st.error('Invalid input! Please enter valid values for all feeding data. IF these had been your actual values consider immediately repeating the feeding to save your starter!')
    else:
        # storing latest information in a df
        latest_feeding = pd.DataFrame(data={
            'feeding_date':date_today, 
            'temperature':temperature_today,
            'feeding_time':feeding_time_today,
            'initial_height':initial_height_today,
            'end_height':end_height_today,
            'bubble_size':bubble_size_today
        }, index=[0])

        # merging new feeding to history of feedings
        feedings = add_latest_activity(df_hist=feedings, df_new=latest_feeding, date_column='feeding_date')

        # saving df to local file
        feedings.to_parquet(PATH + 'feedings.parquet')

        # application display of latest feedings
        st.dataframe(feedings.tail())
        st.write("Nice job! Well done!")


        # Data processing
        feedings_processed = feedings.copy()
        # Bacteria composition depending on temperature
        feedings_processed = bacteria_column(df=feedings_processed, bac_compos=bacteria_composition)
        # Growth rate composition
        feedings_processed = growth_rate_cols(df=feedings_processed)


        # Storing data
        feedings_processed.to_parquet(PATH + 'feedings_processed.parquet')" 
        '''
)

### add saving to text file here

## Feedback

### Test area

In [7]:
llm = OpenAI(temperature=0.9)

In [8]:
text = "What would be a good company name for a company that makes colorful socks?"
print(llm(text))



BrightToe Socks


In [9]:
from langchain.prompts import PromptTemplate

prompt = PromptTemplate(
    input_variables=["product"],
    template="What is a good name for a company that makes {product}?",
)

In [10]:
print(prompt.format(product="colorful socks"))

What is a good name for a company that makes colorful socks?


In [11]:
llm = OpenAI(temperature=0.9)
prompt = PromptTemplate(
    input_variables=["product"],
    template="What is a good name for a company that makes {product}?",
)

In [12]:
from langchain.chains import LLMChain
chain = LLMChain(llm=llm, prompt=prompt)

In [13]:
chain.run("colorful socks")
# -> '\n\nSocktastic!'

'\n\nHappyToes Socks.'

### Langchain implementation area

In [14]:
# https://python.langchain.com/en/latest/modules/prompts/chat_prompt_template.html
# defining the prompt template for a standardized input
# TODO: refine prompt
feedback_prompt = PromptTemplate(
    input_variables=["code"],
    template="Please review the following code and give five recommendations with explanations how to improve the programming: {code}?",
)

In [15]:
# initializing the LM
# TO EXPLORE: adjust temperature
# TO EXPLORE: test other LMs
feedback_llm = OpenAI(temperature=0.3)

In [16]:
# a simple chain taking user input, formatting the prompt and sending it to the LM
feedback_chain = LLMChain(llm=feedback_llm, prompt=feedback_prompt)

In [17]:
# Run the chain only specifying the input variable.
feedback = feedback_chain.run(code_input)

In [18]:
feedback

'\n\n1. Move the code that is not related to the Streamlit application (e.g. functions, loading data, data processing) to a separate file. This will make the code easier to read and maintain. \n\n2. Move the code for styling the Streamlit application (e.g. add_bg(), add_logo()) to a separate file. This will make the code easier to read and maintain. \n\n3. Use meaningful variable names to improve readability. For example, instead of PATH, use file_path. \n\n4. Use more descriptive names for functions. For example, instead of add_latest_activity, use merge_historical_and_latest_activity. \n\n5. Add comments to explain the purpose of each function and the code blocks. This will make the code easier to understand.'

#### TODO: save feedback to text files

### Direct API call area OLD

## Shorten review for learning target

### Langchain implementation area

In [19]:
# defining the prompt template for a standardized input
short_feedback_prompt = PromptTemplate(
    input_variables=["feedback"],
    template="Please shorten the aspects of the following feeback: {feedback}?",
)

In [20]:
# initializing the LM
short_feedback_llm = OpenAI(temperature=0)

In [21]:
# a simple chain taking user input, formatting the prompt and sending it to the LM
short_feedback_chain = LLMChain(llm=short_feedback_llm, prompt=short_feedback_prompt)

In [22]:
# Run the chain only specifying the input variable.
short_feedback = short_feedback_chain.run(feedback)

In [23]:
short_feedback

'\n\n1. Move non-Streamlit code to separate file.\n2. Move Streamlit styling code to separate file.\n3. Use meaningful variable names.\n4. Use descriptive function names.\n5. Add comments to explain code.'

#### TODO: save feedback to text files

### Direct API call area OLD

# Adding new code reviews

In [14]:
# add code input as text here
# code_input = str('''
# '''
# )

In [15]:
# code_input

In [16]:
# today = pd.Timestamp.today()

In [17]:
# replace with PATH here
# history_code = pd.read_csv('../data/history_code.csv')

In [18]:
# storing the code in a df to save as csv
# latest_code = pd.DataFrame({
#     'date':today,
#     'code_input':code_input
# }, index=[0])

In [19]:
# updating the history file
# history_code = pd.concat([history_code, latest_code], axis=0, ignore_index=True)

In [20]:
# overwriting the history file with the latest version
# history_code.to_csv('../data/history_code.csv')

In [21]:
# calling the model to get initial feedback
# response = openai.ChatCompletion.create(
#   model="gpt-3.5-turbo",
#   messages=[
#         {"role": "system", "content": "You are reviewing code and giving recommendations on programming style"},
#         {"role": "user", "content": code_input},
#     ]
# )
# feedback = response['choices'][0]['message']['content']

In [22]:
# feedback

In [23]:
# replace with PATH here
# history_feedback = pd.read_csv('../data/history_feedback.csv')

In [24]:
# latest_feedback = pd.DataFrame({
#     'date':today,
#     'code_input':feedback
# }, index=[0])

In [25]:
# updating the history file
# history_feedback = pd.concat([history_feedback, latest_feedback], axis=0, ignore_index=True)

In [26]:
# overwriting the file
# history_feedback.to_csv('../data/history_feedback.csv')

# Learning goals 

## Set learning goals

### Langchain implementation area

In [27]:
# This has to be done once and then only if the user checks a button like "generate learning targets".

In [None]:
# select only the last X feedbacks
# append or concatenate the txt files

In [33]:
latest_short_feedbacks = str(
    "\n\nImprove programming style: Use docstrings, descriptive variable names, logical sections/functions, consistent formatting, linter/formatter, comments, remove unnecessary code, try-catch blocks, context managers.\n\n1. Move non-Streamlit code to separate file.\n2. Move Streamlit styling code to separate file.\n3. Use meaningful variable names.\n4. Use descriptive function names.\n5. Add comments to explain code."
)

In [34]:
latest_short_feedbacks

'\n\nImprove programming style: Use docstrings, descriptive variable names, logical sections/functions, consistent formatting, linter/formatter, comments, remove unnecessary code, try-catch blocks, context managers.\n\n1. Move non-Streamlit code to separate file.\n2. Move Streamlit styling code to separate file.\n3. Use meaningful variable names.\n4. Use descriptive function names.\n5. Add comments to explain code.'

In [35]:
# defining the prompt template for a standardized input
learning_goal_prompt = PromptTemplate(
    input_variables=["latest_short_feedbacks"],
    template="Please select the four most relevant points from this list of feedback comments. Consider content recommendations more relevant than format recommendations. Consider repetitive aspects more. List of feedback comments: {latest_short_feedbacks}.",
)

In [37]:
# initializing the LM
# TODO: check for optimal LLM
learning_goal_llm = OpenAI(temperature=0.2)

In [38]:
# a simple chain taking user input, formatting the prompt and sending it to the LM
learning_goal_chain = LLMChain(llm=learning_goal_llm, prompt=learning_goal_prompt)

In [39]:
# Run the chain only specifying the input variable.
learning_goals = learning_goal_chain.run(latest_short_feedbacks)

In [40]:
learning_goals

'\n6. Use linter/formatter to maintain consistent formatting.\n7. Use try-catch blocks to handle exceptions.\n8. Use context managers to manage resources.'

### Direct API + JSON area OLD

In [77]:
short_feedb_json

'{"0":"The feedback suggests that the code can be improved by: \\n1. using meaningful variable names, \\n2. modularizing the code, \\n3. using docstrings to describe functions, \\n4. avoiding hardcoding values, \\n5. adding error handling, \\n6. using consistent formatting, and \\n7. organizing imports."}'

## Compare learning goals and latest submitted code

### Langchain implementation area

In [None]:
# read latest learning goals!

In [59]:
evaluation_prompt = PromptTemplate(
    input_variables=["code_input", "learning_goals"],
    template="Please compare this code: {code_input} with these learning goals: {learning_goals}. If the programmer considered the learning goals when writing the provided code, say something motivating. If the programmer didn't consider the learning goals, gently remind the person of their learning goals.",
)

In [63]:
evaluation_prompt = evaluation_prompt.format(code_input=code_input, learning_goals=learning_goals)

In [64]:
evaluation_llm = OpenAI(temperature=0.3)

In [66]:
evaluation_llm(evaluation_prompt)

'\n\nYes, the programmer appears to have considered the learning goals when writing the provided code. The code is well-formatted and easy to read, and the programmer has included try-catch blocks and context managers to handle exceptions and manage resources.'

### Json + direct API area OLD