In [1]:
%load_ext blackcellmagic

In [2]:
import pandas as pd
import numpy as np
import json
import openai
import base64
import os
from langchain import OpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
import datetime

In [3]:
# getting keys
# TODO: remove unnecessary aspects
with open('../config.json') as f:
    keys = json.load(f)
PATH = keys['path']
openai_organization = keys['openai_organization']
openai.organization = openai_organization
openai_api_key = keys['openai_api_key']
openai.api_key = openai_api_key

In [4]:
os.environ["OPENAI_API_KEY"] = openai_api_key

# Variables 

In [5]:
# Get the current date and time
now = datetime.datetime.now()

In [6]:
# Format the date and time as a string in the desired format
timestamp = now.strftime("%Y%m%d%H%M%S")

# Code review

## Code input

In [7]:
# feature to be added: read from Github

In [8]:
# add code input as text here
code_input = str('''
    # Libraries
    import streamlit as st
    import pandas as pd
    import numpy as np
    import json
    import base64

    # getting variables from config.json
    with open('config/config.json') as f:
        keys = json.load(f)
    PATH = keys['path']


    # Functions
    # better read functions from utils, but not yet working
    def add_bg():
        st.markdown(
        f"""
        <style>
        .stApp {{
            background-image: url(https://gist.githubusercontent.com/kiralenz/8fa216a5ab87e92944129da83d84dd5b/raw/806c89b90ee9c6eaf75f833eb9482c9cbca7dec1/bread_loaf.svg);
            background-size: cover
        }}
        </style>
        """,
        unsafe_allow_html=True
        )

    def add_logo(height):
        st.markdown(
            f"""
            <style>
                [data-testid="stSidebarNav"] {{
                    background-image: url(https://gist.githubusercontent.com/kiralenz/16203a45856cfb596741f24f85e82fbe/raw/c9d93e3336730e77132d40df4eb8d758471bcfd8/keeprising_logo.svg);
                    background-repeat: no-repeat;
                    padding-top: {height - 40}px;
                    background-position: 20px 20px;
                }}
            </style>
            """,
            unsafe_allow_html=True,
        )

    # merging historical activities (df_hist) with latest activity data (df_new) 
    # on the target or shared date column (date_column)
    def add_latest_activity(df_hist, df_new, date_column):
        # Fixing dtypes
        df_hist[date_column] = df_hist[date_column].astype(str)
        df_new[date_column] = df_new[date_column].astype(str)

        # Df merging of historical feedings and latest feeding
        df = pd.concat([df_hist, df_new], ignore_index=True)
        # Fixing dtypes and formatting
        df[date_column] = pd.to_datetime(df[date_column])
        df[date_column] = df[date_column].dt.strftime('%Y-%m-%d')

        return df

    # adding a column with the microbial composition based on the feeding temperature
    def bacteria_column(df, bac_compos):
        df['bacteria_composition'] = np.where(
            df["temperature"] <= 20,
            bac_compos.loc[
                bac_compos["temperature"] == 20, "dominant_microbes"
            ],
            np.where(
                ((df["temperature"] > 20) & (df["temperature"] <= 25)),
                bac_compos.loc[
                    bac_compos["temperature"] == 25, "dominant_microbes"
                ],
                np.where(
                    ((df["temperature"] > 25) & (df["temperature"] <= 30)),
                    bac_compos.loc[
                        bac_compos["temperature"] == 30, "dominant_microbes"
                    ],
                    bac_compos.loc[
                        bac_compos["temperature"] == 35, "dominant_microbes"
                    ],
                ),
            ),
        )
        return df

    # adding two columns for growth rates to a dataframe, one is time normalized
    def growth_rate_cols(df):
        df['growth_rate'] = (
            df['end_height'] / df['initial_height']
        )

        df['growth_rate_per_hour'] = (
            df['end_height'] 
            / df['initial_height'] 
            / df['feeding_time']
        )

        return df


    # Loading data
    feedings = pd.read_parquet(PATH + 'feedings.parquet')
    bacteria_composition = pd.read_parquet(PATH + 'bacteria_composition.parquet')


    # streamlit page
    st.set_page_config(page_title="Keeprising")
    add_bg()  
    add_logo(height=160)
    st.title('How was your last feeding?') 


    # Adding new feeding data
    # user input for feeding
    date_today = st.date_input('Feeding date')
    temperature_today = st.number_input('Temperature')
    feeding_time_today = st.number_input('Feeding duration')
    initial_height_today = st.number_input('Intial height')
    end_height_today = st.number_input('End height')
    bubble_size_today = st.number_input('Bubble size')

    # error handling for invalid input
    if temperature_today < 0 or feeding_time_today < 0 or initial_height_today < 0 or end_height_today < 0 or end_height_today < initial_height_today:
        st.error('Invalid input! Please enter valid values for all feeding data. IF these had been your actual values consider immediately repeating the feeding to save your starter!')
    else:
        # storing latest information in a df
        latest_feeding = pd.DataFrame(data={
            'feeding_date':date_today, 
            'temperature':temperature_today,
            'feeding_time':feeding_time_today,
            'initial_height':initial_height_today,
            'end_height':end_height_today,
            'bubble_size':bubble_size_today
        }, index=[0])

        # merging new feeding to history of feedings
        feedings = add_latest_activity(df_hist=feedings, df_new=latest_feeding, date_column='feeding_date')

        # saving df to local file
        feedings.to_parquet(PATH + 'feedings.parquet')

        # application display of latest feedings
        st.dataframe(feedings.tail())
        st.write("Nice job! Well done!")


        # Data processing
        feedings_processed = feedings.copy()
        # Bacteria composition depending on temperature
        feedings_processed = bacteria_column(df=feedings_processed, bac_compos=bacteria_composition)
        # Growth rate composition
        feedings_processed = growth_rate_cols(df=feedings_processed)


        # Storing data
        feedings_processed.to_parquet(PATH + 'feedings_processed.parquet')" 
        '''
)

In [9]:
# Define the file name with the timestamp
# TODO: add the PATH to the filename
filename_codeinput = f"../data/{timestamp}_codeinput.txt"

In [10]:
# Write the string to the file
with open(filename_codeinput, "w") as file:
    file.write(code_input)

## Feedback

In [11]:
# defining the prompt template for a standardized input
# TODO: refine prompt
feedback_prompt = PromptTemplate(
    input_variables=["code"],
    template="Please review the following code and give five recommendations with detailed explanations how to improve the programming: {code}?",
)

In [12]:
# initializing the LM
# TO EXPLORE: adjust temperature
# TO EXPLORE: test other LMs
feedback_llm = OpenAI(temperature=0.3)

In [13]:
# a simple chain taking user input, formatting the prompt and sending it to the LM
feedback_chain = LLMChain(llm=feedback_llm, prompt=feedback_prompt)

In [14]:
# Run the chain only specifying the input variable.
feedback = feedback_chain.run(code_input)

In [15]:
feedback

'\n\n1. Move the code that is used to read the config.json file into a separate function. This will make the code more organized and easier to read. \n2. Move the code that is used to add the background and logo into separate functions. This will make the code more organized and easier to read. \n3. Move the code that is used to add the latest activity into a separate function. This will make the code more organized and easier to read. \n4. Move the code that is used to add the bacteria column into a separate function. This will make the code more organized and easier to read. \n5. Move the code that is used to add the growth rate columns into a separate function. This will make the code more organized and easier to read.'

In [16]:
# Define the file name with the timestamp
# TODO: add the PATH to the filename
filename_feedback = f"../data/{timestamp}_feedback.txt"

In [17]:
# Write the string to the file
with open(filename_feedback, "w") as file:
    file.write(feedback)

# Learning goals 

## Shorten review for learning target

In [18]:
# defining the prompt template for a standardized input
short_feedback_prompt = PromptTemplate(
    input_variables=["feedback"],
    template="Please shorten the aspects of the following feeback: {feedback}?",
)

In [19]:
# initializing the LM
short_feedback_llm = OpenAI(temperature=0)

In [20]:
# a simple chain taking user input, formatting the prompt and sending it to the LM
short_feedback_chain = LLMChain(llm=short_feedback_llm, prompt=short_feedback_prompt)

In [21]:
# Run the chain only specifying the input variable.
short_feedback = short_feedback_chain.run(feedback)

In [22]:
short_feedback

'\n\n1. Move config.json file code into a separate function. \n2. Move background/logo code into separate functions. \n3. Move latest activity code into a separate function. \n4. Move bacteria column code into a separate function. \n5. Move growth rate columns code into a separate function.'

In [23]:
# Define the file name with the timestamp
# TODO: add the PATH to the filename
filename_short_feedback = f"../data/{timestamp}_shortfeedback.txt"

In [24]:
# Write the string to the file
with open(filename_short_feedback, "w") as file:
    file.write(short_feedback)

## Set learning goals

In [47]:
# This has to be done once and then only if the user checks a button like "generate learning targets".

In [48]:
# Define the directory where your files are located
# TODO: use PATH here
directory = "../data"

In [49]:
# Get a list of all files in the directory
files = os.listdir(directory)

In [50]:
# Filter the list to only include files with the correct format
files = [f for f in files if f.endswith("_shortfeedback.txt") and len(f) == 32]

In [51]:
# Sort the list of files by date, with the most recent file first
files.sort(reverse=True)

In [52]:
# Get the two most recent files
latest_files = files[:2]

In [53]:
# Read the contents of the two files into string variables
file_contents = []
for file in latest_files:
    with open(os.path.join(directory, file), "r") as f:
        file_contents.append(f.read())

In [54]:
# Combine the two file contents into a single string variable
latest_short_feedbacks = "\n".join(file_contents)

In [55]:
latest_short_feedbacks

'\n\n1. Move config.json file code into a separate function. \n2. Move background/logo code into separate functions. \n3. Move latest activity code into a separate function. \n4. Move bacteria column code into a separate function. \n5. Move growth rate columns code into a separate function.\n\n\n1. Add try/except block for user input.\n2. Move functions to separate file.\n3. Add comments for functions.\n4. Add comments for variables.\n5. Add comments for code lines.'

In [56]:
# defining the prompt template for a standardized input
learning_goal_prompt = PromptTemplate(
    input_variables=["short_feedback"],
    template="Please select the four most relevant points from this list of feedback comments: {short_feedback}. Consider content recommendations more relevant than format recommendations. Consider repetitive aspects more.",
)

In [57]:
# initializing the LM
# TODO: check for optimal LLM
learning_goal_llm = OpenAI(temperature=0.5)

In [58]:
# a simple chain taking user input, formatting the prompt and sending it to the LM
learning_goal_chain = LLMChain(llm=learning_goal_llm, prompt=learning_goal_prompt)

In [59]:
# Run the chain only specifying the input variable.
learning_goals = learning_goal_chain.run(latest_short_feedbacks)

In [60]:
learning_goals

'\n\n1. Move config.json file code into a separate function. \n2. Move functions to separate file.\n3. Add comments for functions.\n4. Add try/except block for user input.'

In [61]:
# Define the file name with the timestamp
# TODO: add the PATH to the filename
filename_learning_goals = f"../data/{timestamp}_learninggoals.txt"

In [62]:
# Write the string to the file
with open(filename_learning_goals, "w") as file:
    file.write(learning_goals)

## Compare learning goals and latest submitted code

In [40]:
# read latest learning goals

In [64]:
# Get a list of all files in the directory
files = os.listdir(directory)

In [65]:
# Filter the list to only include files with the correct format
files = [f for f in files if f.endswith("_learninggoals.txt") and len(f) == 32]

In [67]:
# Sort the list of files by date, with the most recent file first
files.sort(reverse=True)

In [72]:
# Get the most recent file
latest_file = files[:1]

In [80]:
with open(os.path.join(directory, latest_file[0]), "r") as f:
    learning_goals = f.read()

In [83]:
evaluation_prompt = PromptTemplate(
    input_variables=["code_input", "learning_goals"],
    template="Please compare this code: {code_input} with these learning goals: {learning_goals}. If the programmer considered the learning goals when writing the provided code, say something motivating. If the programmer didn't consider the learning goals, gently remind the person of their learning goals.",
)

In [84]:
evaluation_prompt = evaluation_prompt.format(code_input=code_input, learning_goals=learning_goals)

In [85]:
evaluation_llm = OpenAI(temperature=0.3)

In [86]:
evaluation_llm(evaluation_prompt)

'\n\nThe programmer has considered the learning goals when writing the provided code. The code is well organized and functions are written in separate functions. The functions are also commented to explain what they do. The code also includes an error handling block for user input. Well done!'