In [25]:
from langchain.llms import OpenAI
from langchain.chains import LLMChain, TransformChain
from langchain.prompts import PromptTemplate
from butler.utils import get_columns_from_text, get_properties_from_details
from butler.strings import DATABASE_BASIC_PROPERTIES
from langchain.chains import SimpleSequentialChain, SequentialChain

llm = OpenAI(temperature=0.7, openai_api_key="OPENAI_API_KEY")

In [26]:
propertyNotation = {
    "title": "title",
    "text": "rich_text",
    "number": "number",
    "select": "select",
    "multi-select": "multi_select",
    "status": "status",
    "date": "date",
    "person": "people",
    "files & media": "files",
    "checkbox": "checkbox",
    "url": "url",
    "email": "email",
    "phone": "phone_number",
}

In [28]:
template = """{statement}

For the above statement, what database columns should I use. Start with a suitable column. (enumerate with numbers)
"""
prompt_template = PromptTemplate(input_variables=["statement"], template=template)
properties_chain = LLMChain(llm=llm, output_key="result", prompt=prompt_template)

In [29]:
def transform_func(inputs: dict) -> dict:
    columns = get_columns_from_text(inputs["result"])
    first_column = columns[0]
    return {"Title": first_column}

In [30]:
transform_chain = TransformChain(input_variables=["result"], output_variables=["Title"], transform=transform_func)

In [5]:
database_properties[0]

'Goal Name'

In [31]:
types = "Title, Text, Number, Select, Multi-select, Status, Date, Person, Files & media, Checkbox, URL, Email, Phone"

In [32]:
template = """
{result}

What are the type for the following properties from the given available types: {types} (one has to be Title)

1. {Title}:"""

In [33]:
prompt_template = PromptTemplate(
    input_variables=["types", "result", "Title"], template=template
)

In [34]:
property_type_chain = LLMChain(llm=llm, prompt=prompt_template, output_key='type_dict')

In [37]:
overall_chain = SequentialChain(chains=[properties_chain, transform_chain, property_type_chain], input_variables=['statement', "types"], output_variables=['result', 'Title', 'type_dict'], verbose=True)

overall = overall_chain(
    {
        'statement':  "I need to create a 2023 goal tracker table. I need to have a status column please",
        'types': types
    }
)



[1m> Entering new SequentialChain chain...[0m
[1mChain 0[0m:
{'result': '\n1. Goal ID \n2. Goal Description \n3. Goal Deadline \n4. Status \n5. Notes'}

[1mChain 1[0m:
{'Title': 'Goal ID'}

[1mChain 2[0m:
{'review': ' Number\n2. Goal Description: Text\n3. Goal Deadline: Date\n4. Status: Select\n5. Notes: Text'}


[1m> Finished chain.[0m


In [38]:
overall

{'statement': 'I need to create a 2023 goal tracker table. I need to have a status column please',
 'types': 'Title, Text, Number, Select, Multi-select, Status, Date, Person, Files & media, Checkbox, URL, Email, Phone',
 'result': '\n1. Goal ID \n2. Goal Description \n3. Goal Deadline \n4. Status \n5. Notes',
 'Title': 'Goal ID',
 'review': ' Number\n2. Goal Description: Text\n3. Goal Deadline: Date\n4. Status: Select\n5. Notes: Text'}

In [46]:
database_types_string = property_type_chain.run(
   { "Title": "Goal",
    "types": types,
    "result": database_properties_string}
)

In [47]:
print(database_types_string)

 Title
2. Goal Description: Text
3. Start Date: Date
4. End Date: Date
5. Progress Percentage: Number
6. Tasks: Multi-select
7. Notes: Text
8. Status: Select


In [48]:
get_columns_from_text(database_types_string)

['Goal Description: Text',
 'Start Date: Date',
 'End Date: Date',
 'Progress Percentage: Number',
 'Tasks: Multi-select',
 'Notes: Text',
 'Status: Select']

In [37]:

result_types = list(map(lambda x: x.split(":")[1].strip(), get_columns_from_text(database_types_string)))
# result_types.insert(0, "Title")

In [40]:
result_types

['', '', '', '', '', '', '']

In [38]:
database_properties = get_columns_from_text(database_properties_string)

In [39]:
database_properties

['Status Column',
 'Goal Name',
 'Goal Description',
 'Start Date',
 'End Date',
 'Progress Percentage',
 'Tasks',
 'Notes']

In [16]:
tuples = zip(database_properties, result_types)

In [17]:
tuples = list(tuples)

In [66]:
# make a list of javascript objects
js_objects = []
for tup in tuples:
    js_objects.append({"name": tup[0], "type": tup[1]})

print(js_objects)

[{'name': 'Goal Name', 'type': 'Title'}, {'name': 'Goal Description', 'type': 'Text'}, {'name': 'Start Date', 'type': 'Date'}, {'name': 'End Date', 'type': 'Date'}, {'name': 'Status', 'type': 'Select'}, {'name': 'Priority', 'type': 'Select'}, {'name': 'Progress', 'type': 'Number'}, {'name': 'Notes', 'type': 'Text'}]


In [41]:
filtered_tuples = list(filter(lambda x: propertyNotation[x[1].lower()]  in ["select", "multi_select"], tuples))
template = """{result}

give no more than five examples for {properties} (comma separated)

{prop}:"""

prompt_template = PromptTemplate(
    input_variables=["result", "properties", "prop"], template=template
)

property_type_chain = LLMChain(llm=llm, prompt=prompt_template)

select_multi_select_ = list(map(lambda x: x[0], filtered_tuples))

example_options_string = property_type_chain.run(
    {"prop": select_multi_select_[0], "properties": ", ".join(select_multi_select_) , "result": database_properties_string}
)

In [42]:
example_options_string

' Completed, In Progress, On Hold, Deferred, Cancelled \nPriority: High, Medium, Low'

In [47]:
example_options_list = example_options_string.split("\n")
example_options_list

[' Completed, In Progress, On Hold, Deferred, Cancelled ',
 'Priority: High, Medium, Low']

In [51]:
options_dict = {
    select_multi_select_[0]: example_options_list[0].strip().split(","),
}
for i in range(1, len(example_options_list)):
    options_dict[select_multi_select_[i]] = example_options_list[i].split(": ")[1].split(",")
options_dict

{'Status': ['Completed',
  ' In Progress',
  ' On Hold',
  ' Deferred',
  ' Cancelled'],
 'Priority': ['High', ' Medium', ' Low']}

In [67]:
js_objects

[{'name': 'Goal Name', 'type': 'Title'},
 {'name': 'Goal Description', 'type': 'Text'},
 {'name': 'Start Date', 'type': 'Date'},
 {'name': 'End Date', 'type': 'Date'},
 {'name': 'Status', 'type': 'Select'},
 {'name': 'Priority', 'type': 'Select'},
 {'name': 'Progress', 'type': 'Number'},
 {'name': 'Notes', 'type': 'Text'}]

In [68]:
# add the options to the js_objects
import random


for obj in js_objects:
    if obj["name"] in options_dict:
        options_list = list(map(lambda x: {"name": x, "color": random.choice(["default", "gray", "brown", "orange", "yellow", "green", "blue", "purple", "pink", "red"])}, options_dict[obj["name"]]))
        obj["options"] = options_list

In [1]:
csv_output = """
Task ID|Task Description|Priority Level|Due Date|Status|Date Created|Date Completed
T1|Write report|High|02/07/2020|Incomplete|01/01/2020|
T2|Go to the store|Low|02/10/2020|Completed|01/02/2020|02/05/2020
T3|Clean the house|Medium|02/15/2020|Incomplete|01/03/2020|
T4|Complete online course|High|03/01/2020|Incomplete|01/04/2020|
T5|Read book|Low|02/28/2020|Completed|01/05/2020|02/20/2020"""

In [2]:
from io import StringIO
import pandas as pd
table = pd.read_csv(StringIO(csv_output), sep="|")
table

KeyboardInterrupt: 

In [10]:
table = table.fillna("")

In [59]:
# list columns that have dates in their name
date_columns = list(filter(lambda x: "date" in x.lower(), table.columns))
for column in date_columns:
    table[column] = pd.to_datetime(table[column]).dt.strftime("%Y-%m-%d")

In [60]:
table

Unnamed: 0,Goal Name,Goal Start Date,Goal End Date,Status,Journey,Notes
0,Learn Spanish,2023-01-01,2023-12-31,In Progress,Planning,
1,Buy a House,2023-05-01,2023-08-31,On Hold,Research,
2,Grow a Garden,2023-03-01,2023-07-31,Completed,Implementation,Took a gardening class


In [11]:
table.to_json(orient="records")

'[{"Goal Name":"Learn Spanish","Goal Start Date":"1\\/1\\/2023","Goal End Date":"12\\/31\\/2023","Status":"In Progress","Journey":"Planning","Notes":""},{"Goal Name":"Buy a House","Goal Start Date":"5\\/1\\/2023","Goal End Date":"8\\/31\\/2023","Status":"On Hold","Journey":"Research","Notes":""},{"Goal Name":"Grow a Garden","Goal Start Date":"3\\/1\\/2023","Goal End Date":"7\\/31\\/2023","Status":"Completed","Journey":"Implementation","Notes":"Took a gardening class"}]'

In [13]:
csv_output = """Title of Research Article,Authors of Research Article,Publication Date,Abstract of Research Article,Main Findings,Keywords,References
"The Impact of Social Media on Student Engagement,"John Smith,August 12th, 2020,"This research examines the impact of social media use on student engagement in a higher education setting. The results of the study suggest that social media use has both a positive and a negative effect on student engagement. While students reported feeling more connected to their peers and faculty through social media, they also reported feeling overwhelmed and distracted by the amount of content available. These findings suggest that universities should take a proactive approach to managing social media usage among their students.","The results of the study suggest that social media use has both a positive and a negative effect on student engagement. While students reported feeling more connected to their peers and faculty through social media, they also reported feeling overwhelmed and distracted by the amount of content available.",Social Media,Student Engagement,Higher Education,https://www.example1.com/article.pdf,https://www.example2.com/video.mp4,https://www.example3.com/image.jpg
"The Role of Technology in Education,"Jane Doe,May 14th, 2020,"This research examines the role of technology in education. The study finds that technology is an important tool for student learning, but its use must be managed effectively"""

In [14]:
import pandas as pd
from io import StringIO

pd.read_csv(StringIO(csv_output))

ParserError: Error tokenizing data. C error: EOF inside string starting at row 2

In [15]:
import json
import jsonschema
with open("tests/test_data/database/responses/habit_tracker.json") as f:
    response = json.load(f)

with open("tests/test_data/database/responses/schema.json") as f:
    schema = json.load(f)
jsonschema.validate(response, schema)


ValidationError: nan is not of type 'string'

Failed validating 'type' in schema['properties']['content']['items'][0]['patternProperties']['.*']:
    {'type': 'string'}

On instance['content'][0]['Date']:
    nan

In [2]:
from butler.firebase import pullFromFirebase


In [7]:
response = pullFromFirebase("database/openai_responses/-NNMwS2WKK2f_Lt4-cqy")
response

{'content': '"Crispy Fried Chicken,""2 pounds boneless chicken thighs or breasts, cut into small pieces, 2 tablespoons salt, 2 tablespoons black pepper, 2 tablespoons garlic powder, 2 tablespoons onion powder, 2 tablespoons paprika, 1 cup all-purpose flour, 1/2 cup cornstarch, 2 cups vegetable oil,""In a large bowl, combine the chicken pieces, salt, pepper, garlic powder, onion powder and paprika. Toss until all the chicken pieces are evenly coated. In a separate bowl, mix together the flour and cornstarch. Dip the chicken pieces into the flour mixture and coat them evenly. Heat the oil in a large skillet on medium-high heat. Add the chicken pieces to the skillet and cook for about 10 minutes, or until golden brown and crispy. Remove the chicken from the skillet and serve,"4,15 minutes,25 minutes,Medium,Per Serving: Calories: 556; Fat: 35.4g; Carbohydrates: 24.6g; Protein: 30.2g,Southern,Main Dish',
 'details': "\n\nTitle: Delicious Recipes for Every Occasion\nDescription: This recipe 

None
