In [1]:
!pip install openai awswrangler --quiet


import pandas as pd
import numpy as np
import datetime

import os
import openai

import awswrangler as wr

In [2]:
key_file = open('/home/ec2-user/SageMaker/open_key.txt')
open_key = key_file.read()

os.environ["OPENAI_API_KEY"] = open_key
openai.api_key = os.environ['OPENAI_API_KEY']

In [3]:
from openai import OpenAI
client = OpenAI()

In [4]:
query_string = "Write a SQL query that gets all machines with over 600 speed difference. Return timestamp, machine ID, and speed difference. Sort by highest speed difference. Top 20 results"

response = client.chat.completions.create(
    model='gpt-4',
    messages=[
        {
            "role": "system",
            "content": "Given the following SQL table, your job is to write queries given a user's request \n CREATE TABLE telemetry_extended_v3 (\n timestamp DateTime,\n speed_desired Int,\n ambient_temperature Float,\n ambient_pressure Float,\n speed Float,\n temperature Float,\n pressure Float,\n machineid String,\n speed_difference Float;"
        },
        {
            "role": "user",
            "content": query_string,
        }
    ],
    temperature=0.1,
    max_tokens=1000,
    top_p=1,
    frequency_penalty=0,
    presence_penalty=0
)

output_query = response.choices[0].message.content
# Completed in 1 second

In [5]:
output_query

'SELECT timestamp, machineid, speed_difference\nFROM telemetry_extended_v3\nWHERE speed_difference > 600\nORDER BY speed_difference DESC\nLIMIT 20;'

In [6]:
df = wr.athena.read_sql_query(
    sql=output_query,
    database='capstone_v3',
    ctas_approach=True)

In [7]:
df

Unnamed: 0,timestamp,machineid,speed_difference
0,2023-04-30 15:34:40,M_0002,838.39
1,2023-08-10 06:07:40,M_0007,837.71
2,2022-01-25 12:48:40,M_0006,826.94
3,2023-10-17 09:13:40,M_0005,822.74
4,2022-11-22 12:12:40,M_0020,821.63
5,2023-04-12 09:48:40,M_0013,813.48
6,2022-09-13 06:49:40,M_0009,808.49
7,2021-12-04 06:18:40,M_0008,802.49
8,2021-12-31 03:40:40,M_0013,797.65
9,2023-06-26 00:53:40,M_0005,789.45


In [8]:
context = "Here is the context: We are looking at industrial factory machine data. Speed differences of 232 and over are worse as you get higher. You will be provided a string representation of a dataframe with SQL query results. Your job is to summarize thes results and give recommendations for how to address these issues. Here is the corresponding SQL query: " + output_query 

In [9]:
response = client.chat.completions.create(
    model='gpt-4',
    messages=[
        {
            "role": "system",
            "content": context
        },
        {
            "role": "user",
            "content": "Please give me a summary of these results" + str(df)
        }
    ],
    temperature=0.1,
    max_tokens=1000,
    top_p=1,
    frequency_penalty=0,
    presence_penalty=0
)

output_query = response.choices[0].message.content

In [10]:
print(response.choices[0].message.content)

The SQL query results show the top 20 instances where the speed difference of industrial factory machines exceeded 600. The speed differences range from 771.34 to 838.39, which are significantly higher than the threshold of 232. 

The machine with the highest speed difference of 838.39 is M_0002, recorded on April 30, 2023. Other machines with high speed differences include M_0007, M_0006, and M_0005. 

To address these issues, I recommend the following:

1. Investigate the cause of these high speed differences. This could be due to machine malfunctions, incorrect settings, or external factors affecting the machines' operation.

2. Regularly monitor the machines' speed differences, especially those that have recorded high speed differences in the past. This will help detect any anomalies early and take corrective action before the issue escalates.

3. Implement preventive maintenance for the machines. Regular maintenance can help prevent machine malfunctions that could lead to high spe

In [None]:
def gpt_quizzer(question):
    response = client.chat.completions.create(
    model='gpt-4',
    messages=[
        {
            "role": "system",
            "content": "Given the following SQL table, your job is to write correct SQL queries given a user's request \n CREATE TABLE telemetry_extended_v3 (\n timestamp DateTime,\n speed_desired Int,\n ambient_temperature Float,\n ambient_pressure Float,\n speed Float,\n temperature Float,\n pressure Float,\n machineid String,\n speed_difference Float;"
        },
        {
            "role": "user",
            "content": question,
        }
    ],
    temperature=0.1,
    max_tokens=1000,
    top_p=1,
    frequency_penalty=0,
    presence_penalty=0
    )

    output_query = response.choices[0].message.content