In [None]:
%pip install openai
%pip install python-dotenv
%pip install pandas

### Setup

In [2]:
from openai import OpenAI
from dotenv import load_dotenv
import os
import pandas as pd

# Load environment variables from .env file
load_dotenv()

# Reference the OpenAI API key from the environment variables
client = OpenAI(
  # This is the default and can be omitted
  api_key=os.environ.get("OPENAI_API_KEY"),
)

In [8]:
# Function to read the CSV file and extract headers
def read_csv_headers(file_path):
    df = pd.read_csv(file_path, nrows=0)  # Read only the header row
    headers = df.columns.tolist()
    return headers

headers = read_csv_headers("../column_headers/2024-05-21/global.csv")
headers = headers[:10]
headers

['Employee ID',
 'First Name',
 'Last Name',
 'Nickname',
 'Chinese Name',
 'Email',
 'Invite User',
 'User Email (if different from employee email)',
 'Access Role',
 'My Profile Module']

In [11]:
def create_summary_prompt(headers):
    summary_prompt = "The CSV file contains the following columns:\n"
    summary_prompt += ", ".join(headers) + "\n\n"
    summary_prompt += "Please provide a summary or description of these columns."
    return summary_prompt

# Create the summary prompt
prompt = create_summary_prompt(headers)
prompt

'The CSV file contains the following columns:\nEmployee ID, First Name, Last Name, Nickname, Chinese Name, Email, Invite User, User Email (if different from employee email), Access Role, My Profile Module\n\nPlease provide a summary or description of these columns.'

In [17]:
def get_summary(prompt):
    response = client.chat.completions.create(
      messages=[
        {
          "role": "user",
          "content": prompt,
        }
      ],
      model="gpt-3.5-turbo",
    )
    return response.choices[0].message.content

# Get the summary from the model
summary = get_summary(prompt)
# summary = ChatCompletion(id='chatcmpl-9Tlnb7OGaptMWxQ2zzeerYu0rKmv3', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content="- Employee ID: A unique identifier for each employee\n- First Name: The first name of the employee\n- Last Name: The last name of the employee\n- Nickname: A shortened or informal name that the employee may go by\n- Chinese Name: The employee's name in Chinese characters\n- Email: The employee's email address\n- Invite User: Indicates whether the employee has been invited to access a system or platform\n- User Email: The email address of the user, if different from the employee's email\n- Access Role: The level of access and permissions granted to the employee within a system or platform\n- My Profile Module: The specific module or section within a system where the employee can view and edit their personal profile information", role='assistant', function_call=None, tool_calls=None))], created=1716882279, model='gpt-3.5-turbo-0125', object='chat.completion', system_fingerprint=None, usage=CompletionUsage(completion_tokens=151, prompt_tokens=61, total_tokens=212))
print("Summary of the CSV columns:")
print(summary)


Summary of the CSV columns:
1. Employee ID: A unique identifier for each employee in the organization.
2. First Name: The given or preferred name of the employee.
3. Last Name: The family or surname of the employee.
4. Nickname: A shortened or alternative name that an employee may go by.
5. Chinese Name: The employee's name written in Chinese characters.
6. Email: The primary email address of the employee.
7. Invite User: A field indicating whether the employee has been invited to use a specific platform or software.
8. User Email: An alternative email address for the employee, if different from their primary one.
9. Access Role: The level of access or permissions granted to the employee within a system or organization.
10. My Profile Module: A section or feature within a program or platform where the employee can update and manage their personal information.
