## Setup
#### Load the API key and relevant Python libaries.

In [70]:
from collections import defaultdict
import json
import numpy as np
import pandas as pd
import os

import io
from dotenv import dotenv_values
import openai
import os
from copy import deepcopy
import time
import ast
import re
import pprint
from openai import OpenAI

# LLM KEYS

In [71]:
# Get the first key from the uploaded dictionary
env_file_key = "../../auixiliary/env_GENERAL"

# Open the file and read its content
with open(env_file_key, 'r', encoding='utf-8') as file:
    env_content = file.read()

# Load the content into a variable
env_variables = dotenv_values(stream=io.StringIO(env_content))

api_key = env_variables['OPENAI_API_KEY']
# openai.api_key = api_key

client = OpenAI(
    # This is the default and can be omitted
    api_key=api_key,
)

# Functions

In [102]:
def parse_response(json_block):
    lines = json_block.split('\n')
    json_content = '\n'.join(lines[1:-1])
    return ast.literal_eval(json_content)

# Models

In [72]:
def chat_gpt(prompt, temperature=0.1):
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=prompt,
        temperature=temperature
    )
    return response.choices[0].message.content.strip()

# PROMPT

In [92]:
MESSAGES = [
    {
        'role': 'system',
        'content': """
        As a Senior Facial Recognition and Analysis (FRA) Technology Specialist, you specialize in the latest developments in FRA technology. In this pivotal role, you are entrusted with reviewing and cataloguing the diverse applications and use cases of FRA technology across multiple domains.
        """
    },
    {
        'role': 'user',
        'content': """
        Create a comprehensive and self-explanatory list, in JSON format, detailing the various uses of Earth Observation technology. Each dictionary in the created list describes a particular use case or application of facial recognition technology. Provide three uses for each of the {} domains listed below. The uses must contain specific details about how the technology is used, by using action verbs that clearly describe the actions, activities, or processes of the uses. The level of specificity should be consistent across all uses. For each of these uses, you must output the following 6 elements each in less than 7 words:
        (1) Use: An element of a series of numbered uses, starting with 1. Each use should be listed consecutively.
        (2) Domain: The domain that represents the area or sector the AI system is intended to be used in.
        (3) Purpose: The purpose or objective that is intended to be accomplished by using an AI system.
        (4) Capability: The capability of the AI system that enables the realization of its purpose and reflects the technological capability.
        (5) AI user: The entity or individual in charge of deploying and managing the AI system, including individuals, organizations, corporations, public authorities, and agencies responsible for its operation and management.
        (6) AI subject: The individual directly affected by the use of the AI system, experiencing its effects and consequences. They interact with or are impacted by the AI system's processes, decisions, or outcomes.
        Ensure that each concept is specific and easy to understand for non-experts. Avoid duplicate purposes or objectives and use clear and precise language to describe the uses' concepts.

        Domains to be included are the following:
        "{}"

        For some domains, e.g., Human-Computer Interaction, Well-being, and Social Media, it might be more challenging to identify Earth Observation uses and you might think of wearable sensors or cameras on computer screens; but do NOT. Instead, review your uses and double check that the sensors you are outputting are indeed Earth Observation sensors. Try to be creative with Earth Observation uses even in these less obvious domains of its application. For example, for Well-being, you can think of suggesting parks for health-promoting activities to the user, and for Human-Computer Interaction, you can think of applications bringing satellite data closer to the public.
        Double-check that you are outputting realistic, i.e., plausible, meaningful, and useful uses. It is OK to output upcoming, i.e., uses that are not yet widespread but could be in research, development, or talked about in expert circles. However, try to make sure that it also makes sense to use satellite or aerial data for the use you are describing; and that it is not much more straightforward to use some other sensors. If it is much easier to achieve the same purpose with some other already existing sensors (e.g., phone cameras or wearable devices), then think of another use. 
        
        For the "Capability", write it by combining action verbs in gerund form (i.e., ending with "ing"), inferences and data, entity or metric.
                (1) Action verbs clearly describe the actions, activities, or processes taken by the AI system, e.g., identify. Choose the most suitable action verb from the following list. If none can be assigned, propose a new verb, and mark it with an asterisk.
                    (A) Estimating (e.g., Rating, Grading, Measuring, Assessing)
                    (B) Forecasting (e.g., Predicting, Guessing, Speculating)
                    (C) Comparing (e.g., Ranking, Ordering, Finding Best, Finding Cheapest, Recommending)
                    (D) Detecting (e.g., Monitoring, Sensing, Noticing, Classifying, Discriminating)
                    (E) Identifying (e.g., Recognizing, Discerning, Finding, Classifying, Perceiving)
                    (F) Discovering (e.g., Extracting, Noticing, Organizing, Clustering, Grouping, Connecting, Revealing)
                    (G) Generating (e.g., Making, Composing, Constructing, Creating, Authoring)
                    (H) Acting (e.g., Doing, Executing, Playing, Going, Learning, Operating)
                (2) Inference clearly describes the output or conclusion drawn by the AI system based on the data it processes, e.g., crop yield, floods, trend, anomaly, wildfires, pattern, and probability
                (3) Data, Entity or Metric clearly describes the source, type, or nature of the data used by the AI system, e.g., from an optical camera, from an infrared camera, user input, sensor readings, transaction records, biometric data, environmental data, social media posts, geographical information, medical records, and financial metrics.

        For "Purpose", write it also in a gerund verb form (i.e., ending with "ing").
                
        *** Ensure to output 3 uses per each of the {} domains above, i.e., {} uses in total. ***
                
        Follow this example structure for reporting the identified uses:
        [
            {{
                "Use": 1,
                "Domain": "Biometric identification and categorization of natural persons",
                "Purpose": "Identifying individuals in crowded areas",
                "Capability": "Facial recognition from aerial footage",
                "AI User": "Law enforcement agencies",
                "AI Subject": "Individuals in public spaces"
            }},
            {{
                "Use": 2,
                "Domain": "Education",
                "Purpose": "Personalized learning and tutoring",
                "Capability": "Analyzing student performance and providing tailored study plans",
                "AI User": "Teachers, educational platforms",
                "AI Subject": "Students"
            }},
            {{
                "Use": 3,
                "Domain": "Finance",
                "Purpose": "Fraud detection and prevention",
                "Capability": "Identifying suspicious activities and transactions",
                "AI User": "Banks, financial institutions",
                "AI Subject": "Customers"
            }},
            {{
                "Use": 4,
                "Domain": "Healthcare",
                "Purpose": "Assisting in medical diagnoses",
                "Capability": "Analyzing patient data and suggesting potential conditions",
                "AI User": "Healthcare professionals",
                "AI Subject": "Patients"
            }},
           ...
            {{
                "Use": 44,
                "Domain": "Entertainment",
                "Purpose": "Recommending personalized content",
                "Capability": "Analyzing user preferences and suggesting movies, music, etc.",
                "AI User": "Streaming platforms, content providers",
                "AI Subject": "Entertainment consumers"
            }}
        ]

        *** Ensure to output only the correctly formatted JSON and nothing else. ***
        """
    }
]


In [93]:
def format_prompt(MESSAGES, domains):
    messages = deepcopy(MESSAGES)
    N = len(domains.split("\n"))
    print (N)
    messages[1]['content'] = messages[1]['content'].format(N, domains, N, 3*N)
    return messages

In [94]:
domains_part1 = """1. Biometric identification and categorization of natural persons
                2. Family
                3. Romantic relationships and friendships
                4. Health and Healthcare
                5. Well-being
                6. Human-Computer Interaction
                7. Finance and Investment
                8. Education and vocational training
                9. Employment, workers management and access to self-employment
                10. Essential private services and public services and benefits
                11. Recommender Systems and Personalization
                12. Social Media
                13. Sports and Recreation
                14. Arts and Entertainment
                15. Security and Cybersecurity"""

domains_part2 = """16. Marketing and Advertising
                17. Agriculture and Farming
                18. Entrepreneurship
                19. Autonomous Robots and Robotics
                20. Innovation and Research
                21. Management and Operation of critical infrastructure
                22. Law enforcement
                23. Migration, Asylum and Border control management
                24. Democracy
                25. Media and Communication
                26. Accessibility and Inclusion
                27. Energy
                28. Military and Defense
                29. Administration of justice and democratic processes
                30. Government Services and Administration"""

domains_part3 = """31. Diplomacy and Foreign Policy
                32. Food Safety and Regulation
                33. Crisis Management and Emergency Response
                34. Humanitarian Aid
                35. Transport and Logistics
                36. Urban Planning
                37. Counterterrorism
                38. Environment and Sustainability
                39. International Law Enforcement and Cooperation
                40. Climate Change Mitigation and Adaptation
                41. Gaming and interactive experiences
                42. Hobbies
                43. Smart home
                44. Social and Community Services
                45. Public and private transportation
                46. Interpersonal Communication"""

In [95]:
messages1 = format_prompt(MESSAGES, domains_part1)
response1 = chat_gpt(messages1)
# print(response1)

15


In [98]:
messages2 = format_prompt(MESSAGES, domains_part2)
response2 = chat_gpt(messages2)

15


In [101]:
messages3 = format_prompt(MESSAGES, domains_part3)
response3 = chat_gpt(messages3)

16


In [None]:
print(response3)

In [105]:
tmp1 = parse_response(response1)
tmp2 = parse_response(response2)
tmp3 = parse_response(response3)

In [116]:
for el in tmp2:
    el['Use'] = str(int(el['Use']) + 45)

for el in tmp3:
    el['Use'] = str(int(el['Use']) + 90)

In [118]:
RESPONSE = tmp1+tmp2+tmp3

# SAVE

In [121]:
# Use json.dump to write the list to a file in JSON format
with open('../../data/FRA_uses.json', 'w') as json_file:
    json.dump(RESPONSE, json_file, indent=4)  # 4 spaces of indentation