In [1]:
import requests
import json
import pandas as pd

### Anwendungsbeispiel 1: Automatisierte Übersetzung mit DeepL

Read the [docs](https://www.deepl.com/docs-api)

In [None]:
# Alternative 1 (empfohlen): Abruf Auth Token aus externer Datei
with open('token.json') as f:
    credentials = json.load(f)
    AUTH_KEY_DEEPL = credentials['AUTH_TOKEN_DEEPL']

# Alternative 2 (nicht empfohlen): Manuelle Eingabe des Auth Token
# AUTH_KEY_DEEPL = '<AUTH TOKEN>'

In [None]:
ENDPOINT = 'https://api-free.deepl.com/v2/translate'

text = """Data literacy is the ability to read, understand, create, and communicate data as information. Much like literacy as a general concept, data literacy focuses on the competencies involved in working with data.
It is, however, not similar to the ability to read text since it requires certain skills involving reading and understanding data."""

HEADER = {'Authorization': 'DeepL-Auth-Key ' + AUTH_KEY_DEEPL}

PARAMS = {'source_lang': 'EN',
          'target_lang': 'DE',
          'text': text}

response = requests.get(ENDPOINT, headers=HEADER, params=PARAMS)

In [None]:
response

In [None]:
response.json()

In [None]:
print(response.json()['translations'][0]['text'])

In [None]:
def translate(text, source_lang, target_lang):
        
    ENDPOINT = 'https://api-free.deepl.com/v2/translate'
    
    with open('token.json') as f:
        credentials = json.load(f)
        AUTH_KEY_DEEPL = credentials['AUTH_TOKEN_DEEPL']

    # AUTH_KEY_DEEPL = '<AUTH TOKEN>'
    
    HEADER = {'Authorization': 'DeepL-Auth-Key ' + AUTH_KEY_DEEPL}

    PARAMS = {'source_lang': source_lang,
              'target_lang': target_lang,
              'text': text}
    
    response = requests.get(ENDPOINT, headers=HEADER, params=PARAMS)
    
    return response.json()['translations'][0]['text']

In [None]:
sentences = ["Data literacy is the ability to read, understand, create, and communicate data as information.",
             "Much like literacy as a general concept, data literacy focuses on the competencies involved in working with data.",
             "It is, however, not similar to the ability to read text since it requires certain skills involving reading and understanding data."]

for sent in sentences:
    print(translate(sent, 'EN', 'DE'))

*Hinweis: Es gibt sogar ein separates Python package (`deepl`), was die Benutzung der API noch einfacher macht!*

### Anwendungsbeispiel 2: Transkription mit AssemblyAI

Read the [docs](https://docs.assemblyai.com/)

Schritt 1: Upload der Audio-Datei

In [None]:
UPLOAD_ENDPOINT = 'https://api.assemblyai.com/v2/upload'

with open('token.json') as f:
    credentials = json.load(f)
    AUTH_TOKEN_ASSEMBLYAI = credentials['AUTH_TOKEN_ASSEMBLYAI']

# AUTH_TOKEN_ASSEMBLYAI = '<AUTH TOKEN>'

HEADER = {'authorization': AUTH_TOKEN_ASSEMBLYAI,
          'content-type': 'application/json'}

with open('data/recording.m4a', 'rb') as f:
    audio_file = f.read()
    
response = requests.post(UPLOAD_ENDPOINT, headers=HEADER, data=audio_file)
response

In [None]:
response.json()

In [None]:
UPLOAD_URL = response.json()['upload_url']

Schritt 2: Starten der Transkription

Inzwischen werden auch Transkriptionen in [Spanisch, Französisch, Deutsch und Italienisch](https://docs.assemblyai.com/#supported-languages) unterstützt.

In [None]:
TRANSCRIPT_ENDPOINT = 'https://api.assemblyai.com/v2/transcript'

JSON = {'audio_url': UPLOAD_URL,
        'language_code': 'en',
        'speaker_labels': 'true'}

response = requests.post(TRANSCRIPT_ENDPOINT, headers=HEADER, json=JSON)
response

In [None]:
response.json()

In [None]:
TRANSCRIPT_ID = response.json()['id']

Schritt 3: Download des Transkriptes

*Hinweis: Laut [Anbieter](https://docs.assemblyai.com/#processing-times) veranschlagt der Prozess etwa 30% der Länge der Audio-Datei.*

In [None]:
response = requests.get(TRANSCRIPT_ENDPOINT + '/' + TRANSCRIPT_ID, headers=HEADER)
response

In [None]:
response.json()

In [None]:
response.json()['text']

### Anwendungsbeispiel 3: Chatbots

Read the [docs](https://platform.openai.com/docs/introduction)

Verwendung mittels `requests` Bibliothek

In [6]:
# Alternative 1 (empfohlen): Abruf Auth Token aus externer Datei
with open('token.json') as f:
    credentials = json.load(f)
    AUTH_KEY_OPENAI = credentials['OPENAI_API_KEY']

# Alternative 2 (nicht empfohlen): Manuelle Eingabe des Auth Token
# AUTH_KEY_OPENAI = '<AUTH TOKEN>'

In [16]:
from openai import ChatCompletion

messages = [
    {"role": "system",
     "content": ""},
    {"role": "user",
     "content": "Describe on a high-level the key benefits and caveats of accessing GPT-4 via 1) the ChatGPT interface, 2) the OpenAI Playground, and 3) API Calls in Python. Be concise."},
]

response = ChatCompletion.create(
    model='gpt-3.5-turbo',
    messages=messages,
    max_tokens=300,
    temperature=0,
    response_format={ "type": "json_object" }
)

SyntaxError: invalid syntax (<ipython-input-16-3b234c273c2e>, line 7)

In [10]:
ENDPOINT = 'https://api.openai.com/v1/chat/completions'

text = """
Describe on a high-level the key benefits and caveats of accessing GPT-4 via 1) the ChatGPT interface, 2) the OpenAI Playground, and 3) API Calls in Python. Be concise.
"""

HEADER = {'Authorization': 'Bearer ' + AUTH_KEY_OPENAI}

MESSAGE = {'model': 'gpt-3.5-turbo',
           'messages': [{'role': 'user', 'content': text}]}

response = requests.post(ENDPOINT, headers=HEADER, json=MESSAGE)

In [13]:
response.json()

{'id': 'chatcmpl-8sstflWAJvlGjnOhPZRGcAA2CHtKo',
 'object': 'chat.completion',
 'created': 1708091427,
 'model': 'gpt-3.5-turbo-0613',
 'choices': [{'index': 0,
   'message': {'role': 'assistant',
    'content': "1) ChatGPT interface: The key benefit is its user-friendly nature, allowing easy interaction with GPT-4 by typing prompts and receiving responses. However, it may have limited customization options and constrained use cases compared to other methods.\n\n2) OpenAI Playground: It offers a web-based environment to experiment with GPT-4, providing visual tools and code editing capabilities. It allows users to test and iterate ideas quickly. However, it may have similar limitations to the ChatGPT interface.\n\n3) API Calls in Python: It provides programmatic access to GPT-4, allowing integration into various applications and workflows. Python's flexibility enables users to build advanced interactions and tailor outputs. However, it requires coding expertise and understanding of API

In [None]:
print(response.json()['choices'][0]['message']['content'])

Verwendung mittels `openai` Bibliothek

In [None]:
import openai

openai.api_key = AUTH_KEY_OPENAI

MESSAGE = {'model': 'gpt-3.5-turbo',
           'messages': [{'role': 'user', 'content': text}]}

response = openai.ChatCompletion.create(
    model='gpt-3.5-turbo',
    messages=MESSAGE['messages'],
    temperature=0
)

In [None]:
response

In [None]:
print(response['choices'][0]['message']['content'])

Ein Praktischer Anwendungsfall: News Summarization

In [None]:
with open('data/news_article.txt') as f:
    news = f.read()

print(news)

In [None]:
text = f"""
Perform the following actions step-by-step:
Step 1 - Summarize the key information in the news article delimited by triple dashes. Use three sentences at most.
Step 2 - Extract the names of all entities (e.g., companies, countries, persons) mentioned in the article.
Step 3 - Is the article discussing a rating action by Moody's?


Structure your answer using the following format:
Step 1: <list of bullet points>
Step 2: <list of bullet points>
Step 3: <yes/no as boolean value>

 
News article:
---
Body: {news}
---
"""

print(text)

In [None]:
MESSAGE = {'model': 'gpt-3.5-turbo',
           'messages': [{'role': 'user', 'content': text}]}

response = openai.ChatCompletion.create(
    model='gpt-3.5-turbo',
    messages=MESSAGE['messages'],
    temperature=0
)

In [None]:
print(response['choices'][0]['message']['content'])

### Anwendungsbeispiel 4: Amtliche Statistiken mit Datenguide

Read the [docs](https://datengui.de/docs)

*Hinweis: Da Datenguide open data bereitstellt brauchen wir keine `auth_token`!

Der `data` Code `52411:ISV001` kennzeichnet den Datensatz "Statistik über beantragte Insolvenzverfahren - Insolvenzverfahren", der `region` Code `05` steht für "Nordrhein-Westfalen", und es werden alle verfügbaren Daten ab `2008` abgerufen.*

In [None]:
ENDPOINT_DATENGUIDE = 'https://tabular.genesapi.org'

PARAMS = {'data': '52411:ISV001',
          'region': '05',
          'time': '2008:',
          'format':'json'}

response = requests.get(ENDPOINT_DATENGUIDE, params=PARAMS)
response

In [None]:
response.json()

In [None]:
response.json()['data']

In [None]:
df = pd.DataFrame(response.json()['data'])
df

In [None]:
df.drop_duplicates(subset=['year', 'value']).plot.line(x='year', y='value')