# Augmented Analytics with Python

## Libraries and settings

In [1]:
# Libraries
import re
import io
import sys
import json
import openai
import textwrap
import subprocess
import pandas as pd
import matplotlib.pyplot as plt

# Suppress warnings
import warnings
warnings.filterwarnings('ignore')

# Load OpenAI API key from credentials.json
try:
    with open('./data/credentials.json', encoding='utf-8') as f:
        credentials = json.load(f)
        api_key = credentials['openai']['api_key']
except FileNotFoundError as exc:
    raise ValueError(
        "Please provide OpenAI API key in the credentials.json file.") from exc

# Set up OpenAI API key
openai.api_key = api_key

## Import example data

In [2]:
# Import example data
data = pd.read_csv('./data/apartments_data.csv', sep=';')

# Display the first few rows
data.head()

Unnamed: 0,id,address,lat,lon,municipality_number,municipality_name,rooms,area,luxurious,price,price_per_m2,population,population_density,employees,proportion_foreigners,mean_taxable_income
0,0,"Neuhusstrasse 6, 8630 Rüti ZH, ZH",47.252171,8.845797,118,Rüti (ZH),3.0,49,0,1441,29.41,12286,1221.272366,5053,24.841283,65362.04268
1,1,"Widacherstrasse 5, 8630 Rüti ZH, ZH",47.252087,8.854919,118,Rüti (ZH),3.0,111,0,2600,23.42,12286,1221.272366,5053,24.841283,65362.04268
2,2,"Widenweg 14, 8630 Rüti ZH, ZH",47.25367,8.853993,118,Rüti (ZH),3.0,58,0,1490,25.69,12286,1221.272366,5053,24.841283,65362.04268
3,3,"Rain 1, 8630 Rüti ZH, ZH",47.259834,8.851705,118,Rüti (ZH),4.0,118,0,3240,27.46,12286,1221.272366,5053,24.841283,65362.04268
4,4,"Bachtelstrasse 24b, 8630 Rüti ZH, ZH",47.266113,8.866872,118,Rüti (ZH),3.0,66,0,1450,21.97,12286,1221.272366,5053,24.841283,65362.04268


## Functions

### Function to query GPT-3.5 for Python code

In [3]:
# Function to query GPT-3.5 for Python code
def query_gpt(context, question):
    try:
        response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "system", "content": "You are a coding assistant. Provide Python code based on user queries."},
                {"role": "user", "content": f"Context: {context}\n\nQuestion: {question}\n\nAlso, provide a short explanation of the result."}
            ],
            temperature=0,
            max_tokens=1500
        )
        full_response = response.choices[0].message['content']

        # Extract code block using regular expression
        code_match = re.search(r'```python(.*?)```', full_response, re.DOTALL)
        if code_match:
            code = code_match.group(1).strip()
            
            # Ensure the plot is saved in the static folder
            code += "\nplt.savefig('./data/graphic.png')\nplt.close()"
            explanation = full_response.split('Explanation:')[1].strip() if 'Explanation:' in full_response else "No explanation provided."
            return code, explanation
        else:
            return "No code block found in the response.", "No explanation provided."
    except Exception as e:
        return f"Error querying GPT: {e}", "No explanation provided."

### Function to query llama 3.2 using Ollama

In [4]:
# Function to query an LLM using Ollama for Python code
def query_ollama(context, question):
    try:
        response = subprocess.run(
            ["ollama", "run", "llama3.2:latest", f"{context}\n\nQuestion: {question}"],
            capture_output=True, text=True
        )
        full_response = response.stdout.strip()

        # Extract code block using regex
        code_match = re.search(r'```python(.*?)```', full_response, re.DOTALL)
        if code_match:
            code = code_match.group(1).strip()
            explanation = full_response.split('Explanation:')[1].strip() if 'Explanation:' in full_response else "No explanation provided."
            return code, explanation
        else:
            return "No code block found.", "No explanation provided."
    except Exception as e:
        return f"Error querying Ollama: {e}", "No explanation provided."
    

### Function to load a CSV file

In [5]:
# Function to load a CSV file
def load_csv_file(file_path, nrows=None):
    try:
        df = pd.read_csv(file_path, sep=';', nrows=nrows)
        return df
    except Exception as e:
        print(f"Error loading CSV file: {e}")
        return None

### Function to execute Python code dynamically and capture the output

In [6]:
# Function to execute Python code dynamically and capture the output
def execute_python_code(code, data=None):
    local_scope = {'plt': plt}
    if data is not None:
        local_scope['data'] = data
    try:
        # Redirect stdout to capture print statements
        old_stdout = sys.stdout
        sys.stdout = io.StringIO()
        exec(code, local_scope)
        output = sys.stdout.getvalue()
        
        # Reset stdout
        sys.stdout = old_stdout
        
        # Save the plot if it was created
        if plt.get_fignums():
            plt.savefig('./data/graphic.png')
            plt.close()
        
        return output.strip()
    except Exception as e:
        return f"Error executing code: {e}"


## Example usage

In [7]:
# Context
context = "import pandas as pd\nimport matplotlib.pyplot as plt\ndata = " \
          "pd.read_csv('./data/apartments_data.csv', sep=';')\n"
          
# Question
question = "Show a histogram of the price of apartments with <= 65 m2. Use 20 bins."

# Query the LMM
generated_code, explanation = query_gpt(context, question)

# Wrap the text to a maximum width of 80 characters
explanation_wrapped = textwrap.fill(explanation, width=80)

# Print generated code
print("Generated Python code:")
print(generated_code)

# Print explanation
print("\nExplanation:")
print(explanation_wrapped)


Generated Python code:
Error querying GPT: Incorrect API key provided: sk--XXXX************************************************XXXX. You can find your API key at https://platform.openai.com/account/api-keys.

Explanation:
No explanation provided.


## Modify the generated Python code

In [8]:
# Modify the generated code
try:
    generated_code = generated_code.replace("figsize=(10, 6)", 
                                            "figsize=(7, 4)")
    generated_code = generated_code.replace("color='skyblue'", 
                                            "color='greenyellow'") 
except:
    pass

# Show the modified code
print("\nModified Python code:")
print(generated_code)


Modified Python code:
Error querying GPT: Incorrect API key provided: sk--XXXX************************************************XXXX. You can find your API key at https://platform.openai.com/account/api-keys.


## Execute the generated Python code

In [None]:
# Execute the generated Python code
local_vars = {}
figure_created = False
try:
    exec(generated_code, globals(), local_vars)
except Exception as e:
    print(f"Error executing generated code: {e}")

# Check the type of the result and display accordingly
for var_name, var_value in local_vars.items():
    if isinstance(var_value, plt.Figure):
        print(f"\nFigure: {var_name}")
        var_value.show()
        figure_created = True

if not figure_created:
    for var_name, var_value in local_vars.items():
        if isinstance(var_value, pd.DataFrame):
            print(f"\nDataFrame: {var_name}")
            print(var_value.head())
        else:
            print(f"\n{var_name}: {var_value}")

### Jupyter notebook --footer info-- (please always provide this at the end of each notebook)

In [None]:
import os
import platform
import socket
from platform import python_version
from datetime import datetime

print('-----------------------------------')
print(os.name.upper())
print(platform.system(), '|', platform.release())
print('Datetime:', datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
print('Python Version:', python_version())
print('-----------------------------------')