# Install packages & Kamiwaza Client SDK

In [15]:
import httpx
from openai import OpenAI
import pandas as pd
import json
from kamiwaza_client import KamiwazaClient
import warnings
warnings.filterwarnings('ignore')

# Initialize Kamiwaza Client
client = KamiwazaClient("http://localhost:7777/api/")

# Connect OpenAI Client to the Kamiwaza Deployment 
this was downloaded + deployed in `quickstart.ipynb`

In [20]:
# Find a valid deployment
deployments = client.serving.list_deployments()
valid_deployment = next((d for d in deployments if d.status == 'DEPLOYED' and d.instances), None)

if valid_deployment is None:
    raise ValueError("No valid deployments found. Please ensure a model is deployed.")
else:
    print(f"Using {valid_deployment.m_name}")

# Set up OpenAI client
http_client = httpx.Client(
    base_url=f"http://localhost:{valid_deployment.lb_port}/v1",
    timeout=30.0,
    follow_redirects=True
)

openai_client = OpenAI(
    api_key="local",
    base_url=f"http://localhost:{valid_deployment.lb_port}/v1",
    http_client=http_client
)


Using Qwen2.5-7B-Instruct-GGUF


# Define functions to process csvs + prompt for JSON schema

In [25]:
# Function to read CSV files
def read_csv(file_path):
    return pd.read_csv(file_path)

# Function to process a single row
def process_row(row):
    system_prompt = """
    You are a data standardization assistant. Your task is to convert input data into a standard JSON format.
    The output should always strictly adhere to this schema:
    {
      "name": "string",
      "job_title": "string",
      "email": "string"
    }
    Ensure that the email is in a standard format (e.g., user@domain.com).
    Only respond with the JSON object, no additional text.
    """
    
    user_prompt = f"""
    Convert the following data into the standard format:
    {json.dumps(row.to_dict())}

    Please respond with only the JSON object, no additional text.
    """

    try:
        response = openai_client.chat.completions.create(
            model="local-model",
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": user_prompt}
            ]
        )
        response_content = response.choices[0].message.content
        print(f"Raw response: {response_content}")  # For debugging
        return json.loads(response_content)
    except json.JSONDecodeError as e:
        print(f"Invalid JSON in response: {e}")
        print(f"Response content: {response_content}")
        return None
    except Exception as e:
        print(f"Error processing row: {e}")
        return None

# Function to process a DataFrame
def process_dataframe(df):
    return [process_row(row) for _, row in df.iterrows()]

# Look how messy this is :(

In [26]:
# Take a look at the csvs
csv_files = ['data/contacts_1.csv', 'data/contacts_2.csv', 'data/contacts_3.csv']
for file in csv_files:
    df = read_csv(file)
    print(df)
    print('----------------------------------------------------------------')

      full_name         job      email_address
0      John Doe         CEO   john@example.com
1    Jane Smith         CFO   jane@company.com
2  Mike Johnson     Janitor   mike@example.com
3   Sara Connor         CTO   sara@company.com
4    James Dean  HR Manager  james@company.com
----------------------------------------------------------------
            Name                  Position              Email
0      Doe, John   Chief Executive Officer   john@example.com
1    Smith, Jane   Chief Financial Officer   jane@company.com
2  Johnson, Mike                   Janitor   mike@example.com
3   Connor, Sara  Chief Technology Officer   sara@company.com
4    Dean, James                HR Manager  james@company.com
----------------------------------------------------------------
       Employee       Title                 Contact Info
0      John Doe         CEO  john [at] example [dot] com
1    Jane Smith         CFO      jane(at)company(dot)com
2  Mike Johnson     Janitor             mike@

# Clean it with our locally deployed model

In [None]:
# Read and process each CSV file
all_employees = []

for file in csv_files:
    df = read_csv(file)
    all_employees.extend(process_dataframe(df))

# Create the final JSON structure
final_json = {"employees": all_employees}

# Save the result to a JSON file
with open('data/standardized_contacts.json', 'w') as f:
    json.dump(final_json, f, indent=2)

print("Processing complete. Results saved to standardized_contacts.json")



2024-12-27 16:21:39,452 - httpx - INFO - HTTP Request: POST http://localhost:51100/v1/chat/completions "HTTP/1.1 200 OK"


Raw response: {
  "name": "John Doe",
  "job_title": "CEO",
  "email": "john@example.com"
}


2024-12-27 16:21:40,464 - httpx - INFO - HTTP Request: POST http://localhost:51100/v1/chat/completions "HTTP/1.1 200 OK"


Raw response: {
  "name": "Jane Smith",
  "job_title": "CFO",
  "email": "jane@company.com"
}


2024-12-27 16:21:41,478 - httpx - INFO - HTTP Request: POST http://localhost:51100/v1/chat/completions "HTTP/1.1 200 OK"


Raw response: {
  "name": "Mike Johnson",
  "job_title": "Janitor",
  "email": "mike@example.com"
}


2024-12-27 16:21:42,509 - httpx - INFO - HTTP Request: POST http://localhost:51100/v1/chat/completions "HTTP/1.1 200 OK"


Raw response: {
  "name": "Sara Connor",
  "job_title": "CTO",
  "email": "sara@company.com"
}


2024-12-27 16:21:43,520 - httpx - INFO - HTTP Request: POST http://localhost:51100/v1/chat/completions "HTTP/1.1 200 OK"


Raw response: {
  "name": "James Dean",
  "job_title": "HR Manager",
  "email": "james@company.com"
}


2024-12-27 16:21:44,550 - httpx - INFO - HTTP Request: POST http://localhost:51100/v1/chat/completions "HTTP/1.1 200 OK"


Raw response: {
  "name": "Doe, John",
  "job_title": "Chief Executive Officer",
  "email": "john@example.com"
}


2024-12-27 16:21:45,614 - httpx - INFO - HTTP Request: POST http://localhost:51100/v1/chat/completions "HTTP/1.1 200 OK"


Raw response: {
  "name": "Smith, Jane",
  "job_title": "Chief Financial Officer",
  "email": "jane@company.com"
}


2024-12-27 16:21:46,648 - httpx - INFO - HTTP Request: POST http://localhost:51100/v1/chat/completions "HTTP/1.1 200 OK"


Raw response: {
  "name": "Johnson, Mike",
  "job_title": "Janitor",
  "email": "mike@example.com"
}


2024-12-27 16:21:47,708 - httpx - INFO - HTTP Request: POST http://localhost:51100/v1/chat/completions "HTTP/1.1 200 OK"


Raw response: {
  "name": "Connor, Sara",
  "job_title": "Chief Technology Officer",
  "email": "sara@company.com"
}
