In [None]:
import requests
from zipfile import ZipFile
from io import BytesIO
import os
import json
import pandas as pd
import matplotlib.pyplot as plt
from collections import Counter
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import seaborn as sns
import gzip
import shutil
import transformers
import torch
import os
import json
from langchain_ollama import OllamaLLM
import numpy as np

In [None]:
def get_service_call_method(frame):
    service_call = frame.get('service_call', None)
    if service_call:
        return service_call.get('method', None)
    return None
def get_service_and_service_call(frame):
    service = frame.get('service', None)
    service_call = frame.get('service_call', None)
    if service_call:
        return service, service_call
    return service, None
def verify_service_call_methods(dialogue_file):
    with open(dialogue_file, 'r', encoding='utf-8') as f:
        dialogues = json.load(f)
    
    inconsistent_dialogues = []
    for dialogue in dialogues:
        dialogue_id = dialogue['dialogue_id']
        methods = []
        for turn in dialogue['turns']:
            for frame in turn.get('frames', []):
                method = get_service_call_method(frame)
                if method is not None:
                    methods.append(method)
        
        if len(methods) > 1 and len(set(methods)) != 1:
            inconsistent_dialogues.append(dialogue_id)
    
    if inconsistent_dialogues:
        print(f"False: Inconsistent methods found in Dialogue IDs {inconsistent_dialogues}")
    else:
        print("True")

# Example usage:
verify_service_call_methods('data/SGD/dstc8-schema-guided-dialogue-master/dev/dialogues_001.json')

def verify_service_call_methods(dialogue_file):
    with open(dialogue_file, 'r', encoding='utf-8') as f:
        dialogues = json.load(f)
    
    inconsistent_dialogues = []
    for dialogue in dialogues:
        dialogue_id = dialogue['dialogue_id']
        methods = []
        for turn in dialogue['turns']:
            for frame in turn.get('frames', []):
                method = get_service_call_method(frame)
                if method is not None:
                    methods.append(method)
        
        if len(methods) > 1 and len(set(methods)) != 1:
            inconsistent_dialogues.append((dialogue_id, list(set(methods))))
    
    if inconsistent_dialogues:
        for dialogue_id, methods in inconsistent_dialogues:
            print(f"False: Inconsistent methods found in Dialogue ID {dialogue_id} with methods {methods}")
    else:
        print("True")

# Example usage:
verify_service_call_methods('data/SGD/dstc8-schema-guided-dialogue-master/dev/dialogues_001.json')


In [None]:
def get_service_and_service_call(frame):
    service_call = frame.get('service_call', None)
    if service_call:
        service_call['parameters'] = service_call.get('parameters', {})
        return service_call
    return None

def create_baseline_dialogue(input_file, output_file):
    with open(input_file, 'r', encoding='utf-8') as f:
        dialogues = json.load(f)
    
    baseline_dialogues = []
    
    for dialogue in dialogues:
        dialogue_id = dialogue['dialogue_id']
        service_calls = []
        for turn in dialogue['turns']:
            for frame in turn.get('frames', []):
                service_call = get_service_and_service_call(frame)
                if service_call is not None:
                    service_calls.append(service_call)
        
        if service_calls:
            last_service_call = service_calls[-1]
            baseline_dialogue = {
                'dialogue_id': dialogue_id,
                'service': dialogue['services'][0] if 'services' in dialogue else '',
                'service_call': {'method': last_service_call},
                'user_utterance': [turn['utterance'] for turn in dialogue['turns'] if turn.get('speaker') == 'USER'],
                'system_utterance': [turn['utterance'] for turn in dialogue['turns'] if turn.get('speaker') == 'SYSTEM']
            }
            baseline_dialogues.append(baseline_dialogue)
    
    with open(output_file, 'w', encoding='utf-8') as f:
        json.dump(baseline_dialogues, f, indent=4)

# Process all dialogue files from 001 to 020
for i in range(1, 21):
    input_file = f'c:/Users/paulc/Documents/CodeMemoire/data/SGD/dstc8-schema-guided-dialogue-master/dev/dialogues_{i:03d}.json'
    output_file = f'c:/Users/paulc/Documents/CodeMemoire/experimentation v1/simplifiedDataset/dialogue_baseline_{i:03d}.json'
    create_baseline_dialogue(input_file, output_file)

In [None]:
import os
import glob
import json
import pandas as pd

# Define the directory containing the JSON files
directory = 'c:/Users/paulc/Documents/CodeMemoire/experimentation v1/simplifiedDataset/'

# Get a list of all JSON files in the directory
json_files = glob.glob(os.path.join(directory, '*.json'))

# Define a function to extract properties
def extract_properties(json_data):
    x = {
        'dialogue_id': json_data.get('dialogue_id', None),
        'user_utterance': json_data.get('user_utterance', None),
        'system_utterance': json_data.get('system_utterance', None)
    }
    y = {
        'service': json_data.get('service', None),
        'service_call': json_data.get('service_call', {}).get('method', None)
    }
    return x, y

# Initialize empty lists to store all rows for X and Y
all_x = []
all_y = []

# Iterate over each JSON file in the directory
for input_file in json_files:
    # Load the JSON data
    with open(input_file, 'r', encoding='utf-8') as f:
        json_data = json.load(f)
    
    # Process each dialogue in the JSON file
    for dialogue in json_data:
        x, y = extract_properties(dialogue)
        all_x.append(x)
        all_y.append(y)

# Convert the lists to pandas DataFrames
df_x = pd.DataFrame(all_x)
df_y = pd.DataFrame(all_y)

# Save to CSV or inspect
output_x_path = os.path.join(directory, 'dataset_x.csv')
output_y_path = os.path.join(directory, 'dataset_y.csv')

df_x.to_csv(output_x_path, index=False, encoding='utf-8')
df_y.to_csv(output_y_path, index=False, encoding='utf-8')

print(f"DataFrames saved to {output_x_path} and {output_y_path}")

### estimer le temps de traitement

In [23]:
# time
def time_estimated(csv_file):
    df = pd.read_csv(csv_file)
    num_lines = len(df)
    estimated_time = num_lines * 15  # 15 seconds per line
    return estimated_time/60/60  # in hours

# Example usage
csv_file = output_x_path  # or output_y_path
print(f"Estimated time to process the CSV file: {time_estimated(csv_file)} hours")

Estimated time to process the CSV file: 10.341666666666667 hours


In [24]:

model = OllamaLLM(model="llama3.2")
def call_llama(input : str) :
    return model.invoke(input=input)

In [25]:
# Load the schema from the JSON file
with open("../data/SGD/dstc8-schema-guided-dialogue-master/dev/schema.json", "r") as schema_file:
    schema = json.load(schema_file)

# Convert the schema to a string for the context
context = f"### Schema\nThe schema of available services is as follows:\n{json.dumps(schema, indent=2)}"

In [26]:

prompt_template = """
You are an AI assistant that processes user requests and maps them to specific services, methods, and parameters based on the provided schema.

### Task
For the user input provided below, return only a JSON response indicating:
1. The "service" to call.
2. The "service_call" containing:
   - "method": The intent to invoke.
   - "parameters": A dictionary of key-value pairs matching the required and optional slots.

### Input 
User request: "{user_input}"

### Output Format; it absolutely needs to be exactly in this format because it will go into a JSON parser for evaluation
{{
  "service": "<service_name>",
  "service_call": {{
    "method": "<intent_name>",
    "parameters": {{
      "<parameter_name>": "<parameter_value>"
    }}
  }}
}}
"""



In [27]:
# Define user input
# Iterate over DataFrame rows
y_predictions = []
i=0
for row in df_x.iterrows():
    i=i+1
    user_input = str(row)
    final_prompt = f"this are the services available : {context}\n\n this is your task {prompt_template.format(user_input=user_input)}"
    prediction = model.invoke(input=final_prompt)
    y_predictions.append(prediction)
    if(i==10):
        break
# Combine the context, prompt, and user input
print(y_predictions)
# print(call_llama(final_prompt))

['Based on the provided input, I analyzed the user request and determined that it is a request to make a restaurant reservation. The system utterance indicates that the user wants to dine in a specific city.\n\nHere is the response:\n\n```\n{\n  "service": "FindAttractions",\n  "service_call": {\n    "method": "Browse attractions in a given city",\n    "parameters": {\n      "location": "dine_in_city"\n    }\n  }\n}\n```\n\nNote that I assumed the city name to be "dine_in_city" based on the system utterance. However, since it is not explicitly mentioned in the user request, this may need to be adjusted based on further clarification or additional context.', 'Here\'s the Python code to solve this task:\n\n```python\nimport json\n\n# Define the service schema\nservices = [\n    {\n        "service_name": "Weather",\n        "intents": {\n            "FindAttractions": {\n                "required_slots": ["location"],\n                "optional_slots": {"free_entry": "dontcare", "good_fo