In [1]:
import requests
from zipfile import ZipFile
from io import BytesIO
import os
import json
import pandas as pd
import matplotlib.pyplot as plt
from collections import Counter
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import seaborn as sns
import gzip
import shutil
import transformers
import torch
import os
import json
from langchain_ollama import OllamaLLM
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def get_service_call_method(frame):
    service_call = frame.get('service_call', None)
    if service_call:
        return service_call.get('method', None)
    return None
def get_service_and_service_call(frame):
    service = frame.get('service', None)
    service_call = frame.get('service_call', None)
    if service_call:
        return service, service_call
    return service, None
def verify_service_call_methods(dialogue_file):
    with open(dialogue_file, 'r', encoding='utf-8') as f:
        dialogues = json.load(f)
    
    inconsistent_dialogues = []
    for dialogue in dialogues:
        dialogue_id = dialogue['dialogue_id']
        methods = []
        for turn in dialogue['turns']:
            for frame in turn.get('frames', []):
                method = get_service_call_method(frame)
                if method is not None:
                    methods.append(method)
        
        if len(methods) > 1 and len(set(methods)) != 1:
            inconsistent_dialogues.append(dialogue_id)
    
    if inconsistent_dialogues:
        print(f"False: Inconsistent methods found in Dialogue IDs {inconsistent_dialogues}")
        return inconsistent_dialogues, len(inconsistent_dialogues), len (dialogues)
    else:
        print("True")

# Example usage:
print(verify_service_call_methods('../data/SGD/dstc8-schema-guided-dialogue-master/dev/dialogues_001.json'))
# print (verify_service_call_methods('../data/SGD/dstc8-schema-guided-dialogue-master/dev/dialogues_001.json')))

# def verify_service_call_methods(dialogue_file):
#     with open(dialogue_file, 'r', encoding='utf-8') as f:
#         dialogues = json.load(f)
    
#     inconsistent_dialogues = []
#     for dialogue in dialogues:
#         dialogue_id = dialogue['dialogue_id']
#         methods = []
#         for turn in dialogue['turns']:
#             for frame in turn.get('frames', []):
#                 method = get_service_call_method(frame)
#                 if method is not None:
#                     methods.append(method)
        
#         if len(methods) > 1 and len(set(methods)) != 1:
#             inconsistent_dialogues.append((dialogue_id, list(set(methods))))
    
#     if inconsistent_dialogues:
#         for dialogue_id, methods in inconsistent_dialogues:
#             print(f"False: Inconsistent methods found in Dialogue ID {dialogue_id} with methods {methods}")
        
#     else:
#         print("True")

# Example usage:
# verify_service_call_methods('../data/SGD/dstc8-schema-guided-dialogue-master/dev/dialogues_001.json')


False: Inconsistent methods found in Dialogue IDs ['1_00077', '1_00078', '1_00079', '1_00080', '1_00081', '1_00082', '1_00083', '1_00084', '1_00085', '1_00086', '1_00087', '1_00088', '1_00089', '1_00090', '1_00091', '1_00092', '1_00093', '1_00094', '1_00095', '1_00096', '1_00097', '1_00098', '1_00099', '1_00100', '1_00101', '1_00102', '1_00103', '1_00104', '1_00105', '1_00106', '1_00107', '1_00108', '1_00109', '1_00110', '1_00111', '1_00112', '1_00113', '1_00114', '1_00115', '1_00116', '1_00117', '1_00118', '1_00119', '1_00120', '1_00121', '1_00122']
(['1_00077', '1_00078', '1_00079', '1_00080', '1_00081', '1_00082', '1_00083', '1_00084', '1_00085', '1_00086', '1_00087', '1_00088', '1_00089', '1_00090', '1_00091', '1_00092', '1_00093', '1_00094', '1_00095', '1_00096', '1_00097', '1_00098', '1_00099', '1_00100', '1_00101', '1_00102', '1_00103', '1_00104', '1_00105', '1_00106', '1_00107', '1_00108', '1_00109', '1_00110', '1_00111', '1_00112', '1_00113', '1_00114', '1_00115', '1_00116', '

# Create Data

In [3]:
def get_service_and_service_call(frame):
    service_call = frame.get('service_call', None)
    if service_call:
        service_call['parameters'] = service_call.get('parameters', {})
        return service_call
    return None

def create_baseline_dialogue(input_file, output_file):
    with open(input_file, 'r', encoding='utf-8') as f:
        dialogues = json.load(f)
    
    baseline_dialogues = []
    
    for dialogue in dialogues:
        dialogue_id = dialogue['dialogue_id']
        service_calls = []
        for turn in dialogue['turns']:
            for frame in turn.get('frames', []):
                service_call = get_service_and_service_call(frame)
                if service_call is not None:
                    service_calls.append(service_call)
        
        if service_calls:
            last_service_call = service_calls[-1]
            baseline_dialogue = {
                'dialogue_id': dialogue_id,
                'service': dialogue['services'][0] if 'services' in dialogue else '',
                'service_call': {'method': last_service_call},
                'user_utterance': [turn['utterance'] for turn in dialogue['turns'] if turn.get('speaker') == 'USER'],
                'system_utterance': [turn['utterance'] for turn in dialogue['turns'] if turn.get('speaker') == 'SYSTEM']
            }
            baseline_dialogues.append(baseline_dialogue)
    
    with open(output_file, 'w', encoding='utf-8') as f:
        json.dump(baseline_dialogues, f, indent=4)

# Process all dialogue files from 001 to 020
for i in range(1, 21):
    input_file = f'c:/Users/paulc/Documents/CodeMemoire/data/SGD/dstc8-schema-guided-dialogue-master/dev/dialogues_{i:03d}.json'
    output_file = f'c:/Users/paulc/Documents/CodeMemoire/experimentation v1/simplifiedDataset/dialogue_baseline_{i:03d}.json'
    create_baseline_dialogue(input_file, output_file)

In [None]:
import os
import glob
import json
import pandas as pd

# Define the directory containing the JSON files
directory = 'c:/Users/paulc/Documents/CodeMemoire/experimentation v1/simplifiedDataset/'

# Get a list of all JSON files in the directory
json_files = glob.glob(os.path.join(directory, '*.json'))

# Define a function to extract properties
def extract_properties(json_data):
    x = {
        'dialogue_id': json_data.get('dialogue_id', None),
        'user_utterance': json_data.get('user_utterance', None),
        'system_utterance': json_data.get('system_utterance', None)
    }
    y = {
        'service': json_data.get('service', None),
        'service_call': json_data.get('service_call', {}).get('method', None)
    }
    return x, y

# Initialize empty lists to store all rows for X and Y
all_x = []
all_y = []

# Iterate over each JSON file in the directory
for input_file in json_files:
    # Load the JSON data
    with open(input_file, 'r', encoding='utf-8') as f:
        json_data = json.load(f)
    
    # Process each dialogue in the JSON file
    for dialogue in json_data:
        x, y = extract_properties(dialogue)
        all_x.append(x)
        all_y.append(y)

# Convert the lists to pandas DataFrames
df_x = pd.DataFrame(all_x)
df_y = pd.DataFrame(all_y)

# Save to CSV or inspect
output_x_path = os.path.join(directory, 'dataset_x.csv')
output_y_path = os.path.join(directory, 'dataset_y.csv')

df_x.to_csv(output_x_path, index=False, encoding='utf-8')
df_y.to_csv(output_y_path, index=False, encoding='utf-8')

print(f"DataFrames saved to {output_x_path} and {output_y_path}")

### Load Data

In [4]:
import pandas as pd

# Define the directory containing the CSV files
directory = 'c:/Users/paulc/Documents/CodeMemoire/experimentation v1/simplifiedDataset/'

# Define the paths to the CSV files
output_x_path = os.path.join(directory, 'dataset_x.csv')
output_y_path = os.path.join(directory, 'dataset_y.csv')

# Load the CSV files into DataFrames
df_x = pd.read_csv(output_x_path, encoding='utf-8')
df_y = pd.read_csv(output_y_path, encoding='utf-8')

# Inspect the DataFrames
print(df_x.head())
print(df_y.head())

  dialogue_id                                     user_utterance  \
0     1_00000  ['I want to make a restaurant reservation for ...   
1     1_00001  ['I am not in the mood to cook today. I want t...   
2     1_00002  ['I want to reserve a table at a restaurant, s...   
3     1_00003  ['I would like to make a restaurant reservatio...   
4     1_00004  ['I want to make a dinner reservation on March...   

                                    system_utterance  
0  ['What city do you want to dine in? Do you hav...  
1  ['Which area would you like me to look in? Whi...  
2  ['Which location of Bourbon Steak do you want ...  
3  ['Which restaurant do you want to go to?', 'Wh...  
4  ['What area are you looking in? What time do y...  
         service                                       service_call
0  Restaurants_2  {'method': 'ReserveRestaurant', 'parameters': ...
1  Restaurants_2  {'method': 'ReserveRestaurant', 'parameters': ...
2  Restaurants_2  {'method': 'ReserveRestaurant', 'parame

### estimer le temps de traitement

In [8]:
# time
def time_estimated(csv_file):
    df = pd.read_csv(csv_file)
    num_lines = len(df)
    estimated_time = num_lines * 15  # 15 seconds per line
    return estimated_time/60/60  # in hours

# Example usage
csv_file = output_x_path  # or output_y_path
print(f"Estimated time to process the CSV file: {time_estimated(csv_file)} hours")

Estimated time to process the CSV file: 10.341666666666667 hours


# Model Definition

In [5]:

model = OllamaLLM(model="llama3.2", device="cuda",)

def call_llama(input : str) :
    return model.invoke(input=input)

In [128]:
from langchain_ollama import ChatOllama

llm = ChatOllama(
    model="llama3.2",
    device="cuda",
    format="json",
    )

# Prompt template with placeholders


In [118]:
prompt_template = """
You are an AI assistant that processes user requests and maps them to a specific service which can be found in the context (the service outputed by you has to be in the context)

### Context 
all Services availablle to the AI assistant {context}

### Input User Request 
User request: "{user_input}"


### Task
For the user input provided below, return only a JSON indicating:
1. The "service" to call.
2. The "service_call" containing:
   - "method": The method to call in the service.
   - "parameters": A dictionary of key-value pairs matching the required and optional slots.

### Output Format :
outputs data in the following JSON format:
{{
  "service": "<service_name>",
  "service_call": {{
    "method": "<intent_name>",
    "parameters": {{
      "<parameter_name>": "<parameter_value>"
    }}
  }}
}}

### Exemple 
{exemple}
"""

In [119]:
exemple = "input provided by user :"+ df_x.iloc[10], "output taht you shoud provide in this case: " + df_y.iloc[10]

# Prompt Template LangChain 

In [120]:
from typing import Any, Dict
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain.output_parsers import PydanticOutputParser
from pydantic import BaseModel, Field
import json



class ServiceCall(BaseModel):
    method: str
    parameters: Dict[str, Any]

class ServiceY(BaseModel):
    service: str
    service_call: ServiceCall

output_parser = PydanticOutputParser(pydantic_object=ServiceY)


In [121]:
prompt = PromptTemplate(
    template=prompt_template,
    input_variables=["user_input", "context","exemple"],
    output_parser=output_parser
)

# Load Context (API Schema)

In [122]:
# Load the schema from the JSON file
with open("../data/SGD/dstc8-schema-guided-dialogue-master/dev/schema.json", "r") as schema_file:
    schema = json.load(schema_file)

# Convert the schema to a string for the context
context = f"### Schema\nThe schema of available services is as follows:\n{json.dumps(schema, indent=2)}"

In [123]:

# Instantiation using from_template (recommended)
def build_prompt(prompt_template, user_input, context, exemple):
    prompt = PromptTemplate.from_template(prompt_template)
    return prompt.format(user_input=user_input, context=context, exemple=exemple)

# Configurer le parser de sortie
output_parser = PydanticOutputParser(pydantic_object=ServiceY)

In [129]:
import ast

# Use the parser and the prompt builder to call the LLM
y_predictions = []
i=0

for index, row in df_x.iterrows():
    user_utterance_list = ast.literal_eval(row['user_utterance'])
    user_input = user_utterance_list[0]
    print(user_input)
    formatted_prompt = build_prompt(prompt_template, user_input, context,exemple)
    print(formatted_prompt)
    prediction = llm.invoke(input=formatted_prompt)

    # parsed_output = output_parser.parse(prediction)
    y_predictions.append(prediction)
    if index == 0:  # For demonstration, break after the first iteration
        break

print(y_predictions[0])



I want to make a restaurant reservation for 2 people at half past 11 in the morning.

You are an AI assistant that processes user requests and maps them to a specific service which can be found in the context (the service outputed by you has to be in the context)

### Context 
all Services availablle to the AI assistant ### Schema
The schema of available services is as follows:
[
  {
    "service_name": "Alarm_1",
    "description": "Manage alarms by getting and setting them easily",
    "slots": [
      {
        "name": "alarm_time",
        "description": "Time of the alarm",
        "is_categorical": false,
        "possible_values": []
      },
      {
        "name": "alarm_name",
        "description": "Name of the alarm",
        "is_categorical": false,
        "possible_values": []
      },
      {
        "name": "new_alarm_time",
        "description": "Time to set for the new alarm",
        "is_categorical": false,
        "possible_values": []
      },
      {
        "n

In [127]:
print(prediction)

content='To solve this problem, we can use the provided dialogue data to match the user\'s input with a suitable service call. \n\nGiven the user input "I want to make a restaurant reservation for 2 people at half past 11 in the morning," let\'s first identify the required and optional slots:\n\n- Required slot: `location` (since it\'s not mentioned, we\'ll consider it as an unknown or missing location)\n- Optional slot: `date`\n\nWe can observe that the user\'s input contains a specific time ("half past 11 in the morning") and a mention of "2 people" which suggests that they want to book for two individuals. This information will likely be required by the restaurant reservation service.\n\nNow, let\'s parse this into a JSON response format:\n\n```json\n{\n  "service": "FindAttractions",\n  "service_call": {\n    "method": "GetWeather",\n    "parameters": {\n      "city": "[Unknown Location]", \n      "date": "[2019-03-01]"\n    }\n  }\n}\n```\n\nHowever, the provided solution doesn\'t

In [None]:
print(y_predictions[0])
