# Importing necessary libraries

In [1]:
import pandas as pd
import numpy as np
import torch
import os
import sys

from langchain_anthropic import ChatAnthropic
from langchain_core.prompts import ChatPromptTemplate
from sentence_transformers import SentenceTransformer

  _torch_pytree._register_pytree_node(


# Importing loading_data class from data_loader.py and evaluation class from model_evaluation.py  in the src folder

In [2]:
# Retrieving the current working directory 
current_directory = os.getcwd()

# getting the parent directory of the current working directory
parent_directory = os.path.dirname(current_directory)

# Constructing a path to the "Data" directory located inside a directory named "src"
src_data_directory = os.path.join(parent_directory, "src", "Data")

src_model_directory = os.path.join(parent_directory, "src", "model")

# allowing Python to search for modules in this directory.
sys.path.append(src_data_directory)
sys.path.append(src_model_directory)

#Importing the data_preprocessor class from the data_cleaner module located in the src -> Data.
from data_loader import loading_data

#Importing the evaluation class from the model_evaluation module located in the src -> model.
from model_evaluation import evaluation

# Creating an object of loading_data class

In [3]:
load_object=loading_data()

#checking object is created and getting the address of it
load_object

<data_loader.loading_data at 0x14dfee590>

# Creating an object of evaluation class

In [4]:
eval_object=evaluation()

#checking object is created and getting the address of it
eval_object

<model_evaluation.evaluation at 0x14e08ca90>

# Loading all Embeddings  from Models folder using loading_data class object

In [5]:
#getting the path where we have saved the embeddings of model_1
model_1_path =load_object.get_file_path("embeddings_all_mpnet_base_v2.npy","Models")

#Loading the embeddings of model_1 in notebook
embeddings_model_1 = np.load(model_1_path)

#getting the path where we have saved the embeddings of model_2
model_2_path =load_object.get_file_path("embeddings_multilingual_e5_large_instruct.npy","Models")

#Loading the embeddings of model_2 in notebook
embeddings_model_2 = np.load(model_2_path)

#getting the path where we have saved the embeddings of model_3
model_3_path =load_object.get_file_path("embeddings_intfloat_e5_base_v2.npy","Models")

#Loading the embeddings of model_3 in notebook
embeddings_model_3 = np.load(model_3_path)

#getting the path where we have saved the embeddings of model_4
model_4_path =load_object.get_file_path("embeddings_mixedbread_ai_mxbai_embed_2d_large_v1.npy","Models")

#Loading the embeddings of model_4 in notebook
embeddings_model_4 = np.load(model_4_path)

# Loading all the models

In [6]:
model_1 = SentenceTransformer('sentence-transformers/all-mpnet-base-v2')

model_2 = SentenceTransformer('intfloat/multilingual-e5-large-instruct')

model_3 = SentenceTransformer('intfloat/e5-base-v2')

model_4=SentenceTransformer("mixedbread-ai/mxbai-embed-2d-large-v1")

  _torch_pytree._register_pytree_node(


# Loading the processed_data and converting into the list

In [7]:
# getting the path of processed data
processed_file_path=load_object.get_file_path("processed_data.csv","Data")

# reading the processed_data
df=pd.read_csv(processed_file_path)

# converting the tokenized_docstring into list
list_data = df['tokenized_docstring'].tolist()

In [8]:
df['tokenized_docstring'].value_counts()

tokenized_docstring
return json dictionary representing model                                                                                                                                                                                                                                                                                 286
return default collector setting                                                                                                                                                                                                                                                                                          102
subclass may override method                                                                                                                                                                                                                                                                                               52
handle incoming mavlink pa

# Loading the testing data

In [9]:
query_path=load_object.get_file_path("query.csv","Data","testing_data")
queries=pd.read_csv(query_path)


In [10]:
len(queries)

57

# Key for using Claude API

In [50]:
# key will be provided by the you here for using the claude api
key='claude_api_key'

# function for checking the response form the Claude 

In [71]:
def check_response(Questions, top_match_code):
    
    # converting the top_match_code in the dataframe
    data=top_match_code
    
    
    # Questions is the response from the human for the claude
    human =Questions
    
    # Initialize the ChatAnthropic object
    chat = ChatAnthropic(anthropic_api_key=key ,temperature=0, model_name="claude-3-haiku-20240307")

    # Defining system message with task description and data
    system = (
    """ Your task is to provide a response of only 'YES' if there is a 75 percentage matching of code for
        the human input and the data,
        or only 'No' if there isn't,
        when comparing the data to human input. Do not generate unnecessary response give 'YES' or 'NO' only.
        
    data: {data}
    human: {human}
    """
    )
    
    # Creating the ChatPromptTemplate
    prompt = ChatPromptTemplate.from_messages([("system", system), ("human",human)])

    # Creating the chain combining prompt and chat
    chain = prompt | chat
    
    # Invoking the chain with data and human input
    response=chain.invoke(
    {
         "data": data,
        "human": human,
    }
    )
    return response
    

# Function for adding the column in dataframe

In [72]:
def add_column(df, query):
    
    # Add a new column named 'Query' with the same value for all rows
    if query=='Match':
        
        df['Match']=None
        
        return df
    
    df['Query'] = query
    
    return df



# Function for reordering the column in a dataframe

In [73]:
def reorder_columns(df):
    
    # Reorder columns based on the specified order
    column_order = ['Query', 'Docstrings', 'Code','Match']
    
    df = df[column_order]
    
    return df


# Evaluting the model_1

In [83]:
# order in which column of databse will be present
column_order = ['Query', 'Docstrings', 'Code','Match']

#creating the dataframe for model_1 which consists of for particular query all top code and
#match column shows whether it is correctly retrieved or not
model_1_query_response=pd.DataFrame(columns=column_order)

for i in range(len(queries)):
    #getting the particular query from queries 
    query=queries.loc[i].Questions
    
    # getting the table dataframe that consists of docstring and code 
    #that has top match with query using cosine similarity
    table=eval_object.get_top_code_and_docstring(query,embeddings_model_1,model_1,list_data,df)
    
    #adding the column query with all values as 
    table=add_column(table,query)
    
    #adding columns Match that defines whether the code for given query is correctly retrieved or not
    table=add_column(table,'Match')
    
    #reordering the table for better view
    table=reorder_columns(table)
    
    #iterating in the table
    for index,row in table.iterrows():
        
        #fetching the current code for query
        result = row['Code']
        
        #getting the response from claude api
        response=check_response(query,row['Code'])
        
        print(f"{i+1}.{index+1}")
        
        # printing the reponse from the claude api using the key
        print(response)
    
        #Checking the value present in the response generated by the claude
        if 'YES' in response.content:
            response='YES'
        else :
            response='NO'
            
        #Giving the value of response in match column for current row
        table.at[index,'Match']=response
    
    #merging the table dataframe and model_1_query_response in single dataframe
    model_1_query_response= pd.concat([model_1_query_response, table], ignore_index=True)

1.1
content='NO'
1.2
content='YES'
1.3
content='YES'
1.4
content='YES'
1.5
content='NO'
1.6
content='NO'
1.7
content='YES'
1.8
content='YES'
1.9
content='YES'
1.10
content='YES'
2.1
content='YES'
2.2
content='The code for determining the number of processing units available on the system is:\n\n```python\nif "cores" in sysinfo:\n    cores_per_job = min(cores_per_job, int(sysinfo["cores"]))\n```\n\nThis checks the `sysinfo` dictionary, which contains information about the system resources, for the key `"cores"`. If it exists, it uses the value of `"cores"` to set the `cores_per_job` variable to the minimum of the current `cores_per_job` and the number of cores available on the system.\n\nThis ensures that the number of cores used per job does not exceed the total number of cores available on the system.'
2.3
content='YES'
2.4
content='YES'
2.5
content='YES'
2.6
content='YES'
2.7
content='YES'
2.8
content='YES'
2.9
content='Here\'s a Python code snippet that can get the number of process

20.9
content='YES'
20.10
content='NO'
21.1
content='YES'
21.2
content='YES'
21.3
content='YES'
21.4
content='YES'
21.5
content='YES'
21.6
content='YES'
21.7
content='YES'
21.8
content='NO'
21.9
content='YES'
21.10
content='YES'
22.1
content='YES'
22.2
content='YES'
22.3
content='YES'
22.4
content='NO'
22.5
content='YES'
22.6
content='YES'
22.7
content='YES'
22.8
content='YES'
22.9
content='NO'
22.10
content='NO'
23.1
content='YES'
23.2
content='NO'
23.3
content='NO'
23.4
content='YES'
23.5
content='YES'
23.6
content='YES'
23.7
content='YES'
23.8
content='NO'
23.9
content='NO'
23.10
content='NO'
24.1
content='YES'
24.2
content='YES'
24.3
content='YES'
24.4
content='YES'
24.5
content='YES'
24.6
content='NO'
24.7
content='NO'
24.8
content='NO'
24.9
content='YES'
24.10
content='YES'
25.1
content='YES'
25.2
content='NO'
25.3
content='NO'
25.4
content='YES'
25.5
content='YES'
25.6
content='NO'
25.7
content='YES'
25.8
content='NO'
25.9
content='YES'
25.10
content='YES'
26.1
content='YES'
26.2

46.8
content='YES'
46.9
content='NO'
46.10
content='YES'
47.1
content='YES'
47.2
content='YES'
47.3
content='YES'
47.4
content='YES'
47.5
content='NO'
47.6
content='YES'
47.7
content='Based on the provided code, there is a 75% or higher matching of the human input. The response is:\n\nYES'
47.8
content='YES'
47.9
content='NO'
47.10
content='YES'
48.1
content='YES'
48.2
content='Here\'s a Python function that can detect if a file is in CSV format:\n\n```python\nimport csv\nimport os\n\ndef is_csv_file(file_path):\n    """\n    Checks if a file is in CSV format.\n    \n    Args:\n        file_path (str): The path to the file to be checked.\n        \n    Returns:\n        bool: True if the file is in CSV format, False otherwise.\n    """\n    try:\n        with open(file_path, \'r\') as file:\n            # Try to read the first few lines of the file using the csv.Sniffer\n            sample = file.read(1024)\n            file.seek(0)\n            \n            # Use the csv.Sniffer to d

48.7
content='YES'
48.8
content='Here\'s a function that can detect if a given set of bytes represents a CSV (Comma-Separated Values) formatted file:\n\n```python\nimport csv\nimport io\n\ndef is_csv(bytes_to_check):\n    """\n    Detects if the given bytes represent a CSV formatted file.\n\n    :param bytes_to_check: A chunk of bytes to check.\n    :returns: True if the bytes appear to be a CSV file, False otherwise.\n    """\n    try:\n        # Try to read the bytes as a CSV file\n        csv_reader = csv.reader(io.BytesIO(bytes_to_check))\n        # Check if the file has at least one row\n        next(csv_reader, None)\n        return True\n    except (csv.Error, StopIteration):\n        # If there was an error reading the file as CSV, or no rows were found, it\'s not a CSV\n        return False\n```\n\nHere\'s how the function works:\n\n1. The function takes a `bytes_to_check` parameter, which is the chunk of bytes to be checked for CSV format.\n2. It uses the `csv.reader` functio

57.9
content='NO'
57.10
content='YES'


# Evaluating the model_2

In [84]:
# order in which column of databse will be present
column_order = ['Query', 'Docstrings', 'Code','Match']

#creating the dataframe for model_2 which consists of for particular query all top code and
#match column shows whether it is correctly retrieved or not
model_2_query_response=pd.DataFrame(columns=column_order)

for i in range(len(queries)):
    #getting the particular query from queries 
    query=queries.loc[i].Questions
    
    # getting the table dataframe that consists of docstring and code 
    #that has top match with query using cosine similarity
    table=eval_object.get_top_code_and_docstring(query,embeddings_model_2,model_2,list_data,df)
    
    #adding the column query with all values as 
    table=add_column(table,query)
    
    #adding columns Match that defines whether the code for given query is correctly retrieved or not
    table=add_column(table,'Match')
    
    #reordering the table for better view
    table=reorder_columns(table)
    
    #iterating in the table
    for index,row in table.iterrows():
        
        #fetching the current code for query
        result = row['Code']
        
        #getting the response from claude api
        response=check_response(query,row['Code'])
        
        print(f"{i+1}.{index+1}")
        
        # printing the reponse from the claude api using the key
        print(response)
    
        #Checking the value present in the response generated by the claude
        if 'YES' in response.content:
            response='YES'
        else :
            response='NO'
            
        #Giving the value of response in match column for current row
        table.at[index,'Match']=response
    
    #merging the table dataframe and model_2_query_response in single dataframe
    model_2_query_response= pd.concat([model_2_query_response, table], ignore_index=True)

1.1
content='YES'
1.2
content='YES'
1.3
content='NO'
1.4
content='YES'
1.5
content='NO'
1.6
content='YES'
1.7
content='NO'
1.8
content='YES'
1.9
content='NO'
1.10
content='NO'
2.1
content='YES'
2.2
content='YES'
2.3
content='YES'
2.4
content='NO'
2.5
content='YES'
2.6
content='YES'
2.7
content='NO'
2.8
content='NO'
2.9
content='YES'
2.10
content='YES'
3.1
content='NO'
3.2
content='NO'
3.3
content='NO'
3.4
content='YES'
3.5
content='NO'
3.6
content='YES'
3.7
content='YES'
3.8
content='YES'
3.9
content='YES'
3.10
content='NO'
4.1
content='NO'
4.2
content='YES'
4.3
content='YES'
4.4
content='YES'
4.5
content='YES'
4.6
content='NO'
4.7
content='YES'
4.8
content='YES'
4.9
content='NO'
4.10
content='NO'
5.1
content='YES'
5.2
content='YES'
5.3
content='YES'
5.4
content='The provided code does not contain any information about calculating the cosine similarity of a TF-IDF vector. The code appears to be related to reordering the eigenvalues and eigenvectors of a system. Without any additional c

26.7
content='YES'
26.8
content='NO'
26.9
content='YES'
26.10
content='YES'
27.1
content='YES'
27.2
content='YES'
27.3
content='NO'
27.4
content='YES'
27.5
content='YES'
27.6
content='YES'
27.7
content='NO'
27.8
content='YES'
27.9
content='YES'
27.10
content='YES'
28.1
content='YES'
28.2
content='YES'
28.3
content='YES'
28.4
content='YES'
28.5
content='YES'
28.6
content='YES'
28.7
content='YES'
28.8
content='YES'
28.9
content='YES'
28.10
content='YES'
29.1
content='YES'
29.2
content='YES'
29.3
content='YES'
29.4
content='YES'
29.5
content='YES'
29.6
content='NO'
29.7
content='NO'
29.8
content='YES'
29.9
content='YES'
29.10
content='YES'
30.1
content='YES'
30.2
content='NO'
30.3
content='NO'
30.4
content='YES'
30.5
content='YES'
30.6
content='YES'
30.7
content='YES'
30.8
content='NO'
30.9
content='YES'
30.10
content='YES'
31.1
content='YES'
31.2
content='YES'
31.3
content='YES'
31.4
content='YES'
31.5
content='NO'
31.6
content='YES'
31.7
content='NO'
31.8
content='YES'
31.9
content='YES

48.2
content="Here's a simple Python function that can detect if a file is in CSV format:\n\n```python\nimport csv\n\ndef is_csv(file_path):\n    try:\n        with open(file_path, 'r') as file:\n            dialect = csv.Sniffer().sniff(file.read(1024))\n            file.seek(0)\n            csv.reader(file, dialect)\n        return 'YES'\n    except csv.Error:\n        return 'NO'\n```\n\nThis function uses the `csv.Sniffer` class to analyze the first 1024 bytes of the file and determine the CSV dialect (e.g., comma-separated, tab-separated, etc.). If the `csv.Sniffer` is able to successfully sniff the dialect, the function returns `'YES'`, indicating that the file is in CSV format. If the `csv.Sniffer` raises a `csv.Error`, the function returns `'NO'`, indicating that the file is not in CSV format.\n\nYou can use this function like this:\n\n```python\nprint(is_csv('example.csv'))  # Output: 'YES'\nprint(is_csv('example.txt'))  # Output: 'NO'\n```\n\nNote that this function assumes t

48.9
content='Here\'s a Python function that can detect if a file is in CSV format:\n\n```python\nimport csv\nimport os\n\ndef is_csv(file_path):\n    """\n    Detects if a file is in CSV format.\n    \n    Args:\n        file_path (str): The path to the file to be checked.\n        \n    Returns:\n        bool: True if the file is in CSV format, False otherwise.\n    """\n    try:\n        with open(file_path, \'r\') as file:\n            # Try to read the first few lines of the file\n            sample = file.read(4096)\n            \n            # Attempt to detect the CSV dialect\n            dialect = csv.Sniffer().sniff(sample, delimiters=\',;\\t\')\n            \n            # Check if the file has a header row\n            has_header = csv.Sniffer().has_header(sample)\n            \n            return True\n    except (csv.Error, UnicodeDecodeError):\n        # If there\'s an error, the file is likely not in CSV format\n        return False\n```\n\nHere\'s how the function work

56.7
content='To compute the product set of two arrays and sort the result, you can use the following Python code:\n\n```python\narray_a = [1, 2, 3]\narray_b = [4, 5, 6]\n\nproduct_set = [(x, y) for x in array_a for y in array_b]\nproduct_set.sort()\n\nprint(product_set)\n```\n\nThis will output:\n\n```\n[(1, 4), (1, 5), (1, 6), (2, 4), (2, 5), (2, 6), (3, 4), (3, 5), (3, 6)]\n```\n\nThe key steps are:\n\n1. Use a list comprehension to generate the product set of the two arrays. The expression `(x, y) for x in array_a for y in array_b` creates a tuple for each combination of elements from the two arrays.\n2. Sort the resulting list of tuples using the `sort()` method.\n\nThis approach generates the product set and sorts the result in a concise and efficient manner.'
56.8
content='NO'
56.9
content='YES'
56.10
content='To compute the product set of two arrays and sort the result, you can use the following Python code:\n\n```python\nimport itertools\n\narray_a = [1, 2, 3]\narray_b = [4, 5

# Evaluating the model_3

In [85]:
# order in which column of databse will be present
column_order = ['Query', 'Docstrings', 'Code','Match']

#creating the dataframe for model_3 which consists of for particular query all top code and
#match column shows whether it is correctly retrieved or not
model_3_query_response=pd.DataFrame(columns=column_order)

for i in range(len(queries)):
    #getting the particular query from queries 
    query=queries.loc[i].Questions
    
    # getting the table dataframe that consists of docstring and code 
    #that has top match with query using cosine similarity
    table=eval_object.get_top_code_and_docstring(query,embeddings_model_3,model_3,list_data,df)
    
    #adding the column query with all values as 
    table=add_column(table,query)
    
    #adding columns Match that defines whether the code for given query is correctly retrieved or not
    table=add_column(table,'Match')
    
    #reordering the table for better view
    table=reorder_columns(table)
    
    #iterating in the table
    for index,row in table.iterrows():
        
        #fetching the current code for query
        result = row['Code']
        
        #getting the response from claude api
        response=check_response(query,row['Code'])
        
        print(f"{i+1}.{index+1}")
        
        # printing the reponse from the claude api using the key
        print(response)
    
        #Checking the value present in the response generated by the claude
        if 'YES' in response.content:
            response='YES'
        else :
            response='NO'
            
        #Giving the value of response in match column for current row
        table.at[index,'Match']=response
    
    #merging the table dataframe and model_3_query_response in single dataframe
    model_3_query_response= pd.concat([model_3_query_response, table], ignore_index=True)

1.1
content='YES'
1.2
content='YES'
1.3
content='NO'
1.4
content='NO'
1.5
content='NO'
1.6
content='YES'
1.7
content='NO'
1.8
content='NO'
1.9
content='NO'
1.10
content='NO'
2.1
content='YES'
2.2
content='YES'
2.3
content='YES'
2.4
content='YES'
2.5
content='YES'
2.6
content='NO'
2.7
content='YES'
2.8
content='NO'
2.9
content='The code you provided does not contain any information about the number of processing units available on the system. The code you provided is a function called `process_files` that simulates processing a random number of files on a random number of systems across multiple data centers.\n\nTo get the number of processing units available on the system, you can use the `multiprocessing` module in Python. Here\'s an example:\n\n```python\nimport multiprocessing\n\ndef get_num_processors():\n    """\n    Returns the number of processing units available on the system.\n    """\n    return multiprocessing.cpu_count()\n```\n\nThis function uses the `multiprocessing.cpu_c

23.10
content='YES'
24.1
content='YES'
24.2
content='YES'
24.3
content='YES'
24.4
content='YES'
24.5
content='YES'
24.6
content='YES'
24.7
content='NO'
24.8
content='NO'
24.9
content='NO'
24.10
content='YES'
25.1
content='YES'
25.2
content='NO'
25.3
content='YES'
25.4
content='NO'
25.5
content='YES'
25.6
content='YES'
25.7
content='YES'
25.8
content='NO'
25.9
content='YES'
25.10
content='NO'
26.1
content='YES'
26.2
content='NO'
26.3
content='YES'
26.4
content='YES'
26.5
content='NO'
26.6
content='YES'
26.7
content='YES'
26.8
content='NO'
26.9
content='YES'
26.10
content='NO'
27.1
content='YES'
27.2
content='NO'
27.3
content='YES'
27.4
content='YES'
27.5
content='NO'
27.6
content='YES'
27.7
content='YES'
27.8
content='YES'
27.9
content='NO'
27.10
content='YES'
28.1
content='YES'
28.2
content='YES'
28.3
content='NO'
28.4
content='NO'
28.5
content='NO'
28.6
content='YES'
28.7
content='YES'
28.8
content='NO'
28.9
content='YES'
28.10
content='YES'
29.1
content='YES'
29.2
content='YES'
29.3


48.5
content="Here's a simple Python function that can detect if a file is in CSV format:\n\n```python\nimport csv\n\ndef is_csv_file(file_path):\n    try:\n        with open(file_path, 'r') as file:\n            dialect = csv.Sniffer().sniff(file.read(1024))\n            file.seek(0)  # Reset the file pointer to the beginning\n            csv.reader(file, dialect)\n        return 'YES'\n    except (csv.Error, UnicodeDecodeError):\n        return 'NO'\n```\n\nThis function uses the `csv.Sniffer` class to detect the CSV dialect of the file. It reads the first 1024 bytes of the file and uses the `sniff()` method to detect the dialect. If the dialect is successfully detected, the function returns `'YES'`, indicating that the file is in CSV format. If an exception is raised (either a `csv.Error` or a `UnicodeDecodeError`), the function returns `'NO'`, indicating that the file is not in CSV format.\n\nYou can use this function like this:\n\n```python\nprint(is_csv_file('path/to/your/file.cs

# Evaluating the model_4


In [86]:
# order in which column of databse will be present
column_order = ['Query', 'Docstrings', 'Code','Match']

#creating the dataframe for model_4 which consists of for particular query all top code and
#match column shows whether it is correctly retrieved or not
model_4_query_response=pd.DataFrame(columns=column_order)

for i in range(len(queries)):
    #getting the particular query from queries 
    query=queries.loc[i].Questions
    
    # getting the table dataframe that consists of docstring and code 
    #that has top match with query using cosine similarity
    table=eval_object.get_top_code_and_docstring(query,embeddings_model_4,model_4,list_data,df)
    
    #adding the column query with all values as 
    table=add_column(table,query)
    
    #adding columns Match that defines whether the code for given query is correctly retrieved or not
    table=add_column(table,'Match')
    
    #reordering the table for better view
    table=reorder_columns(table)
    
    #iterating in the table
    for index,row in table.iterrows():
        
        #fetching the current code for query
        result = row['Code']
        
        #getting the response from claude api
        response=check_response(query,row['Code'])
        
        print(f"{i+1}.{index+1}")
        
        # printing the reponse from the claude api using the key
        print(response)
    
        #Checking the value present in the response generated by the claude
        if 'YES' in response.content:
            response='YES'
        else :
            response='NO'
            
        #Giving the value of response in match column for current row
        table.at[index,'Match']=response
    
    #merging the table dataframe and model_4_query_response in single dataframe
    model_4_query_response= pd.concat([model_4_query_response, table], ignore_index=True)

1.1
content='YES'
1.2
content='YES'
1.3
content='NO'
1.4
content='NO'
1.5
content='NO'
1.6
content='YES'
1.7
content='YES'
1.8
content='NO'
1.9
content='NO'
1.10
content='NO'
2.1
content='YES'
2.2
content='YES'
2.3
content='YES'
2.4
content='YES'
2.5
content='YES'
2.6
content='The code for determining the number of processing units available on the system is:\n\n```python\nif "cores" in sysinfo:\n    cores_per_job = min(cores_per_job, int(sysinfo["cores"]))\n```\n\nThis checks the `sysinfo` dictionary for the "cores" key, which should contain the number of processing units (cores) available on the system. It then uses this value to set the `cores_per_job` variable, ensuring that the number of cores used per job does not exceed the total number of cores available on the system.'
2.7
content='YES'
2.8
content='YES'
2.9
content='YES'
2.10
content='YES'
3.1
content='YES'
3.2
content='YES'
3.3
content='YES'
3.4
content='YES'
3.5
content='NO'
3.6
content='NO'
3.7
content='YES'
3.8
content='N

7.8
content='YES'
7.9
content='NO'
7.10
content='YES'
8.1
content='YES'
8.2
content='YES'
8.3
content='YES'
8.4
content='YES'
8.5
content='YES'
8.6
content='YES'
8.7
content='YES'
8.8
content='YES'
8.9
content='YES'
8.10
content='YES'
9.1
content='NO'
9.2
content='NO'
9.3
content='NO'
9.4
content='NO'
9.5
content='NO'
9.6
content='NO'
9.7
content='NO'
9.8
content='NO'
9.9
content='NO'
9.10
content='NO'
10.1
content='YES'
10.2
content='YES'
10.3
content='NO'
10.4
content='NO'
10.5
content='NO'
10.6
content='NO'
10.7
content='YES'
10.8
content='YES'
10.9
content='NO'
10.10
content='YES'
11.1
content='YES'
11.2
content='NO'
11.3
content='YES'
11.4
content='NO'
11.5
content='NO'
11.6
content='NO'
11.7
content='YES'
11.8
content='NO'
11.9
content='YES'
11.10
content='NO'
12.1
content='YES'
12.2
content='NO'
12.3
content='YES'
12.4
content='NO'
12.5
content='YES'
12.6
content='NO'
12.7
content='YES'
12.8
content='YES'
12.9
content='NO'
12.10
content='YES'
13.1
content='YES'
13.2
content='YES

35.8
content='YES'
35.9
content='Based on the provided code, there is a 75% or higher matching between the human input and the data. The function `start_instance` in the code appears to be responsible for constructing a server instance from a cloud profile. Therefore, the response is:\n\nYES'
35.10
content='YES'
36.1
content='YES'
36.2
content='NO'
36.3
content='YES'
36.4
content='YES'
36.5
content='YES'
36.6
content='YES'
36.7
content='YES'
36.8
content='YES'
36.9
content='YES'
36.10
content='NO'
37.1
content='YES'
37.2
content="Based on the provided code, it seems that the `iprompt` function already includes a prompt at the bottom of the screen and gets input from the user. The relevant code is:\n\n```python\nprompt = True\ntry:\n    while self.status in ['running', 'paused']:\n        if prompt:\n            out.write('pymc > ')\n            out.flush()\n\n        cmd = utils.getInput().strip()\n        # ...\n```\n\nThis code writes the prompt `'pymc > '` to the output stream (`out

48.6
content="Here's the code to detect if a file is in CSV format:\n\ndef is_csv_file(filename):\n    '''\n    Checks if a file is in CSV format\n    '''\n    try:\n        with open(filename, 'r') as file:\n            dialect = csv.Sniffer().sniff(file.read(1024))\n            return dialect.delimiter == ','\n    except (csv.Error, UnicodeDecodeError):\n        return False\n\nThis function uses the `csv.Sniffer` class to analyze the first 1024 bytes of the file and determine if it has the characteristics of a CSV file. If the detected delimiter is a comma (','), the function returns `True`, indicating that the file is in CSV format. If an exception occurs (e.g., the file is not in a valid CSV format or cannot be decoded), the function returns `False`."
48.7
content="Here's an example code for detecting a CSV formatted file:\n\n```python\nimport csv\n\ndef detect_csv_file(file_path):\n    try:\n        with open(file_path, 'r') as file:\n            reader = csv.reader(file)\n      

52.1
content='YES'
52.2
content='YES'
52.3
content='YES'
52.4
content='YES'
52.5
content='YES'
52.6
content='YES'
52.7
content='YES'
52.8
content='YES'
52.9
content='YES'
52.10
content='Here is the code to add and commit all files given in a list into a git repository:\n\n```python\nfrom git import Repo\n\ndef add_and_commit(repo_path, file_list):\n    """\n    Add and commit all files given in a file_list to the git repository at repo_path.\n    \n    Args:\n        repo_path (str): The path to the git repository.\n        file_list (list): A list of file paths to be added and committed.\n    """\n    repo = Repo(repo_path)\n    index = repo.index\n\n    # Add files to the index\n    for file_path in file_list:\n        index.add([file_path])\n\n    # Commit the changes\n    index.commit("Added and committed files: {}".format(", ".join(file_list)))\n\n    print("Files added and committed successfully.")\n```\n\nTo use this function, you would call it with the path to your git reposito

# Getting head values of model_1_query_response

In [91]:
model_1_query_response.head(10)

Unnamed: 0,Query,Docstrings,Code,Match
0,Rename a table in the database,renames index param old_name name index rename...,"def rename_index(self, old_name, new_name=None...",NO
1,Rename a table in the database,change table column name otherwise leaving tab...,"def _table_relabel(table, substitutions, repla...",YES
2,Rename a table in the database,rename table inside mapd reference old table l...,"def rename(self, new_name, database=None):\n ...",YES
3,Rename a table in the database,try find index changed name rename operation m...,"def detect_index_renamings(self, table_differe...",YES
4,Rename a table in the database,delete row drop existing table parameter table...,"def truncate_table(self, table_name, database=...",NO
5,Rename a table in the database,purge specific table database reversed param n...,"def purge_table(self, name):\n """"""\n ...",NO
6,Rename a table in the database,make sure specified table exist name name tabl...,"def table_absent(name, db):\n '''\n Make...",YES
7,Rename a table in the database,check table name obviously invalid,"def normalize_table_name(name):\n """"""Check ...",YES
8,Rename a table in the database,modify table schema param table table,"def table(self, table):\n """"""\n ...",YES
9,Rename a table in the database,rename old schema identifier new schema identi...,"def rename(self, old, new):\n """"""Rename...",YES


# Getting head values of model_2_query_response

In [111]:
model_2_query_response.sample(10)

Unnamed: 0,Query,Docstrings,Code,Match
137,apply processing running comparison like chang...,change array name searching array renaming,"def rename_scalar(self, old_name, new_name, pr...",YES
372,Resizes the image according to width and size,return squared resized image,"def no_crop(im, min_sz=None, interpolation=cv2...",NO
32,Get the status of a process with a particular ...,write current process pid pidfile location,"def write_pid(self, pid=None):\n """"""Write t...",YES
38,Get the status of a process with a particular ...,check process running name,"def get_pid(name):\n """"""Check if process is...",NO
311,Get all repositories list from project,list package one configured repos,"def _repo_packages(self, args, search=False):\...",YES
207,binary search function complexity log n,time complexity dfs v e space complexity v,"def top_sort(graph):\n """""" Time complexity ...",YES
130,apply processing running comparison like chang...,applies post processing running comparison cha...,"def _post_process_output(res):\n """"""\n A...",NO
48,calculate the cosine similarity of the tf idf...,return cosine similarity binary vector one len...,"def cosine_similarity_vec(num_tokens, num_remo...",YES
490,Check if a username/password combination is va...,check username password combination valid,"def _check_auth(self, username: str, password:...",YES
339,Returns a vocabulary after eliminating the wor...,build word frequency list incoming string,"def counter_from_str(self, string):\n ""...",YES


# Getting head values of model_3_query_response

In [93]:
model_3_query_response.head(10)

Unnamed: 0,Query,Docstrings,Code,Match
0,Rename a table in the database,rename table inside mapd reference old table l...,"def rename(self, new_name, database=None):\n ...",YES
1,Rename a table in the database,modify table schema param table table,"def table(self, table):\n """"""\n ...",YES
2,Rename a table in the database,set renamed column table diff rtype orator dba...,"def _set_renamed_columns(self, table_diff, com...",NO
3,Rename a table in the database,purge specific table database reversed param n...,"def purge_table(self, name):\n """"""\n ...",NO
4,Rename a table in the database,rename tab change name,"def rename_tabs_after_change(self, given_name)...",NO
5,Rename a table in the database,change table column name otherwise leaving tab...,"def _table_relabel(table, substitutions, repla...",YES
6,Rename a table in the database,rename using content disposition header,"def _rename_with_content_disposition(self, res...",NO
7,Rename a table in the database,table changed event handler,"def OnTableChanged(self, event):\n """"""T...",NO
8,Rename a table in the database,rename content created added,"def renameAfterCreation(obj):\n """"""Rename t...",NO
9,Rename a table in the database,change selected database current connection,"def select(self, db):\n """"""Change the s...",NO


# Getting head values of model_4_query_response

In [94]:
model_4_query_response.head(10)

Unnamed: 0,Query,Docstrings,Code,Match
0,Rename a table in the database,rename table inside mapd reference old table l...,"def rename(self, new_name, database=None):\n ...",YES
1,Rename a table in the database,try find index changed name rename operation m...,"def detect_index_renamings(self, table_differe...",YES
2,Rename a table in the database,purge specific table database reversed param n...,"def purge_table(self, name):\n """"""\n ...",NO
3,Rename a table in the database,renames index param old_name name index rename...,"def rename_index(self, old_name, new_name=None...",NO
4,Rename a table in the database,set renamed column table diff rtype orator dba...,"def _set_renamed_columns(self, table_diff, com...",NO
5,Rename a table in the database,rename old schema identifier new schema identi...,"def rename(self, old, new):\n """"""Rename...",YES
6,Rename a table in the database,change table column name otherwise leaving tab...,"def _table_relabel(table, substitutions, repla...",YES
7,Rename a table in the database,check rename constraint return whether rename ...,"def _check_rename_constraints(self, old_key, n...",NO
8,Rename a table in the database,update record name change allowed record ident...,"def _update_record(self, identifier, rtype=Non...",NO
9,Rename a table in the database,rename key instance also renamed,"def rename_key(pki_dir, id_, new_id):\n '''...",NO


# MAP@10 (mean average precision) of model_1

In [107]:
total_yes_model_1 = (model_1_query_response['Match'] == 'YES').sum()
total_no_model_1 = (model_1_query_response['Match'] == 'NO').sum()

print(f"MAP@10 (mean average precision) of model_1 : {(total_yes_model_1/(total_yes_model_1+total_no_model_1))*100}")


MAP@10 (mean average precision) of model_1 : 66.49122807017544


# MAP@10 (mean average precision) of model_2

In [108]:
total_yes_model_2 = (model_2_query_response['Match'] == 'YES').sum()
total_no_model_2 = (model_2_query_response['Match'] == 'NO').sum()

print(f"MAP@10 (mean average precision) of model_2 : {(total_yes_model_2/(total_yes_model_2+total_no_model_2))*100}")


MAP@10 (mean average precision) of model_2 : 63.68421052631579


# MAP@10 (mean average precision) of model_3

In [109]:
total_yes_model_3 = (model_3_query_response['Match'] == 'YES').sum()
total_no_model_3 = (model_3_query_response['Match'] == 'NO').sum()

print(f"MAP@10 (mean average precision) of model_3 : {(total_yes_model_3/(total_yes_model_3+total_no_model_3))*100}")


MAP@10 (mean average precision) of model_3 : 65.6140350877193


# MAP@10 (mean average precision) of model_4

In [110]:
total_yes_model_4 = (model_4_query_response['Match'] == 'YES').sum()
total_no_model_4 = (model_4_query_response['Match'] == 'NO').sum()


print(f"MAP@10 (mean average precision) of model_4 : {(total_yes_model_4/(total_yes_model_4+total_no_model_4))*100}")


MAP@10 (mean average precision) of model_4 : 65.6140350877193
