In [1]:
import torch
from transformers import (
    AutoTokenizer,
    pipeline,
    BitsAndBytesConfig
)
from datasets import load_dataset
from peft import AutoPeftModelForCausalLM
import pandas as pd
from tqdm import tqdm

model_id = './code-mistral-7b-text-to-python'
tokenizer = AutoTokenizer.from_pretrained(model_id)

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_compute_dtype=torch.bfloat16
)

model = AutoPeftModelForCausalLM.from_pretrained(
    model_id,
    device_map='auto',
    torch_dtype=torch.bfloat16,
    quantization_config=bnb_config
)

tokenizer.padding_side = 'right'

pipe = pipeline(
    'text-generation',
    model=model,
    tokenizer=tokenizer
)

dataset = load_dataset(
    'json',
    data_files='test_CRM_data.json',
    split='train'
)

results = []

for idx, data in tqdm(enumerate(dataset), total=len(dataset)):
    prompt = pipe.tokenizer.apply_chat_template(
        data['messages'][:2],
        tokenize=False,
        add_generation_prompt=True
    )
    outputs = pipe(
        prompt,
        max_new_tokens=256,
        do_sample=False,
        temperature=0.1,
        top_k=50,
        top_p=0.1,
        eos_token_id=pipe.tokenizer.eos_token_id,
        pad_token_id=pipe.tokenizer.pad_token_id
    )

    query = data['messages'][1]['content']
    orig_res = data['messages'][2]['content']
    gen_res = outputs[0]['generated_text'][len(prompt):].strip()
    
    results.append({'Query': query, 'Original_Answer': orig_res, 'Generated_Answer': gen_res})

df = pd.DataFrame(results)
df.head()

  from .autonotebook import tqdm as notebook_tqdm
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Loading checkpoint shards: 100%|██████████| 2/2 [00:40<00:00, 20.34s/it]
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
The model 'PeftModelForCausalLM' is not supported for text-generation. Supported models are ['BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'LlamaForCausalLM', 'CodeGenForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'ElectraForCausalLM', 'ErnieForCausalLM', 'FalconForCausalLM', 'FuyuForCausalLM', 'GitForCausalLM', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTNeoForCausalLM', 'GPTNeoXForCausalLM',

Unnamed: 0,Query,Original_Answer,Generated_Answer
0,Can you identify any patterns in transaction v...,"def transaction_volume_by_city_per_month(df, c...","def transaction_volume_patterns(df, customer_d..."
1,Compute Lexical Document Similarity,"def lexical_document_similarity(df, customer_d...","def lexical_document_similarity(df, customer_d..."
2,How do the prices of related products affect t...,"def analyze_product_prices(df, customer_df, pr...","def analyze_related_product_prices(df, custome..."
3,How do the scores of transactions involving re...,"def compare_transaction_scores(df, customer_df...","def compare_scores(df, customer_df, product_df..."
4,Could you please recommend top 5 related produ...,"def related_product_recommendations(df, produc...","def related_product_recommendations(df, produc..."


In [2]:
df.to_csv('evaluation_results.csv', index=False)

In [3]:
import pandas as pd
import random
from faker import Faker
fake = Faker()

# List of famous European city names
european_cities = [
    'London', 'Paris'
]

# Generate sample data for orders
orders_data = []
for i in range(1, 16):
    order_id = f'order{i}'
    user_id = f'user{random.randint(1, 5)}'
    item_id = f'item{random.randint(1, 3)}'
    timestamp = fake.date_time_between(start_date='-5d', end_date='now')
    score = round(random.uniform(0.5, 1.0), 2)
    orders_data.append({'order_id': order_id, 'user_id': user_id, 'item_id': item_id, 'timestamp': timestamp, 'score': score})
orig_df = pd.DataFrame(orders_data)

# Generate sample data for customers
customers_data = []
for i in range(1, 6):
    user_id = f'user{i}'
    customer_city = random.choice(european_cities)
    customers_data.append({'user_id': user_id, 'customer_city': customer_city})
orig_customer_df = pd.DataFrame(customers_data).drop_duplicates(subset=['user_id'])

# Generate sample data for products
products_data = []
for i in range(1, 6):
    item_id = f'item{i}'
    product_category = fake.word()
    products_data.append({'item_id': item_id, 'product_category': product_category})
orig_product_df = pd.DataFrame(products_data).drop_duplicates(subset=['item_id'])

# Displaying sample data
print("Orders Data:")
print(orig_df)
print("\nCustomer Data:")
print(orig_customer_df)
print("\nProduct Data:")
print(orig_product_df)

Orders Data:
   order_id user_id item_id                  timestamp  score
0    order1   user3   item2 2024-04-15 00:01:39.892902   0.81
1    order2   user1   item3 2024-04-15 08:33:46.314329   0.94
2    order3   user1   item1 2024-04-18 07:21:41.384627   0.94
3    order4   user5   item2 2024-04-14 06:19:19.341374   0.68
4    order5   user1   item1 2024-04-17 01:25:24.630368   0.76
5    order6   user1   item1 2024-04-17 17:26:43.045747   0.55
6    order7   user4   item2 2024-04-14 22:40:38.961091   0.66
7    order8   user5   item2 2024-04-14 01:53:34.338920   0.51
8    order9   user4   item3 2024-04-16 06:33:52.408665   0.90
9   order10   user3   item3 2024-04-15 16:45:59.637230   0.73
10  order11   user3   item1 2024-04-13 23:16:57.922618   0.51
11  order12   user3   item3 2024-04-17 12:34:45.631818   0.54
12  order13   user3   item1 2024-04-13 14:02:47.139630   0.86
13  order14   user5   item3 2024-04-17 07:07:45.039326   0.60
14  order15   user2   item3 2024-04-17 12:34:47.674488   

In [4]:
def data_copy(orig_df, orig_customer_df, orig_product_df):
    df = orig_df.copy(deep=True)
    customer_df = orig_customer_df.copy(deep=True)
    product_df = orig_product_df.copy(deep=True)
    return df, customer_df, product_df

df, customer_df, product_df = data_copy(orig_df, orig_customer_df, orig_product_df)

# Displaying sample data
print("Orders Data:")
print(df)
print("\nCustomer Data:")
print(customer_df)
print("\nProduct Data:")
print(product_df)

Orders Data:
   order_id user_id item_id                  timestamp  score
0    order1   user3   item2 2024-04-15 00:01:39.892902   0.81
1    order2   user1   item3 2024-04-15 08:33:46.314329   0.94
2    order3   user1   item1 2024-04-18 07:21:41.384627   0.94
3    order4   user5   item2 2024-04-14 06:19:19.341374   0.68
4    order5   user1   item1 2024-04-17 01:25:24.630368   0.76
5    order6   user1   item1 2024-04-17 17:26:43.045747   0.55
6    order7   user4   item2 2024-04-14 22:40:38.961091   0.66
7    order8   user5   item2 2024-04-14 01:53:34.338920   0.51
8    order9   user4   item3 2024-04-16 06:33:52.408665   0.90
9   order10   user3   item3 2024-04-15 16:45:59.637230   0.73
10  order11   user3   item1 2024-04-13 23:16:57.922618   0.51
11  order12   user3   item3 2024-04-17 12:34:45.631818   0.54
12  order13   user3   item1 2024-04-13 14:02:47.139630   0.86
13  order14   user5   item3 2024-04-17 07:07:45.039326   0.60
14  order15   user2   item3 2024-04-17 12:34:47.674488   

In [16]:
import pandas as pd
import ast
import astor
import inspect

def print_red(text):
    print("\033[91m {}\033[00m" .format(text))

def print_func(query_out, solution, exception_out=None):
    print_red(query_out)
    print_red(solution)
    print_red(exception_out)

def parse_function_definition(definition_string):
    parsed = ast.parse(definition_string)

    function_name = None
    arguments = []
    body = []

    for node in parsed.body:
        if isinstance(node, ast.FunctionDef):
            function_name = node.name
            arguments = [arg.arg for arg in node.args.args]

            for stmt in node.body:
                body.append(astor.to_source(stmt).strip())

    function_definition = (
        "\n"
        f"import pandas as pd\n"
        f"import numpy as np\n"
        f"from numpy.linalg import LinAlgError\n"
        f"from datetime import datetime, timedelta\n"
        f"from collections import defaultdict, Counter\n"
        f"from itertools import combinations\n"
        f"from scipy.sparse import csr_matrix\n"
        f"from scipy.stats import zscore\n"
        f"from sklearn.preprocessing import StandardScaler, LabelEncoder\n"
        f"from sklearn.cluster import KMeans\n"
        f"from sklearn.model_selection import train_test_split\n"
        f"from sklearn.linear_model import LogisticRegression, LinearRegression\n"
        f"from sklearn.metrics.pairwise import cosine_similarity\n"
        f"from sklearn.feature_extraction.text import TfidfVectorizer\n"
        f"from sklearn.decomposition import TruncatedSVD\n"
        f"from sklearn.metrics import accuracy_score, classification_report, mean_squared_error, r2_score\n"
        f"from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor\n"
        f"from mlxtend.frequent_patterns import apriori\n"
        f"from mlxtend.frequent_patterns import association_rules\n"
        f"from surprise import Reader, Dataset\n"
        f"from surprise.prediction_algorithms import SVD\n"
        f"from matplotlib import pyplot as plt\n"
        f"from statsmodels.tsa.statespace.sarimax import SARIMAX\n"
        f"from statsmodels.tools.sm_exceptions import ConvergenceWarning\n"
        f"from statsmodels.tsa.seasonal import seasonal_decompose\n"
        f"import warnings\n"
        f"{definition_string}"
    )
    return function_name, arguments, '\n'.join(body), function_definition

def compare_results(idx, query, original_answer):
    
    query_out = f"Query - {idx}: {query}"
    print(query_out)
    exception_out = None

    original_definition_string = original_answer
    original_definition_string = original_definition_string.replace("\\\\", "\\")
    original_definition_string = original_definition_string.strip().replace("\\n", "\n").rstrip('\\')
    original_definition_string = original_definition_string.replace("\\'", "'")
    
    try:
        original_function_name, original_arguments, _, original_definition_string = parse_function_definition(original_definition_string)
        print(original_definition_string)
        globals_ = {'df': df, 'customer_df': customer_df, 'product_df': product_df}
        exec(original_definition_string, globals_)
        original_parsed_function = globals_[original_function_name]
        original_signature = inspect.signature(original_parsed_function)
        original_parameters_with_defaults = [(param.name, param.default) for param in original_signature.parameters.values() if param.default != inspect.Parameter.empty]
        original_default_values = dict(original_parameters_with_defaults)
        original_args = tuple(original_default_values[arg] if arg in original_default_values else globals()[arg] for arg in original_arguments)
        original_result = original_parsed_function(*original_args)
        print(original_result)
    except Exception as e:
        exception_out = f"Error:\n {original_definition_string} \n {str(e)}"

    if exception_out is not None:
        print_red(exception_out)

for idx, row in pd.read_csv('evaluation_results.csv').iterrows():
    row_index = idx
    row_question = row['Query']
    row_solution = row['Generated_Answer']
    
    df, customer_df, product_df = data_copy(orig_df, orig_customer_df, orig_product_df)
    
    compare_results(row_index, row_question, row_solution)

print("Done!")

Query - 0: Can you identify any patterns in transaction volume by customer city per month?
[91m Error:
 def transaction_volume_patterns(df, customer_df):
    merged_df = pd.merge(df, customer_df, on='user_id', how='left')
    merged_df['month'] = merged_df['timestamp'].dt.month
    merged_df['year'] = merged_df['timestamp'].dt.year
    transaction_volume_patterns = merged_df.groupby(['customer_city','month', 'year']].size().reset_index(name='transaction_count')
    return transaction_volume_patterns
 
 closing parenthesis ']' does not match opening parenthesis '(' (<unknown>, line 5)[00m
Query - 1: Compute Lexical Document Similarity

import pandas as pd
import numpy as np
from numpy.linalg import LinAlgError
from datetime import datetime, timedelta
from collections import defaultdict, Counter
from itertools import combinations
from scipy.sparse import csr_matrix
from scipy.stats import zscore
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.cluster import 