In [None]:
from gpt4all import GPT4All
import os
import glob
import pandas as pd
import gspread
from gspread_dataframe import set_with_dataframe
from google.oauth2.service_account import Credentials


In [14]:
# Function to analyze conversations for commercial intent using GPT-4
def analyze_conversation(conversation):
    # Send the conversation text to GPT-4 and get the response
    model = GPT4All("Meta-Llama-3-8B-Instruct.Q4_0.gguf") # downloads / loads a 4.66GB LLM
    with model.chat_session():
        response = model.generate(f"I am a market researcher and you are a consumer behavior specialist. I need you to make accurate inferences about whether the LLM prompt\
                                    I give at the end of these instructions is related to a commercial intention. In case you're uncertain about potential commercial intent, please answer conservatively,\
                                    because I most prefer to avoid false classifications of commercial intent. \
                                    Follow the instructions specified further carefully and do not give any other information as a note or explanations.\
                                    Instructions - \
                                    1. If you think the prompt has no commercial intent, only answer the following: \
                                        Commercial intent : No\
                                    2. If you think the prompt has a commercial intent, then answer the following in the given format and order only :\
                                    Commercial intent : Yes\
                                    Commercial Intent Type : Give only one phrase indicating the type of commercial intent (for example: intent to make a purchase, intent to sell, searching for a product, searching for a service, \
                                    searching for a brand, searching for physical store, problem that a product could help with)\
                                    Relevant Industry:  Give the Industry Name that corresponds to the commercial intent of the prompt (choose only from the following : retail, consumer packaged goods, financial services, \
                                    computing products and consumer electronics, healthcare and pharma, automotive, telecom, travel and tourism, entertainment and media, none) \
                                    Confidence level :  Provide a confidence score between 0-100% to indicate how confident you are that this prompt reflects a commercial intent.\
                                    End of instructions.\
                                    The prompt is as follows: {conversation}")
    # Parse GPT-4's response (assuming it returns intent and industry classification)
    #print(response)
    return response

In [None]:
import re
import glob

# Load all conversations
sampled_dfs = []
data_file = 'cleaned_wildchat_data.csv'
df_convo = pd.read_csv(data_file)
N = 10  # adjust sample size to analyse conversations
sampled_df = df_convo.sample(n=N, random_state=None)
sampled_dfs.append(sampled_df)

english_convo = pd.concat(sampled_dfs)
sample_convos = english_convo['conversation'].head(10)
sample_convos

734     make this paper longer and flow more smoothly ...
1878    Algorithmic Art - Benchmark 2\n\nInstructions:...
1318    "Wormholes connect across space and time, not ...
39      My 6 year old pug who has never had any seriou...
1509    Matt and his sister, Tricia, are stopping at R...
3122    Improve the prose and increase the clarity of ...
2918    ØµØ­Ø­ You can make a payment using your PayPa...
1379    rewrite the below more formally\n\nHello Chad,...
372                Order to cash belongs to which domain?
438     Strange Enderwoman: "Oh come on, where'd I put...
Name: conversation, dtype: object

In [None]:

commercial_intent = []
commercial_intent_type = []
industry_classification = []
response_list = []
confidence_level = []
query_list = []
columns_to_save = ['conversation', 'CommercialIntent', 'CommercialIntentType', 'IndustryClassification', 'ConfidenceLevel']

# Function to select the first two sentences
def select_first_two_sentences(text):
    text = text.replace('\n', '')
    #Find the first two occurrences of sentence-ending punctuation
    match = re.search(r'([A-Za-z0-9][^.!?]*[.!?])\s+([A-Za-z0-9][^.!?]*[.!?])\s+([A-Za-z0-9][^.!?]*[.!?])', text)
    if match:
        if match.group(3):
            return match.group(1) + match.group(2) + match.group(3)
        elif match.group(2):
            return match.group(1) + match.group(2)
        else:
            return match.group(1)
    else:
        return text

english_convo.loc[:,'conversation'] = english_convo['conversation'].apply(select_first_two_sentences)
sample_convos = english_convo['conversation'].head(N) #ADJUST SAMPLE SIZE

# Iterate through the conversations and analyze each one
for conversation in sample_convos:
    query = conversation
    print(query)
    result = analyze_conversation(conversation)  # Replace this with actual model interaction
    print(result)  # Print the model's response for reference
    convos = pd.DataFrame()
        # Parse the result to extract intent and industry classification
    try:
        output = result.split("\n")
        #print(output)
        if "" in output:
            output.remove("") 
        print(output)
        if len(output)<4:
            commercial_intent.append(output[0].split(":")[1].strip())
            commercial_intent_type.append('None')
            industry_classification.append('None')
            confidence_level.append('None')  
        else:
            commercial_intent.append(output[0].split(":")[1].strip())
            commercial_intent_type.append(output[1].split(":")[1].strip())
            industry_classification.append(output[2].split(":")[1].strip())
            confidence_level.append(output[3].split(":")[1].strip())  
        query_list.append(query)

        convos['conversation'] = query_list
        convos['CommercialIntent'] = commercial_intent
        convos['CommercialIntentType'] = commercial_intent_type
        convos['IndustryClassification'] = industry_classification
        convos['ConfidenceLevel'] = confidence_level
        
        # Select only the relevant columns for the final DataFrame
        df_final = convos.loc[:, columns_to_save]
        
        # Store the entire response in a separate list
        response_list.append(result)
        SERVICE_ACCOUNT_FILE = 'path to google service account file'
        SCOPES = ['https://www.googleapis.com/auth/spreadsheets', 'https://www.googleapis.com/auth/drive']
        creds = Credentials.from_service_account_file(SERVICE_ACCOUNT_FILE, scopes=SCOPES)

        # Authorize gspread client
        client = gspread.authorize(creds)

        # Open the Google Sheet by its name or ID
        sheet = client.open('GPT').worksheet('Sheet5')
        #sheet.clear()

        # Write the entire DataFrame to the sheet, starting from the top-left corner (A1)
        x = set_with_dataframe(sheet, df_final)
    except:
        pass


make this paper longer and flow more smoothly between ideas "The Industrial Revolution, a period characterized by the transition from manual labor to machine-based manufacturing, transformed human societies worldwide beginning in the 18th and extending into the 19th centuries.Scholars have noted that the Industrial Revolution has become a complex historical phenomenon that had a profound impact on social, economic, and cultural landscapes worldwide.In this regard, this paper aims to discuss how this era has laid a foundation for the modern global economy and continues to shape contemporary society through ongoing technological advancements, the growth of capitalism, globalization, and environmental sustainability/urbanization concerns.
Commercial intent : No
Confidence level: 100%
['Commercial intent : No', 'Confidence level: 100%']
Algorithmic Art - Benchmark 2Instructions: You will present the individual work you've done to the rest of your group on 10/26 or 10/27.You will explain th