# Installation of Libraries & User Defined Functions

In [1]:
#Install Necessary Libraries
#!pip install -- upgrade pip --q
!pip install openai --q
!pip install openpyxl --q
#!pip install pandas --q
#!pip install numpy --q

In [2]:
#Import necessary modules and set default settings as required
import pandas as pd
pd.set_option('display.max_columns', None)
import numpy as np
from warnings import filterwarnings
filterwarnings("ignore")
import openai
import json
import time
from IPython.display import clear_output
import threading
import math
import sys
import os
import openpyxl

In [3]:
pd.set_option('display.max_rows', None)

# OpenAI API Key

In [4]:
#Initializing openai api key
openai.api_key = 'sk-proj-4aZNVQQU2553UKazDKGYT3BlbkFJjISCNXbbMhMb1c0FO6Ly'


# UDF

## UDF for review word count & buckets tagging

In [5]:
def word_count(comment):
    return len(comment.split())

#Function to categorize word count into buckets
def categorize_word_count(count):
    if count <= 4:
        return '1-4'
    elif count <= 15:
        return '5-15'
    elif count <= 30:
        return '16-30'
    elif count <= 60:
        return '31-60'
    elif count <= 100:
        return '61-100'
    else:
        return '>100'

## Function to assign values based on the presence of the topic in positive or negative keys

In [6]:
def assign_value(topic, positive, negative):
    if topic in positive:
        return 1
    elif topic in negative:
        return -1
    else:
        return 0

## UDF for Dynamic output

In [7]:
#Function for dynamic message
def print_dynamic_message(full_counter,key_counter,subcounter, full_total, key_total,subtotal, start_time, stop_event): #new addition - subcounter & subtotal
    while not stop_event.is_set():
        clear_output(wait=True)
        elapsed_time_sec = time.time() - start_time
        elapsed_time = time.strftime('%H:%M:%S', time.gmtime(elapsed_time_sec))
        print(f"Overall - Executing {full_counter[0]} of {full_total}")
        print(f"Key - Executing {key_counter} of {key_total}") #new addition
        print(f"Loop within Key - Executing {subcounter[0]} of {int(subtotal)}", end=' ') #Replacement - subcounter & subtotal to counter & total
        print(f"\nElapsed time: {elapsed_time}")
        for _ in range(3):  # Number of dots in the animation
            if stop_event.is_set():
                break
            print('.', end='', flush=True)
            time.sleep(0.5)  # Time delay between dots

In [8]:
#Function for dynamic message
def print_dynamic_message_keyword(counter, total, stop_event):
    while not stop_event.is_set():
        clear_output(wait=True)
        print(f"Executing loop {counter[0]} out of {int(total)}", end=' ')
        for _ in range(3):  # Number of dots in the animation
            if stop_event.is_set():
                break
            print('.', end='', flush=True)
            time.sleep(0.5)  # Time delay between dots

## Function to query the OpenAI API

In [9]:
def query_openai(prompt):
    response = openai.chat.completions.create(
        model='gpt-4o-2024-08-06',
        messages=[{"role": "user", "content": prompt}],
        temperature=0,
        max_tokens=4095
    )
    content = response.choices[0].message.content
    completion_tokens = response.usage.completion_tokens
    prompt_tokens = response.usage.prompt_tokens
    
    return content, prompt_tokens, completion_tokens    

#gpt-4o-2024-05-13

## UDF-Sentiment Scoring

In [10]:
#Function to initialize the Sentiment Scoring context with OpenAI
#        - 'Others': Others refer to customer comments on topics other than 'Product Quality & OLD Gold Jewellery Exchange'.

def score_sentiments_jul(reviews_df):
    reviews_list = []
    for index, row in reviews_df.iterrows():
        review = row['review_text']
        reviewer = row['Name']
        reviews_list.append({"review": review, "reviewer": reviewer})
            
    prompt = f"""
    You have been given a list of Commentor Names & Customer comments for analysis.
    Instructions:
    1. Read & analyze each of the comment for sentiment('positive' or 'negative').  
    2. Categorize each of the comment into below defined topics.
        Topic & Definitions:
        - 'Trust':Trust refers to customers' trust in the brand.
        - 'Store Experience': Store Experience refers to the customers' feeling on the overall shopping experience.
        - 'Store Staff': Store Staff refers to customers feeling on how the sales person or staff interacts with the customer when providing assistance before, during, and after a purchase. A Good Store staff means he has good Knowledge on products & process and is able to explain clearly, very polite & hospitable, makes the customer feel heard, valued, and satisfied with the service & hospitality provided.
        - 'Product Design': Product Design refers to the presence of any of these words 'DESIGN','CRAFTSMANSHIP','WORKMANSHIP' in the comment. The comment may also describe these in the product.
        - 'Product Variety': Product Variety refers to the range of different products or collections available. A good variety means the customer feels there are many options to choose from.
        - 'Discount': Discount refers to offers or deals. A comment on good Discount refers to customers' feeling that they have received a good deal or offer through a sale or discount.
        - 'Making Charge': Making charge refers to the customers comments on whether they feel making charges are reasonable, high or low.
        - 'Price': Price refers to the overall pricing of the products. Customer comments on whether the products are affordable, expensive, or value for money.        
        - 'Product Quality': Product Quality refers to customers comments on Quality, Strength, Durability & Reliability of the product. The comment must include one of the words 'Quality', 'Strength', 'Durability' or 'Reliable' with reference to product quality to qualify as comment on Product Quality. 
        - 'OLD Gold Jewellery Exchange': OLD Gold Jewellery Exchange refers to customers comments on Old Gold Jewellery Exchange Policy, ease of exchanging their old gold & the value they get out of it. The comment must include one of the words 'Exchange', 'Exchange Policy' or 'old gold exchange' to qualify as comment on OLD Gold Jewellery Exchange. 
    3. Present the analysis for each of the comments ONLY in JSON format as shown in the below example. The output must strictly adhere to the JSON format provided in the example. Any additional text, explanations, summaries, or interpretations outside of this format is not required and should be omitted. 
        Example:
            {{
              "Commentor Name": 
              [
                {{
                  "positive": "Trust,Store Experience,Store Staff,Product Quality",
                  "negative": "Discount,Price"
                }}
              ],
              "Commentor Name": 
              [
                {{
                  "positive": "OLD Gold Jewellery Exchange,Trust,Store Staff",
                  "negative": "Product Quality"
                }}
              ]
            }}

    Customer Comments: 
    {json.dumps(reviews_list)}
"""    
    #encoding = tiktoken.encoding_for_model("gpt-4-1106-preview")  
    #input_token = len(encoding.encode(prompt))
    api_response, input_token, output_token = query_openai(prompt)
    #output_token = len(encoding.encode(api_response))
        
    return api_response, input_token, output_token

## Function to assign values based on the presence of the topic in positive or negative keys

In [11]:
def assign_value(topic, positive, negative):
    if topic in positive:
        return 1
    elif topic in negative:
        return -1
    else:
        return 0

## UDF for Keywords extraction

In [12]:
def positive_keywords(reviews,topic):
    prompt = f"""
    You have been provided with a list of customer comments of a jewellery shop that has a positive sentiment for the topic {topic}. 
    
    Your task is to follow the steps as below:
    1. Read through each of the comments.  
    2. Check the relevancy of each of the comments with the {topic}.
    3. Identify the Positive keywords & Positive phrases with relevance to {topic} in each of the comments.
    4. Consolidate all the Positive keywords & Positive phrases.
    5. Strictly give only the top 5 Positive keywords & the top 5 Positive phrases along with their frequencies.
    
    Strictly follow the instructions as below while executing the task:
    a. The keywords & phrases that you are listing down should be strictly relevant to the {topic} & should be in the same keywords as in customer comments.
        For example: if the topic is 'Product Quality', relevant keywords might include 'durable', 'good quality', 'quality product','excellent quality' but not  'beautiful designs','Exquisite Workmanship', 'great service', 'good service', 'nice collection', 'friendly staff' or 'quick service', as these are related to Design, collection & Customer Service, not product quality.
    b. Note that keywords can be either a single word or a combination of two words.
        For example: 'Trustable', 'Reliable', 'Durable' are single word keywords while 'Good Quality', 'excellent variety', 'wide range', 'good designs' are two word keywords.  
    c. The order of keywords & phrases that are listed must be in the descending order of their frequencies.
    d. Strictly omit keywords & phrases that are not relevant to the {topic}.
    e. Strictly ensure the keywords are mapped to keywords & phrases are mapped to phrases.
    f. Strictly ensure that the keywords must not be mapped in phrases & phrases must not be mapped in keywords.
    g. If there are no relevant positive keywords & phrases in comments for {topic}, give the output as "No relevant positive keywords/ phrases".
    h. Strictly follow the below structure for the output. An example output is also given for your reference.
    
        Structure:
        {{
            "{topic}":  
            [
            {{
              "keywords": "<str>",
              "phrases": "<str>"
            }}
            ]
        }}


        Example Output:
        {{
            "Trust": 
            [
            {{
              "keywords": "Genuine Service :5, Trust :4 , Reliable Option : 3 ,Honest :3",
              "phrases": "Trustworthy place to buy :1, Our all time trusted place :1, Most trustworthy brand name :1"
            }}
            ]
        }}

    The output should strictly be only in a JSON format.
    Strictly start the output with {{.
    Strictly do not start with ```json & end with  ``` in the output you give.
    Strictly do not start with```json.
    Strictly ensure that your output can be decoded by json.loads().

Remember, The ultimate goal is to provide the business team with what delights the customer in our {topic} through positive keywords & phrases along with their frequencies in the comments.
This helps them improve and better meet the needs of their customers.

Here are the customer comments to work with:
{reviews}
"""    

    
    #encoding = tiktoken.encoding_for_model("gpt-4-1106-preview")  
    #input_token_count = len(encoding.encode(prompt))
    #print(f"Input tokens = {token_count}")
    
    api_response,input_token_count,output_token_count = query_openai(prompt)
    
    #results = json.loads(api_response) # Assuming the API response is in valid JSON format 
    #output_token_count = len(encoding.encode(api_response))
    #print(f"Output tokens = {output_token}")
    
    return api_response, input_token_count, output_token_count

In [13]:
def negative_keywords(reviews,topic):
    prompt = f"""
    You have been provided with a list of customer comments of a jewellery shop that has a negative sentiment for the topic {topic}. 
    
    Your task is to follow the steps as below:
    1. Read through each of the comments.  
    2. Check the relevancy of each of the comments with the {topic}.
    3. Identify the negative keywords & negative phrases with relevance to {topic} in each of the comments.
    4. Consolidate all the negative keywords & negative phrases.
    5. Strictly give only the top 5 negative keywords & the top 5 negative phrases along with their frequencies.
    
    Strictly follow the instructions as below while executing the task:
    a. The keywords & phrases that you are listing down should be strictly relevant to the {topic} & should be in the same keywords as in customer comments.
        For example, if the topic is 'Product Quality', relevant words might include 'broke immediately', 'very weak', 'poor quality','bad quality' but not  'poor designs','rude service', 'bad service', 'low collection', 'unfriendly staff' or 'slow service', as these are related to Design, product variety & Customer Service, not product quality.
    b. Note that keywords can be either a single word or a combination of two words.
        For example: 'unhappy', 'painful', 'waiting' are single word keywords while 'poor designs', 'very weak', 'bad quality', 'rude service' are two word keywords.  
    c. The order of keywords & phrases that are listed must be in the descending order of their frequencies.
    d. Strictly omit keywords & phrases that are not relevant to the {topic}.
    e. Strictly ensure the keywords are mapped to keywords & phrases are mapped to phrases.
    f. Strictly ensure that the keywords must not be mapped in phrases & phrases must not be mapped in keywords.
    g. If there are no relevant negative keywords & phrases in comments for {topic}, give the output as "No relevant negative keywords/ phrases".
    h. Strictly follow the below structure for the output. An example output is also given for your reference.
    
        Structure:
        {{
            "{topic}":  
            [
            {{
              "keywords": "<str>",
              "phrases": "<str>"
            }}
            ]
        }}


        Example Output:
        {{
            "Customer Service": 
            [
            {{
              "keywords": "Poor Service :5, rude behaviour :4 , unfriendly staff : 3 , didnt respond well :3, lousy behaviour:1",
              "phrases": "The staff didnt bother to ask us what we wanted  :2, Didn't have product knowledge  :1"
            }}
            ]
        }}

    The output should strictly be only in a JSON format.
    Strictly start the output with {{.
    Strictly do not start with ```json & end with  ``` in the output you give.
    Strictly do not start with```json.
    Strictly ensure that your output can be decoded by json.loads().

Remember, The ultimate goal is to provide the business team with what pain points of the customer in our {topic} through negative words & phrases along with their frequencies in the comments.
This helps them improve and better meet the needs of their customers.

Here are the customer comments to work with:
{reviews}
"""    

    
    #encoding = tiktoken.encoding_for_model("gpt-4-1106-preview")  
    #input_token_count = len(encoding.encode(prompt))
    #print(f"Input tokens = {token_count}")
    
    api_response,input_token_count,output_token_count = query_openai(prompt)
    
    #results = json.loads(api_response) # Assuming the API response is in valid JSON format 
    #output_token_count = len(encoding.encode(api_response))
    #print(f"Output tokens = {output_token}")
    
    return api_response, input_token_count, output_token_count

## UDF for Combining Keywords

In [14]:
#Function to combine keywords
def combine_keywords(cell_content):

    prompt = f"""
    Given the following keywords and their frequencies:
    
    {cell_content}

    Please perform the following tasks:
    1. Group together keywords that has an overall similar meanings(those with closely related meanings, e.g., "Trusted", "Trustworthy", "Reliable") under a common term.
    2. Correct any spelling mistakes (e.g., lowercase variations, incorrect spellings) but ensure that the meaning is not changed.
    3. Sum up the frequencies of the grouped keywords.
    4. There can be upto 10 grouped keywords in the descending order of the summed up frequncies.
    
    Strictly follow the below structure for the output. Dont give any additional explanation
    
        Structure:
        {{
        <str>:frequency, <str>:frequency 
        }}
    """

    api_response,input_token_count,output_token_count = query_openai(prompt)

    return api_response, input_token_count, output_token_count


In [15]:
#Function to combine phrases
def combine_phrases(cell_content):

    prompt = f"""
    Given the following phrases and their frequencies:
    
    {cell_content}

    Please perform the following tasks:
    1. Group together similar phrases that convey the same or closely related meaning (e.g., "most trustworthy people" and "very trustworthy").
    2. Correct any minor spelling or phrasing issues (e.g., capitalization, common typos), ensuring that the original meaning of the phrase is not changed.
    3. Sum up the frequencies of the grouped phrases.
    4. There can be upto 10 grouped phrases in the descending order of the summed up frequncies.  

    Strictly follow the below structure for the output. Dont give any additional explanation
    
        Structure:
        {{
        <str>:frequency, <str>:frequency 
        }}




"""
    api_response,input_token_count,output_token_count = query_openai(prompt)

    return api_response, input_token_count, output_token_count

# Data Reading & Manipulation

## Reading data scraped from APIFY

In [16]:
#Define the folder path
folder_path = 'scraped_data/uae_us'

#List of required columns
required_columns = [
                    "address", "name", "publishedAtDate", "reviewsCount", "stars", 
                    "text", "textTranslated", "title", "totalScore","url"
                    ]

#Initialize an empty list to store dataframes
dfs = []

#Loop through all files in the folder
for file in os.listdir(folder_path):
    file_path = os.path.join(folder_path, file)

    #Check if it's a file and has a valid extension (CSV or Excel)
    if os.path.isfile(file_path) and file.endswith(('.csv', '.xlsx')):
        try:
            #Read CSV or Excel
            if file.endswith('.csv'):
                df = pd.read_csv(file_path, usecols=lambda col: col in required_columns, encoding='utf-8')
            elif file.endswith('.xlsx'):
                df = pd.read_excel(file_path, usecols=lambda col: col in required_columns, engine='openpyxl')
            
            #Append to list if it's not empty
            if not df.empty:
                dfs.append(df)
        
        except Exception as e:
            print(f"Error reading {file}: {e}")

#Combine all dataframes
if dfs:
    combined_df_competitors = pd.concat(dfs, ignore_index=True)
    print("Combined DataFrame shape:", combined_df_competitors.shape)
    
    #Display the final dataframe
    combined_df_competitors.head()
else:
    print("No valid files found or no data in the specified columns.")

Combined DataFrame shape: (8481, 10)


In [17]:
combined_df_competitors = combined_df_competitors.dropna(how='all')
print(combined_df_competitors.shape)

(8479, 10)


In [18]:
combined_df_competitors.head(1)

Unnamed: 0,address,reviewsCount,title,totalScore,url,name,publishedAtDate,stars,text,textTranslated
0,"One Garden State Plaza, Paramus, NJ 07652",103.0,Tiffany & Co.,4.1,https://www.google.com/maps/search/?api=1&quer...,,,,,


In [19]:
#combining title & address columns to create a primary key to map store name
combined_df_competitors['title-address'] = combined_df_competitors['title']+"-"+combined_df_competitors['address'] 

combined_df_competitors.to_excel("temp.xlsx", index=False)

In [20]:
store_name_mapping = {
"Arakkal Gold and Diamonds LLC - Meena Bazar - Bur Dubai (Branch 3)-Meena Bazar - Al Fahidi St - Bur Dubai - Al Fahidi - Dubai - United Arab Emirates": "Arakkal Gold and Diamonds LLC - Meena Bazar - Bur Dubai (Branch 3)",
"Bhima Jewellers - Al Karama - Dubai-karama Center - Shop No. 16 16th St - Al Karama - Dubai - United Arab Emirates": "Bhima Jewellers - Al Karama",
"Bhindi Jewelers-1070 Oak Tree Rd, Decatur, GA 30033": "Bhindi Jewellers-Decatur, GA",
"Evermark Jewelry-3170 Peachtree Pkwy, Johns Creek, GA 30024": "Evermark Jewelry-Johns Creek, GA",
"Jared Jewelers-1016 Illinois Rte 59, Aurora, IL 60504": "Jared-Aurora, IL",
"Jared Jewelers-1504 Randall Rd, Algonquin, IL 60102": "Jared-Algonquin, IL",
"Jared Jewelers-15341 LaGrange Rd, Orland Park, IL 60462": "Jared-Orland Park, IL",
"Jared Jewelers-1700 Woodfield Rd, Schaumburg, IL 60173, United States": "Jared-Schaumburg, IL",
"Jared Jewelers-2370 Fountain Square Dr, Lombard, IL 60148": "Jared-Lombard, IL",
"Jared Jewelers-567 E Townline Rd, Vernon Hills, IL 60061, United States": "Jared-Vernon Hills, IL",
"Jared Jewelers-693 E Boughton Rd, Bolingbrook, IL 60440": "Jared-Bolingbrook, IL",
"Joyalukkas Jewellery Al Barsha-4684+QX2 Lulu Hypermarket - Al Barsha Rd - Al Barsha - Al Barsha 1 - Dubai - United Arab Emirates": "Joyalukkas Jewellery - Al Barsha",
"Joyalukkas Jewellery Karama-Karama Centre, Kuwait Road - Dubai - United Arab Emirates": "Joyalukkas Jewellery - Al Karama",
"Joyalukkas Jewellery-3155 Peachtree Pkwy, Suwanee, GA 30024": "Joyalukkas Jewellery-Suwanee, GA",
"Joyalukkas Jewellery-7055 Preston Rd, Frisco, TX 75034": "Joyalukkas Jewellery-Frisco, TX",
"Joyalukkas Jewellery-Al Fahidi St - Bur Dubai - Al Fahidi - Dubai - United Arab Emirates": "Joyalukkas Jewellery - Al Fahidi st - Al Fahidi",
"Joyalukkas Jewellery-Dalma Plaza - Hamdan Bin Mohammed St - Abu Dhabi - United Arab Emirates": "Joyalukkas Jewellery - Dalma Plaza - Abu Dhabi",
"Joyalukkas Jewellery-Madinat Zayed Shopping Centre - Sultan Bin Zayed The First St - Zone 1 - Abu Dhabi - United Arab Emirates": "Joyalukkas Jewellery - Madinat Zayed Shopping Centre - Abu Dhabi",
"Joyalukkas Jewellery-Shabia - Musaffah - Abu Dhabi - United Arab Emirates": "Joyalukkas Jewellery - Shabia - Abu Dhabi",
"Joyalukkas Jewelry-2642 W Devon Ave, Chicago, IL 60659": "Joyalukkas Jewellery-Chicago, IL",
"Joyalukkas Jewelry-5901 Hillcroft Ave Suite C7-A, Houston, TX 77036, United States": "Joyalukkas Jewellery-Houston, TX",
"Kanz Jewels Meena Bazaar-19 50B St - Al Fahidi - Dubai - United Arab Emirates": "Kanz Jewellers",
"Malabar Gold & Diamonds - Iselin - New Jersey-1348 Oak Tree Rd, Iselin, NJ 08830": "Malabar Gold & Diamonds-Iselin, NJ",
"Malabar Gold & Diamonds - Silicon Central Mall-Central - Ground Floor, Shop No C03 & C04 - Dubai Silicon Oasis - Dubai - United Arab Emirates": "Malabar Gold & Diamonds - Silicon Oasis Central",
"Malabar Gold and Diamonds - Al Barsha - Dubai-Level 2, Lulu Hypermarket, Al Barsha - Dubai - United Arab Emirates": "Malabar Gold and Diamonds - Al Barsha - Dubai",
"Malabar Gold and Diamonds - Al Fahidi Street - Bur Dubai (Branch 1)-BurDubai - Al Fahidi St - Al Souq Al Kabeer - Dubai - United Arab Emirates": "Malabar Gold and Diamonds - Al Fahidi Street - Bur Dubai (Branch 1)",
"Malabar Gold and Diamonds - Al Karama - Dubai-Karama Center, karama Center - Shop no: B66 - Kuwait St - Al Karama - Dubai - United Arab Emirates": "Malabar Gold and Diamonds - Al Karama - Dubai",
"Malabar Gold and Diamonds - Al Wahda Mall - Abu Dhabi-Ground Floor, Al Wahda Mall - Hazza ' Bin Zayed The First St - Al Nahyan - Zone 1 - Abu Dhabi - United Arab Emirates": "Malabar Gold and Diamonds - Al Wahda Mall - Abu Dhabi",
"Malabar Gold and Diamonds - Dalma Mall - Abu Dhabi-Dalma Mall - First Floor - Abu Dhabi Industrial City - ICAD I - Abu Dhabi - United Arab Emirates": "Malabar Gold and Diamonds - Dalma Mall - Abu Dhabi",
"Malabar Gold and Diamonds - Hamdan Street ( Branch 1)-Omeir Bin Yousaf Building, Behind New UAE Exchange - Hamdan Bin Mohammed St - Abu Dhabi - United Arab Emirates": "Malabar Gold and Diamonds - Hamdan Street ( Branch 1)",
"Malabar Gold and Diamonds - Hamdan Street (Branch 2)-3 Hamdan Bin Mohammed St - opp. ahalia hospital - Al Danah - Zone 1 - Abu Dhabi - United Arab Emirates": "Malabar Gold and Diamonds - Hamdan Street (Branch 2)",
"Malabar Gold and Diamonds - Lulu Hypermarket - Madinat Zayed-Lulu Hypermarket, Madinat Zayed Shopping Centre - Abu Dhabi - United Arab Emirates": "Malabar Gold and Diamonds - Lulu Hypermarket - Madinat Zayed",
"Malabar Gold and Diamonds - Meena Bazar - Dubai-Meena Bazar Cosmos Lane, Near Dubai Museum - Dubai - United Arab Emirates": "Malabar Gold and Diamonds - Meena Bazar - Dubai",
"Malabar Gold and Diamonds - Shabia Musaffah-G Floor,Building # C125,ME-11 Mussafah Shabia Opp Al Ansari Exchange 106916 - Abu Dhabi - United Arab Emirates": "Malabar Gold and Diamonds - Shabia Musaffah",
"Malabar Gold and Diamonds - Souq Al Kabeer Building - Bur Dubai (Branch 2)-Shop No: - 01 Al Fahidi St - opp. Habib Bank AG Zurich - Bur Dubai - Al Souq Al Kabeer - Dubai - United Arab Emirates": "Malabar Gold and Diamonds - Souq Al Kabeer Building - Bur Dubai (Branch 2)",
"Malabar Gold and Diamonds Chicago-2652 W Devon Ave, Chicago, IL 60659": "Malabar Gold & Diamonds-Chicago, IL",
"Malabar Gold and Diamonds- Dallas-5811 Preston Rd, Frisco, TX 75034": "Malabar Gold & Diamonds-Frisco, TX",
"Malabar Gold and Diamonds Naperville-1568 Ogden Ave, Naperville, IL 60540": "Malabar Gold & Diamonds-Naperville, IL",
"Malani Jewelers-300 Central Expy, Richardson, TX 75080": "Malani Jewellers-Richardson, TX",
"May Jewelers-8032 Leesburg Pike, Tysons, VA 22182, United States": "May Jewelers-Vienna, VA",
"Meena Jewellers-Meena Bazar Cosmos Lane - 14 50B St - near Dubai Museum - Al Fahidi - Dubai - United Arab Emirates": "Meena Jewellers - Meena Bazar",
"Mint Jewels | Sell Gold in Dubai | Buy Gold Bars in Dubai-karama Center - 22 Kuwait St - Al Karama - Dubai - United Arab Emirates": "Mint Jewels - Al Karama",
"Sona Jewelers-6 Marconi Ave, Iselin, NJ 08830, United States": "Sona Jewelers-Iselin, NJ",
"Tiffany & Co.-1 American Dream Wy E201, East Rutherford, NJ 07073, United States": "Tiffany & Co-East Rutherford, NJ",
"Tiffany & Co.-101 Riverside Square Mall Suite 184A, Hackensack, NJ 07601": "Tiffany & Co-Hackensack, NJ",
"Tiffany & Co.-105 Broad St, Red Bank, NJ 07701": "Tiffany & Co-Red Bank, NJ",
"Tiffany & Co.-1158 Northbrook Ct, Northbrook, IL 60062": "Tiffany & Co-Northbrook, IL",
"Tiffany & Co.-1200 Morris Tpke, Short Hills, NJ 07078": "Tiffany & Co-Short Hills, NJ",
"Tiffany & Co.-4999 Old Orchard Shopping Ctr, Skokie, IL 60077": "Tiffany & Co-Skokie, IL",
"Tiffany & Co.-730 Michigan Ave, Chicago, IL 60611": "Tiffany & Co-Chicago, IL",
"Tiffany & Co.-8045 Leesburg Pike, Vienna, VA 22182": "Tiffany & Co-Vienna, VA",
"Tiffany & Co.-9200 Stony Point Pkwy, Richmond, VA 23235, United States": "Tiffany & Co-Richmond, VA",
"Tiffany & Co.-One Garden State Plaza, Paramus, NJ 07652": "Tiffany & Co-Paramus, NJ",
"VBJ (Vummidi Bangaru Jewellers)-7100 Stonebrook Pkwy, Frisco, TX 75034": "VBJ Jewellers-Frisco, TX"
}

#Create a new column 'store_name' by mapping the values of 'title-address' using the dictionary
combined_df_competitors['store_name'] = combined_df_competitors['title-address'].map(store_name_mapping)


In [21]:
combined_df_competitors.head(1)

Unnamed: 0,address,reviewsCount,title,totalScore,url,name,publishedAtDate,stars,text,textTranslated,title-address,store_name
0,"One Garden State Plaza, Paramus, NJ 07652",103.0,Tiffany & Co.,4.1,https://www.google.com/maps/search/?api=1&quer...,,,,,,"Tiffany & Co.-One Garden State Plaza, Paramus,...","Tiffany & Co-Paramus, NJ"


In [22]:
if((len(combined_df_competitors['title-address'].unique().tolist())) == (len(combined_df_competitors['store_name'].unique().tolist()))):
    print("Data is mapped correctly")
else:
    print("Revisit Store Name Mapping")

Data is mapped correctly


In [23]:
combined_df_competitors['store_name'].unique().tolist()

['Tiffany & Co-Paramus, NJ',
 'Joyalukkas Jewellery - Shabia - Abu Dhabi',
 'Malabar Gold and Diamonds - Lulu Hypermarket - Madinat Zayed',
 'Malabar Gold & Diamonds-Naperville, IL',
 'Sona Jewelers-Iselin, NJ',
 'Malabar Gold and Diamonds - Dalma Mall - Abu Dhabi',
 'Malabar Gold & Diamonds-Chicago, IL',
 'Joyalukkas Jewellery - Madinat Zayed Shopping Centre - Abu Dhabi',
 'Tiffany & Co-Northbrook, IL',
 'Jared-Lombard, IL',
 'Bhima Jewellers - Al Karama',
 'Malabar Gold and Diamonds - Al Barsha - Dubai',
 'Malabar Gold and Diamonds - Hamdan Street ( Branch 1)',
 'Jared-Schaumburg, IL',
 'Tiffany & Co-Chicago, IL',
 'Tiffany & Co-Richmond, VA',
 'Malabar Gold and Diamonds - Meena Bazar - Dubai',
 'Tiffany & Co-Skokie, IL',
 'Joyalukkas Jewellery-Chicago, IL',
 'Mint Jewels - Al Karama',
 'Malabar Gold and Diamonds - Al Fahidi Street - Bur Dubai (Branch 1)',
 'May Jewelers-Vienna, VA',
 'Tiffany & Co-Red Bank, NJ',
 'Kanz Jewellers',
 'Malabar Gold & Diamonds - Silicon Oasis Central',


In [24]:
combined_df_competitors.to_excel('temp/combined_df_competitors.xlsx', index=False)

In [25]:
columns_required = ['name','publishedAtDate','stars','text', 'textTranslated','totalScore','store_name'] 
combined_df_competitors= combined_df_competitors[columns_required].copy()

In [26]:
combined_df_competitors.columns.tolist()

['name',
 'publishedAtDate',
 'stars',
 'text',
 'textTranslated',
 'totalScore',
 'store_name']

In [27]:
rows_to_drop = combined_df_competitors[combined_df_competitors['stars'].isna()]
rows_to_drop


Unnamed: 0,name,publishedAtDate,stars,text,textTranslated,totalScore,store_name
0,,,,,,4.1,"Tiffany & Co-Paramus, NJ"
4424,,,,,,5.0,"Evermark Jewelry-Johns Creek, GA"


In [28]:
combined_df_competitors['store_name'].value_counts()

store_name
Joyalukkas Jewellery - Al Fahidi st - Al Fahidi                               1102
Malabar Gold and Diamonds - Hamdan Street (Branch 2)                           789
Malabar Gold and Diamonds - Lulu Hypermarket - Madinat Zayed                   701
Malabar Gold and Diamonds - Al Karama - Dubai                                  511
Malabar Gold and Diamonds - Dalma Mall - Abu Dhabi                             421
Joyalukkas Jewellery - Al Karama                                               412
Malabar Gold & Diamonds-Iselin, NJ                                             360
Malabar Gold & Diamonds-Naperville, IL                                         352
Malabar Gold and Diamonds - Meena Bazar - Dubai                                337
Malabar Gold & Diamonds-Frisco, TX                                             333
Malabar Gold and Diamonds - Al Barsha - Dubai                                  321
Malabar Gold & Diamonds-Chicago, IL                                         

In [29]:
#Dropping records having Null Values in stars 
combined_df_competitors = combined_df_competitors.dropna(subset=['stars'])

In [30]:
len(combined_df_competitors)

8477

### Competitor Data Manipulation

In [31]:
#Creating review_text column
combined_df_competitors['review_text'] = combined_df_competitors.apply(
                                                                        lambda row: row['text'] if pd.isna(row['textTranslated']) or row['textTranslated'].strip() == '' else row['textTranslated'], 
                                                                        axis=1
                                                                        )

In [32]:
#Convert 'publishedAtDate' to datetime format
combined_df_competitors['publishedAtDate'] = pd.to_datetime(combined_df_competitors['publishedAtDate'])

#Extracting Year and month
combined_df_competitors['year'] = combined_df_competitors['publishedAtDate'].dt.year
combined_df_competitors['month'] = combined_df_competitors['publishedAtDate'].dt.month


In [33]:
#Group by 'store_name' and calculate min & max dates
store_date_summary = combined_df_competitors.groupby('store_name')['publishedAtDate'].agg(['min', 'max']).reset_index()

#Format dates as 'MMM-YYYY'
store_date_summary['min_date'] = store_date_summary['min'].dt.strftime('%b-%Y')
store_date_summary['max_date'] = store_date_summary['max'].dt.strftime('%b-%Y')

#Drop the original min/max date columns
store_date_summary = store_date_summary.drop(columns=['min', 'max'])

#Display the final result
store_date_summary['min_date'].unique()

array(['May-2025', 'Apr-2025', 'Jun-2025'], dtype=object)

#### Filtering comments greater than Dec 31, 2024

#Define the cutoff date
cutoff_date = pd.Timestamp('2025-01-01', tz='UTC')  # Convert cutoff_date to UTC

#Filter the dataframe to keep only records after December 31, 2024
combined_df_competitors = combined_df_competitors[combined_df_competitors['publishedAtDate'] > cutoff_date].copy()



In [34]:
#Dropping the repetitive columns
combined_df_competitors.drop(columns=['text','textTranslated'], inplace=True)

In [35]:
#Adding 'store_code' column to competitors and filling with 'NA'
combined_df_competitors['store_code'] = 'NA'

In [36]:
#Dictionary mapping to standard column names in rest of the code
std_column_names_comp = {
                            'name': 'Name',
                            'review_text': 'review_text',
                            'publishedAtDate': 'Published At Date',
                            'stars': 'Stars',
                            'store_name':'Store Name',
                            'totalScore':'Total Score',
                            'store_code':'Store Code Cleaned',
                            'year':'year', 
                            'month':'month'
                        }


#Renaming the columns
combined_df_competitors.rename(columns=std_column_names_comp, inplace=True)


In [37]:
len(combined_df_competitors['Store Name'].unique().tolist())

52

In [38]:
len(combined_df_competitors)

8477

In [39]:
#Remove timezone to make datetime timezone-naive
combined_df_competitors['Published At Date'] = combined_df_competitors['Published At Date'].dt.tz_localize(None)

#Now save to Excel
combined_df_competitors.to_excel("temp/raw_file_combined_df_competitors.xlsx", index=False)

print("File saved successfully!")

File saved successfully!


In [40]:
combined_df_competitors.head(2)

Unnamed: 0,Name,Published At Date,Stars,Total Score,Store Name,review_text,year,month,Store Code Cleaned
2,Suyel Khan,2025-06-09 10:54:58.711,5.0,4.6,Joyalukkas Jewellery - Shabia - Abu Dhabi,Thank for jeeshan❤️,2025,6,
3,Hidayat Afridi,2025-06-09 08:08:12.093,5.0,4.6,Joyalukkas Jewellery - Shabia - Abu Dhabi,,2025,6,


## Reading Tanishq Data.

<span style = "color:darkgreen">**Prerequisites before reading in python notebook**</span>

This code works where all the data are combined

file_path = 'scraped_data/tanishq/scraped_data_tanishq.xlsx'

#Load all the sheets into a dictionary of DataFrames
sheets_dict = pd.read_excel(file_path, sheet_name=None)

#Combine all the DataFrames into a single DataFrame
combined_df_tq = pd.concat(sheets_dict.values(), ignore_index=True)


In [41]:
#Read Data and share it 
combined_df_tq = pd.read_excel('scraped_data/tanishq/scraped_data_tanishq.xlsx', usecols=['Screenname', 'Chatter', 'Post Created Date','Ratings','Location'])

combined_df_tq.rename(columns={
                                'Screenname': 'Screenname',
                                'Chatter': 'Content',
                                'Post Created Date':'Date',
                                'Ratings':'Ratings',
                                'Location':'Business Location'
                                }, inplace=True)

combined_df_tq.head(2)

Unnamed: 0,Screenname,Content,Date,Ratings,Business Location
0,Santhoshkumar Sathian,So happy with good services specially Mrs chai...,2024-04-09 17:55:36,5,"Lulu ,Sharjah Central, A-06, Ground Floor Shei..."
1,Kusum Devrani,Exceptional service from Tanishq and their sta...,2024-04-12 20:07:01,5,"G-01, TAJ DUBAI, BURJ-KHALIFA STREET, BUSINESS..."


### Tanishq Data Manipulation

In [42]:
combined_df_tq.columns

Index(['Screenname', 'Content', 'Date', 'Ratings', 'Business Location'], dtype='object')

In [43]:
#Creating a seperate dataframe
working_combined_tq = combined_df_tq.copy()

In [44]:
working_combined_tq.to_excel('temp/combined_tq.xlsx',index=False)

In [45]:
len(working_combined_tq["Business Location"].unique().tolist())

23

In [46]:
location_to_store_name = {
                            "1429 Oak Tree Rd" : "Tanishq-New Jersey, NJ",
                            "2335 Post Oak Blvd" : "Tanishq-Houston, TX",
                            "2809 Preston Rd #1200" : "Tanishq-Frisco, TX",
                            "3406 El Camino Real" : "Tanishq-Santa Clara, CA",
                            "4300 E New York St" : "Tanishq-Chicago, IL",
                            "580 Peachtree Pkwy" : "Tanishq-Atlanta, GA",
                            "7325 166th Avenue Northeast suite F155" : "Tanishq-Redmond Seattle, WA",
                            "Al Dana Tower, Rolla Square, Al Ghuwair Al Gharb" : "Tanishq Jewellers-Rolla, SH",
                            "C-19&amp;20, Ground Floor Silicon Central, Silicon Oasis Dubai" : "Tanishq Jewellers-Silicon Central, DB",
                            "Dubai Gold souk, AL Hind Plaza 1 - Deira - Al Ras" : "Tanishq Jewellers-Gold Souk, DB",
                            "F33 A, Oman Avenues Mall, Baushar" : "Tanishq Jewellers-Avenues Mall, OM",
                            "G-01, TAJ DUBAI, BURJ-KHALIFA STREET, BUSINESS BAY" : "Tanishq Jewellers-Taj, DB",
                            "Ground Floor, Al Wahda Mall - Hazza ' Bin Zayed The First St - Al Nahyan -  Zone 1" : "Mia-Al Wahda Mall, AD",
                            "Johara Al Mana WLL, Lulu Hypermarket , Shop No 6 Lulu, D Ring" : "Tanishq Jewellers-Lulu Hypermarket, QA",
                            "Lulu ,Sharjah Central, A-06, Ground Floor Sheikh Rashid Bin Saqr Al Qasimi St - Halwan Suburb - Samnan - Sharjah" : "Tanishq Jewellers-Sharjah Central, SH",
                            "Shop 20, Ground Floor, Lulu Hypermarket شارع أم سقيم البرشاء" : "Tanishq Jewellers-Al Barsha, DB",
                            "Shop No 17-20, Ground Floor Karama Center Shopping Mall, Al Kuwait Street" : "Tanishq Jewellers-Al Karama, DB",
                            "Shop No:1, Plot No:312-294 Al Souq Al Kabeer, Fahidi Road Bur Dubai" : "Tanishq Jewellers-Al Fahidi, DB",
                            "Shop Number 1 &amp; 2 Ground Floor, UW Mall 39 11B St - Al Mankhool" : "Tanishq Jewellers-UW Mall Al Mankhool, DB",
                            "Showroom No. 3, Plot 312/177, Al Souq Al Kabeer, Meena Bazar, Cosmos Lane, Near Dubai Museum Bur Dubai- 90320" : "Tanishq Jewellers-Meena Bazar, DB",
                            "Showroom No:3, Plot No:C-7 Sector No:E-8, Al Saman Tower" : "Tanishq Jewellers-Hamdan Bin Mohammed Street, AD",
                            "Tanishq Jewellery Johara Al Mana WLL Doha Festival City, Store GE138, Bawabat Al Shamal" : "Tanishq Jewellers-Festival City, QA",
                            "Unit 1025, Level 1, Burjuman Center, Opposite Starbucks, Khalid Bin Al Waleed Street," : "Mia-Burjuman, DB"
                        }

#Map the 'location' to 'Store Name'
working_combined_tq['store_name'] = working_combined_tq['Business Location'].map(location_to_store_name)



In [47]:
len(working_combined_tq['store_name'].unique().tolist())

23

In [48]:
working_combined_tq['store_name'].value_counts()

store_name
Tanishq Jewellers-Al Fahidi, DB                     1781
Tanishq Jewellers-Al Barsha, DB                     1029
Tanishq Jewellers-Gold Souk, DB                     1016
Tanishq-Frisco, TX                                   778
Tanishq Jewellers-Meena Bazar, DB                    683
Tanishq Jewellers-Silicon Central, DB                681
Tanishq Jewellers-Al Karama, DB                      488
Mia-Burjuman, DB                                     408
Tanishq Jewellers-Hamdan Bin Mohammed Street, AD     390
Tanishq-Atlanta, GA                                  373
Tanishq Jewellers-Sharjah Central, SH                319
Tanishq-Houston, TX                                  248
Tanishq-New Jersey, NJ                               231
Tanishq Jewellers-Rolla, SH                          223
Tanishq-Chicago, IL                                  203
Tanishq Jewellers-Taj, DB                            154
Tanishq Jewellers-Lulu Hypermarket, QA               145
Tanishq-Redmond Seat

In [49]:
location_to_store_code = {
                            "1429 Oak Tree Rd" : "XNJ",
                            "2335 Post Oak Blvd" : "XTH",
                            "2809 Preston Rd #1200" : "XTD",
                            "3406 El Camino Real" : "XBA",
                            "4300 E New York St" : "XCG",
                            "580 Peachtree Pkwy" : "XAC",
                            "7325 166th Avenue Northeast suite F155" : "XWS",
                            "Al Dana Tower, Rolla Square, Al Ghuwair Al Gharb" : "XSR",
                            "C-19&amp;20, Ground Floor Silicon Central, Silicon Oasis Dubai" : "XDS",
                            "Dubai Gold souk, AL Hind Plaza 1 - Deira - Al Ras" : "XDG",
                            "F33 A, Oman Avenues Mall, Baushar" : "XOM",
                            "G-01, TAJ DUBAI, BURJ-KHALIFA STREET, BUSINESS BAY" : "XDT",
                            "Ground Floor, Al Wahda Mall - Hazza ' Bin Zayed The First St - Al Nahyan -  Zone 1" : "XAW",
                            "Johara Al Mana WLL, Lulu Hypermarket , Shop No 6 Lulu, D Ring" : "XQD",
                            "Lulu ,Sharjah Central, A-06, Ground Floor Sheikh Rashid Bin Saqr Al Qasimi St - Halwan Suburb - Samnan - Sharjah" : "XSL",
                            "Shop 20, Ground Floor, Lulu Hypermarket شارع أم سقيم البرشاء" : "XDB",
                            "Shop No 17-20, Ground Floor Karama Center Shopping Mall, Al Kuwait Street" : "XDK",
                            "Shop No:1, Plot No:312-294 Al Souq Al Kabeer, Fahidi Road Bur Dubai" : "XDF",
                            "Shop Number 1 &amp; 2 Ground Floor, UW Mall 39 11B St - Al Mankhool" : "XDX",
                            "Showroom No. 3, Plot 312/177, Al Souq Al Kabeer, Meena Bazar, Cosmos Lane, Near Dubai Museum Bur Dubai- 90320" : "XDM",
                            "Showroom No:3, Plot No:C-7 Sector No:E-8, Al Saman Tower" : "XAH",
                            "Tanishq Jewellery Johara Al Mana WLL Doha Festival City, Store GE138, Bawabat Al Shamal" : "XQF",
                            "Unit 1025, Level 1, Burjuman Center, Opposite Starbucks, Khalid Bin Al Waleed Street," : "XDJ"
                            }

#Map the 'location' to 'Store Code'
working_combined_tq['store_code'] = working_combined_tq['Business Location'].map(location_to_store_code)


In [50]:
len(working_combined_tq['store_code'].unique().tolist())

23

In [51]:
if(len(working_combined_tq["Business Location"].unique().tolist()) == len(working_combined_tq['store_name'].unique().tolist())):
    print("Address & Store Name number matches")
    if(len(working_combined_tq["Business Location"].unique().tolist()) == len(working_combined_tq['store_code'].unique().tolist())):
        print("Store code also matches")
    else:
        print("Store code number doesnt match")
else:
    print("Store Name doesnt match")

Address & Store Name number matches
Store code also matches


In [52]:
nan_store_code_locations = working_combined_tq[working_combined_tq['store_code'].isna()]['Business Location'].unique().tolist()
print(nan_store_code_locations)

[]


In [53]:
#Creating the dictionary for avg_rating
store_code_to_avg_rating = {
                            "XSL" : 4.9,
                            "XDT" : 4.9,
                            "XDS" : 4.9,
                            "XDF" : 5,
                            "XDK" : 4.9,
                            "XDM" : 4.9,
                            "XDJ" : 4.9,
                            "XDB" : 4.9,
                            "XAH" : 5,
                            "XQD" : 4.9,
                            "XOM" : 4.7,
                            "XQF" : 4.9,
                            "XDG" : 5,
                            "XSR" : 5,
                            "XDX" : 4.9,
                            "XAW" : 5,
                            "XTD" : 4.9,
                            "XCG" : 4.8,
                            "XNJ" : 4.7,
                            "XTH" : 4.8,
                            "XAC" : 4.9,
                            "XWS" : 4.8,
                            "XBA" : 4.5
                            }

working_combined_tq['Total Score'] = working_combined_tq['store_code'].map(store_code_to_avg_rating)

In [54]:
working_combined_tq['Total Score'].unique().tolist()

[4.9, 5.0, 4.7, 4.8, 4.5]

In [55]:
#extract year and month
working_combined_tq['Date'] = pd.to_datetime(working_combined_tq['Date'])
working_combined_tq['year'] = working_combined_tq['Date'].dt.year
working_combined_tq['month'] = working_combined_tq['Date'].dt.month

In [56]:
#Dropping the repetitive columns
working_combined_tq.drop(columns=['Business Location'], inplace=True)

In [57]:
working_combined_tq.columns

Index(['Screenname', 'Content', 'Date', 'Ratings', 'store_name', 'store_code',
       'Total Score', 'year', 'month'],
      dtype='object')

In [58]:
#Dictionary mapping to standard column names in rest of the code
std_column_names_tq = {
                        'Screenname': 'Name',
                        'Content': 'review_text',
                        'Date': 'Published At Date',
                        'Ratings': 'Stars',
                        'store_name':'Store Name',
                        'Total Score':'Total Score',
                        'store_code':'Store Code Cleaned',
                        'year':'year', 
                        'month':'month'
                    }


#Renaming the columns
working_combined_tq.rename(columns=std_column_names_tq, inplace=True)


In [59]:
working_combined_tq.groupby('Store Code Cleaned')['Total Score'].mean()

Store Code Cleaned
XAC    4.9
XAH    5.0
XAW    5.0
XBA    4.5
XCG    4.8
XDB    4.9
XDF    5.0
XDG    5.0
XDJ    4.9
XDK    4.9
XDM    4.9
XDS    4.9
XDT    4.9
XDX    4.9
XNJ    4.7
XOM    4.7
XQD    4.9
XQF    4.9
XSL    4.9
XSR    5.0
XTD    4.9
XTH    4.8
XWS    4.8
Name: Total Score, dtype: float64

In [60]:
#Remove timezone to make datetime timezone-naive
working_combined_tq['Published At Date'] = working_combined_tq['Published At Date'].dt.tz_localize(None)

#Now save to Excel
working_combined_tq.to_excel("temp/raw_file_combined_tq.xlsx", index=False)

print("File saved successfully!")

File saved successfully!


## Combining Tanishq & Competitor data

In [61]:
working_combined_tq.columns

Index(['Name', 'review_text', 'Published At Date', 'Stars', 'Store Name',
       'Store Code Cleaned', 'Total Score', 'year', 'month'],
      dtype='object')

In [62]:
combined_df_competitors.columns

Index(['Name', 'Published At Date', 'Stars', 'Total Score', 'Store Name',
       'review_text', 'year', 'month', 'Store Code Cleaned'],
      dtype='object')

In [63]:
combined_df = pd.DataFrame()
combined_df = pd.concat([working_combined_tq, combined_df_competitors], ignore_index=True)

In [64]:
#Desired column order
desired_order = ['Store Name', 'Name', 'Published At Date', 'Stars', 'Total Score',
                 'year', 'month', 'review_text','Store Code Cleaned']
#Reordering the columns
combined_df = combined_df[desired_order]

In [65]:
combined_df.head(1)

Unnamed: 0,Store Name,Name,Published At Date,Stars,Total Score,year,month,review_text,Store Code Cleaned
0,"Tanishq Jewellers-Sharjah Central, SH",Santhoshkumar Sathian,2024-04-09 17:55:36,5.0,4.9,2024,4,So happy with good services specially Mrs chai...,XSL


In [66]:
#Remove timezone to make datetime timezone-naive
# working_combined_tq['Published At Date'] = working_combined_tq['Published At Date'].dt.tz_localize(None)

#Now save to Excel
combined_df.to_excel("temp/raw_file_combined.xlsx", index=False)

print("File saved successfully!")

File saved successfully!


# Data Pre Processing

> There are certain stores that are not considered due to non availability of tangible data

In [67]:
combined_df['Store Name'].unique().tolist()
combined_df['Store Name'].isnull().sum()

0

In [68]:
combined_df['Store Code Cleaned'].unique().tolist()
combined_df['Store Code Cleaned'].isnull().sum()

0

In [69]:
combined_df['Store Name'].nunique()


75

In [70]:
combined_df['Store Name'].unique().tolist()


['Tanishq Jewellers-Sharjah Central, SH',
 'Tanishq Jewellers-Taj, DB',
 'Tanishq Jewellers-Silicon Central, DB',
 'Tanishq Jewellers-Al Fahidi, DB',
 'Tanishq Jewellers-Al Karama, DB',
 'Tanishq Jewellers-Meena Bazar, DB',
 'Mia-Burjuman, DB',
 'Tanishq Jewellers-Al Barsha, DB',
 'Tanishq Jewellers-Hamdan Bin Mohammed Street, AD',
 'Tanishq Jewellers-Lulu Hypermarket, QA',
 'Tanishq Jewellers-Avenues Mall, OM',
 'Tanishq Jewellers-Festival City, QA',
 'Tanishq Jewellers-Gold Souk, DB',
 'Tanishq Jewellers-Rolla, SH',
 'Tanishq Jewellers-UW Mall Al Mankhool, DB',
 'Mia-Al Wahda Mall, AD',
 'Tanishq-Frisco, TX',
 'Tanishq-Chicago, IL',
 'Tanishq-New Jersey, NJ',
 'Tanishq-Houston, TX',
 'Tanishq-Atlanta, GA',
 'Tanishq-Redmond Seattle, WA',
 'Tanishq-Santa Clara, CA',
 'Joyalukkas Jewellery - Shabia - Abu Dhabi',
 'Malabar Gold and Diamonds - Lulu Hypermarket - Madinat Zayed',
 'Malabar Gold & Diamonds-Naperville, IL',
 'Sona Jewelers-Iselin, NJ',
 'Malabar Gold and Diamonds - Dalma Mal

In [71]:
#Create individual dataframes 
mappings = {"agd_mb" : "Arakkal Gold and Diamonds LLC - Meena Bazar - Bur Dubai (Branch 3)",
            "bhi_ak" : "Bhima Jewellers - Al Karama",
            "bhi_dec_ga" : "Bhindi Jewellers-Decatur, GA",
            "eve_joh_ga" : "Evermark Jewelry-Johns Creek, GA",
            "jar_alg_il" : "Jared-Algonquin, IL",
            "jar_aur_il" : "Jared-Aurora, IL",
            "jar_bol_il" : "Jared-Bolingbrook, IL",
            "jar_lom_il" : "Jared-Lombard, IL",
            "jar_orl_il" : "Jared-Orland Park, IL",
            "jar_sch_il" : "Jared-Schaumburg, IL",
            "jar_ver_il" : "Jared-Vernon Hills, IL",
            "joy_ab" : "Joyalukkas Jewellery - Al Barsha",
            "joy_ak" : "Joyalukkas Jewellery - Al Karama",
            "joy_chi_il" : "Joyalukkas Jewellery-Chicago, IL",
            "joy_dm_ad" : "Joyalukkas Jewellery - Dalma Plaza - Abu Dhabi",
            "joy_fri_tx" : "Joyalukkas Jewellery-Frisco, TX",
            "joy_hou_tx" : "Joyalukkas Jewellery-Houston, TX",
            "joy_mz_ad" : "Joyalukkas Jewellery - Madinat Zayed Shopping Centre - Abu Dhabi",
            "joy_sh_ad" : "Joyalukkas Jewellery - Shabia - Abu Dhabi",
            "joy_st_af" : "Joyalukkas Jewellery - Al Fahidi st - Al Fahidi",
            "joy_suw_ga" : "Joyalukkas Jewellery-Suwanee, GA",
            "kan_mb" : "Kanz Jewellers",
            "mal_ab" : "Malabar Gold and Diamonds - Al Barsha - Dubai",
            "mal_ak" : "Malabar Gold and Diamonds - Al Karama - Dubai",
            "mal_aw_ad" : "Malabar Gold and Diamonds - Al Wahda Mall - Abu Dhabi",
            "mal_b1_ad" : "Malabar Gold and Diamonds - Hamdan Street ( Branch 1)",
            "mal_b1_af" : "Malabar Gold and Diamonds - Al Fahidi Street - Bur Dubai (Branch 1)",
            "mal_b2_ad" : "Malabar Gold and Diamonds - Hamdan Street (Branch 2)",
            "mal_b2_af" : "Malabar Gold and Diamonds - Souq Al Kabeer Building - Bur Dubai (Branch 2)",
            "mal_chi_il" : "Malabar Gold & Diamonds-Chicago, IL",
            "mal_dm_ad" : "Malabar Gold and Diamonds - Dalma Mall - Abu Dhabi",
            "mal_fri_tx" : "Malabar Gold & Diamonds-Frisco, TX",
            "mal_ise_nj" : "Malabar Gold & Diamonds-Iselin, NJ",
            "mal_lu_ad" : "Malabar Gold and Diamonds - Lulu Hypermarket - Madinat Zayed",
            "mal_mb" : "Malabar Gold and Diamonds - Meena Bazar - Dubai",
            "mal_nap_il" : "Malabar Gold & Diamonds-Naperville, IL",
            "mal_ric_tx" : "Malani Jewellers-Richardson, TX",
            "mal_sc" : "Malabar Gold & Diamonds - Silicon Oasis Central",
            "mal_sh_ad" : "Malabar Gold and Diamonds - Shabia Musaffah",
            "may_vie_va" : "May Jewelers-Vienna, VA",
            "mia_awm_ad" : "Mia-Al Wahda Mall, AD",
            "mia_bur_db" : "Mia-Burjuman, DB",
            "min_ak" : "Mint Jewels - Al Karama",
            "mna_mb" : "Meena Jewellers - Meena Bazar",
            "son_ise_nj" : "Sona Jewelers-Iselin, NJ",
            "tan_am_om" : "Tanishq Jewellers-Avenues Mall, OM",
            "tan_atl_ga" : "Tanishq-Atlanta, GA",
            "tan_bar_db" : "Tanishq Jewellers-Al Barsha, DB",
            "tan_chi_il" : "Tanishq-Chicago, IL",
            "tan_fah_db" : "Tanishq Jewellers-Al Fahidi, DB",
            "tan_fc_qa" : "Tanishq Jewellers-Festival City, QA",
            "tan_fri_tx" : "Tanishq-Frisco, TX",
            "tan_gs_db" : "Tanishq Jewellers-Gold Souk, DB",
            "tan_ham_ad" : "Tanishq Jewellers-Hamdan Bin Mohammed Street, AD",
            "tan_hou_tx" : "Tanishq-Houston, TX",
            "tan_kar_db" : "Tanishq Jewellers-Al Karama, DB",
            "tan_lul_qa" : "Tanishq Jewellers-Lulu Hypermarket, QA",
            "tan_mank_db" : "Tanishq Jewellers-UW Mall Al Mankhool, DB",
            "tan_mee_db" : "Tanishq Jewellers-Meena Bazar, DB",
            "tan_new_nj" : "Tanishq-New Jersey, NJ",
            "tan_rol_sh" : "Tanishq Jewellers-Rolla, SH",
            "tan_rse_wa" : "Tanishq-Redmond Seattle, WA",
            "tan_sc_ca" : "Tanishq-Santa Clara, CA",
            "tan_sc_sh" : "Tanishq Jewellers-Sharjah Central, SH",
            "tan_sil_db" : "Tanishq Jewellers-Silicon Central, DB",
            "tan_taj_db" : "Tanishq Jewellers-Taj, DB",
            "tif_chi_il" : "Tiffany & Co-Chicago, IL",
            "tif_eas_nj" : "Tiffany & Co-East Rutherford, NJ",
            "tif_hac_nj" : "Tiffany & Co-Hackensack, NJ",
            "tif_nor_il" : "Tiffany & Co-Northbrook, IL",
            "tif_par_nj" : "Tiffany & Co-Paramus, NJ",
            "tif_red_nj" : "Tiffany & Co-Red Bank, NJ",
            "tif_ric_va" : "Tiffany & Co-Richmond, VA",
            "tif_sho_nj" : "Tiffany & Co-Short Hills, NJ",
            "tif_sko_il" : "Tiffany & Co-Skokie, IL",
            "tif_vie_va" : "Tiffany & Co-Vienna, VA",
            "vbj_fri_tx" : "VBJ Jewellers-Frisco, TX"
}

In [72]:
#Initialize an empty dictionary to store your dataframes
dataframes = {}

#Loop through the mappings and filter combined_df_21to23 for each title
for df_name, title in mappings.items():
    filtered_df = combined_df[combined_df['Store Name'] == title].reset_index(drop=True)
    dataframes[df_name] = filtered_df

In [73]:
len(dataframes)

77

In [74]:
#Initialize two new dictionaries to store the non-null and null dataframes
nonnull_dataframes = {}
null_dataframes = {}

#Loop through the previously created 'dataframes' dictionary
for df_name, df in dataframes.items():
    #Filter the dataframe for non-null 'review_text' and reset the index
    nonnull_dataframes[f"{df_name}_nonnull"] = df[df['review_text'].notnull()].reset_index(drop=True)
    
    #Filter the dataframe for null 'review_text' and reset the index
    null_dataframes[f"{df_name}_null"] = df[df['review_text'].isnull()].reset_index(drop=True)

In [75]:
for df_name, df in nonnull_dataframes.items():
    # Filter the dataframe for non-null 'review_text' and reset the index
    nonnull_dataframes[df_name]['word_count'] = nonnull_dataframes[df_name]['review_text'].apply(word_count)
    nonnull_dataframes[df_name]['count_buckets'] = nonnull_dataframes[df_name]['word_count'].apply(categorize_word_count)

In [76]:
# Define bucket ranges
buckets = ['1-4', '5-15', '16-30', '31-60', '61-100', '>100']

agd_mb_nonnull_buckets = {}
bhi_ak_nonnull_buckets = {}
bhi_dec_ga_nonnull_buckets = {}
jar_alg_il_nonnull_buckets = {}
jar_aur_il_nonnull_buckets = {}
jar_bol_il_nonnull_buckets = {}
jar_lom_il_nonnull_buckets = {}
jar_orl_il_nonnull_buckets = {}
jar_sch_il_nonnull_buckets = {}
jar_ver_il_nonnull_buckets = {}
joy_ab_nonnull_buckets = {}
joy_st_af_nonnull_buckets = {}
joy_ak_nonnull_buckets = {}
joy_dm_ad_nonnull_buckets = {}
joy_mz_ad_nonnull_buckets = {}
joy_sh_ad_nonnull_buckets = {}
joy_chi_il_nonnull_buckets = {}
joy_fri_tx_nonnull_buckets = {}
joy_hou_tx_nonnull_buckets = {}
joy_suw_ga_nonnull_buckets = {}
kan_mb_nonnull_buckets = {}
mal_sc_nonnull_buckets = {}
mal_chi_il_nonnull_buckets = {}
mal_fri_tx_nonnull_buckets = {}
mal_ise_nj_nonnull_buckets = {}
mal_nap_il_nonnull_buckets = {}
mal_ab_nonnull_buckets = {}
mal_b1_af_nonnull_buckets = {}
mal_ak_nonnull_buckets = {}
mal_aw_ad_nonnull_buckets = {}
mal_dm_ad_nonnull_buckets = {}
mal_b1_ad_nonnull_buckets = {}
mal_b2_ad_nonnull_buckets = {}
mal_lu_ad_nonnull_buckets = {}
mal_mb_nonnull_buckets = {}
mal_sh_ad_nonnull_buckets = {}
mal_b2_af_nonnull_buckets = {}
mal_ric_tx_nonnull_buckets = {}
may_vie_va_nonnull_buckets = {}
mna_mb_nonnull_buckets = {}
mia_awm_ad_nonnull_buckets = {}
mia_bur_db_nonnull_buckets = {}
min_ak_nonnull_buckets = {}
son_ise_nj_nonnull_buckets = {}
tan_bar_db_nonnull_buckets = {}
tan_fah_db_nonnull_buckets = {}
tan_kar_db_nonnull_buckets = {}
tan_am_om_nonnull_buckets = {}
tan_fc_qa_nonnull_buckets = {}
tan_gs_db_nonnull_buckets = {}
tan_ham_ad_nonnull_buckets = {}
tan_lul_qa_nonnull_buckets = {}
tan_mee_db_nonnull_buckets = {}
tan_rol_sh_nonnull_buckets = {}
tan_sc_sh_nonnull_buckets = {}
tan_sil_db_nonnull_buckets = {}
tan_taj_db_nonnull_buckets = {}
tan_mank_db_nonnull_buckets = {}
tan_atl_ga_nonnull_buckets = {}
tan_chi_il_nonnull_buckets = {}
tan_fri_tx_nonnull_buckets = {}
tan_hou_tx_nonnull_buckets = {}
tan_new_nj_nonnull_buckets = {}
tan_rse_wa_nonnull_buckets = {}
tan_sc_ca_nonnull_buckets = {}
tif_chi_il_nonnull_buckets = {}
tif_eas_nj_nonnull_buckets = {}
tif_hac_nj_nonnull_buckets = {}
tif_nor_il_nonnull_buckets = {}
tif_red_nj_nonnull_buckets = {}
tif_ric_va_nonnull_buckets = {}
tif_sho_nj_nonnull_buckets = {}
tif_sko_il_nonnull_buckets = {}
tif_vie_va_nonnull_buckets = {}
vbj_fri_tx_nonnull_buckets = {}
tif_par_nj_nonnull_buckets = {}
eve_joh_ga_nonnull_buckets = {}


In [77]:
#Define bucket ranges
buckets = ['1-4', '5-15', '16-30', '31-60', '61-100', '>100']

#Loop through the bucket ranges and create DataFrames
for bucket in buckets:
    agd_mb_filtered_df = nonnull_dataframes['agd_mb_nonnull'][nonnull_dataframes['agd_mb_nonnull']['count_buckets'] == bucket].reset_index(drop=True)
    bhi_ak_filtered_df = nonnull_dataframes['bhi_ak_nonnull'][nonnull_dataframes['bhi_ak_nonnull']['count_buckets'] == bucket].reset_index(drop=True)
    bhi_dec_ga_filtered_df = nonnull_dataframes['bhi_dec_ga_nonnull'][nonnull_dataframes['bhi_dec_ga_nonnull']['count_buckets'] == bucket].reset_index(drop=True)
    jar_alg_il_filtered_df = nonnull_dataframes['jar_alg_il_nonnull'][nonnull_dataframes['jar_alg_il_nonnull']['count_buckets'] == bucket].reset_index(drop=True)
    jar_aur_il_filtered_df = nonnull_dataframes['jar_aur_il_nonnull'][nonnull_dataframes['jar_aur_il_nonnull']['count_buckets'] == bucket].reset_index(drop=True)
    jar_bol_il_filtered_df = nonnull_dataframes['jar_bol_il_nonnull'][nonnull_dataframes['jar_bol_il_nonnull']['count_buckets'] == bucket].reset_index(drop=True)
    jar_lom_il_filtered_df = nonnull_dataframes['jar_lom_il_nonnull'][nonnull_dataframes['jar_lom_il_nonnull']['count_buckets'] == bucket].reset_index(drop=True)
    jar_orl_il_filtered_df = nonnull_dataframes['jar_orl_il_nonnull'][nonnull_dataframes['jar_orl_il_nonnull']['count_buckets'] == bucket].reset_index(drop=True)
    jar_sch_il_filtered_df = nonnull_dataframes['jar_sch_il_nonnull'][nonnull_dataframes['jar_sch_il_nonnull']['count_buckets'] == bucket].reset_index(drop=True)
    jar_ver_il_filtered_df = nonnull_dataframes['jar_ver_il_nonnull'][nonnull_dataframes['jar_ver_il_nonnull']['count_buckets'] == bucket].reset_index(drop=True)
    joy_ab_filtered_df = nonnull_dataframes['joy_ab_nonnull'][nonnull_dataframes['joy_ab_nonnull']['count_buckets'] == bucket].reset_index(drop=True)
    joy_st_af_filtered_df = nonnull_dataframes['joy_st_af_nonnull'][nonnull_dataframes['joy_st_af_nonnull']['count_buckets'] == bucket].reset_index(drop=True)
    joy_ak_filtered_df = nonnull_dataframes['joy_ak_nonnull'][nonnull_dataframes['joy_ak_nonnull']['count_buckets'] == bucket].reset_index(drop=True)
    joy_dm_ad_filtered_df = nonnull_dataframes['joy_dm_ad_nonnull'][nonnull_dataframes['joy_dm_ad_nonnull']['count_buckets'] == bucket].reset_index(drop=True)
    joy_mz_ad_filtered_df = nonnull_dataframes['joy_mz_ad_nonnull'][nonnull_dataframes['joy_mz_ad_nonnull']['count_buckets'] == bucket].reset_index(drop=True)
    joy_sh_ad_filtered_df = nonnull_dataframes['joy_sh_ad_nonnull'][nonnull_dataframes['joy_sh_ad_nonnull']['count_buckets'] == bucket].reset_index(drop=True)
    joy_chi_il_filtered_df = nonnull_dataframes['joy_chi_il_nonnull'][nonnull_dataframes['joy_chi_il_nonnull']['count_buckets'] == bucket].reset_index(drop=True)
    joy_fri_tx_filtered_df = nonnull_dataframes['joy_fri_tx_nonnull'][nonnull_dataframes['joy_fri_tx_nonnull']['count_buckets'] == bucket].reset_index(drop=True)
    joy_hou_tx_filtered_df = nonnull_dataframes['joy_hou_tx_nonnull'][nonnull_dataframes['joy_hou_tx_nonnull']['count_buckets'] == bucket].reset_index(drop=True)
    joy_suw_ga_filtered_df = nonnull_dataframes['joy_suw_ga_nonnull'][nonnull_dataframes['joy_suw_ga_nonnull']['count_buckets'] == bucket].reset_index(drop=True)
    kan_mb_filtered_df = nonnull_dataframes['kan_mb_nonnull'][nonnull_dataframes['kan_mb_nonnull']['count_buckets'] == bucket].reset_index(drop=True)
    mal_sc_filtered_df = nonnull_dataframes['mal_sc_nonnull'][nonnull_dataframes['mal_sc_nonnull']['count_buckets'] == bucket].reset_index(drop=True)
    mal_chi_il_filtered_df = nonnull_dataframes['mal_chi_il_nonnull'][nonnull_dataframes['mal_chi_il_nonnull']['count_buckets'] == bucket].reset_index(drop=True)
    mal_fri_tx_filtered_df = nonnull_dataframes['mal_fri_tx_nonnull'][nonnull_dataframes['mal_fri_tx_nonnull']['count_buckets'] == bucket].reset_index(drop=True)
    mal_ise_nj_filtered_df = nonnull_dataframes['mal_ise_nj_nonnull'][nonnull_dataframes['mal_ise_nj_nonnull']['count_buckets'] == bucket].reset_index(drop=True)
    mal_nap_il_filtered_df = nonnull_dataframes['mal_nap_il_nonnull'][nonnull_dataframes['mal_nap_il_nonnull']['count_buckets'] == bucket].reset_index(drop=True)
    mal_ab_filtered_df = nonnull_dataframes['mal_ab_nonnull'][nonnull_dataframes['mal_ab_nonnull']['count_buckets'] == bucket].reset_index(drop=True)
    mal_b1_af_filtered_df = nonnull_dataframes['mal_b1_af_nonnull'][nonnull_dataframes['mal_b1_af_nonnull']['count_buckets'] == bucket].reset_index(drop=True)
    mal_ak_filtered_df = nonnull_dataframes['mal_ak_nonnull'][nonnull_dataframes['mal_ak_nonnull']['count_buckets'] == bucket].reset_index(drop=True)
    mal_aw_ad_filtered_df = nonnull_dataframes['mal_aw_ad_nonnull'][nonnull_dataframes['mal_aw_ad_nonnull']['count_buckets'] == bucket].reset_index(drop=True)
    mal_dm_ad_filtered_df = nonnull_dataframes['mal_dm_ad_nonnull'][nonnull_dataframes['mal_dm_ad_nonnull']['count_buckets'] == bucket].reset_index(drop=True)
    mal_b1_ad_filtered_df = nonnull_dataframes['mal_b1_ad_nonnull'][nonnull_dataframes['mal_b1_ad_nonnull']['count_buckets'] == bucket].reset_index(drop=True)
    mal_b2_ad_filtered_df = nonnull_dataframes['mal_b2_ad_nonnull'][nonnull_dataframes['mal_b2_ad_nonnull']['count_buckets'] == bucket].reset_index(drop=True)
    mal_lu_ad_filtered_df = nonnull_dataframes['mal_lu_ad_nonnull'][nonnull_dataframes['mal_lu_ad_nonnull']['count_buckets'] == bucket].reset_index(drop=True)
    mal_mb_filtered_df = nonnull_dataframes['mal_mb_nonnull'][nonnull_dataframes['mal_mb_nonnull']['count_buckets'] == bucket].reset_index(drop=True)
    mal_sh_ad_filtered_df = nonnull_dataframes['mal_sh_ad_nonnull'][nonnull_dataframes['mal_sh_ad_nonnull']['count_buckets'] == bucket].reset_index(drop=True)
    mal_b2_af_filtered_df = nonnull_dataframes['mal_b2_af_nonnull'][nonnull_dataframes['mal_b2_af_nonnull']['count_buckets'] == bucket].reset_index(drop=True)
    mal_ric_tx_filtered_df = nonnull_dataframes['mal_ric_tx_nonnull'][nonnull_dataframes['mal_ric_tx_nonnull']['count_buckets'] == bucket].reset_index(drop=True)
    may_vie_va_filtered_df = nonnull_dataframes['may_vie_va_nonnull'][nonnull_dataframes['may_vie_va_nonnull']['count_buckets'] == bucket].reset_index(drop=True)
    mna_mb_filtered_df = nonnull_dataframes['mna_mb_nonnull'][nonnull_dataframes['mna_mb_nonnull']['count_buckets'] == bucket].reset_index(drop=True)
    mia_awm_ad_filtered_df = nonnull_dataframes['mia_awm_ad_nonnull'][nonnull_dataframes['mia_awm_ad_nonnull']['count_buckets'] == bucket].reset_index(drop=True)
    mia_bur_db_filtered_df = nonnull_dataframes['mia_bur_db_nonnull'][nonnull_dataframes['mia_bur_db_nonnull']['count_buckets'] == bucket].reset_index(drop=True)
    min_ak_filtered_df = nonnull_dataframes['min_ak_nonnull'][nonnull_dataframes['min_ak_nonnull']['count_buckets'] == bucket].reset_index(drop=True)
    son_ise_nj_filtered_df = nonnull_dataframes['son_ise_nj_nonnull'][nonnull_dataframes['son_ise_nj_nonnull']['count_buckets'] == bucket].reset_index(drop=True)
    tan_bar_db_filtered_df = nonnull_dataframes['tan_bar_db_nonnull'][nonnull_dataframes['tan_bar_db_nonnull']['count_buckets'] == bucket].reset_index(drop=True)
    tan_fah_db_filtered_df = nonnull_dataframes['tan_fah_db_nonnull'][nonnull_dataframes['tan_fah_db_nonnull']['count_buckets'] == bucket].reset_index(drop=True)
    tan_kar_db_filtered_df = nonnull_dataframes['tan_kar_db_nonnull'][nonnull_dataframes['tan_kar_db_nonnull']['count_buckets'] == bucket].reset_index(drop=True)
    tan_am_om_filtered_df = nonnull_dataframes['tan_am_om_nonnull'][nonnull_dataframes['tan_am_om_nonnull']['count_buckets'] == bucket].reset_index(drop=True)
    tan_fc_qa_filtered_df = nonnull_dataframes['tan_fc_qa_nonnull'][nonnull_dataframes['tan_fc_qa_nonnull']['count_buckets'] == bucket].reset_index(drop=True)
    tan_gs_db_filtered_df = nonnull_dataframes['tan_gs_db_nonnull'][nonnull_dataframes['tan_gs_db_nonnull']['count_buckets'] == bucket].reset_index(drop=True)
    tan_ham_ad_filtered_df = nonnull_dataframes['tan_ham_ad_nonnull'][nonnull_dataframes['tan_ham_ad_nonnull']['count_buckets'] == bucket].reset_index(drop=True)
    tan_lul_qa_filtered_df = nonnull_dataframes['tan_lul_qa_nonnull'][nonnull_dataframes['tan_lul_qa_nonnull']['count_buckets'] == bucket].reset_index(drop=True)
    tan_mee_db_filtered_df = nonnull_dataframes['tan_mee_db_nonnull'][nonnull_dataframes['tan_mee_db_nonnull']['count_buckets'] == bucket].reset_index(drop=True)
    tan_rol_sh_filtered_df = nonnull_dataframes['tan_rol_sh_nonnull'][nonnull_dataframes['tan_rol_sh_nonnull']['count_buckets'] == bucket].reset_index(drop=True)
    tan_sc_sh_filtered_df = nonnull_dataframes['tan_sc_sh_nonnull'][nonnull_dataframes['tan_sc_sh_nonnull']['count_buckets'] == bucket].reset_index(drop=True)
    tan_sil_db_filtered_df = nonnull_dataframes['tan_sil_db_nonnull'][nonnull_dataframes['tan_sil_db_nonnull']['count_buckets'] == bucket].reset_index(drop=True)
    tan_taj_db_filtered_df = nonnull_dataframes['tan_taj_db_nonnull'][nonnull_dataframes['tan_taj_db_nonnull']['count_buckets'] == bucket].reset_index(drop=True)
    tan_mank_db_filtered_df = nonnull_dataframes['tan_mank_db_nonnull'][nonnull_dataframes['tan_mank_db_nonnull']['count_buckets'] == bucket].reset_index(drop=True)
    tan_atl_ga_filtered_df = nonnull_dataframes['tan_atl_ga_nonnull'][nonnull_dataframes['tan_atl_ga_nonnull']['count_buckets'] == bucket].reset_index(drop=True)
    tan_chi_il_filtered_df = nonnull_dataframes['tan_chi_il_nonnull'][nonnull_dataframes['tan_chi_il_nonnull']['count_buckets'] == bucket].reset_index(drop=True)
    tan_fri_tx_filtered_df = nonnull_dataframes['tan_fri_tx_nonnull'][nonnull_dataframes['tan_fri_tx_nonnull']['count_buckets'] == bucket].reset_index(drop=True)
    tan_hou_tx_filtered_df = nonnull_dataframes['tan_hou_tx_nonnull'][nonnull_dataframes['tan_hou_tx_nonnull']['count_buckets'] == bucket].reset_index(drop=True)
    tan_new_nj_filtered_df = nonnull_dataframes['tan_new_nj_nonnull'][nonnull_dataframes['tan_new_nj_nonnull']['count_buckets'] == bucket].reset_index(drop=True)
    tan_rse_wa_filtered_df = nonnull_dataframes['tan_rse_wa_nonnull'][nonnull_dataframes['tan_rse_wa_nonnull']['count_buckets'] == bucket].reset_index(drop=True)
    tan_sc_ca_filtered_df = nonnull_dataframes['tan_sc_ca_nonnull'][nonnull_dataframes['tan_sc_ca_nonnull']['count_buckets'] == bucket].reset_index(drop=True)
    tif_chi_il_filtered_df = nonnull_dataframes['tif_chi_il_nonnull'][nonnull_dataframes['tif_chi_il_nonnull']['count_buckets'] == bucket].reset_index(drop=True)
    tif_eas_nj_filtered_df = nonnull_dataframes['tif_eas_nj_nonnull'][nonnull_dataframes['tif_eas_nj_nonnull']['count_buckets'] == bucket].reset_index(drop=True)
    tif_hac_nj_filtered_df = nonnull_dataframes['tif_hac_nj_nonnull'][nonnull_dataframes['tif_hac_nj_nonnull']['count_buckets'] == bucket].reset_index(drop=True)
    tif_nor_il_filtered_df = nonnull_dataframes['tif_nor_il_nonnull'][nonnull_dataframes['tif_nor_il_nonnull']['count_buckets'] == bucket].reset_index(drop=True)
    tif_red_nj_filtered_df = nonnull_dataframes['tif_red_nj_nonnull'][nonnull_dataframes['tif_red_nj_nonnull']['count_buckets'] == bucket].reset_index(drop=True)
    tif_ric_va_filtered_df = nonnull_dataframes['tif_ric_va_nonnull'][nonnull_dataframes['tif_ric_va_nonnull']['count_buckets'] == bucket].reset_index(drop=True)
    tif_sho_nj_filtered_df = nonnull_dataframes['tif_sho_nj_nonnull'][nonnull_dataframes['tif_sho_nj_nonnull']['count_buckets'] == bucket].reset_index(drop=True)
    tif_sko_il_filtered_df = nonnull_dataframes['tif_sko_il_nonnull'][nonnull_dataframes['tif_sko_il_nonnull']['count_buckets'] == bucket].reset_index(drop=True)
    tif_vie_va_filtered_df = nonnull_dataframes['tif_vie_va_nonnull'][nonnull_dataframes['tif_vie_va_nonnull']['count_buckets'] == bucket].reset_index(drop=True)
    vbj_fri_tx_filtered_df = nonnull_dataframes['vbj_fri_tx_nonnull'][nonnull_dataframes['vbj_fri_tx_nonnull']['count_buckets'] == bucket].reset_index(drop=True)
    tif_par_nj_filtered_df = nonnull_dataframes['tif_par_nj_nonnull'][nonnull_dataframes['tif_par_nj_nonnull']['count_buckets'] == bucket].reset_index(drop=True)
    eve_joh_ga_filtered_df = nonnull_dataframes['eve_joh_ga_nonnull'][nonnull_dataframes['eve_joh_ga_nonnull']['count_buckets'] == bucket].reset_index(drop=True)


    agd_mb_nonnull_buckets[f'agd_mb_nonnull_{bucket.replace(">", "greater_")}'] = agd_mb_filtered_df
    bhi_ak_nonnull_buckets[f'bhi_ak_nonnull_{bucket.replace(">", "greater_")}'] = bhi_ak_filtered_df
    bhi_dec_ga_nonnull_buckets[f'bhi_dec_ga_nonnull_{bucket.replace(">", "greater_")}'] = bhi_dec_ga_filtered_df
    jar_alg_il_nonnull_buckets[f'jar_alg_il_nonnull_{bucket.replace(">", "greater_")}'] = jar_alg_il_filtered_df
    jar_aur_il_nonnull_buckets[f'jar_aur_il_nonnull_{bucket.replace(">", "greater_")}'] = jar_aur_il_filtered_df
    jar_bol_il_nonnull_buckets[f'jar_bol_il_nonnull_{bucket.replace(">", "greater_")}'] = jar_bol_il_filtered_df
    jar_lom_il_nonnull_buckets[f'jar_lom_il_nonnull_{bucket.replace(">", "greater_")}'] = jar_lom_il_filtered_df
    jar_orl_il_nonnull_buckets[f'jar_orl_il_nonnull_{bucket.replace(">", "greater_")}'] = jar_orl_il_filtered_df
    jar_sch_il_nonnull_buckets[f'jar_sch_il_nonnull_{bucket.replace(">", "greater_")}'] = jar_sch_il_filtered_df
    jar_ver_il_nonnull_buckets[f'jar_ver_il_nonnull_{bucket.replace(">", "greater_")}'] = jar_ver_il_filtered_df
    joy_ab_nonnull_buckets[f'joy_ab_nonnull_{bucket.replace(">", "greater_")}'] = joy_ab_filtered_df
    joy_st_af_nonnull_buckets[f'joy_st_af_nonnull_{bucket.replace(">", "greater_")}'] = joy_st_af_filtered_df
    joy_ak_nonnull_buckets[f'joy_ak_nonnull_{bucket.replace(">", "greater_")}'] = joy_ak_filtered_df
    joy_dm_ad_nonnull_buckets[f'joy_dm_ad_nonnull_{bucket.replace(">", "greater_")}'] = joy_dm_ad_filtered_df
    joy_mz_ad_nonnull_buckets[f'joy_mz_ad_nonnull_{bucket.replace(">", "greater_")}'] = joy_mz_ad_filtered_df
    joy_sh_ad_nonnull_buckets[f'joy_sh_ad_nonnull_{bucket.replace(">", "greater_")}'] = joy_sh_ad_filtered_df
    joy_chi_il_nonnull_buckets[f'joy_chi_il_nonnull_{bucket.replace(">", "greater_")}'] = joy_chi_il_filtered_df
    joy_fri_tx_nonnull_buckets[f'joy_fri_tx_nonnull_{bucket.replace(">", "greater_")}'] = joy_fri_tx_filtered_df
    joy_hou_tx_nonnull_buckets[f'joy_hou_tx_nonnull_{bucket.replace(">", "greater_")}'] = joy_hou_tx_filtered_df
    joy_suw_ga_nonnull_buckets[f'joy_suw_ga_nonnull_{bucket.replace(">", "greater_")}'] = joy_suw_ga_filtered_df
    kan_mb_nonnull_buckets[f'kan_mb_nonnull_{bucket.replace(">", "greater_")}'] = kan_mb_filtered_df
    mal_sc_nonnull_buckets[f'mal_sc_nonnull_{bucket.replace(">", "greater_")}'] = mal_sc_filtered_df
    mal_chi_il_nonnull_buckets[f'mal_chi_il_nonnull_{bucket.replace(">", "greater_")}'] = mal_chi_il_filtered_df
    mal_fri_tx_nonnull_buckets[f'mal_fri_tx_nonnull_{bucket.replace(">", "greater_")}'] = mal_fri_tx_filtered_df
    mal_ise_nj_nonnull_buckets[f'mal_ise_nj_nonnull_{bucket.replace(">", "greater_")}'] = mal_ise_nj_filtered_df
    mal_nap_il_nonnull_buckets[f'mal_nap_il_nonnull_{bucket.replace(">", "greater_")}'] = mal_nap_il_filtered_df
    mal_ab_nonnull_buckets[f'mal_ab_nonnull_{bucket.replace(">", "greater_")}'] = mal_ab_filtered_df
    mal_b1_af_nonnull_buckets[f'mal_b1_af_nonnull_{bucket.replace(">", "greater_")}'] = mal_b1_af_filtered_df
    mal_ak_nonnull_buckets[f'mal_ak_nonnull_{bucket.replace(">", "greater_")}'] = mal_ak_filtered_df
    mal_aw_ad_nonnull_buckets[f'mal_aw_ad_nonnull_{bucket.replace(">", "greater_")}'] = mal_aw_ad_filtered_df
    mal_dm_ad_nonnull_buckets[f'mal_dm_ad_nonnull_{bucket.replace(">", "greater_")}'] = mal_dm_ad_filtered_df
    mal_b1_ad_nonnull_buckets[f'mal_b1_ad_nonnull_{bucket.replace(">", "greater_")}'] = mal_b1_ad_filtered_df
    mal_b2_ad_nonnull_buckets[f'mal_b2_ad_nonnull_{bucket.replace(">", "greater_")}'] = mal_b2_ad_filtered_df
    mal_lu_ad_nonnull_buckets[f'mal_lu_ad_nonnull_{bucket.replace(">", "greater_")}'] = mal_lu_ad_filtered_df
    mal_mb_nonnull_buckets[f'mal_mb_nonnull_{bucket.replace(">", "greater_")}'] = mal_mb_filtered_df
    mal_sh_ad_nonnull_buckets[f'mal_sh_ad_nonnull_{bucket.replace(">", "greater_")}'] = mal_sh_ad_filtered_df
    mal_b2_af_nonnull_buckets[f'mal_b2_af_nonnull_{bucket.replace(">", "greater_")}'] = mal_b2_af_filtered_df
    mal_ric_tx_nonnull_buckets[f'mal_ric_tx_nonnull_{bucket.replace(">", "greater_")}'] = mal_ric_tx_filtered_df
    may_vie_va_nonnull_buckets[f'may_vie_va_nonnull_{bucket.replace(">", "greater_")}'] = may_vie_va_filtered_df
    mna_mb_nonnull_buckets[f'mna_mb_nonnull_{bucket.replace(">", "greater_")}'] = mna_mb_filtered_df
    mia_awm_ad_nonnull_buckets[f'mia_awm_ad_nonnull_{bucket.replace(">", "greater_")}'] = mia_awm_ad_filtered_df
    mia_bur_db_nonnull_buckets[f'mia_bur_db_nonnull_{bucket.replace(">", "greater_")}'] = mia_bur_db_filtered_df
    min_ak_nonnull_buckets[f'min_ak_nonnull_{bucket.replace(">", "greater_")}'] = min_ak_filtered_df
    son_ise_nj_nonnull_buckets[f'son_ise_nj_nonnull_{bucket.replace(">", "greater_")}'] = son_ise_nj_filtered_df
    tan_bar_db_nonnull_buckets[f'tan_bar_db_nonnull_{bucket.replace(">", "greater_")}'] = tan_bar_db_filtered_df
    tan_fah_db_nonnull_buckets[f'tan_fah_db_nonnull_{bucket.replace(">", "greater_")}'] = tan_fah_db_filtered_df
    tan_kar_db_nonnull_buckets[f'tan_kar_db_nonnull_{bucket.replace(">", "greater_")}'] = tan_kar_db_filtered_df
    tan_am_om_nonnull_buckets[f'tan_am_om_nonnull_{bucket.replace(">", "greater_")}'] = tan_am_om_filtered_df
    tan_fc_qa_nonnull_buckets[f'tan_fc_qa_nonnull_{bucket.replace(">", "greater_")}'] = tan_fc_qa_filtered_df
    tan_gs_db_nonnull_buckets[f'tan_gs_db_nonnull_{bucket.replace(">", "greater_")}'] = tan_gs_db_filtered_df
    tan_ham_ad_nonnull_buckets[f'tan_ham_ad_nonnull_{bucket.replace(">", "greater_")}'] = tan_ham_ad_filtered_df
    tan_lul_qa_nonnull_buckets[f'tan_lul_qa_nonnull_{bucket.replace(">", "greater_")}'] = tan_lul_qa_filtered_df
    tan_mee_db_nonnull_buckets[f'tan_mee_db_nonnull_{bucket.replace(">", "greater_")}'] = tan_mee_db_filtered_df
    tan_rol_sh_nonnull_buckets[f'tan_rol_sh_nonnull_{bucket.replace(">", "greater_")}'] = tan_rol_sh_filtered_df
    tan_sc_sh_nonnull_buckets[f'tan_sc_sh_nonnull_{bucket.replace(">", "greater_")}'] = tan_sc_sh_filtered_df
    tan_sil_db_nonnull_buckets[f'tan_sil_db_nonnull_{bucket.replace(">", "greater_")}'] = tan_sil_db_filtered_df
    tan_taj_db_nonnull_buckets[f'tan_taj_db_nonnull_{bucket.replace(">", "greater_")}'] = tan_taj_db_filtered_df
    tan_mank_db_nonnull_buckets[f'tan_mank_db_nonnull_{bucket.replace(">", "greater_")}'] = tan_mank_db_filtered_df
    tan_atl_ga_nonnull_buckets[f'tan_atl_ga_nonnull_{bucket.replace(">", "greater_")}'] = tan_atl_ga_filtered_df
    tan_chi_il_nonnull_buckets[f'tan_chi_il_nonnull_{bucket.replace(">", "greater_")}'] = tan_chi_il_filtered_df
    tan_fri_tx_nonnull_buckets[f'tan_fri_tx_nonnull_{bucket.replace(">", "greater_")}'] = tan_fri_tx_filtered_df
    tan_hou_tx_nonnull_buckets[f'tan_hou_tx_nonnull_{bucket.replace(">", "greater_")}'] = tan_hou_tx_filtered_df
    tan_new_nj_nonnull_buckets[f'tan_new_nj_nonnull_{bucket.replace(">", "greater_")}'] = tan_new_nj_filtered_df
    tan_rse_wa_nonnull_buckets[f'tan_rse_wa_nonnull_{bucket.replace(">", "greater_")}'] = tan_rse_wa_filtered_df
    tan_sc_ca_nonnull_buckets[f'tan_sc_ca_nonnull_{bucket.replace(">", "greater_")}'] = tan_sc_ca_filtered_df
    tif_chi_il_nonnull_buckets[f'tif_chi_il_nonnull_{bucket.replace(">", "greater_")}'] = tif_chi_il_filtered_df
    tif_eas_nj_nonnull_buckets[f'tif_eas_nj_nonnull_{bucket.replace(">", "greater_")}'] = tif_eas_nj_filtered_df
    tif_hac_nj_nonnull_buckets[f'tif_hac_nj_nonnull_{bucket.replace(">", "greater_")}'] = tif_hac_nj_filtered_df
    tif_nor_il_nonnull_buckets[f'tif_nor_il_nonnull_{bucket.replace(">", "greater_")}'] = tif_nor_il_filtered_df
    tif_red_nj_nonnull_buckets[f'tif_red_nj_nonnull_{bucket.replace(">", "greater_")}'] = tif_red_nj_filtered_df
    tif_ric_va_nonnull_buckets[f'tif_ric_va_nonnull_{bucket.replace(">", "greater_")}'] = tif_ric_va_filtered_df
    tif_sho_nj_nonnull_buckets[f'tif_sho_nj_nonnull_{bucket.replace(">", "greater_")}'] = tif_sho_nj_filtered_df
    tif_sko_il_nonnull_buckets[f'tif_sko_il_nonnull_{bucket.replace(">", "greater_")}'] = tif_sko_il_filtered_df
    tif_vie_va_nonnull_buckets[f'tif_vie_va_nonnull_{bucket.replace(">", "greater_")}'] = tif_vie_va_filtered_df
    vbj_fri_tx_nonnull_buckets[f'vbj_fri_tx_nonnull_{bucket.replace(">", "greater_")}'] = vbj_fri_tx_filtered_df
    tif_par_nj_nonnull_buckets[f'tif_par_nj_nonnull_{bucket.replace(">", "greater_")}'] = tif_par_nj_filtered_df
    eve_joh_ga_nonnull_buckets[f'eve_joh_ga_nonnull_{bucket.replace(">", "greater_")}'] = eve_joh_ga_filtered_df


In [78]:
dataframes_agd_mb = [
    agd_mb_nonnull_buckets['agd_mb_nonnull_1-4'],
    agd_mb_nonnull_buckets['agd_mb_nonnull_5-15'],
    agd_mb_nonnull_buckets['agd_mb_nonnull_16-30'],
    agd_mb_nonnull_buckets['agd_mb_nonnull_31-60'],
    agd_mb_nonnull_buckets['agd_mb_nonnull_61-100'],
    agd_mb_nonnull_buckets['agd_mb_nonnull_greater_100']
]

dataframes_bhi_ak = [bhi_ak_nonnull_buckets['bhi_ak_nonnull_1-4'],bhi_ak_nonnull_buckets['bhi_ak_nonnull_5-15'],bhi_ak_nonnull_buckets['bhi_ak_nonnull_16-30'],bhi_ak_nonnull_buckets['bhi_ak_nonnull_31-60'],bhi_ak_nonnull_buckets['bhi_ak_nonnull_61-100'],bhi_ak_nonnull_buckets['bhi_ak_nonnull_greater_100']]
dataframes_bhi_dec_ga = [bhi_dec_ga_nonnull_buckets['bhi_dec_ga_nonnull_1-4'],bhi_dec_ga_nonnull_buckets['bhi_dec_ga_nonnull_5-15'],bhi_dec_ga_nonnull_buckets['bhi_dec_ga_nonnull_16-30'],bhi_dec_ga_nonnull_buckets['bhi_dec_ga_nonnull_31-60'],bhi_dec_ga_nonnull_buckets['bhi_dec_ga_nonnull_61-100'],bhi_dec_ga_nonnull_buckets['bhi_dec_ga_nonnull_greater_100']]
dataframes_jar_alg_il = [jar_alg_il_nonnull_buckets['jar_alg_il_nonnull_1-4'],jar_alg_il_nonnull_buckets['jar_alg_il_nonnull_5-15'],jar_alg_il_nonnull_buckets['jar_alg_il_nonnull_16-30'],jar_alg_il_nonnull_buckets['jar_alg_il_nonnull_31-60'],jar_alg_il_nonnull_buckets['jar_alg_il_nonnull_61-100'],jar_alg_il_nonnull_buckets['jar_alg_il_nonnull_greater_100']]
dataframes_jar_aur_il = [jar_aur_il_nonnull_buckets['jar_aur_il_nonnull_1-4'],jar_aur_il_nonnull_buckets['jar_aur_il_nonnull_5-15'],jar_aur_il_nonnull_buckets['jar_aur_il_nonnull_16-30'],jar_aur_il_nonnull_buckets['jar_aur_il_nonnull_31-60'],jar_aur_il_nonnull_buckets['jar_aur_il_nonnull_61-100'],jar_aur_il_nonnull_buckets['jar_aur_il_nonnull_greater_100']]
dataframes_jar_bol_il = [jar_bol_il_nonnull_buckets['jar_bol_il_nonnull_1-4'],jar_bol_il_nonnull_buckets['jar_bol_il_nonnull_5-15'],jar_bol_il_nonnull_buckets['jar_bol_il_nonnull_16-30'],jar_bol_il_nonnull_buckets['jar_bol_il_nonnull_31-60'],jar_bol_il_nonnull_buckets['jar_bol_il_nonnull_61-100'],jar_bol_il_nonnull_buckets['jar_bol_il_nonnull_greater_100']]
dataframes_jar_lom_il = [jar_lom_il_nonnull_buckets['jar_lom_il_nonnull_1-4'],jar_lom_il_nonnull_buckets['jar_lom_il_nonnull_5-15'],jar_lom_il_nonnull_buckets['jar_lom_il_nonnull_16-30'],jar_lom_il_nonnull_buckets['jar_lom_il_nonnull_31-60'],jar_lom_il_nonnull_buckets['jar_lom_il_nonnull_61-100'],jar_lom_il_nonnull_buckets['jar_lom_il_nonnull_greater_100']]
dataframes_jar_orl_il = [jar_orl_il_nonnull_buckets['jar_orl_il_nonnull_1-4'],jar_orl_il_nonnull_buckets['jar_orl_il_nonnull_5-15'],jar_orl_il_nonnull_buckets['jar_orl_il_nonnull_16-30'],jar_orl_il_nonnull_buckets['jar_orl_il_nonnull_31-60'],jar_orl_il_nonnull_buckets['jar_orl_il_nonnull_61-100'],jar_orl_il_nonnull_buckets['jar_orl_il_nonnull_greater_100']]
dataframes_jar_sch_il = [jar_sch_il_nonnull_buckets['jar_sch_il_nonnull_1-4'],jar_sch_il_nonnull_buckets['jar_sch_il_nonnull_5-15'],jar_sch_il_nonnull_buckets['jar_sch_il_nonnull_16-30'],jar_sch_il_nonnull_buckets['jar_sch_il_nonnull_31-60'],jar_sch_il_nonnull_buckets['jar_sch_il_nonnull_61-100'],jar_sch_il_nonnull_buckets['jar_sch_il_nonnull_greater_100']]
dataframes_jar_ver_il = [jar_ver_il_nonnull_buckets['jar_ver_il_nonnull_1-4'],jar_ver_il_nonnull_buckets['jar_ver_il_nonnull_5-15'],jar_ver_il_nonnull_buckets['jar_ver_il_nonnull_16-30'],jar_ver_il_nonnull_buckets['jar_ver_il_nonnull_31-60'],jar_ver_il_nonnull_buckets['jar_ver_il_nonnull_61-100'],jar_ver_il_nonnull_buckets['jar_ver_il_nonnull_greater_100']]
dataframes_joy_ab = [joy_ab_nonnull_buckets['joy_ab_nonnull_1-4'],joy_ab_nonnull_buckets['joy_ab_nonnull_5-15'],joy_ab_nonnull_buckets['joy_ab_nonnull_16-30'],joy_ab_nonnull_buckets['joy_ab_nonnull_31-60'],joy_ab_nonnull_buckets['joy_ab_nonnull_61-100'],joy_ab_nonnull_buckets['joy_ab_nonnull_greater_100']]
dataframes_joy_st_af = [joy_st_af_nonnull_buckets['joy_st_af_nonnull_1-4'],joy_st_af_nonnull_buckets['joy_st_af_nonnull_5-15'],joy_st_af_nonnull_buckets['joy_st_af_nonnull_16-30'],joy_st_af_nonnull_buckets['joy_st_af_nonnull_31-60'],joy_st_af_nonnull_buckets['joy_st_af_nonnull_61-100'],joy_st_af_nonnull_buckets['joy_st_af_nonnull_greater_100']]
dataframes_joy_ak = [joy_ak_nonnull_buckets['joy_ak_nonnull_1-4'],joy_ak_nonnull_buckets['joy_ak_nonnull_5-15'],joy_ak_nonnull_buckets['joy_ak_nonnull_16-30'],joy_ak_nonnull_buckets['joy_ak_nonnull_31-60'],joy_ak_nonnull_buckets['joy_ak_nonnull_61-100'],joy_ak_nonnull_buckets['joy_ak_nonnull_greater_100']]
dataframes_joy_dm_ad = [joy_dm_ad_nonnull_buckets['joy_dm_ad_nonnull_1-4'],joy_dm_ad_nonnull_buckets['joy_dm_ad_nonnull_5-15'],joy_dm_ad_nonnull_buckets['joy_dm_ad_nonnull_16-30'],joy_dm_ad_nonnull_buckets['joy_dm_ad_nonnull_31-60'],joy_dm_ad_nonnull_buckets['joy_dm_ad_nonnull_61-100'],joy_dm_ad_nonnull_buckets['joy_dm_ad_nonnull_greater_100']]
dataframes_joy_mz_ad = [joy_mz_ad_nonnull_buckets['joy_mz_ad_nonnull_1-4'],joy_mz_ad_nonnull_buckets['joy_mz_ad_nonnull_5-15'],joy_mz_ad_nonnull_buckets['joy_mz_ad_nonnull_16-30'],joy_mz_ad_nonnull_buckets['joy_mz_ad_nonnull_31-60'],joy_mz_ad_nonnull_buckets['joy_mz_ad_nonnull_61-100'],joy_mz_ad_nonnull_buckets['joy_mz_ad_nonnull_greater_100']]
dataframes_joy_sh_ad = [joy_sh_ad_nonnull_buckets['joy_sh_ad_nonnull_1-4'],joy_sh_ad_nonnull_buckets['joy_sh_ad_nonnull_5-15'],joy_sh_ad_nonnull_buckets['joy_sh_ad_nonnull_16-30'],joy_sh_ad_nonnull_buckets['joy_sh_ad_nonnull_31-60'],joy_sh_ad_nonnull_buckets['joy_sh_ad_nonnull_61-100'],joy_sh_ad_nonnull_buckets['joy_sh_ad_nonnull_greater_100']]
dataframes_joy_chi_il = [joy_chi_il_nonnull_buckets['joy_chi_il_nonnull_1-4'],joy_chi_il_nonnull_buckets['joy_chi_il_nonnull_5-15'],joy_chi_il_nonnull_buckets['joy_chi_il_nonnull_16-30'],joy_chi_il_nonnull_buckets['joy_chi_il_nonnull_31-60'],joy_chi_il_nonnull_buckets['joy_chi_il_nonnull_61-100'],joy_chi_il_nonnull_buckets['joy_chi_il_nonnull_greater_100']]
dataframes_joy_fri_tx = [joy_fri_tx_nonnull_buckets['joy_fri_tx_nonnull_1-4'],joy_fri_tx_nonnull_buckets['joy_fri_tx_nonnull_5-15'],joy_fri_tx_nonnull_buckets['joy_fri_tx_nonnull_16-30'],joy_fri_tx_nonnull_buckets['joy_fri_tx_nonnull_31-60'],joy_fri_tx_nonnull_buckets['joy_fri_tx_nonnull_61-100'],joy_fri_tx_nonnull_buckets['joy_fri_tx_nonnull_greater_100']]
dataframes_joy_hou_tx = [joy_hou_tx_nonnull_buckets['joy_hou_tx_nonnull_1-4'],joy_hou_tx_nonnull_buckets['joy_hou_tx_nonnull_5-15'],joy_hou_tx_nonnull_buckets['joy_hou_tx_nonnull_16-30'],joy_hou_tx_nonnull_buckets['joy_hou_tx_nonnull_31-60'],joy_hou_tx_nonnull_buckets['joy_hou_tx_nonnull_61-100'],joy_hou_tx_nonnull_buckets['joy_hou_tx_nonnull_greater_100']]
dataframes_joy_suw_ga = [joy_suw_ga_nonnull_buckets['joy_suw_ga_nonnull_1-4'],joy_suw_ga_nonnull_buckets['joy_suw_ga_nonnull_5-15'],joy_suw_ga_nonnull_buckets['joy_suw_ga_nonnull_16-30'],joy_suw_ga_nonnull_buckets['joy_suw_ga_nonnull_31-60'],joy_suw_ga_nonnull_buckets['joy_suw_ga_nonnull_61-100'],joy_suw_ga_nonnull_buckets['joy_suw_ga_nonnull_greater_100']]
dataframes_kan_mb = [kan_mb_nonnull_buckets['kan_mb_nonnull_1-4'],kan_mb_nonnull_buckets['kan_mb_nonnull_5-15'],kan_mb_nonnull_buckets['kan_mb_nonnull_16-30'],kan_mb_nonnull_buckets['kan_mb_nonnull_31-60'],kan_mb_nonnull_buckets['kan_mb_nonnull_61-100'],kan_mb_nonnull_buckets['kan_mb_nonnull_greater_100']]
dataframes_mal_sc = [mal_sc_nonnull_buckets['mal_sc_nonnull_1-4'],mal_sc_nonnull_buckets['mal_sc_nonnull_5-15'],mal_sc_nonnull_buckets['mal_sc_nonnull_16-30'],mal_sc_nonnull_buckets['mal_sc_nonnull_31-60'],mal_sc_nonnull_buckets['mal_sc_nonnull_61-100'],mal_sc_nonnull_buckets['mal_sc_nonnull_greater_100']]
dataframes_mal_chi_il = [mal_chi_il_nonnull_buckets['mal_chi_il_nonnull_1-4'],mal_chi_il_nonnull_buckets['mal_chi_il_nonnull_5-15'],mal_chi_il_nonnull_buckets['mal_chi_il_nonnull_16-30'],mal_chi_il_nonnull_buckets['mal_chi_il_nonnull_31-60'],mal_chi_il_nonnull_buckets['mal_chi_il_nonnull_61-100'],mal_chi_il_nonnull_buckets['mal_chi_il_nonnull_greater_100']]
dataframes_mal_fri_tx = [mal_fri_tx_nonnull_buckets['mal_fri_tx_nonnull_1-4'],mal_fri_tx_nonnull_buckets['mal_fri_tx_nonnull_5-15'],mal_fri_tx_nonnull_buckets['mal_fri_tx_nonnull_16-30'],mal_fri_tx_nonnull_buckets['mal_fri_tx_nonnull_31-60'],mal_fri_tx_nonnull_buckets['mal_fri_tx_nonnull_61-100'],mal_fri_tx_nonnull_buckets['mal_fri_tx_nonnull_greater_100']]
dataframes_mal_ise_nj = [mal_ise_nj_nonnull_buckets['mal_ise_nj_nonnull_1-4'],mal_ise_nj_nonnull_buckets['mal_ise_nj_nonnull_5-15'],mal_ise_nj_nonnull_buckets['mal_ise_nj_nonnull_16-30'],mal_ise_nj_nonnull_buckets['mal_ise_nj_nonnull_31-60'],mal_ise_nj_nonnull_buckets['mal_ise_nj_nonnull_61-100'],mal_ise_nj_nonnull_buckets['mal_ise_nj_nonnull_greater_100']]
dataframes_mal_nap_il = [mal_nap_il_nonnull_buckets['mal_nap_il_nonnull_1-4'],mal_nap_il_nonnull_buckets['mal_nap_il_nonnull_5-15'],mal_nap_il_nonnull_buckets['mal_nap_il_nonnull_16-30'],mal_nap_il_nonnull_buckets['mal_nap_il_nonnull_31-60'],mal_nap_il_nonnull_buckets['mal_nap_il_nonnull_61-100'],mal_nap_il_nonnull_buckets['mal_nap_il_nonnull_greater_100']]
dataframes_mal_ab = [mal_ab_nonnull_buckets['mal_ab_nonnull_1-4'],mal_ab_nonnull_buckets['mal_ab_nonnull_5-15'],mal_ab_nonnull_buckets['mal_ab_nonnull_16-30'],mal_ab_nonnull_buckets['mal_ab_nonnull_31-60'],mal_ab_nonnull_buckets['mal_ab_nonnull_61-100'],mal_ab_nonnull_buckets['mal_ab_nonnull_greater_100']]
dataframes_mal_b1_af = [mal_b1_af_nonnull_buckets['mal_b1_af_nonnull_1-4'],mal_b1_af_nonnull_buckets['mal_b1_af_nonnull_5-15'],mal_b1_af_nonnull_buckets['mal_b1_af_nonnull_16-30'],mal_b1_af_nonnull_buckets['mal_b1_af_nonnull_31-60'],mal_b1_af_nonnull_buckets['mal_b1_af_nonnull_61-100'],mal_b1_af_nonnull_buckets['mal_b1_af_nonnull_greater_100']]
dataframes_mal_ak = [mal_ak_nonnull_buckets['mal_ak_nonnull_1-4'],mal_ak_nonnull_buckets['mal_ak_nonnull_5-15'],mal_ak_nonnull_buckets['mal_ak_nonnull_16-30'],mal_ak_nonnull_buckets['mal_ak_nonnull_31-60'],mal_ak_nonnull_buckets['mal_ak_nonnull_61-100'],mal_ak_nonnull_buckets['mal_ak_nonnull_greater_100']]
dataframes_mal_aw_ad = [mal_aw_ad_nonnull_buckets['mal_aw_ad_nonnull_1-4'],mal_aw_ad_nonnull_buckets['mal_aw_ad_nonnull_5-15'],mal_aw_ad_nonnull_buckets['mal_aw_ad_nonnull_16-30'],mal_aw_ad_nonnull_buckets['mal_aw_ad_nonnull_31-60'],mal_aw_ad_nonnull_buckets['mal_aw_ad_nonnull_61-100'],mal_aw_ad_nonnull_buckets['mal_aw_ad_nonnull_greater_100']]
dataframes_mal_dm_ad = [mal_dm_ad_nonnull_buckets['mal_dm_ad_nonnull_1-4'],mal_dm_ad_nonnull_buckets['mal_dm_ad_nonnull_5-15'],mal_dm_ad_nonnull_buckets['mal_dm_ad_nonnull_16-30'],mal_dm_ad_nonnull_buckets['mal_dm_ad_nonnull_31-60'],mal_dm_ad_nonnull_buckets['mal_dm_ad_nonnull_61-100'],mal_dm_ad_nonnull_buckets['mal_dm_ad_nonnull_greater_100']]
dataframes_mal_b1_ad = [mal_b1_ad_nonnull_buckets['mal_b1_ad_nonnull_1-4'],mal_b1_ad_nonnull_buckets['mal_b1_ad_nonnull_5-15'],mal_b1_ad_nonnull_buckets['mal_b1_ad_nonnull_16-30'],mal_b1_ad_nonnull_buckets['mal_b1_ad_nonnull_31-60'],mal_b1_ad_nonnull_buckets['mal_b1_ad_nonnull_61-100'],mal_b1_ad_nonnull_buckets['mal_b1_ad_nonnull_greater_100']]
dataframes_mal_b2_ad = [mal_b2_ad_nonnull_buckets['mal_b2_ad_nonnull_1-4'],mal_b2_ad_nonnull_buckets['mal_b2_ad_nonnull_5-15'],mal_b2_ad_nonnull_buckets['mal_b2_ad_nonnull_16-30'],mal_b2_ad_nonnull_buckets['mal_b2_ad_nonnull_31-60'],mal_b2_ad_nonnull_buckets['mal_b2_ad_nonnull_61-100'],mal_b2_ad_nonnull_buckets['mal_b2_ad_nonnull_greater_100']]
dataframes_mal_lu_ad = [mal_lu_ad_nonnull_buckets['mal_lu_ad_nonnull_1-4'],mal_lu_ad_nonnull_buckets['mal_lu_ad_nonnull_5-15'],mal_lu_ad_nonnull_buckets['mal_lu_ad_nonnull_16-30'],mal_lu_ad_nonnull_buckets['mal_lu_ad_nonnull_31-60'],mal_lu_ad_nonnull_buckets['mal_lu_ad_nonnull_61-100'],mal_lu_ad_nonnull_buckets['mal_lu_ad_nonnull_greater_100']]
dataframes_mal_mb = [mal_mb_nonnull_buckets['mal_mb_nonnull_1-4'],mal_mb_nonnull_buckets['mal_mb_nonnull_5-15'],mal_mb_nonnull_buckets['mal_mb_nonnull_16-30'],mal_mb_nonnull_buckets['mal_mb_nonnull_31-60'],mal_mb_nonnull_buckets['mal_mb_nonnull_61-100'],mal_mb_nonnull_buckets['mal_mb_nonnull_greater_100']]
dataframes_mal_sh_ad = [mal_sh_ad_nonnull_buckets['mal_sh_ad_nonnull_1-4'],mal_sh_ad_nonnull_buckets['mal_sh_ad_nonnull_5-15'],mal_sh_ad_nonnull_buckets['mal_sh_ad_nonnull_16-30'],mal_sh_ad_nonnull_buckets['mal_sh_ad_nonnull_31-60'],mal_sh_ad_nonnull_buckets['mal_sh_ad_nonnull_61-100'],mal_sh_ad_nonnull_buckets['mal_sh_ad_nonnull_greater_100']]
dataframes_mal_b2_af = [mal_b2_af_nonnull_buckets['mal_b2_af_nonnull_1-4'],mal_b2_af_nonnull_buckets['mal_b2_af_nonnull_5-15'],mal_b2_af_nonnull_buckets['mal_b2_af_nonnull_16-30'],mal_b2_af_nonnull_buckets['mal_b2_af_nonnull_31-60'],mal_b2_af_nonnull_buckets['mal_b2_af_nonnull_61-100'],mal_b2_af_nonnull_buckets['mal_b2_af_nonnull_greater_100']]
dataframes_mal_ric_tx = [mal_ric_tx_nonnull_buckets['mal_ric_tx_nonnull_1-4'],mal_ric_tx_nonnull_buckets['mal_ric_tx_nonnull_5-15'],mal_ric_tx_nonnull_buckets['mal_ric_tx_nonnull_16-30'],mal_ric_tx_nonnull_buckets['mal_ric_tx_nonnull_31-60'],mal_ric_tx_nonnull_buckets['mal_ric_tx_nonnull_61-100'],mal_ric_tx_nonnull_buckets['mal_ric_tx_nonnull_greater_100']]
dataframes_may_vie_va = [may_vie_va_nonnull_buckets['may_vie_va_nonnull_1-4'],may_vie_va_nonnull_buckets['may_vie_va_nonnull_5-15'],may_vie_va_nonnull_buckets['may_vie_va_nonnull_16-30'],may_vie_va_nonnull_buckets['may_vie_va_nonnull_31-60'],may_vie_va_nonnull_buckets['may_vie_va_nonnull_61-100'],may_vie_va_nonnull_buckets['may_vie_va_nonnull_greater_100']]
dataframes_mna_mb = [mna_mb_nonnull_buckets['mna_mb_nonnull_1-4'],mna_mb_nonnull_buckets['mna_mb_nonnull_5-15'],mna_mb_nonnull_buckets['mna_mb_nonnull_16-30'],mna_mb_nonnull_buckets['mna_mb_nonnull_31-60'],mna_mb_nonnull_buckets['mna_mb_nonnull_61-100'],mna_mb_nonnull_buckets['mna_mb_nonnull_greater_100']]
dataframes_mia_awm_ad = [mia_awm_ad_nonnull_buckets['mia_awm_ad_nonnull_1-4'],mia_awm_ad_nonnull_buckets['mia_awm_ad_nonnull_5-15'],mia_awm_ad_nonnull_buckets['mia_awm_ad_nonnull_16-30'],mia_awm_ad_nonnull_buckets['mia_awm_ad_nonnull_31-60'],mia_awm_ad_nonnull_buckets['mia_awm_ad_nonnull_61-100'],mia_awm_ad_nonnull_buckets['mia_awm_ad_nonnull_greater_100']]
dataframes_mia_bur_db = [mia_bur_db_nonnull_buckets['mia_bur_db_nonnull_1-4'],mia_bur_db_nonnull_buckets['mia_bur_db_nonnull_5-15'],mia_bur_db_nonnull_buckets['mia_bur_db_nonnull_16-30'],mia_bur_db_nonnull_buckets['mia_bur_db_nonnull_31-60'],mia_bur_db_nonnull_buckets['mia_bur_db_nonnull_61-100'],mia_bur_db_nonnull_buckets['mia_bur_db_nonnull_greater_100']]
dataframes_min_ak = [min_ak_nonnull_buckets['min_ak_nonnull_1-4'],min_ak_nonnull_buckets['min_ak_nonnull_5-15'],min_ak_nonnull_buckets['min_ak_nonnull_16-30'],min_ak_nonnull_buckets['min_ak_nonnull_31-60'],min_ak_nonnull_buckets['min_ak_nonnull_61-100'],min_ak_nonnull_buckets['min_ak_nonnull_greater_100']]
dataframes_son_ise_nj = [son_ise_nj_nonnull_buckets['son_ise_nj_nonnull_1-4'],son_ise_nj_nonnull_buckets['son_ise_nj_nonnull_5-15'],son_ise_nj_nonnull_buckets['son_ise_nj_nonnull_16-30'],son_ise_nj_nonnull_buckets['son_ise_nj_nonnull_31-60'],son_ise_nj_nonnull_buckets['son_ise_nj_nonnull_61-100'],son_ise_nj_nonnull_buckets['son_ise_nj_nonnull_greater_100']]
dataframes_tan_bar_db = [tan_bar_db_nonnull_buckets['tan_bar_db_nonnull_1-4'],tan_bar_db_nonnull_buckets['tan_bar_db_nonnull_5-15'],tan_bar_db_nonnull_buckets['tan_bar_db_nonnull_16-30'],tan_bar_db_nonnull_buckets['tan_bar_db_nonnull_31-60'],tan_bar_db_nonnull_buckets['tan_bar_db_nonnull_61-100'],tan_bar_db_nonnull_buckets['tan_bar_db_nonnull_greater_100']]
dataframes_tan_fah_db = [tan_fah_db_nonnull_buckets['tan_fah_db_nonnull_1-4'],tan_fah_db_nonnull_buckets['tan_fah_db_nonnull_5-15'],tan_fah_db_nonnull_buckets['tan_fah_db_nonnull_16-30'],tan_fah_db_nonnull_buckets['tan_fah_db_nonnull_31-60'],tan_fah_db_nonnull_buckets['tan_fah_db_nonnull_61-100'],tan_fah_db_nonnull_buckets['tan_fah_db_nonnull_greater_100']]
dataframes_tan_kar_db = [tan_kar_db_nonnull_buckets['tan_kar_db_nonnull_1-4'],tan_kar_db_nonnull_buckets['tan_kar_db_nonnull_5-15'],tan_kar_db_nonnull_buckets['tan_kar_db_nonnull_16-30'],tan_kar_db_nonnull_buckets['tan_kar_db_nonnull_31-60'],tan_kar_db_nonnull_buckets['tan_kar_db_nonnull_61-100'],tan_kar_db_nonnull_buckets['tan_kar_db_nonnull_greater_100']]
dataframes_tan_am_om = [tan_am_om_nonnull_buckets['tan_am_om_nonnull_1-4'],tan_am_om_nonnull_buckets['tan_am_om_nonnull_5-15'],tan_am_om_nonnull_buckets['tan_am_om_nonnull_16-30'],tan_am_om_nonnull_buckets['tan_am_om_nonnull_31-60'],tan_am_om_nonnull_buckets['tan_am_om_nonnull_61-100'],tan_am_om_nonnull_buckets['tan_am_om_nonnull_greater_100']]
dataframes_tan_fc_qa = [tan_fc_qa_nonnull_buckets['tan_fc_qa_nonnull_1-4'],tan_fc_qa_nonnull_buckets['tan_fc_qa_nonnull_5-15'],tan_fc_qa_nonnull_buckets['tan_fc_qa_nonnull_16-30'],tan_fc_qa_nonnull_buckets['tan_fc_qa_nonnull_31-60'],tan_fc_qa_nonnull_buckets['tan_fc_qa_nonnull_61-100'],tan_fc_qa_nonnull_buckets['tan_fc_qa_nonnull_greater_100']]
dataframes_tan_gs_db = [tan_gs_db_nonnull_buckets['tan_gs_db_nonnull_1-4'],tan_gs_db_nonnull_buckets['tan_gs_db_nonnull_5-15'],tan_gs_db_nonnull_buckets['tan_gs_db_nonnull_16-30'],tan_gs_db_nonnull_buckets['tan_gs_db_nonnull_31-60'],tan_gs_db_nonnull_buckets['tan_gs_db_nonnull_61-100'],tan_gs_db_nonnull_buckets['tan_gs_db_nonnull_greater_100']]
dataframes_tan_ham_ad = [tan_ham_ad_nonnull_buckets['tan_ham_ad_nonnull_1-4'],tan_ham_ad_nonnull_buckets['tan_ham_ad_nonnull_5-15'],tan_ham_ad_nonnull_buckets['tan_ham_ad_nonnull_16-30'],tan_ham_ad_nonnull_buckets['tan_ham_ad_nonnull_31-60'],tan_ham_ad_nonnull_buckets['tan_ham_ad_nonnull_61-100'],tan_ham_ad_nonnull_buckets['tan_ham_ad_nonnull_greater_100']]
dataframes_tan_lul_qa = [tan_lul_qa_nonnull_buckets['tan_lul_qa_nonnull_1-4'],tan_lul_qa_nonnull_buckets['tan_lul_qa_nonnull_5-15'],tan_lul_qa_nonnull_buckets['tan_lul_qa_nonnull_16-30'],tan_lul_qa_nonnull_buckets['tan_lul_qa_nonnull_31-60'],tan_lul_qa_nonnull_buckets['tan_lul_qa_nonnull_61-100'],tan_lul_qa_nonnull_buckets['tan_lul_qa_nonnull_greater_100']]
dataframes_tan_mee_db = [tan_mee_db_nonnull_buckets['tan_mee_db_nonnull_1-4'],tan_mee_db_nonnull_buckets['tan_mee_db_nonnull_5-15'],tan_mee_db_nonnull_buckets['tan_mee_db_nonnull_16-30'],tan_mee_db_nonnull_buckets['tan_mee_db_nonnull_31-60'],tan_mee_db_nonnull_buckets['tan_mee_db_nonnull_61-100'],tan_mee_db_nonnull_buckets['tan_mee_db_nonnull_greater_100']]
dataframes_tan_rol_sh = [tan_rol_sh_nonnull_buckets['tan_rol_sh_nonnull_1-4'],tan_rol_sh_nonnull_buckets['tan_rol_sh_nonnull_5-15'],tan_rol_sh_nonnull_buckets['tan_rol_sh_nonnull_16-30'],tan_rol_sh_nonnull_buckets['tan_rol_sh_nonnull_31-60'],tan_rol_sh_nonnull_buckets['tan_rol_sh_nonnull_61-100'],tan_rol_sh_nonnull_buckets['tan_rol_sh_nonnull_greater_100']]
dataframes_tan_sc_sh = [tan_sc_sh_nonnull_buckets['tan_sc_sh_nonnull_1-4'],tan_sc_sh_nonnull_buckets['tan_sc_sh_nonnull_5-15'],tan_sc_sh_nonnull_buckets['tan_sc_sh_nonnull_16-30'],tan_sc_sh_nonnull_buckets['tan_sc_sh_nonnull_31-60'],tan_sc_sh_nonnull_buckets['tan_sc_sh_nonnull_61-100'],tan_sc_sh_nonnull_buckets['tan_sc_sh_nonnull_greater_100']]
dataframes_tan_sil_db = [tan_sil_db_nonnull_buckets['tan_sil_db_nonnull_1-4'],tan_sil_db_nonnull_buckets['tan_sil_db_nonnull_5-15'],tan_sil_db_nonnull_buckets['tan_sil_db_nonnull_16-30'],tan_sil_db_nonnull_buckets['tan_sil_db_nonnull_31-60'],tan_sil_db_nonnull_buckets['tan_sil_db_nonnull_61-100'],tan_sil_db_nonnull_buckets['tan_sil_db_nonnull_greater_100']]
dataframes_tan_taj_db = [tan_taj_db_nonnull_buckets['tan_taj_db_nonnull_1-4'],tan_taj_db_nonnull_buckets['tan_taj_db_nonnull_5-15'],tan_taj_db_nonnull_buckets['tan_taj_db_nonnull_16-30'],tan_taj_db_nonnull_buckets['tan_taj_db_nonnull_31-60'],tan_taj_db_nonnull_buckets['tan_taj_db_nonnull_61-100'],tan_taj_db_nonnull_buckets['tan_taj_db_nonnull_greater_100']]
dataframes_tan_mank_db = [tan_mank_db_nonnull_buckets['tan_mank_db_nonnull_1-4'],tan_mank_db_nonnull_buckets['tan_mank_db_nonnull_5-15'],tan_mank_db_nonnull_buckets['tan_mank_db_nonnull_16-30'],tan_mank_db_nonnull_buckets['tan_mank_db_nonnull_31-60'],tan_mank_db_nonnull_buckets['tan_mank_db_nonnull_61-100'],tan_mank_db_nonnull_buckets['tan_mank_db_nonnull_greater_100']]
dataframes_tan_atl_ga = [tan_atl_ga_nonnull_buckets['tan_atl_ga_nonnull_1-4'],tan_atl_ga_nonnull_buckets['tan_atl_ga_nonnull_5-15'],tan_atl_ga_nonnull_buckets['tan_atl_ga_nonnull_16-30'],tan_atl_ga_nonnull_buckets['tan_atl_ga_nonnull_31-60'],tan_atl_ga_nonnull_buckets['tan_atl_ga_nonnull_61-100'],tan_atl_ga_nonnull_buckets['tan_atl_ga_nonnull_greater_100']]
dataframes_tan_chi_il = [tan_chi_il_nonnull_buckets['tan_chi_il_nonnull_1-4'],tan_chi_il_nonnull_buckets['tan_chi_il_nonnull_5-15'],tan_chi_il_nonnull_buckets['tan_chi_il_nonnull_16-30'],tan_chi_il_nonnull_buckets['tan_chi_il_nonnull_31-60'],tan_chi_il_nonnull_buckets['tan_chi_il_nonnull_61-100'],tan_chi_il_nonnull_buckets['tan_chi_il_nonnull_greater_100']]
dataframes_tan_fri_tx = [tan_fri_tx_nonnull_buckets['tan_fri_tx_nonnull_1-4'],tan_fri_tx_nonnull_buckets['tan_fri_tx_nonnull_5-15'],tan_fri_tx_nonnull_buckets['tan_fri_tx_nonnull_16-30'],tan_fri_tx_nonnull_buckets['tan_fri_tx_nonnull_31-60'],tan_fri_tx_nonnull_buckets['tan_fri_tx_nonnull_61-100'],tan_fri_tx_nonnull_buckets['tan_fri_tx_nonnull_greater_100']]
dataframes_tan_hou_tx = [tan_hou_tx_nonnull_buckets['tan_hou_tx_nonnull_1-4'],tan_hou_tx_nonnull_buckets['tan_hou_tx_nonnull_5-15'],tan_hou_tx_nonnull_buckets['tan_hou_tx_nonnull_16-30'],tan_hou_tx_nonnull_buckets['tan_hou_tx_nonnull_31-60'],tan_hou_tx_nonnull_buckets['tan_hou_tx_nonnull_61-100'],tan_hou_tx_nonnull_buckets['tan_hou_tx_nonnull_greater_100']]
dataframes_tan_new_nj = [tan_new_nj_nonnull_buckets['tan_new_nj_nonnull_1-4'],tan_new_nj_nonnull_buckets['tan_new_nj_nonnull_5-15'],tan_new_nj_nonnull_buckets['tan_new_nj_nonnull_16-30'],tan_new_nj_nonnull_buckets['tan_new_nj_nonnull_31-60'],tan_new_nj_nonnull_buckets['tan_new_nj_nonnull_61-100'],tan_new_nj_nonnull_buckets['tan_new_nj_nonnull_greater_100']]
dataframes_tan_rse_wa = [tan_rse_wa_nonnull_buckets['tan_rse_wa_nonnull_1-4'],tan_rse_wa_nonnull_buckets['tan_rse_wa_nonnull_5-15'],tan_rse_wa_nonnull_buckets['tan_rse_wa_nonnull_16-30'],tan_rse_wa_nonnull_buckets['tan_rse_wa_nonnull_31-60'],tan_rse_wa_nonnull_buckets['tan_rse_wa_nonnull_61-100'],tan_rse_wa_nonnull_buckets['tan_rse_wa_nonnull_greater_100']]
dataframes_tan_sc_ca = [tan_sc_ca_nonnull_buckets['tan_sc_ca_nonnull_1-4'],tan_sc_ca_nonnull_buckets['tan_sc_ca_nonnull_5-15'],tan_sc_ca_nonnull_buckets['tan_sc_ca_nonnull_16-30'],tan_sc_ca_nonnull_buckets['tan_sc_ca_nonnull_31-60'],tan_sc_ca_nonnull_buckets['tan_sc_ca_nonnull_61-100'],tan_sc_ca_nonnull_buckets['tan_sc_ca_nonnull_greater_100']]
dataframes_tif_chi_il = [tif_chi_il_nonnull_buckets['tif_chi_il_nonnull_1-4'],tif_chi_il_nonnull_buckets['tif_chi_il_nonnull_5-15'],tif_chi_il_nonnull_buckets['tif_chi_il_nonnull_16-30'],tif_chi_il_nonnull_buckets['tif_chi_il_nonnull_31-60'],tif_chi_il_nonnull_buckets['tif_chi_il_nonnull_61-100'],tif_chi_il_nonnull_buckets['tif_chi_il_nonnull_greater_100']]
dataframes_tif_eas_nj = [tif_eas_nj_nonnull_buckets['tif_eas_nj_nonnull_1-4'],tif_eas_nj_nonnull_buckets['tif_eas_nj_nonnull_5-15'],tif_eas_nj_nonnull_buckets['tif_eas_nj_nonnull_16-30'],tif_eas_nj_nonnull_buckets['tif_eas_nj_nonnull_31-60'],tif_eas_nj_nonnull_buckets['tif_eas_nj_nonnull_61-100'],tif_eas_nj_nonnull_buckets['tif_eas_nj_nonnull_greater_100']]
dataframes_tif_hac_nj = [tif_hac_nj_nonnull_buckets['tif_hac_nj_nonnull_1-4'],tif_hac_nj_nonnull_buckets['tif_hac_nj_nonnull_5-15'],tif_hac_nj_nonnull_buckets['tif_hac_nj_nonnull_16-30'],tif_hac_nj_nonnull_buckets['tif_hac_nj_nonnull_31-60'],tif_hac_nj_nonnull_buckets['tif_hac_nj_nonnull_61-100'],tif_hac_nj_nonnull_buckets['tif_hac_nj_nonnull_greater_100']]
dataframes_tif_nor_il = [tif_nor_il_nonnull_buckets['tif_nor_il_nonnull_1-4'],tif_nor_il_nonnull_buckets['tif_nor_il_nonnull_5-15'],tif_nor_il_nonnull_buckets['tif_nor_il_nonnull_16-30'],tif_nor_il_nonnull_buckets['tif_nor_il_nonnull_31-60'],tif_nor_il_nonnull_buckets['tif_nor_il_nonnull_61-100'],tif_nor_il_nonnull_buckets['tif_nor_il_nonnull_greater_100']]
dataframes_tif_red_nj = [tif_red_nj_nonnull_buckets['tif_red_nj_nonnull_1-4'],tif_red_nj_nonnull_buckets['tif_red_nj_nonnull_5-15'],tif_red_nj_nonnull_buckets['tif_red_nj_nonnull_16-30'],tif_red_nj_nonnull_buckets['tif_red_nj_nonnull_31-60'],tif_red_nj_nonnull_buckets['tif_red_nj_nonnull_61-100'],tif_red_nj_nonnull_buckets['tif_red_nj_nonnull_greater_100']]
dataframes_tif_ric_va = [tif_ric_va_nonnull_buckets['tif_ric_va_nonnull_1-4'],tif_ric_va_nonnull_buckets['tif_ric_va_nonnull_5-15'],tif_ric_va_nonnull_buckets['tif_ric_va_nonnull_16-30'],tif_ric_va_nonnull_buckets['tif_ric_va_nonnull_31-60'],tif_ric_va_nonnull_buckets['tif_ric_va_nonnull_61-100'],tif_ric_va_nonnull_buckets['tif_ric_va_nonnull_greater_100']]
dataframes_tif_sho_nj = [tif_sho_nj_nonnull_buckets['tif_sho_nj_nonnull_1-4'],tif_sho_nj_nonnull_buckets['tif_sho_nj_nonnull_5-15'],tif_sho_nj_nonnull_buckets['tif_sho_nj_nonnull_16-30'],tif_sho_nj_nonnull_buckets['tif_sho_nj_nonnull_31-60'],tif_sho_nj_nonnull_buckets['tif_sho_nj_nonnull_61-100'],tif_sho_nj_nonnull_buckets['tif_sho_nj_nonnull_greater_100']]
dataframes_tif_sko_il = [tif_sko_il_nonnull_buckets['tif_sko_il_nonnull_1-4'],tif_sko_il_nonnull_buckets['tif_sko_il_nonnull_5-15'],tif_sko_il_nonnull_buckets['tif_sko_il_nonnull_16-30'],tif_sko_il_nonnull_buckets['tif_sko_il_nonnull_31-60'],tif_sko_il_nonnull_buckets['tif_sko_il_nonnull_61-100'],tif_sko_il_nonnull_buckets['tif_sko_il_nonnull_greater_100']]
dataframes_tif_vie_va = [tif_vie_va_nonnull_buckets['tif_vie_va_nonnull_1-4'],tif_vie_va_nonnull_buckets['tif_vie_va_nonnull_5-15'],tif_vie_va_nonnull_buckets['tif_vie_va_nonnull_16-30'],tif_vie_va_nonnull_buckets['tif_vie_va_nonnull_31-60'],tif_vie_va_nonnull_buckets['tif_vie_va_nonnull_61-100'],tif_vie_va_nonnull_buckets['tif_vie_va_nonnull_greater_100']]
dataframes_vbj_fri_tx = [vbj_fri_tx_nonnull_buckets['vbj_fri_tx_nonnull_1-4'],vbj_fri_tx_nonnull_buckets['vbj_fri_tx_nonnull_5-15'],vbj_fri_tx_nonnull_buckets['vbj_fri_tx_nonnull_16-30'],vbj_fri_tx_nonnull_buckets['vbj_fri_tx_nonnull_31-60'],vbj_fri_tx_nonnull_buckets['vbj_fri_tx_nonnull_61-100'],vbj_fri_tx_nonnull_buckets['vbj_fri_tx_nonnull_greater_100']]
dataframes_tif_par_nj = [tif_par_nj_nonnull_buckets['tif_par_nj_nonnull_1-4'],tif_par_nj_nonnull_buckets['tif_par_nj_nonnull_5-15'],tif_par_nj_nonnull_buckets['tif_par_nj_nonnull_16-30'],tif_par_nj_nonnull_buckets['tif_par_nj_nonnull_31-60'],tif_par_nj_nonnull_buckets['tif_par_nj_nonnull_61-100'],tif_par_nj_nonnull_buckets['tif_par_nj_nonnull_greater_100']]
dataframes_eve_joh_ga = [eve_joh_ga_nonnull_buckets['eve_joh_ga_nonnull_1-4'],eve_joh_ga_nonnull_buckets['eve_joh_ga_nonnull_5-15'],eve_joh_ga_nonnull_buckets['eve_joh_ga_nonnull_16-30'],eve_joh_ga_nonnull_buckets['eve_joh_ga_nonnull_31-60'],eve_joh_ga_nonnull_buckets['eve_joh_ga_nonnull_61-100'],eve_joh_ga_nonnull_buckets['eve_joh_ga_nonnull_greater_100']]


In [79]:
combined_df_agd_mb = pd.concat(dataframes_agd_mb, ignore_index=True).reset_index(drop=True)
combined_df_bhi_ak = pd.concat(dataframes_bhi_ak, ignore_index=True).reset_index(drop=True)
combined_df_bhi_dec_ga = pd.concat(dataframes_bhi_dec_ga, ignore_index=True).reset_index(drop=True)
combined_df_jar_alg_il = pd.concat(dataframes_jar_alg_il, ignore_index=True).reset_index(drop=True)
combined_df_jar_aur_il = pd.concat(dataframes_jar_aur_il, ignore_index=True).reset_index(drop=True)
combined_df_jar_bol_il = pd.concat(dataframes_jar_bol_il, ignore_index=True).reset_index(drop=True)
combined_df_jar_lom_il = pd.concat(dataframes_jar_lom_il, ignore_index=True).reset_index(drop=True)
combined_df_jar_orl_il = pd.concat(dataframes_jar_orl_il, ignore_index=True).reset_index(drop=True)
combined_df_jar_sch_il = pd.concat(dataframes_jar_sch_il, ignore_index=True).reset_index(drop=True)
combined_df_jar_ver_il = pd.concat(dataframes_jar_ver_il, ignore_index=True).reset_index(drop=True)
combined_df_joy_ab = pd.concat(dataframes_joy_ab, ignore_index=True).reset_index(drop=True)
combined_df_joy_st_af = pd.concat(dataframes_joy_st_af, ignore_index=True).reset_index(drop=True)
combined_df_joy_ak = pd.concat(dataframes_joy_ak, ignore_index=True).reset_index(drop=True)
combined_df_joy_dm_ad = pd.concat(dataframes_joy_dm_ad, ignore_index=True).reset_index(drop=True)
combined_df_joy_mz_ad = pd.concat(dataframes_joy_mz_ad, ignore_index=True).reset_index(drop=True)
combined_df_joy_sh_ad = pd.concat(dataframes_joy_sh_ad, ignore_index=True).reset_index(drop=True)
combined_df_joy_chi_il = pd.concat(dataframes_joy_chi_il, ignore_index=True).reset_index(drop=True)
combined_df_joy_fri_tx = pd.concat(dataframes_joy_fri_tx, ignore_index=True).reset_index(drop=True)
combined_df_joy_hou_tx = pd.concat(dataframes_joy_hou_tx, ignore_index=True).reset_index(drop=True)
combined_df_joy_suw_ga = pd.concat(dataframes_joy_suw_ga, ignore_index=True).reset_index(drop=True)
combined_df_kan_mb = pd.concat(dataframes_kan_mb, ignore_index=True).reset_index(drop=True)
combined_df_mal_sc = pd.concat(dataframes_mal_sc, ignore_index=True).reset_index(drop=True)
combined_df_mal_chi_il = pd.concat(dataframes_mal_chi_il, ignore_index=True).reset_index(drop=True)
combined_df_mal_fri_tx = pd.concat(dataframes_mal_fri_tx, ignore_index=True).reset_index(drop=True)
combined_df_mal_ise_nj = pd.concat(dataframes_mal_ise_nj, ignore_index=True).reset_index(drop=True)
combined_df_mal_nap_il = pd.concat(dataframes_mal_nap_il, ignore_index=True).reset_index(drop=True)
combined_df_mal_ab = pd.concat(dataframes_mal_ab, ignore_index=True).reset_index(drop=True)
combined_df_mal_b1_af = pd.concat(dataframes_mal_b1_af, ignore_index=True).reset_index(drop=True)
combined_df_mal_ak = pd.concat(dataframes_mal_ak, ignore_index=True).reset_index(drop=True)
combined_df_mal_aw_ad = pd.concat(dataframes_mal_aw_ad, ignore_index=True).reset_index(drop=True)
combined_df_mal_dm_ad = pd.concat(dataframes_mal_dm_ad, ignore_index=True).reset_index(drop=True)
combined_df_mal_b1_ad = pd.concat(dataframes_mal_b1_ad, ignore_index=True).reset_index(drop=True)
combined_df_mal_b2_ad = pd.concat(dataframes_mal_b2_ad, ignore_index=True).reset_index(drop=True)
combined_df_mal_lu_ad = pd.concat(dataframes_mal_lu_ad, ignore_index=True).reset_index(drop=True)
combined_df_mal_mb = pd.concat(dataframes_mal_mb, ignore_index=True).reset_index(drop=True)
combined_df_mal_sh_ad = pd.concat(dataframes_mal_sh_ad, ignore_index=True).reset_index(drop=True)
combined_df_mal_b2_af = pd.concat(dataframes_mal_b2_af, ignore_index=True).reset_index(drop=True)
combined_df_mal_ric_tx = pd.concat(dataframes_mal_ric_tx, ignore_index=True).reset_index(drop=True)
combined_df_may_vie_va = pd.concat(dataframes_may_vie_va, ignore_index=True).reset_index(drop=True)
combined_df_mna_mb = pd.concat(dataframes_mna_mb, ignore_index=True).reset_index(drop=True)
combined_df_mia_awm_ad = pd.concat(dataframes_mia_awm_ad, ignore_index=True).reset_index(drop=True)
combined_df_mia_bur_db = pd.concat(dataframes_mia_bur_db, ignore_index=True).reset_index(drop=True)
combined_df_min_ak = pd.concat(dataframes_min_ak, ignore_index=True).reset_index(drop=True)
combined_df_son_ise_nj = pd.concat(dataframes_son_ise_nj, ignore_index=True).reset_index(drop=True)
combined_df_tan_bar_db = pd.concat(dataframes_tan_bar_db, ignore_index=True).reset_index(drop=True)
combined_df_tan_fah_db = pd.concat(dataframes_tan_fah_db, ignore_index=True).reset_index(drop=True)
combined_df_tan_kar_db = pd.concat(dataframes_tan_kar_db, ignore_index=True).reset_index(drop=True)
combined_df_tan_am_om = pd.concat(dataframes_tan_am_om, ignore_index=True).reset_index(drop=True)
combined_df_tan_fc_qa = pd.concat(dataframes_tan_fc_qa, ignore_index=True).reset_index(drop=True)
combined_df_tan_gs_db = pd.concat(dataframes_tan_gs_db, ignore_index=True).reset_index(drop=True)
combined_df_tan_ham_ad = pd.concat(dataframes_tan_ham_ad, ignore_index=True).reset_index(drop=True)
combined_df_tan_lul_qa = pd.concat(dataframes_tan_lul_qa, ignore_index=True).reset_index(drop=True)
combined_df_tan_mee_db = pd.concat(dataframes_tan_mee_db, ignore_index=True).reset_index(drop=True)
combined_df_tan_rol_sh = pd.concat(dataframes_tan_rol_sh, ignore_index=True).reset_index(drop=True)
combined_df_tan_sc_sh = pd.concat(dataframes_tan_sc_sh, ignore_index=True).reset_index(drop=True)
combined_df_tan_sil_db = pd.concat(dataframes_tan_sil_db, ignore_index=True).reset_index(drop=True)
combined_df_tan_taj_db = pd.concat(dataframes_tan_taj_db, ignore_index=True).reset_index(drop=True)
combined_df_tan_mank_db = pd.concat(dataframes_tan_mank_db, ignore_index=True).reset_index(drop=True)
combined_df_tan_atl_ga = pd.concat(dataframes_tan_atl_ga, ignore_index=True).reset_index(drop=True)
combined_df_tan_chi_il = pd.concat(dataframes_tan_chi_il, ignore_index=True).reset_index(drop=True)
combined_df_tan_fri_tx = pd.concat(dataframes_tan_fri_tx, ignore_index=True).reset_index(drop=True)
combined_df_tan_hou_tx = pd.concat(dataframes_tan_hou_tx, ignore_index=True).reset_index(drop=True)
combined_df_tan_new_nj = pd.concat(dataframes_tan_new_nj, ignore_index=True).reset_index(drop=True)
combined_df_tan_rse_wa = pd.concat(dataframes_tan_rse_wa, ignore_index=True).reset_index(drop=True)
combined_df_tan_sc_ca = pd.concat(dataframes_tan_sc_ca, ignore_index=True).reset_index(drop=True)
combined_df_tif_chi_il = pd.concat(dataframes_tif_chi_il, ignore_index=True).reset_index(drop=True)
combined_df_tif_eas_nj = pd.concat(dataframes_tif_eas_nj, ignore_index=True).reset_index(drop=True)
combined_df_tif_hac_nj = pd.concat(dataframes_tif_hac_nj, ignore_index=True).reset_index(drop=True)
combined_df_tif_nor_il = pd.concat(dataframes_tif_nor_il, ignore_index=True).reset_index(drop=True)
combined_df_tif_red_nj = pd.concat(dataframes_tif_red_nj, ignore_index=True).reset_index(drop=True)
combined_df_tif_ric_va = pd.concat(dataframes_tif_ric_va, ignore_index=True).reset_index(drop=True)
combined_df_tif_sho_nj = pd.concat(dataframes_tif_sho_nj, ignore_index=True).reset_index(drop=True)
combined_df_tif_sko_il = pd.concat(dataframes_tif_sko_il, ignore_index=True).reset_index(drop=True)
combined_df_tif_vie_va = pd.concat(dataframes_tif_vie_va, ignore_index=True).reset_index(drop=True)
combined_df_vbj_fri_tx = pd.concat(dataframes_vbj_fri_tx, ignore_index=True).reset_index(drop=True)
combined_df_tif_par_nj = pd.concat(dataframes_tif_par_nj, ignore_index=True).reset_index(drop=True)
combined_df_eve_joh_ga = pd.concat(dataframes_eve_joh_ga, ignore_index=True).reset_index(drop=True)


# Sentiment Scoring for Null reviews

In [80]:
#List of new columns to add with a default value of 0
column_names = ["Trust",
                "Store Experience",
                "Store Staff",
                "Product Design",
                "Product Variety",
                "Discount",
                "Making Charge",
                "Price",
                "Product Quality",
                "OLD Gold Jewellery Exchange"]

In [81]:
#Loop through each DataFrame in 'null_dataframes' dictionary
for df_name, df in null_dataframes.items():
    # Add 'Commentor Name' column by copying values from 'name' column
    df['Commentor Name'] = df['Name']

#Loop through each DataFrame in the 'null_dataframes' dictionary
for df_name, df in null_dataframes.items():
    # Add each column from 'column_names' with a default value of 0
    for column in column_names:
        df[column] = 0

# Now, each DataFrame in 'null_dataframes' has the new columns with values initialized to 0

# Sentiment Scoring for reviews

## Competitors

### bhi_ak

In [82]:
batch_counter = [0]
total_batches = math.ceil(len(bhi_ak_nonnull_buckets['bhi_ak_nonnull_1-4'])/25)+math.ceil(len(bhi_ak_nonnull_buckets['bhi_ak_nonnull_5-15'])/25)+math.ceil(len(bhi_ak_nonnull_buckets['bhi_ak_nonnull_16-30'])/25)+math.ceil(len(bhi_ak_nonnull_buckets['bhi_ak_nonnull_31-60'])/25)+math.ceil(len(bhi_ak_nonnull_buckets['bhi_ak_nonnull_61-100'])/25)+math.ceil(len(bhi_ak_nonnull_buckets['bhi_ak_nonnull_greater_100'])/25)
key_counter = 0
total_keys=len(bhi_ak_nonnull_buckets.keys())
bhi_ak_nonnull_api = []
input_tokens_bhi_ak_nonnull=0
output_tokens_bhi_ak_nonnull=0
start_time_bhi_ak = time.time()

for key in bhi_ak_nonnull_buckets.keys():
    key_counter+=1
    current_df = bhi_ak_nonnull_buckets[key][['Name', 'review_text']]
    counter = [0]
    input_tokens = 0
    output_tokens = 0
        
    # Threading setup
    total_iterations = math.ceil(len(current_df) / 25)
    stop_event = threading.Event()
    message_thread = threading.Thread(target=print_dynamic_message, args=(batch_counter,key_counter,counter,total_batches,total_keys, total_iterations,start_time_bhi_ak, stop_event))
    message_thread.start()

    for start in range(0, len(current_df), 25):
        clear_output(wait=True)
        counter[0] += 1
        batch_counter[0] += 1
        end = start + 25
        result, it, ot = score_sentiments_jul(current_df[start:end])
        bhi_ak_nonnull_api.append(result)
        input_tokens += it
        output_tokens += ot

    # Stopping the dynamic message thread
    stop_event.set()
    message_thread.join()    
    input_tokens_bhi_ak_nonnull+=input_tokens
    output_tokens_bhi_ak_nonnull+=output_tokens
    clear_output(wait=True)
    
overall_execution_time_bhi_ak = time.time() - start_time_bhi_ak
formatted_time_bhi_ak = time.strftime('%H:%M:%S', time.gmtime(overall_execution_time_bhi_ak))
input_token_cost_bhi_ak = round((0.01/1000) * input_tokens_bhi_ak_nonnull, 2)
output_token_cost_bhi_ak = round((0.03/1000) * output_tokens_bhi_ak_nonnull, 2)
total_cost_bhi_ak = round(input_token_cost_bhi_ak + output_token_cost_bhi_ak, 2)    
    
print(f"Executed {batch_counter} Iterations")
print(f"Total Execution Time: {formatted_time_bhi_ak}")
print(f"Total Input Tokens - {input_tokens_bhi_ak_nonnull}")
print(f"Total Input Cost = {input_token_cost_bhi_ak}")
print(f"Total Output Tokens - {output_tokens_bhi_ak_nonnull}")
print(f"Total Output Cost = {output_token_cost_bhi_ak}")
print(f"Total Cost = {total_cost_bhi_ak}")

Executed [5] Iterations
Total Execution Time: 00:00:19
Total Input Tokens - 4067
Total Input Cost = 0.04
Total Output Tokens - 732
Total Output Cost = 0.02
Total Cost = 0.06


In [83]:
bhi_ak_nonnull_api[0]

'```json\n{\n  "Binu Pillai": [\n    {\n      "positive": "Store Staff",\n      "negative": ""\n    }\n  ],\n  "Sajani Manikandan": [\n    {\n      "positive": "Store Staff",\n      "negative": ""\n    }\n  ],\n  "ameen sb": [\n    {\n      "positive": "Product Variety",\n      "negative": ""\n    }\n  ],\n  "Parthibarajan s": [\n    {\n      "positive": "",\n      "negative": ""\n    }\n  ],\n  "Jacob Plackan": [\n    {\n      "positive": "Store Staff",\n      "negative": ""\n    }\n  ]\n}\n```'

In [84]:
#Remove "```json" and "```" from each string in bhi_ak_nonnull_api & convert to DataFrame
bhi_ak_nonnull_api_cleaned = [
    s.replace("```json", "")
     .replace("```", "")
     .replace("[]", "[{\"positive\": \"\", \"negative\": \"\"}]")
    for s in bhi_ak_nonnull_api
]                                  
bhi_ak_nonnull_api_cleaned_df = pd.DataFrame(bhi_ak_nonnull_api_cleaned)
#bhi_ak_nonnull_api_cleaned_df = pd.DataFrame(bhi_ak_nonnull_api)
column_names2 = ['Trust',
                    'Store Experience',
                    'Store Staff',
                    'Product Design',
                    'Product Variety',
                    'Discount',
                    'Making Charge',
                    'Price',
                    'Product Quality',
                    'OLD Gold Jewellery Exchange']


In [85]:
#Initialize an empty list to store the processed data
processed_data_bhi_ak_nonnull = []

#Iterate over each row in the DataFrame
for index, row in bhi_ak_nonnull_api_cleaned_df.iterrows():
    try:
        #Assuming the JSON string is in the first column
        json_string = row.iloc[0] if len(row) > 0 else ''
        data = json.loads(json_string)

        #Iterate over each key-value pair in the dictionary
        for commentor_name, feedback_list in data.items():
            #Create a dictionary for each commentor with default values as 0
            commentor_data = {col: 0 for col in column_names2}
            commentor_data["Commentor Name"] = commentor_name

            #Check if there is any feedback for the commentor
            if feedback_list:
                #Since there could be multiple feedback entries for a single commentor, we aggregate them
                for feedback in feedback_list:
                    positive = feedback.get("positive", "").split(',')
                    negative = feedback.get("negative", "").split(',')

                    for topic in column_names2:
                        #Aggregate the value: 1 for positive, -1 for negative, 0 for not mentioned
                        commentor_data[topic] += assign_value(topic, positive, negative)

                #Finalize the values to be within -1, 0, or 1
                for topic in column_names2:
                    if commentor_data[topic] > 1:
                        commentor_data[topic] = 1
                    elif commentor_data[topic] < -1:
                        commentor_data[topic] = -1

            #Append the commentor data to the processed data list
            processed_data_bhi_ak_nonnull.append(commentor_data)
    except:
        #Skip rows with invalid JSON data
        print(f"Invalid JSON in row {index}")

bhi_ak_nonnull_api_cleaned_df.to_excel("bhi_ak_nonnull_api_cleaned_df.xlsx",index=False)

bhi_ak_nonnull_api_cleaned_df = pd.read_excel("bhi_ak_nonnull_api_cleaned_df.xlsx")

bhi_ak_nonnull_api_cleaned_df

In [86]:
#Create a DataFrame from the processed data
bhi_ak_nonnull_sen_df = pd.DataFrame(processed_data_bhi_ak_nonnull)
#Reorder the columns
ordered_columns = ["Commentor Name",
                   "Trust",
                    "Store Experience",
                    "Store Staff",
                    "Product Design",
                    "Product Variety",
                    "Discount",
                    "Making Charge",
                    "Price",
                    "Product Quality",
                    "OLD Gold Jewellery Exchange"]
#Apply the new order to the DataFrame
bhi_ak_nonnull_sen_df = bhi_ak_nonnull_sen_df[ordered_columns]

bhi_ak_nonnull_merged_df = pd.concat([combined_df_bhi_ak, bhi_ak_nonnull_sen_df], axis=1)

bhi_ak_final_sen_df = pd.concat([bhi_ak_nonnull_merged_df,null_dataframes['bhi_ak_null']], ignore_index=True)

bhi_ak_final_sen_df_copy = bhi_ak_final_sen_df.copy()
bhi_ak_final_sen_df_copy["Published At Date"] = bhi_ak_final_sen_df_copy["Published At Date"].astype(str).str[:10]

bhi_ak_final_sen_df_copy.to_excel("sentiment_raw_output/bhi_ak_final_sen_df_jul.xlsx",index=False)


In [87]:
bhi_ak_final_sen_df_copy.head()

Unnamed: 0,Store Name,Name,Published At Date,Stars,Total Score,year,month,review_text,Store Code Cleaned,word_count,count_buckets,Commentor Name,Trust,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,OLD Gold Jewellery Exchange
0,Bhima Jewellers - Al Karama,Binu Pillai,2025-05-29,4.0,4.7,2025,5,Excellent customer care.,,3.0,1-4,Binu Pillai,0,0,1,0,0,0,0,0,0,0
1,Bhima Jewellers - Al Karama,Sajani Manikandan,2025-05-23,5.0,4.7,2025,5,Excellent customer service.,,3.0,1-4,Sajani Manikandan,0,0,1,0,0,0,0,0,0,0
2,Bhima Jewellers - Al Karama,ameen sb,2025-05-20,5.0,4.7,2025,5,Good collections,,2.0,1-4,ameen sb,0,0,0,0,1,0,0,0,0,0
3,Bhima Jewellers - Al Karama,Parthibarajan s,2025-05-15,5.0,4.7,2025,5,Good,,1.0,1-4,Parthibarajan s,0,0,0,0,0,0,0,0,0,0
4,Bhima Jewellers - Al Karama,Jacob Plackan,2025-05-04,5.0,4.7,2025,5,Good Service by Nijin,,4.0,1-4,Jacob Plackan,0,0,1,0,0,0,0,0,0,0


### joy_ab

In [88]:
batch_counter = [0]
total_batches = math.ceil(len(joy_ab_nonnull_buckets['joy_ab_nonnull_1-4'])/25)+math.ceil(len(joy_ab_nonnull_buckets['joy_ab_nonnull_5-15'])/25)+math.ceil(len(joy_ab_nonnull_buckets['joy_ab_nonnull_16-30'])/25)+math.ceil(len(joy_ab_nonnull_buckets['joy_ab_nonnull_31-60'])/25)+math.ceil(len(joy_ab_nonnull_buckets['joy_ab_nonnull_61-100'])/25)+math.ceil(len(joy_ab_nonnull_buckets['joy_ab_nonnull_greater_100'])/25)
key_counter = 0
total_keys=len(joy_ab_nonnull_buckets.keys())
joy_ab_nonnull_api = []
input_tokens_joy_ab_nonnull=0
output_tokens_joy_ab_nonnull=0
start_time_joy_ab = time.time()

for key in joy_ab_nonnull_buckets.keys():
    key_counter+=1
    current_df = joy_ab_nonnull_buckets[key][['Name', 'review_text']]
    counter = [0]
    input_tokens = 0
    output_tokens = 0
        
    # Threading setup
    total_iterations = math.ceil(len(current_df) / 25)
    stop_event = threading.Event()
    message_thread = threading.Thread(target=print_dynamic_message, args=(batch_counter,key_counter,counter,total_batches,total_keys, total_iterations,start_time_joy_ab, stop_event))
    message_thread.start()

    for start in range(0, len(current_df), 25):
        clear_output(wait=True)
        counter[0] += 1
        batch_counter[0] += 1
        end = start + 25
        result, it, ot = score_sentiments_jul(current_df[start:end])
        joy_ab_nonnull_api.append(result)
        input_tokens += it
        output_tokens += ot

    # Stopping the dynamic message thread
    stop_event.set()
    message_thread.join()    
    input_tokens_joy_ab_nonnull+=input_tokens
    output_tokens_joy_ab_nonnull+=output_tokens
    clear_output(wait=True)
    
overall_execution_time_joy_ab = time.time() - start_time_joy_ab
formatted_time_joy_ab = time.strftime('%H:%M:%S', time.gmtime(overall_execution_time_joy_ab))
input_token_cost_joy_ab = round((0.01/1000) * input_tokens_joy_ab_nonnull, 2)
output_token_cost_joy_ab = round((0.03/1000) * output_tokens_joy_ab_nonnull, 2)
total_cost_joy_ab = round(input_token_cost_joy_ab + output_token_cost_joy_ab, 2)    
    
print(f"Executed {batch_counter} Iterations")
print(f"Total Execution Time: {formatted_time_joy_ab}")
print(f"Total Input Tokens - {input_tokens_joy_ab_nonnull}")
print(f"Total Input Cost = {input_token_cost_joy_ab}")
print(f"Total Output Tokens - {output_tokens_joy_ab_nonnull}")
print(f"Total Output Cost = {output_token_cost_joy_ab}")
print(f"Total Cost = {total_cost_joy_ab}")

Executed [6] Iterations
Total Execution Time: 00:00:28
Total Input Tokens - 5655
Total Input Cost = 0.06
Total Output Tokens - 1688
Total Output Cost = 0.05
Total Cost = 0.11


In [89]:
#Remove "```json" and "```" from each string in joy_ab_nonnull_api & convert to DataFrame
joy_ab_nonnull_api_cleaned = [
    s.replace("```json", "")
     .replace("```", "")
     .replace("[]", "[{\"positive\": \"\", \"negative\": \"\"}]")
    for s in joy_ab_nonnull_api
]                                  
joy_ab_nonnull_api_cleaned_df = pd.DataFrame(joy_ab_nonnull_api_cleaned)
#joy_ab_nonnull_api_cleaned_df = pd.DataFrame(joy_ab_nonnull_api)
column_names2 = ['Trust',
                    'Store Experience',
                    'Store Staff',
                    'Product Design',
                    'Product Variety',
                    'Discount',
                    'Making Charge',
                    'Price',
                    'Product Quality',
                    'OLD Gold Jewellery Exchange']

In [90]:
#Initialize an empty list to store the processed data
processed_data_joy_ab_nonnull = []

#Iterate over each row in the DataFrame
for index, row in joy_ab_nonnull_api_cleaned_df.iterrows():
    try:
        #Assuming the JSON string is in the first column
        json_string = row.iloc[0] if len(row) > 0 else ''
        data = json.loads(json_string)

        #Iterate over each key-value pair in the dictionary
        for commentor_name, feedback_list in data.items():
            #Create a dictionary for each commentor with default values as 0
            commentor_data = {col: 0 for col in column_names2}
            commentor_data["Commentor Name"] = commentor_name

            #Check if there is any feedback for the commentor
            if feedback_list:
                #Since there could be multiple feedback entries for a single commentor, we aggregate them
                for feedback in feedback_list:
                    positive = feedback.get("positive", "").split(',')
                    negative = feedback.get("negative", "").split(',')

                    for topic in column_names2:
                        #Aggregate the value: 1 for positive, -1 for negative, 0 for not mentioned
                        commentor_data[topic] += assign_value(topic, positive, negative)

                #Finalize the values to be within -1, 0, or 1
                for topic in column_names2:
                    if commentor_data[topic] > 1:
                        commentor_data[topic] = 1
                    elif commentor_data[topic] < -1:
                        commentor_data[topic] = -1

            #Append the commentor data to the processed data list
            processed_data_joy_ab_nonnull.append(commentor_data)
    except:
        #Skip rows with invalid JSON data
        print(f"Invalid JSON in row {index}")

joy_ab_nonnull_api_cleaned_df.to_excel("joy_ab_nonnull_api_cleaned_df.xlsx",index=False)

joy_ab_nonnull_api_cleaned_df = pd.read_excel("joy_ab_nonnull_api_cleaned_df.xlsx")

joy_ab_nonnull_api_cleaned_df

In [None]:
#Create a DataFrame from the processed data
joy_ab_nonnull_sen_df = pd.DataFrame(processed_data_joy_ab_nonnull)
#Reorder the columns
ordered_columns = ["Commentor Name",
                   "Trust",
                    "Store Experience",
                    "Store Staff",
                    "Product Design",
                    "Product Variety",
                    "Discount",
                    "Making Charge",
                    "Price",
                    "Product Quality",
                    "OLD Gold Jewellery Exchange"]
#Apply the new order to the DataFrame
joy_ab_nonnull_sen_df = joy_ab_nonnull_sen_df[ordered_columns]

joy_ab_nonnull_merged_df = pd.concat([combined_df_joy_ab, joy_ab_nonnull_sen_df], axis=1)

joy_ab_final_sen_df = pd.concat([joy_ab_nonnull_merged_df,null_dataframes['joy_ab_null']], ignore_index=True)

joy_ab_final_sen_df_copy = joy_ab_final_sen_df.copy()
joy_ab_final_sen_df_copy["Published At Date"] = joy_ab_final_sen_df_copy["Published At Date"].astype(str).str[:10]

joy_ab_final_sen_df_copy.to_excel("sentiment_raw_output/joy_ab_final_sen_df_jul.xlsx",index=False)



In [92]:
joy_ab_final_sen_df_copy.to_excel("sentiment_raw_output/joy_ab_final_sen_df_jul.xlsx",index=False)


### joy_st_af

In [93]:
batch_counter = [0]
total_batches = math.ceil(len(joy_st_af_nonnull_buckets['joy_st_af_nonnull_1-4'])/25)+math.ceil(len(joy_st_af_nonnull_buckets['joy_st_af_nonnull_5-15'])/25)+math.ceil(len(joy_st_af_nonnull_buckets['joy_st_af_nonnull_16-30'])/25)+math.ceil(len(joy_st_af_nonnull_buckets['joy_st_af_nonnull_31-60'])/25)+math.ceil(len(joy_st_af_nonnull_buckets['joy_st_af_nonnull_61-100'])/25)+math.ceil(len(joy_st_af_nonnull_buckets['joy_st_af_nonnull_greater_100'])/25)
key_counter = 0
total_keys=len(joy_st_af_nonnull_buckets.keys())
joy_st_af_nonnull_api = []
input_tokens_joy_st_af_nonnull=0
output_tokens_joy_st_af_nonnull=0
start_time_joy_st_af = time.time()

for key in joy_st_af_nonnull_buckets.keys():
    key_counter+=1
    current_df = joy_st_af_nonnull_buckets[key][['Name', 'review_text']]
    counter = [0]
    input_tokens = 0
    output_tokens = 0
        
    # Threading setup
    total_iterations = math.ceil(len(current_df) / 25)
    stop_event = threading.Event()
    message_thread = threading.Thread(target=print_dynamic_message, args=(batch_counter,key_counter,counter,total_batches,total_keys, total_iterations,start_time_joy_st_af, stop_event))
    message_thread.start()

    for start in range(0, len(current_df), 25):
        clear_output(wait=True)
        counter[0] += 1
        batch_counter[0] += 1
        end = start + 25
        result, it, ot = score_sentiments_jul(current_df[start:end])
        joy_st_af_nonnull_api.append(result)
        input_tokens += it
        output_tokens += ot

    # Stopping the dynamic message thread
    stop_event.set()
    message_thread.join()    
    input_tokens_joy_st_af_nonnull+=input_tokens
    output_tokens_joy_st_af_nonnull+=output_tokens
    clear_output(wait=True)
    
overall_execution_time_joy_st_af = time.time() - start_time_joy_st_af
formatted_time_joy_st_af = time.strftime('%H:%M:%S', time.gmtime(overall_execution_time_joy_st_af))
input_token_cost_joy_st_af = round((0.01/1000) * input_tokens_joy_st_af_nonnull, 2)
output_token_cost_joy_st_af = round((0.03/1000) * output_tokens_joy_st_af_nonnull, 2)
total_cost_joy_st_af = round(input_token_cost_joy_st_af + output_token_cost_joy_st_af, 2)    
    
print(f"Executed {batch_counter} Iterations")
print(f"Total Execution Time: {formatted_time_joy_st_af}")
print(f"Total Input Tokens - {input_tokens_joy_st_af_nonnull}")
print(f"Total Input Cost = {input_token_cost_joy_st_af}")
print(f"Total Output Tokens - {output_tokens_joy_st_af_nonnull}")
print(f"Total Output Cost = {output_token_cost_joy_st_af}")
print(f"Total Cost = {total_cost_joy_st_af}")

Executed [21] Iterations
Total Execution Time: 00:02:59
Total Input Tokens - 26492
Total Input Cost = 0.26
Total Output Tokens - 12945
Total Output Cost = 0.39
Total Cost = 0.65


In [94]:
#Remove "```json" and "```" from each string in joy_st_af_nonnull_api & convert to DataFrame
joy_st_af_nonnull_api_cleaned = [
    s.replace("```json", "")
     .replace("```", "")
     .replace("[]", "[{\"positive\": \"\", \"negative\": \"\"}]")
    for s in joy_st_af_nonnull_api
]                                  
joy_st_af_nonnull_api_cleaned_df = pd.DataFrame(joy_st_af_nonnull_api_cleaned)
#joy_st_af_nonnull_api_cleaned_df = pd.DataFrame(joy_st_af_nonnull_api)
column_names2 = ['Trust',
                    'Store Experience',
                    'Store Staff',
                    'Product Design',
                    'Product Variety',
                    'Discount',
                    'Making Charge',
                    'Price',
                    'Product Quality',
                    'OLD Gold Jewellery Exchange']

In [95]:
#Initialize an empty list to store the processed data
processed_data_joy_st_af_nonnull = []

#Iterate over each row in the DataFrame
for index, row in joy_st_af_nonnull_api_cleaned_df.iterrows():
    try:
        #Assuming the JSON string is in the first column
        json_string = row.iloc[0] if len(row) > 0 else ''
        data = json.loads(json_string)

        #Iterate over each key-value pair in the dictionary
        for commentor_name, feedback_list in data.items():
            #Create a dictionary for each commentor with default values as 0
            commentor_data = {col: 0 for col in column_names2}
            commentor_data["Commentor Name"] = commentor_name

            #Check if there is any feedback for the commentor
            if feedback_list:
                #Since there could be multiple feedback entries for a single commentor, we aggregate them
                for feedback in feedback_list:
                    positive = feedback.get("positive", "").split(',')
                    negative = feedback.get("negative", "").split(',')

                    for topic in column_names2:
                        #Aggregate the value: 1 for positive, -1 for negative, 0 for not mentioned
                        commentor_data[topic] += assign_value(topic, positive, negative)

                #Finalize the values to be within -1, 0, or 1
                for topic in column_names2:
                    if commentor_data[topic] > 1:
                        commentor_data[topic] = 1
                    elif commentor_data[topic] < -1:
                        commentor_data[topic] = -1

            #Append the commentor data to the processed data list
            processed_data_joy_st_af_nonnull.append(commentor_data)
    except:
        #Skip rows with invalid JSON data
        print(f"Invalid JSON in row {index}")

joy_st_af_nonnull_api_cleaned_df.to_excel("joy_st_af_nonnull_api_cleaned_df.xlsx",index=False)

joy_st_af_nonnull_api_cleaned_df = pd.read_excel("joy_st_af_nonnull_api_cleaned_df.xlsx")

joy_st_af_nonnull_api_cleaned_df

In [96]:
#Create a DataFrame from the processed data
joy_st_af_nonnull_sen_df = pd.DataFrame(processed_data_joy_st_af_nonnull)
#Reorder the columns
ordered_columns = ["Commentor Name",
                   "Trust",
                    "Store Experience",
                    "Store Staff",
                    "Product Design",
                    "Product Variety",
                    "Discount",
                    "Making Charge",
                    "Price",
                    "Product Quality",
                    "OLD Gold Jewellery Exchange"]
#Apply the new order to the DataFrame
joy_st_af_nonnull_sen_df = joy_st_af_nonnull_sen_df[ordered_columns]

joy_st_af_nonnull_merged_df = pd.concat([combined_df_joy_st_af, joy_st_af_nonnull_sen_df], axis=1)

joy_st_af_final_sen_df = pd.concat([joy_st_af_nonnull_merged_df,null_dataframes['joy_st_af_null']], ignore_index=True)

joy_st_af_final_sen_df_copy = joy_st_af_final_sen_df.copy()
joy_st_af_final_sen_df_copy["Published At Date"] = joy_st_af_final_sen_df_copy["Published At Date"].astype(str).str[:10]

joy_st_af_final_sen_df_copy.to_excel("sentiment_raw_output/joy_st_af_final_sen_df_jul.xlsx",index=False)



### joy_dm_ad

In [97]:
batch_counter = [0]
total_batches = math.ceil(len(joy_dm_ad_nonnull_buckets['joy_dm_ad_nonnull_1-4'])/25)+math.ceil(len(joy_dm_ad_nonnull_buckets['joy_dm_ad_nonnull_5-15'])/25)+math.ceil(len(joy_dm_ad_nonnull_buckets['joy_dm_ad_nonnull_16-30'])/25)+math.ceil(len(joy_dm_ad_nonnull_buckets['joy_dm_ad_nonnull_31-60'])/25)+math.ceil(len(joy_dm_ad_nonnull_buckets['joy_dm_ad_nonnull_61-100'])/25)+math.ceil(len(joy_dm_ad_nonnull_buckets['joy_dm_ad_nonnull_greater_100'])/25)
key_counter = 0
total_keys=len(joy_dm_ad_nonnull_buckets.keys())
joy_dm_ad_nonnull_api = []
input_tokens_joy_dm_ad_nonnull=0
output_tokens_joy_dm_ad_nonnull=0
start_time_joy_dm_ad = time.time()

for key in joy_dm_ad_nonnull_buckets.keys():
    key_counter+=1
    current_df = joy_dm_ad_nonnull_buckets[key][['Name', 'review_text']]
    counter = [0]
    input_tokens = 0
    output_tokens = 0
        
    # Threading setup
    total_iterations = math.ceil(len(current_df) / 25)
    stop_event = threading.Event()
    message_thread = threading.Thread(target=print_dynamic_message, args=(batch_counter,key_counter,counter,total_batches,total_keys, total_iterations,start_time_joy_dm_ad, stop_event))
    message_thread.start()

    for start in range(0, len(current_df), 25):
        clear_output(wait=True)
        counter[0] += 1
        batch_counter[0] += 1
        end = start + 25
        result, it, ot = score_sentiments_jul(current_df[start:end])
        joy_dm_ad_nonnull_api.append(result)
        input_tokens += it
        output_tokens += ot

    # Stopping the dynamic message thread
    stop_event.set()
    message_thread.join()    
    input_tokens_joy_dm_ad_nonnull+=input_tokens
    output_tokens_joy_dm_ad_nonnull+=output_tokens
    clear_output(wait=True)
    
overall_execution_time_joy_dm_ad = time.time() - start_time_joy_dm_ad
formatted_time_joy_dm_ad = time.strftime('%H:%M:%S', time.gmtime(overall_execution_time_joy_dm_ad))
input_token_cost_joy_dm_ad = round((0.01/1000) * input_tokens_joy_dm_ad_nonnull, 2)
output_token_cost_joy_dm_ad = round((0.03/1000) * output_tokens_joy_dm_ad_nonnull, 2)
total_cost_joy_dm_ad = round(input_token_cost_joy_dm_ad + output_token_cost_joy_dm_ad, 2)    
    
print(f"Executed {batch_counter} Iterations")
print(f"Total Execution Time: {formatted_time_joy_dm_ad}")
print(f"Total Input Tokens - {input_tokens_joy_dm_ad_nonnull}")
print(f"Total Input Cost = {input_token_cost_joy_dm_ad}")
print(f"Total Output Tokens - {output_tokens_joy_dm_ad_nonnull}")
print(f"Total Output Cost = {output_token_cost_joy_dm_ad}")
print(f"Total Cost = {total_cost_joy_dm_ad}")

Executed [4] Iterations
Total Execution Time: 00:00:19
Total Input Tokens - 3677
Total Input Cost = 0.04
Total Output Tokens - 1013
Total Output Cost = 0.03
Total Cost = 0.07


In [98]:
#Remove "```json" and "```" from each string in joy_dm_ad_nonnull_api & convert to DataFrame
joy_dm_ad_nonnull_api_cleaned = [
    s.replace("```json", "")
     .replace("```", "")
     .replace("[]", "[{\"positive\": \"\", \"negative\": \"\"}]")
    for s in joy_dm_ad_nonnull_api
]                                  
joy_dm_ad_nonnull_api_cleaned_df = pd.DataFrame(joy_dm_ad_nonnull_api_cleaned)
#joy_dm_ad_nonnull_api_cleaned_df = pd.DataFrame(joy_dm_ad_nonnull_api)
column_names2 = ['Trust',
                    'Store Experience',
                    'Store Staff',
                    'Product Design',
                    'Product Variety',
                    'Discount',
                    'Making Charge',
                    'Price',
                    'Product Quality',
                    'OLD Gold Jewellery Exchange']

In [99]:
#Initialize an empty list to store the processed data
processed_data_joy_dm_ad_nonnull = []

#Iterate over each row in the DataFrame
for index, row in joy_dm_ad_nonnull_api_cleaned_df.iterrows():
    try:
        #Assuming the JSON string is in the first column
        json_string = row.iloc[0] if len(row) > 0 else ''
        data = json.loads(json_string)

        #Iterate over each key-value pair in the dictionary
        for commentor_name, feedback_list in data.items():
            #Create a dictionary for each commentor with default values as 0
            commentor_data = {col: 0 for col in column_names2}
            commentor_data["Commentor Name"] = commentor_name

            #Check if there is any feedback for the commentor
            if feedback_list:
                #Since there could be multiple feedback entries for a single commentor, we aggregate them
                for feedback in feedback_list:
                    positive = feedback.get("positive", "").split(',')
                    negative = feedback.get("negative", "").split(',')

                    for topic in column_names2:
                        #Aggregate the value: 1 for positive, -1 for negative, 0 for not mentioned
                        commentor_data[topic] += assign_value(topic, positive, negative)

                #Finalize the values to be within -1, 0, or 1
                for topic in column_names2:
                    if commentor_data[topic] > 1:
                        commentor_data[topic] = 1
                    elif commentor_data[topic] < -1:
                        commentor_data[topic] = -1

            #Append the commentor data to the processed data list
            processed_data_joy_dm_ad_nonnull.append(commentor_data)
    except:
        #Skip rows with invalid JSON data
        print(f"Invalid JSON in row {index}")

joy_dm_ad_nonnull_api_cleaned_df.to_excel("joy_dm_ad_nonnull_api_cleaned_df.xlsx",index=False)

joy_dm_ad_nonnull_api_cleaned_df = pd.read_excel("joy_dm_ad_nonnull_api_cleaned_df.xlsx")

joy_dm_ad_nonnull_api_cleaned_df

In [100]:
#Create a DataFrame from the processed data
joy_dm_ad_nonnull_sen_df = pd.DataFrame(processed_data_joy_dm_ad_nonnull)
#Reorder the columns
ordered_columns = ["Commentor Name",
                   "Trust",
                    "Store Experience",
                    "Store Staff",
                    "Product Design",
                    "Product Variety",
                    "Discount",
                    "Making Charge",
                    "Price",
                    "Product Quality",
                    "OLD Gold Jewellery Exchange"]
#Apply the new order to the DataFrame
joy_dm_ad_nonnull_sen_df = joy_dm_ad_nonnull_sen_df[ordered_columns]

joy_dm_ad_nonnull_merged_df = pd.concat([combined_df_joy_dm_ad, joy_dm_ad_nonnull_sen_df], axis=1)

joy_dm_ad_final_sen_df = pd.concat([joy_dm_ad_nonnull_merged_df,null_dataframes['joy_dm_ad_null']], ignore_index=True)

joy_dm_ad_final_sen_df_copy = joy_dm_ad_final_sen_df.copy()
joy_dm_ad_final_sen_df_copy["Published At Date"] = joy_dm_ad_final_sen_df_copy["Published At Date"].astype(str).str[:10]

joy_dm_ad_final_sen_df_copy.to_excel("sentiment_raw_output/joy_dm_ad_final_sen_df_jul.xlsx",index=False)



### joy_mz_ad

In [101]:
batch_counter = [0]
total_batches = math.ceil(len(joy_mz_ad_nonnull_buckets['joy_mz_ad_nonnull_1-4'])/25)+math.ceil(len(joy_mz_ad_nonnull_buckets['joy_mz_ad_nonnull_5-15'])/25)+math.ceil(len(joy_mz_ad_nonnull_buckets['joy_mz_ad_nonnull_16-30'])/25)+math.ceil(len(joy_mz_ad_nonnull_buckets['joy_mz_ad_nonnull_31-60'])/25)+math.ceil(len(joy_mz_ad_nonnull_buckets['joy_mz_ad_nonnull_61-100'])/25)+math.ceil(len(joy_mz_ad_nonnull_buckets['joy_mz_ad_nonnull_greater_100'])/25)
key_counter = 0
total_keys=len(joy_mz_ad_nonnull_buckets.keys())
joy_mz_ad_nonnull_api = []
input_tokens_joy_mz_ad_nonnull=0
output_tokens_joy_mz_ad_nonnull=0
start_time_joy_mz_ad = time.time()

for key in joy_mz_ad_nonnull_buckets.keys():
    key_counter+=1
    current_df = joy_mz_ad_nonnull_buckets[key][['Name', 'review_text']]
    counter = [0]
    input_tokens = 0
    output_tokens = 0
        
    # Threading setup
    total_iterations = math.ceil(len(current_df) / 25)
    stop_event = threading.Event()
    message_thread = threading.Thread(target=print_dynamic_message, args=(batch_counter,key_counter,counter,total_batches,total_keys, total_iterations,start_time_joy_mz_ad, stop_event))
    message_thread.start()

    for start in range(0, len(current_df), 25):
        clear_output(wait=True)
        counter[0] += 1
        batch_counter[0] += 1
        end = start + 25
        result, it, ot = score_sentiments_jul(current_df[start:end])
        joy_mz_ad_nonnull_api.append(result)
        input_tokens += it
        output_tokens += ot

    # Stopping the dynamic message thread
    stop_event.set()
    message_thread.join()    
    input_tokens_joy_mz_ad_nonnull+=input_tokens
    output_tokens_joy_mz_ad_nonnull+=output_tokens
    clear_output(wait=True)
    
overall_execution_time_joy_mz_ad = time.time() - start_time_joy_mz_ad
formatted_time_joy_mz_ad = time.strftime('%H:%M:%S', time.gmtime(overall_execution_time_joy_mz_ad))
input_token_cost_joy_mz_ad = round((0.01/1000) * input_tokens_joy_mz_ad_nonnull, 2)
output_token_cost_joy_mz_ad = round((0.03/1000) * output_tokens_joy_mz_ad_nonnull, 2)
total_cost_joy_mz_ad = round(input_token_cost_joy_mz_ad + output_token_cost_joy_mz_ad, 2)    
    
print(f"Executed {batch_counter} Iterations")
print(f"Total Execution Time: {formatted_time_joy_mz_ad}")
print(f"Total Input Tokens - {input_tokens_joy_mz_ad_nonnull}")
print(f"Total Input Cost = {input_token_cost_joy_mz_ad}")
print(f"Total Output Tokens - {output_tokens_joy_mz_ad_nonnull}")
print(f"Total Output Cost = {output_token_cost_joy_mz_ad}")
print(f"Total Cost = {total_cost_joy_mz_ad}")

Executed [7] Iterations
Total Execution Time: 00:00:45
Total Input Tokens - 8120
Total Input Cost = 0.08
Total Output Tokens - 3190
Total Output Cost = 0.1
Total Cost = 0.18


In [102]:
#Remove "```json" and "```" from each string in joy_mz_ad_nonnull_api & convert to DataFrame
joy_mz_ad_nonnull_api_cleaned = [
    s.replace("```json", "")
     .replace("```", "")
     .replace("[]", "[{\"positive\": \"\", \"negative\": \"\"}]")
    for s in joy_mz_ad_nonnull_api
]                                  
joy_mz_ad_nonnull_api_cleaned_df = pd.DataFrame(joy_mz_ad_nonnull_api_cleaned)
#joy_mz_ad_nonnull_api_cleaned_df = pd.DataFrame(joy_mz_ad_nonnull_api)
column_names2 = ['Trust',
                    'Store Experience',
                    'Store Staff',
                    'Product Design',
                    'Product Variety',
                    'Discount',
                    'Making Charge',
                    'Price',
                    'Product Quality',
                    'OLD Gold Jewellery Exchange']

In [103]:
#Initialize an empty list to store the processed data
processed_data_joy_mz_ad_nonnull = []

#Iterate over each row in the DataFrame
for index, row in joy_mz_ad_nonnull_api_cleaned_df.iterrows():
    try:
        #Assuming the JSON string is in the first column
        json_string = row.iloc[0] if len(row) > 0 else ''
        data = json.loads(json_string)

        #Iterate over each key-value pair in the dictionary
        for commentor_name, feedback_list in data.items():
            #Create a dictionary for each commentor with default values as 0
            commentor_data = {col: 0 for col in column_names2}
            commentor_data["Commentor Name"] = commentor_name

            #Check if there is any feedback for the commentor
            if feedback_list:
                #Since there could be multiple feedback entries for a single commentor, we aggregate them
                for feedback in feedback_list:
                    positive = feedback.get("positive", "").split(',')
                    negative = feedback.get("negative", "").split(',')

                    for topic in column_names2:
                        #Aggregate the value: 1 for positive, -1 for negative, 0 for not mentioned
                        commentor_data[topic] += assign_value(topic, positive, negative)

                #Finalize the values to be within -1, 0, or 1
                for topic in column_names2:
                    if commentor_data[topic] > 1:
                        commentor_data[topic] = 1
                    elif commentor_data[topic] < -1:
                        commentor_data[topic] = -1

            #Append the commentor data to the processed data list
            processed_data_joy_mz_ad_nonnull.append(commentor_data)
    except:
        #Skip rows with invalid JSON data
        print(f"Invalid JSON in row {index}")

joy_mz_ad_nonnull_api_cleaned_df.to_excel("joy_mz_ad_nonnull_api_cleaned_df.xlsx",index=False)

joy_mz_ad_nonnull_api_cleaned_df = pd.read_excel("joy_mz_ad_nonnull_api_cleaned_df.xlsx")

joy_mz_ad_nonnull_api_cleaned_df

In [104]:
#Create a DataFrame from the processed data
joy_mz_ad_nonnull_sen_df = pd.DataFrame(processed_data_joy_mz_ad_nonnull)
#Reorder the columns
ordered_columns = ["Commentor Name",
                   "Trust",
                    "Store Experience",
                    "Store Staff",
                    "Product Design",
                    "Product Variety",
                    "Discount",
                    "Making Charge",
                    "Price",
                    "Product Quality",
                    "OLD Gold Jewellery Exchange"]
#Apply the new order to the DataFrame
joy_mz_ad_nonnull_sen_df = joy_mz_ad_nonnull_sen_df[ordered_columns]

joy_mz_ad_nonnull_merged_df = pd.concat([combined_df_joy_mz_ad, joy_mz_ad_nonnull_sen_df], axis=1)

joy_mz_ad_final_sen_df = pd.concat([joy_mz_ad_nonnull_merged_df,null_dataframes['joy_mz_ad_null']], ignore_index=True)

joy_mz_ad_final_sen_df_copy = joy_mz_ad_final_sen_df.copy()
joy_mz_ad_final_sen_df_copy["Published At Date"] = joy_mz_ad_final_sen_df_copy["Published At Date"].astype(str).str[:10]

joy_mz_ad_final_sen_df_copy.to_excel("sentiment_raw_output/joy_mz_ad_final_sen_df_jul.xlsx",index=False)


### joy_sh_ad

In [105]:
batch_counter = [0]
total_batches = math.ceil(len(joy_sh_ad_nonnull_buckets['joy_sh_ad_nonnull_1-4'])/25)+math.ceil(len(joy_sh_ad_nonnull_buckets['joy_sh_ad_nonnull_5-15'])/25)+math.ceil(len(joy_sh_ad_nonnull_buckets['joy_sh_ad_nonnull_16-30'])/25)+math.ceil(len(joy_sh_ad_nonnull_buckets['joy_sh_ad_nonnull_31-60'])/25)+math.ceil(len(joy_sh_ad_nonnull_buckets['joy_sh_ad_nonnull_61-100'])/25)+math.ceil(len(joy_sh_ad_nonnull_buckets['joy_sh_ad_nonnull_greater_100'])/25)
key_counter = 0
total_keys=len(joy_sh_ad_nonnull_buckets.keys())
joy_sh_ad_nonnull_api = []
input_tokens_joy_sh_ad_nonnull=0
output_tokens_joy_sh_ad_nonnull=0
start_time_joy_sh_ad = time.time()

for key in joy_sh_ad_nonnull_buckets.keys():
    key_counter+=1
    current_df = joy_sh_ad_nonnull_buckets[key][['Name', 'review_text']]
    counter = [0]
    input_tokens = 0
    output_tokens = 0
        
    # Threading setup
    total_iterations = math.ceil(len(current_df) / 25)
    stop_event = threading.Event()
    message_thread = threading.Thread(target=print_dynamic_message, args=(batch_counter,key_counter,counter,total_batches,total_keys, total_iterations,start_time_joy_sh_ad, stop_event))
    message_thread.start()

    for start in range(0, len(current_df), 25):
        clear_output(wait=True)
        counter[0] += 1
        batch_counter[0] += 1
        end = start + 25
        result, it, ot = score_sentiments_jul(current_df[start:end])
        joy_sh_ad_nonnull_api.append(result)
        input_tokens += it
        output_tokens += ot

    # Stopping the dynamic message thread
    stop_event.set()
    message_thread.join()    
    input_tokens_joy_sh_ad_nonnull+=input_tokens
    output_tokens_joy_sh_ad_nonnull+=output_tokens
    clear_output(wait=True)
    
overall_execution_time_joy_sh_ad = time.time() - start_time_joy_sh_ad
formatted_time_joy_sh_ad = time.strftime('%H:%M:%S', time.gmtime(overall_execution_time_joy_sh_ad))
input_token_cost_joy_sh_ad = round((0.01/1000) * input_tokens_joy_sh_ad_nonnull, 2)
output_token_cost_joy_sh_ad = round((0.03/1000) * output_tokens_joy_sh_ad_nonnull, 2)
total_cost_joy_sh_ad = round(input_token_cost_joy_sh_ad + output_token_cost_joy_sh_ad, 2)    
    
print(f"Executed {batch_counter} Iterations")
print(f"Total Execution Time: {formatted_time_joy_sh_ad}")
print(f"Total Input Tokens - {input_tokens_joy_sh_ad_nonnull}")
print(f"Total Input Cost = {input_token_cost_joy_sh_ad}")
print(f"Total Output Tokens - {output_tokens_joy_sh_ad_nonnull}")
print(f"Total Output Cost = {output_token_cost_joy_sh_ad}")
print(f"Total Cost = {total_cost_joy_sh_ad}")

Executed [5] Iterations
Total Execution Time: 00:00:22
Total Input Tokens - 4715
Total Input Cost = 0.05
Total Output Tokens - 1346
Total Output Cost = 0.04
Total Cost = 0.09


In [106]:
#Remove "```json" and "```" from each string in joy_sh_ad_nonnull_api & convert to DataFrame
joy_sh_ad_nonnull_api_cleaned = [
    s.replace("```json", "")
     .replace("```", "")
     .replace("[]", "[{\"positive\": \"\", \"negative\": \"\"}]")
    for s in joy_sh_ad_nonnull_api
]                                  
joy_sh_ad_nonnull_api_cleaned_df = pd.DataFrame(joy_sh_ad_nonnull_api_cleaned)
#joy_sh_ad_nonnull_api_cleaned_df = pd.DataFrame(joy_sh_ad_nonnull_api)
column_names2 = ['Trust',
                    'Store Experience',
                    'Store Staff',
                    'Product Design',
                    'Product Variety',
                    'Discount',
                    'Making Charge',
                    'Price',
                    'Product Quality',
                    'OLD Gold Jewellery Exchange']

In [107]:
#Initialize an empty list to store the processed data
processed_data_joy_sh_ad_nonnull = []

#Iterate over each row in the DataFrame
for index, row in joy_sh_ad_nonnull_api_cleaned_df.iterrows():
    try:
        #Assuming the JSON string is in the first column
        json_string = row.iloc[0] if len(row) > 0 else ''
        data = json.loads(json_string)

        #Iterate over each key-value pair in the dictionary
        for commentor_name, feedback_list in data.items():
            #Create a dictionary for each commentor with default values as 0
            commentor_data = {col: 0 for col in column_names2}
            commentor_data["Commentor Name"] = commentor_name

            #Check if there is any feedback for the commentor
            if feedback_list:
                #Since there could be multiple feedback entries for a single commentor, we aggregate them
                for feedback in feedback_list:
                    positive = feedback.get("positive", "").split(',')
                    negative = feedback.get("negative", "").split(',')

                    for topic in column_names2:
                        #Aggregate the value: 1 for positive, -1 for negative, 0 for not mentioned
                        commentor_data[topic] += assign_value(topic, positive, negative)

                #Finalize the values to be within -1, 0, or 1
                for topic in column_names2:
                    if commentor_data[topic] > 1:
                        commentor_data[topic] = 1
                    elif commentor_data[topic] < -1:
                        commentor_data[topic] = -1

            #Append the commentor data to the processed data list
            processed_data_joy_sh_ad_nonnull.append(commentor_data)
    except:
        #Skip rows with invalid JSON data
        print(f"Invalid JSON in row {index}")

joy_sh_ad_nonnull_api_cleaned_df.to_excel("joy_sh_ad_nonnull_api_cleaned_df.xlsx",index=False)

joy_sh_ad_nonnull_api_cleaned_df = pd.read_excel("joy_sh_ad_nonnull_api_cleaned_df.xlsx")

joy_sh_ad_nonnull_api_cleaned_df

In [108]:
#Create a DataFrame from the processed data
joy_sh_ad_nonnull_sen_df = pd.DataFrame(processed_data_joy_sh_ad_nonnull)
#Reorder the columns
ordered_columns = ["Commentor Name",
                   "Trust",
                    "Store Experience",
                    "Store Staff",
                    "Product Design",
                    "Product Variety",
                    "Discount",
                    "Making Charge",
                    "Price",
                    "Product Quality",
                    "OLD Gold Jewellery Exchange"]
#Apply the new order to the DataFrame
joy_sh_ad_nonnull_sen_df = joy_sh_ad_nonnull_sen_df[ordered_columns]

joy_sh_ad_nonnull_merged_df = pd.concat([combined_df_joy_sh_ad, joy_sh_ad_nonnull_sen_df], axis=1)

joy_sh_ad_final_sen_df = pd.concat([joy_sh_ad_nonnull_merged_df,null_dataframes['joy_sh_ad_null']], ignore_index=True)

joy_sh_ad_final_sen_df_copy = joy_sh_ad_final_sen_df.copy()
joy_sh_ad_final_sen_df_copy["Published At Date"] = joy_sh_ad_final_sen_df_copy["Published At Date"].astype(str).str[:10]

joy_sh_ad_final_sen_df_copy.to_excel("sentiment_raw_output/joy_sh_ad_final_sen_df_jul.xlsx",index=False)


### mal_sc

In [109]:
batch_counter = [0]
total_batches = math.ceil(len(mal_sc_nonnull_buckets['mal_sc_nonnull_1-4'])/25)+math.ceil(len(mal_sc_nonnull_buckets['mal_sc_nonnull_5-15'])/25)+math.ceil(len(mal_sc_nonnull_buckets['mal_sc_nonnull_16-30'])/25)+math.ceil(len(mal_sc_nonnull_buckets['mal_sc_nonnull_31-60'])/25)+math.ceil(len(mal_sc_nonnull_buckets['mal_sc_nonnull_61-100'])/25)+math.ceil(len(mal_sc_nonnull_buckets['mal_sc_nonnull_greater_100'])/25)
key_counter = 0
total_keys=len(mal_sc_nonnull_buckets.keys())
mal_sc_nonnull_api = []
input_tokens_mal_sc_nonnull=0
output_tokens_mal_sc_nonnull=0
start_time_mal_sc = time.time()

for key in mal_sc_nonnull_buckets.keys():
    key_counter+=1
    current_df = mal_sc_nonnull_buckets[key][['Name', 'review_text']]
    counter = [0]
    input_tokens = 0
    output_tokens = 0
        
    # Threading setup
    total_iterations = math.ceil(len(current_df) / 25)
    stop_event = threading.Event()
    message_thread = threading.Thread(target=print_dynamic_message, args=(batch_counter,key_counter,counter,total_batches,total_keys, total_iterations,start_time_mal_sc, stop_event))
    message_thread.start()

    for start in range(0, len(current_df), 25):
        clear_output(wait=True)
        counter[0] += 1
        batch_counter[0] += 1
        end = start + 25
        result, it, ot = score_sentiments_jul(current_df[start:end])
        mal_sc_nonnull_api.append(result)
        input_tokens += it
        output_tokens += ot

    # Stopping the dynamic message thread
    stop_event.set()
    message_thread.join()    
    input_tokens_mal_sc_nonnull+=input_tokens
    output_tokens_mal_sc_nonnull+=output_tokens
    clear_output(wait=True)
    
overall_execution_time_mal_sc = time.time() - start_time_mal_sc
formatted_time_mal_sc = time.strftime('%H:%M:%S', time.gmtime(overall_execution_time_mal_sc))
input_token_cost_mal_sc = round((0.01/1000) * input_tokens_mal_sc_nonnull, 2)
output_token_cost_mal_sc = round((0.03/1000) * output_tokens_mal_sc_nonnull, 2)
total_cost_mal_sc = round(input_token_cost_mal_sc + output_token_cost_mal_sc, 2)    
    
print(f"Executed {batch_counter} Iterations")
print(f"Total Execution Time: {formatted_time_mal_sc}")
print(f"Total Input Tokens - {input_tokens_mal_sc_nonnull}")
print(f"Total Input Cost = {input_token_cost_mal_sc}")
print(f"Total Output Tokens - {output_tokens_mal_sc_nonnull}")
print(f"Total Output Cost = {output_token_cost_mal_sc}")
print(f"Total Cost = {total_cost_mal_sc}")

Executed [7] Iterations
Total Execution Time: 00:00:31
Total Input Tokens - 7985
Total Input Cost = 0.08
Total Output Tokens - 2657
Total Output Cost = 0.08
Total Cost = 0.16


In [110]:
#Remove "```json" and "```" from each string in mal_sc_nonnull_api & convert to DataFrame
mal_sc_nonnull_api_cleaned = [
    s.replace("```json", "")
     .replace("```", "")
     .replace("[]", "[{\"positive\": \"\", \"negative\": \"\"}]")
    for s in mal_sc_nonnull_api
]                                  
mal_sc_nonnull_api_cleaned_df = pd.DataFrame(mal_sc_nonnull_api_cleaned)
#mal_sc_nonnull_api_cleaned_df = pd.DataFrame(mal_sc_nonnull_api)
column_names2 = ['Trust',
                    'Store Experience',
                    'Store Staff',
                    'Product Design',
                    'Product Variety',
                    'Discount',
                    'Making Charge',
                    'Price',
                    'Product Quality',
                    'OLD Gold Jewellery Exchange']

In [111]:
#Initialize an empty list to store the processed data
processed_data_mal_sc_nonnull = []

#Iterate over each row in the DataFrame
for index, row in mal_sc_nonnull_api_cleaned_df.iterrows():
    try:
        #Assuming the JSON string is in the first column
        json_string = row.iloc[0] if len(row) > 0 else ''
        data = json.loads(json_string)

        #Iterate over each key-value pair in the dictionary
        for commentor_name, feedback_list in data.items():
            #Create a dictionary for each commentor with default values as 0
            commentor_data = {col: 0 for col in column_names2}
            commentor_data["Commentor Name"] = commentor_name

            #Check if there is any feedback for the commentor
            if feedback_list:
                #Since there could be multiple feedback entries for a single commentor, we aggregate them
                for feedback in feedback_list:
                    positive = feedback.get("positive", "").split(',')
                    negative = feedback.get("negative", "").split(',')

                    for topic in column_names2:
                        #Aggregate the value: 1 for positive, -1 for negative, 0 for not mentioned
                        commentor_data[topic] += assign_value(topic, positive, negative)

                #Finalize the values to be within -1, 0, or 1
                for topic in column_names2:
                    if commentor_data[topic] > 1:
                        commentor_data[topic] = 1
                    elif commentor_data[topic] < -1:
                        commentor_data[topic] = -1

            #Append the commentor data to the processed data list
            processed_data_mal_sc_nonnull.append(commentor_data)
    except:
        #Skip rows with invalid JSON data
        print(f"Invalid JSON in row {index}")

mal_sc_nonnull_api_cleaned_df.to_excel("mal_sc_nonnull_api_cleaned_df.xlsx",index=False)

mal_sc_nonnull_api_cleaned_df = pd.read_excel("mal_sc_nonnull_api_cleaned_df.xlsx")

mal_sc_nonnull_api_cleaned_df

In [112]:
#Create a DataFrame from the processed data
mal_sc_nonnull_sen_df = pd.DataFrame(processed_data_mal_sc_nonnull)
#Reorder the columns
ordered_columns = ["Commentor Name",
                   "Trust",
                    "Store Experience",
                    "Store Staff",
                    "Product Design",
                    "Product Variety",
                    "Discount",
                    "Making Charge",
                    "Price",
                    "Product Quality",
                    "OLD Gold Jewellery Exchange"]
#Apply the new order to the DataFrame
mal_sc_nonnull_sen_df = mal_sc_nonnull_sen_df[ordered_columns]

mal_sc_nonnull_merged_df = pd.concat([combined_df_mal_sc, mal_sc_nonnull_sen_df], axis=1)

mal_sc_final_sen_df = pd.concat([mal_sc_nonnull_merged_df,null_dataframes['mal_sc_null']], ignore_index=True)

mal_sc_final_sen_df_copy = mal_sc_final_sen_df.copy()
mal_sc_final_sen_df_copy["Published At Date"] = mal_sc_final_sen_df_copy["Published At Date"].astype(str).str[:10]

mal_sc_final_sen_df_copy.to_excel("sentiment_raw_output/mal_sc_final_sen_df_jul.xlsx",index=False)


### mal_ab

In [113]:
batch_counter = [0]
total_batches = math.ceil(len(mal_ab_nonnull_buckets['mal_ab_nonnull_1-4'])/25)+math.ceil(len(mal_ab_nonnull_buckets['mal_ab_nonnull_5-15'])/25)+math.ceil(len(mal_ab_nonnull_buckets['mal_ab_nonnull_16-30'])/25)+math.ceil(len(mal_ab_nonnull_buckets['mal_ab_nonnull_31-60'])/25)+math.ceil(len(mal_ab_nonnull_buckets['mal_ab_nonnull_61-100'])/25)+math.ceil(len(mal_ab_nonnull_buckets['mal_ab_nonnull_greater_100'])/25)
key_counter = 0
total_keys=len(mal_ab_nonnull_buckets.keys())
mal_ab_nonnull_api = []
input_tokens_mal_ab_nonnull=0
output_tokens_mal_ab_nonnull=0
start_time_mal_ab = time.time()

for key in mal_ab_nonnull_buckets.keys():
    key_counter+=1
    current_df = mal_ab_nonnull_buckets[key][['Name', 'review_text']]
    counter = [0]
    input_tokens = 0
    output_tokens = 0
        
    # Threading setup
    total_iterations = math.ceil(len(current_df) / 25)
    stop_event = threading.Event()
    message_thread = threading.Thread(target=print_dynamic_message, args=(batch_counter,key_counter,counter,total_batches,total_keys, total_iterations,start_time_mal_ab, stop_event))
    message_thread.start()

    for start in range(0, len(current_df), 25):
        clear_output(wait=True)
        counter[0] += 1
        batch_counter[0] += 1
        end = start + 25
        result, it, ot = score_sentiments_jul(current_df[start:end])
        mal_ab_nonnull_api.append(result)
        input_tokens += it
        output_tokens += ot

    # Stopping the dynamic message thread
    stop_event.set()
    message_thread.join()    
    input_tokens_mal_ab_nonnull+=input_tokens
    output_tokens_mal_ab_nonnull+=output_tokens
    clear_output(wait=True)
    
overall_execution_time_mal_ab = time.time() - start_time_mal_ab
formatted_time_mal_ab = time.strftime('%H:%M:%S', time.gmtime(overall_execution_time_mal_ab))
input_token_cost_mal_ab = round((0.01/1000) * input_tokens_mal_ab_nonnull, 2)
output_token_cost_mal_ab = round((0.03/1000) * output_tokens_mal_ab_nonnull, 2)
total_cost_mal_ab = round(input_token_cost_mal_ab + output_token_cost_mal_ab, 2)    
    
print(f"Executed {batch_counter} Iterations")
print(f"Total Execution Time: {formatted_time_mal_ab}")
print(f"Total Input Tokens - {input_tokens_mal_ab_nonnull}")
print(f"Total Input Cost = {input_token_cost_mal_ab}")
print(f"Total Output Tokens - {output_tokens_mal_ab_nonnull}")
print(f"Total Output Cost = {output_token_cost_mal_ab}")
print(f"Total Cost = {total_cost_mal_ab}")

Executed [15] Iterations
Total Execution Time: 00:01:52
Total Input Tokens - 20099
Total Input Cost = 0.2
Total Output Tokens - 8928
Total Output Cost = 0.27
Total Cost = 0.47


In [114]:
#Remove "```json" and "```" from each string in mal_ab_nonnull_api & convert to DataFrame
mal_ab_nonnull_api_cleaned = [
    s.replace("```json", "")
     .replace("```", "")
     .replace("[]", "[{\"positive\": \"\", \"negative\": \"\"}]")
    for s in mal_ab_nonnull_api
]                                  
mal_ab_nonnull_api_cleaned_df = pd.DataFrame(mal_ab_nonnull_api_cleaned)
#mal_ab_nonnull_api_cleaned_df = pd.DataFrame(mal_ab_nonnull_api)
column_names2 = ['Trust',
                    'Store Experience',
                    'Store Staff',
                    'Product Design',
                    'Product Variety',
                    'Discount',
                    'Making Charge',
                    'Price',
                    'Product Quality',
                    'OLD Gold Jewellery Exchange']

In [115]:
#Initialize an empty list to store the processed data
processed_data_mal_ab_nonnull = []

#Iterate over each row in the DataFrame
for index, row in mal_ab_nonnull_api_cleaned_df.iterrows():
    try:
        #Assuming the JSON string is in the first column
        json_string = row.iloc[0] if len(row) > 0 else ''
        data = json.loads(json_string)

        #Iterate over each key-value pair in the dictionary
        for commentor_name, feedback_list in data.items():
            #Create a dictionary for each commentor with default values as 0
            commentor_data = {col: 0 for col in column_names2}
            commentor_data["Commentor Name"] = commentor_name

            #Check if there is any feedback for the commentor
            if feedback_list:
                #Since there could be multiple feedback entries for a single commentor, we aggregate them
                for feedback in feedback_list:
                    positive = feedback.get("positive", "").split(',')
                    negative = feedback.get("negative", "").split(',')

                    for topic in column_names2:
                        #Aggregate the value: 1 for positive, -1 for negative, 0 for not mentioned
                        commentor_data[topic] += assign_value(topic, positive, negative)

                #Finalize the values to be within -1, 0, or 1
                for topic in column_names2:
                    if commentor_data[topic] > 1:
                        commentor_data[topic] = 1
                    elif commentor_data[topic] < -1:
                        commentor_data[topic] = -1

            #Append the commentor data to the processed data list
            processed_data_mal_ab_nonnull.append(commentor_data)
    except:
        #Skip rows with invalid JSON data
        print(f"Invalid JSON in row {index}")

mal_ab_nonnull_api_cleaned_df.to_excel("mal_ab_nonnull_api_cleaned_df.xlsx",index=False)

mal_ab_nonnull_api_cleaned_df = pd.read_excel("mal_ab_nonnull_api_cleaned_df.xlsx")

mal_ab_nonnull_api_cleaned_df

In [116]:
#Create a DataFrame from the processed data
mal_ab_nonnull_sen_df = pd.DataFrame(processed_data_mal_ab_nonnull)
#Reorder the columns
ordered_columns = ["Commentor Name",
                   "Trust",
                    "Store Experience",
                    "Store Staff",
                    "Product Design",
                    "Product Variety",
                    "Discount",
                    "Making Charge",
                    "Price",
                    "Product Quality",
                    "OLD Gold Jewellery Exchange"]
#Apply the new order to the DataFrame
mal_ab_nonnull_sen_df = mal_ab_nonnull_sen_df[ordered_columns]

mal_ab_nonnull_merged_df = pd.concat([combined_df_mal_ab, mal_ab_nonnull_sen_df], axis=1)

mal_ab_final_sen_df = pd.concat([mal_ab_nonnull_merged_df,null_dataframes['mal_ab_null']], ignore_index=True)

mal_ab_final_sen_df_copy = mal_ab_final_sen_df.copy()
mal_ab_final_sen_df_copy["Published At Date"] = mal_ab_final_sen_df_copy["Published At Date"].astype(str).str[:10]

mal_ab_final_sen_df_copy.to_excel("sentiment_raw_output/mal_ab_final_sen_df_jul.xlsx",index=False)


### mal_b1_af

In [117]:
batch_counter = [0]
total_batches = math.ceil(len(mal_b1_af_nonnull_buckets['mal_b1_af_nonnull_1-4'])/25)+math.ceil(len(mal_b1_af_nonnull_buckets['mal_b1_af_nonnull_5-15'])/25)+math.ceil(len(mal_b1_af_nonnull_buckets['mal_b1_af_nonnull_16-30'])/25)+math.ceil(len(mal_b1_af_nonnull_buckets['mal_b1_af_nonnull_31-60'])/25)+math.ceil(len(mal_b1_af_nonnull_buckets['mal_b1_af_nonnull_61-100'])/25)+math.ceil(len(mal_b1_af_nonnull_buckets['mal_b1_af_nonnull_greater_100'])/25)
key_counter = 0
total_keys=len(mal_b1_af_nonnull_buckets.keys())
mal_b1_af_nonnull_api = []
input_tokens_mal_b1_af_nonnull=0
output_tokens_mal_b1_af_nonnull=0
start_time_mal_b1_af = time.time()

for key in mal_b1_af_nonnull_buckets.keys():
    key_counter+=1
    current_df = mal_b1_af_nonnull_buckets[key][['Name', 'review_text']]
    counter = [0]
    input_tokens = 0
    output_tokens = 0
        
    # Threading setup
    total_iterations = math.ceil(len(current_df) / 25)
    stop_event = threading.Event()
    message_thread = threading.Thread(target=print_dynamic_message, args=(batch_counter,key_counter,counter,total_batches,total_keys, total_iterations,start_time_mal_b1_af, stop_event))
    message_thread.start()

    for start in range(0, len(current_df), 25):
        clear_output(wait=True)
        counter[0] += 1
        batch_counter[0] += 1
        end = start + 25
        result, it, ot = score_sentiments_jul(current_df[start:end])
        mal_b1_af_nonnull_api.append(result)
        input_tokens += it
        output_tokens += ot

    # Stopping the dynamic message thread
    stop_event.set()
    message_thread.join()    
    input_tokens_mal_b1_af_nonnull+=input_tokens
    output_tokens_mal_b1_af_nonnull+=output_tokens
    clear_output(wait=True)
    
overall_execution_time_mal_b1_af = time.time() - start_time_mal_b1_af
formatted_time_mal_b1_af = time.strftime('%H:%M:%S', time.gmtime(overall_execution_time_mal_b1_af))
input_token_cost_mal_b1_af = round((0.01/1000) * input_tokens_mal_b1_af_nonnull, 2)
output_token_cost_mal_b1_af = round((0.03/1000) * output_tokens_mal_b1_af_nonnull, 2)
total_cost_mal_b1_af = round(input_token_cost_mal_b1_af + output_token_cost_mal_b1_af, 2)    
    
print(f"Executed {batch_counter} Iterations")
print(f"Total Execution Time: {formatted_time_mal_b1_af}")
print(f"Total Input Tokens - {input_tokens_mal_b1_af_nonnull}")
print(f"Total Input Cost = {input_token_cost_mal_b1_af}")
print(f"Total Output Tokens - {output_tokens_mal_b1_af_nonnull}")
print(f"Total Output Cost = {output_token_cost_mal_b1_af}")
print(f"Total Cost = {total_cost_mal_b1_af}")

Executed [13] Iterations
Total Execution Time: 00:01:19
Total Input Tokens - 14895
Total Input Cost = 0.15
Total Output Tokens - 6938
Total Output Cost = 0.21
Total Cost = 0.36


In [118]:
#Remove "```json" and "```" from each string in mal_b1_af_nonnull_api & convert to DataFrame
mal_b1_af_nonnull_api_cleaned = [
    s.replace("```json", "")
     .replace("```", "")
     .replace("[]", "[{\"positive\": \"\", \"negative\": \"\"}]")
    for s in mal_b1_af_nonnull_api
]                                  
mal_b1_af_nonnull_api_cleaned_df = pd.DataFrame(mal_b1_af_nonnull_api_cleaned)
#mal_b1_af_nonnull_api_cleaned_df = pd.DataFrame(mal_b1_af_nonnull_api)
column_names2 = ['Trust',
                    'Store Experience',
                    'Store Staff',
                    'Product Design',
                    'Product Variety',
                    'Discount',
                    'Making Charge',
                    'Price',
                    'Product Quality',
                    'OLD Gold Jewellery Exchange']

In [119]:
#Initialize an empty list to store the processed data
processed_data_mal_b1_af_nonnull = []

#Iterate over each row in the DataFrame
for index, row in mal_b1_af_nonnull_api_cleaned_df.iterrows():
    try:
        #Assuming the JSON string is in the first column
        json_string = row.iloc[0] if len(row) > 0 else ''
        data = json.loads(json_string)

        #Iterate over each key-value pair in the dictionary
        for commentor_name, feedback_list in data.items():
            #Create a dictionary for each commentor with default values as 0
            commentor_data = {col: 0 for col in column_names2}
            commentor_data["Commentor Name"] = commentor_name

            #Check if there is any feedback for the commentor
            if feedback_list:
                #Since there could be multiple feedback entries for a single commentor, we aggregate them
                for feedback in feedback_list:
                    positive = feedback.get("positive", "").split(',')
                    negative = feedback.get("negative", "").split(',')

                    for topic in column_names2:
                        #Aggregate the value: 1 for positive, -1 for negative, 0 for not mentioned
                        commentor_data[topic] += assign_value(topic, positive, negative)

                #Finalize the values to be within -1, 0, or 1
                for topic in column_names2:
                    if commentor_data[topic] > 1:
                        commentor_data[topic] = 1
                    elif commentor_data[topic] < -1:
                        commentor_data[topic] = -1

            #Append the commentor data to the processed data list
            processed_data_mal_b1_af_nonnull.append(commentor_data)
    except:
        #Skip rows with invalid JSON data
        print(f"Invalid JSON in row {index}")

mal_b1_af_nonnull_api_cleaned_df.to_excel("mal_b1_af_nonnull_api_cleaned_df.xlsx",index=False)

mal_b1_af_nonnull_api_cleaned_df = pd.read_excel("mal_b1_af_nonnull_api_cleaned_df.xlsx")

mal_b1_af_nonnull_api_cleaned_df

In [120]:
#Create a DataFrame from the processed data
mal_b1_af_nonnull_sen_df = pd.DataFrame(processed_data_mal_b1_af_nonnull)
#Reorder the columns
ordered_columns = ["Commentor Name",
                   "Trust",
                    "Store Experience",
                    "Store Staff",
                    "Product Design",
                    "Product Variety",
                    "Discount",
                    "Making Charge",
                    "Price",
                    "Product Quality",
                    "OLD Gold Jewellery Exchange"]
#Apply the new order to the DataFrame
mal_b1_af_nonnull_sen_df = mal_b1_af_nonnull_sen_df[ordered_columns]

mal_b1_af_nonnull_merged_df = pd.concat([combined_df_mal_b1_af, mal_b1_af_nonnull_sen_df], axis=1)

mal_b1_af_final_sen_df = pd.concat([mal_b1_af_nonnull_merged_df,null_dataframes['mal_b1_af_null']], ignore_index=True)

mal_b1_af_final_sen_df_copy = mal_b1_af_final_sen_df.copy()
mal_b1_af_final_sen_df_copy["Published At Date"] = mal_b1_af_final_sen_df_copy["Published At Date"].astype(str).str[:10]

mal_b1_af_final_sen_df_copy.to_excel("sentiment_raw_output/mal_b1_af_final_sen_df_jul.xlsx",index=False)


### mal_ak

In [121]:
batch_counter = [0]
total_batches = math.ceil(len(mal_ak_nonnull_buckets['mal_ak_nonnull_1-4'])/25)+math.ceil(len(mal_ak_nonnull_buckets['mal_ak_nonnull_5-15'])/25)+math.ceil(len(mal_ak_nonnull_buckets['mal_ak_nonnull_16-30'])/25)+math.ceil(len(mal_ak_nonnull_buckets['mal_ak_nonnull_31-60'])/25)+math.ceil(len(mal_ak_nonnull_buckets['mal_ak_nonnull_61-100'])/25)+math.ceil(len(mal_ak_nonnull_buckets['mal_ak_nonnull_greater_100'])/25)
key_counter = 0
total_keys=len(mal_ak_nonnull_buckets.keys())
mal_ak_nonnull_api = []
input_tokens_mal_ak_nonnull=0
output_tokens_mal_ak_nonnull=0
start_time_mal_ak = time.time()

for key in mal_ak_nonnull_buckets.keys():
    key_counter+=1
    current_df = mal_ak_nonnull_buckets[key][['Name', 'review_text']]
    counter = [0]
    input_tokens = 0
    output_tokens = 0
        
    # Threading setup
    total_iterations = math.ceil(len(current_df) / 25)
    stop_event = threading.Event()
    message_thread = threading.Thread(target=print_dynamic_message, args=(batch_counter,key_counter,counter,total_batches,total_keys, total_iterations,start_time_mal_ak, stop_event))
    message_thread.start()

    for start in range(0, len(current_df), 25):
        clear_output(wait=True)
        counter[0] += 1
        batch_counter[0] += 1
        end = start + 25
        result, it, ot = score_sentiments_jul(current_df[start:end])
        mal_ak_nonnull_api.append(result)
        input_tokens += it
        output_tokens += ot

    # Stopping the dynamic message thread
    stop_event.set()
    message_thread.join()    
    input_tokens_mal_ak_nonnull+=input_tokens
    output_tokens_mal_ak_nonnull+=output_tokens
    clear_output(wait=True)
    
overall_execution_time_mal_ak = time.time() - start_time_mal_ak
formatted_time_mal_ak = time.strftime('%H:%M:%S', time.gmtime(overall_execution_time_mal_ak))
input_token_cost_mal_ak = round((0.01/1000) * input_tokens_mal_ak_nonnull, 2)
output_token_cost_mal_ak = round((0.03/1000) * output_tokens_mal_ak_nonnull, 2)
total_cost_mal_ak = round(input_token_cost_mal_ak + output_token_cost_mal_ak, 2)    
    
print(f"Executed {batch_counter} Iterations")
print(f"Total Execution Time: {formatted_time_mal_ak}")
print(f"Total Input Tokens - {input_tokens_mal_ak_nonnull}")
print(f"Total Input Cost = {input_token_cost_mal_ak}")
print(f"Total Output Tokens - {output_tokens_mal_ak_nonnull}")
print(f"Total Output Cost = {output_token_cost_mal_ak}")
print(f"Total Cost = {total_cost_mal_ak}")

Executed [24] Iterations
Total Execution Time: 00:02:56
Total Input Tokens - 31930
Total Input Cost = 0.32
Total Output Tokens - 14028
Total Output Cost = 0.42
Total Cost = 0.74


In [122]:
#Remove "```json" and "```" from each string in mal_ak_nonnull_api & convert to DataFrame
mal_ak_nonnull_api_cleaned = [
    s.replace("```json", "")
     .replace("```", "")
     .replace("[]", "[{\"positive\": \"\", \"negative\": \"\"}]")
    for s in mal_ak_nonnull_api
]                                  
mal_ak_nonnull_api_cleaned_df = pd.DataFrame(mal_ak_nonnull_api_cleaned)
#mal_ak_nonnull_api_cleaned_df = pd.DataFrame(mal_ak_nonnull_api)
column_names2 = ['Trust',
                    'Store Experience',
                    'Store Staff',
                    'Product Design',
                    'Product Variety',
                    'Discount',
                    'Making Charge',
                    'Price',
                    'Product Quality',
                    'OLD Gold Jewellery Exchange']

In [123]:
#Initialize an empty list to store the processed data
processed_data_mal_ak_nonnull = []

#Iterate over each row in the DataFrame
for index, row in mal_ak_nonnull_api_cleaned_df.iterrows():
    try:
        #Assuming the JSON string is in the first column
        json_string = row.iloc[0] if len(row) > 0 else ''
        data = json.loads(json_string)

        #Iterate over each key-value pair in the dictionary
        for commentor_name, feedback_list in data.items():
            #Create a dictionary for each commentor with default values as 0
            commentor_data = {col: 0 for col in column_names2}
            commentor_data["Commentor Name"] = commentor_name

            #Check if there is any feedback for the commentor
            if feedback_list:
                #Since there could be multiple feedback entries for a single commentor, we aggregate them
                for feedback in feedback_list:
                    positive = feedback.get("positive", "").split(',')
                    negative = feedback.get("negative", "").split(',')

                    for topic in column_names2:
                        #Aggregate the value: 1 for positive, -1 for negative, 0 for not mentioned
                        commentor_data[topic] += assign_value(topic, positive, negative)

                #Finalize the values to be within -1, 0, or 1
                for topic in column_names2:
                    if commentor_data[topic] > 1:
                        commentor_data[topic] = 1
                    elif commentor_data[topic] < -1:
                        commentor_data[topic] = -1

            #Append the commentor data to the processed data list
            processed_data_mal_ak_nonnull.append(commentor_data)
    except:
        #Skip rows with invalid JSON data
        print(f"Invalid JSON in row {index}")

mal_ak_nonnull_api_cleaned_df.to_excel("mal_ak_nonnull_api_cleaned_df.xlsx",index=False)

mal_ak_nonnull_api_cleaned_df = pd.read_excel("mal_ak_nonnull_api_cleaned_df.xlsx")

mal_ak_nonnull_api_cleaned_df

In [124]:
#Create a DataFrame from the processed data
mal_ak_nonnull_sen_df = pd.DataFrame(processed_data_mal_ak_nonnull)
#Reorder the columns
ordered_columns = ["Commentor Name",
                   "Trust",
                    "Store Experience",
                    "Store Staff",
                    "Product Design",
                    "Product Variety",
                    "Discount",
                    "Making Charge",
                    "Price",
                    "Product Quality",
                    "OLD Gold Jewellery Exchange"]
#Apply the new order to the DataFrame
mal_ak_nonnull_sen_df = mal_ak_nonnull_sen_df[ordered_columns]

mal_ak_nonnull_merged_df = pd.concat([combined_df_mal_ak, mal_ak_nonnull_sen_df], axis=1)

mal_ak_final_sen_df = pd.concat([mal_ak_nonnull_merged_df,null_dataframes['mal_ak_null']], ignore_index=True)

mal_ak_final_sen_df_copy = mal_ak_final_sen_df.copy()
mal_ak_final_sen_df_copy["Published At Date"] = mal_ak_final_sen_df_copy["Published At Date"].astype(str).str[:10]

mal_ak_final_sen_df_copy.to_excel("sentiment_raw_output/mal_ak_final_sen_df_jul.xlsx",index=False)


### mal_aw_ad

In [125]:
batch_counter = [0]
total_batches = math.ceil(len(mal_aw_ad_nonnull_buckets['mal_aw_ad_nonnull_1-4'])/25)+math.ceil(len(mal_aw_ad_nonnull_buckets['mal_aw_ad_nonnull_5-15'])/25)+math.ceil(len(mal_aw_ad_nonnull_buckets['mal_aw_ad_nonnull_16-30'])/25)+math.ceil(len(mal_aw_ad_nonnull_buckets['mal_aw_ad_nonnull_31-60'])/25)+math.ceil(len(mal_aw_ad_nonnull_buckets['mal_aw_ad_nonnull_61-100'])/25)+math.ceil(len(mal_aw_ad_nonnull_buckets['mal_aw_ad_nonnull_greater_100'])/25)
key_counter = 0
total_keys=len(mal_aw_ad_nonnull_buckets.keys())
mal_aw_ad_nonnull_api = []
input_tokens_mal_aw_ad_nonnull=0
output_tokens_mal_aw_ad_nonnull=0
start_time_mal_aw_ad = time.time()

for key in mal_aw_ad_nonnull_buckets.keys():
    key_counter+=1
    current_df = mal_aw_ad_nonnull_buckets[key][['Name', 'review_text']]
    counter = [0]
    input_tokens = 0
    output_tokens = 0
        
    # Threading setup
    total_iterations = math.ceil(len(current_df) / 25)
    stop_event = threading.Event()
    message_thread = threading.Thread(target=print_dynamic_message, args=(batch_counter,key_counter,counter,total_batches,total_keys, total_iterations,start_time_mal_aw_ad, stop_event))
    message_thread.start()

    for start in range(0, len(current_df), 25):
        clear_output(wait=True)
        counter[0] += 1
        batch_counter[0] += 1
        end = start + 25
        result, it, ot = score_sentiments_jul(current_df[start:end])
        mal_aw_ad_nonnull_api.append(result)
        input_tokens += it
        output_tokens += ot

    # Stopping the dynamic message thread
    stop_event.set()
    message_thread.join()    
    input_tokens_mal_aw_ad_nonnull+=input_tokens
    output_tokens_mal_aw_ad_nonnull+=output_tokens
    clear_output(wait=True)
    
overall_execution_time_mal_aw_ad = time.time() - start_time_mal_aw_ad
formatted_time_mal_aw_ad = time.strftime('%H:%M:%S', time.gmtime(overall_execution_time_mal_aw_ad))
input_token_cost_mal_aw_ad = round((0.01/1000) * input_tokens_mal_aw_ad_nonnull, 2)
output_token_cost_mal_aw_ad = round((0.03/1000) * output_tokens_mal_aw_ad_nonnull, 2)
total_cost_mal_aw_ad = round(input_token_cost_mal_aw_ad + output_token_cost_mal_aw_ad, 2)    
    
print(f"Executed {batch_counter} Iterations")
print(f"Total Execution Time: {formatted_time_mal_aw_ad}")
print(f"Total Input Tokens - {input_tokens_mal_aw_ad_nonnull}")
print(f"Total Input Cost = {input_token_cost_mal_aw_ad}")
print(f"Total Output Tokens - {output_tokens_mal_aw_ad_nonnull}")
print(f"Total Output Cost = {output_token_cost_mal_aw_ad}")
print(f"Total Cost = {total_cost_mal_aw_ad}")

Executed [8] Iterations
Total Execution Time: 00:00:33
Total Input Tokens - 8215
Total Input Cost = 0.08
Total Output Tokens - 3052
Total Output Cost = 0.09
Total Cost = 0.17


In [126]:
#Remove "```json" and "```" from each string in mal_aw_ad_nonnull_api & convert to DataFrame
mal_aw_ad_nonnull_api_cleaned = [
    s.replace("```json", "")
     .replace("```", "")
     .replace("[]", "[{\"positive\": \"\", \"negative\": \"\"}]")
    for s in mal_aw_ad_nonnull_api
]                                  
mal_aw_ad_nonnull_api_cleaned_df = pd.DataFrame(mal_aw_ad_nonnull_api_cleaned)
#mal_aw_ad_nonnull_api_cleaned_df = pd.DataFrame(mal_aw_ad_nonnull_api)
column_names2 = ['Trust',
                    'Store Experience',
                    'Store Staff',
                    'Product Design',
                    'Product Variety',
                    'Discount',
                    'Making Charge',
                    'Price',
                    'Product Quality',
                    'OLD Gold Jewellery Exchange']

In [127]:
#Initialize an empty list to store the processed data
processed_data_mal_aw_ad_nonnull = []

#Iterate over each row in the DataFrame
for index, row in mal_aw_ad_nonnull_api_cleaned_df.iterrows():
    try:
        #Assuming the JSON string is in the first column
        json_string = row.iloc[0] if len(row) > 0 else ''
        data = json.loads(json_string)

        #Iterate over each key-value pair in the dictionary
        for commentor_name, feedback_list in data.items():
            #Create a dictionary for each commentor with default values as 0
            commentor_data = {col: 0 for col in column_names2}
            commentor_data["Commentor Name"] = commentor_name

            #Check if there is any feedback for the commentor
            if feedback_list:
                #Since there could be multiple feedback entries for a single commentor, we aggregate them
                for feedback in feedback_list:
                    positive = feedback.get("positive", "").split(',')
                    negative = feedback.get("negative", "").split(',')

                    for topic in column_names2:
                        #Aggregate the value: 1 for positive, -1 for negative, 0 for not mentioned
                        commentor_data[topic] += assign_value(topic, positive, negative)

                #Finalize the values to be within -1, 0, or 1
                for topic in column_names2:
                    if commentor_data[topic] > 1:
                        commentor_data[topic] = 1
                    elif commentor_data[topic] < -1:
                        commentor_data[topic] = -1

            #Append the commentor data to the processed data list
            processed_data_mal_aw_ad_nonnull.append(commentor_data)
    except:
        #Skip rows with invalid JSON data
        print(f"Invalid JSON in row {index}")

mal_aw_ad_nonnull_api_cleaned_df.to_excel("mal_aw_ad_nonnull_api_cleaned_df.xlsx",index=False)

mal_aw_ad_nonnull_api_cleaned_df = pd.read_excel("mal_aw_ad_nonnull_api_cleaned_df.xlsx")

mal_aw_ad_nonnull_api_cleaned_df

In [128]:
#Create a DataFrame from the processed data
mal_aw_ad_nonnull_sen_df = pd.DataFrame(processed_data_mal_aw_ad_nonnull)
#Reorder the columns
ordered_columns = ["Commentor Name",
                   "Trust",
                    "Store Experience",
                    "Store Staff",
                    "Product Design",
                    "Product Variety",
                    "Discount",
                    "Making Charge",
                    "Price",
                    "Product Quality",
                    "OLD Gold Jewellery Exchange"]
#Apply the new order to the DataFrame
mal_aw_ad_nonnull_sen_df = mal_aw_ad_nonnull_sen_df[ordered_columns]

mal_aw_ad_nonnull_merged_df = pd.concat([combined_df_mal_aw_ad, mal_aw_ad_nonnull_sen_df], axis=1)

mal_aw_ad_final_sen_df = pd.concat([mal_aw_ad_nonnull_merged_df,null_dataframes['mal_aw_ad_null']], ignore_index=True)

mal_aw_ad_final_sen_df_copy = mal_aw_ad_final_sen_df.copy()
mal_aw_ad_final_sen_df_copy["Published At Date"] = mal_aw_ad_final_sen_df_copy["Published At Date"].astype(str).str[:10]

mal_aw_ad_final_sen_df_copy.to_excel("sentiment_raw_output/mal_aw_ad_final_sen_df_jul.xlsx",index=False)


### mal_dm_ad

In [129]:
batch_counter = [0]
total_batches = math.ceil(len(mal_dm_ad_nonnull_buckets['mal_dm_ad_nonnull_1-4'])/25)+math.ceil(len(mal_dm_ad_nonnull_buckets['mal_dm_ad_nonnull_5-15'])/25)+math.ceil(len(mal_dm_ad_nonnull_buckets['mal_dm_ad_nonnull_16-30'])/25)+math.ceil(len(mal_dm_ad_nonnull_buckets['mal_dm_ad_nonnull_31-60'])/25)+math.ceil(len(mal_dm_ad_nonnull_buckets['mal_dm_ad_nonnull_61-100'])/25)+math.ceil(len(mal_dm_ad_nonnull_buckets['mal_dm_ad_nonnull_greater_100'])/25)
key_counter = 0
total_keys=len(mal_dm_ad_nonnull_buckets.keys())
mal_dm_ad_nonnull_api = []
input_tokens_mal_dm_ad_nonnull=0
output_tokens_mal_dm_ad_nonnull=0
start_time_mal_dm_ad = time.time()

for key in mal_dm_ad_nonnull_buckets.keys():
    key_counter+=1
    current_df = mal_dm_ad_nonnull_buckets[key][['Name', 'review_text']]
    counter = [0]
    input_tokens = 0
    output_tokens = 0
        
    # Threading setup
    total_iterations = math.ceil(len(current_df) / 25)
    stop_event = threading.Event()
    message_thread = threading.Thread(target=print_dynamic_message, args=(batch_counter,key_counter,counter,total_batches,total_keys, total_iterations,start_time_mal_dm_ad, stop_event))
    message_thread.start()

    for start in range(0, len(current_df), 25):
        clear_output(wait=True)
        counter[0] += 1
        batch_counter[0] += 1
        end = start + 25
        result, it, ot = score_sentiments_jul(current_df[start:end])
        mal_dm_ad_nonnull_api.append(result)
        input_tokens += it
        output_tokens += ot

    # Stopping the dynamic message thread
    stop_event.set()
    message_thread.join()    
    input_tokens_mal_dm_ad_nonnull+=input_tokens
    output_tokens_mal_dm_ad_nonnull+=output_tokens
    clear_output(wait=True)
    
overall_execution_time_mal_dm_ad = time.time() - start_time_mal_dm_ad
formatted_time_mal_dm_ad = time.strftime('%H:%M:%S', time.gmtime(overall_execution_time_mal_dm_ad))
input_token_cost_mal_dm_ad = round((0.01/1000) * input_tokens_mal_dm_ad_nonnull, 2)
output_token_cost_mal_dm_ad = round((0.03/1000) * output_tokens_mal_dm_ad_nonnull, 2)
total_cost_mal_dm_ad = round(input_token_cost_mal_dm_ad + output_token_cost_mal_dm_ad, 2)    
    
print(f"Executed {batch_counter} Iterations")
print(f"Total Execution Time: {formatted_time_mal_dm_ad}")
print(f"Total Input Tokens - {input_tokens_mal_dm_ad_nonnull}")
print(f"Total Input Cost = {input_token_cost_mal_dm_ad}")
print(f"Total Output Tokens - {output_tokens_mal_dm_ad_nonnull}")
print(f"Total Output Cost = {output_token_cost_mal_dm_ad}")
print(f"Total Cost = {total_cost_mal_dm_ad}")

Executed [20] Iterations
Total Execution Time: 00:02:11
Total Input Tokens - 22903
Total Input Cost = 0.23
Total Output Tokens - 11351
Total Output Cost = 0.34
Total Cost = 0.57


In [130]:
#Remove "```json" and "```" from each string in mal_dm_ad_nonnull_api & convert to DataFrame
mal_dm_ad_nonnull_api_cleaned = [
    s.replace("```json", "")
     .replace("```", "")
     .replace("[]", "[{\"positive\": \"\", \"negative\": \"\"}]")
    for s in mal_dm_ad_nonnull_api
]                                  
mal_dm_ad_nonnull_api_cleaned_df = pd.DataFrame(mal_dm_ad_nonnull_api_cleaned)
#mal_dm_ad_nonnull_api_cleaned_df = pd.DataFrame(mal_dm_ad_nonnull_api)
column_names2 = ['Trust',
                    'Store Experience',
                    'Store Staff',
                    'Product Design',
                    'Product Variety',
                    'Discount',
                    'Making Charge',
                    'Price',
                    'Product Quality',
                    'OLD Gold Jewellery Exchange']

In [131]:
#Initialize an empty list to store the processed data
processed_data_mal_dm_ad_nonnull = []

#Iterate over each row in the DataFrame
for index, row in mal_dm_ad_nonnull_api_cleaned_df.iterrows():
    try:
        #Assuming the JSON string is in the first column
        json_string = row.iloc[0] if len(row) > 0 else ''
        data = json.loads(json_string)

        #Iterate over each key-value pair in the dictionary
        for commentor_name, feedback_list in data.items():
            #Create a dictionary for each commentor with default values as 0
            commentor_data = {col: 0 for col in column_names2}
            commentor_data["Commentor Name"] = commentor_name

            #Check if there is any feedback for the commentor
            if feedback_list:
                #Since there could be multiple feedback entries for a single commentor, we aggregate them
                for feedback in feedback_list:
                    positive = feedback.get("positive", "").split(',')
                    negative = feedback.get("negative", "").split(',')

                    for topic in column_names2:
                        #Aggregate the value: 1 for positive, -1 for negative, 0 for not mentioned
                        commentor_data[topic] += assign_value(topic, positive, negative)

                #Finalize the values to be within -1, 0, or 1
                for topic in column_names2:
                    if commentor_data[topic] > 1:
                        commentor_data[topic] = 1
                    elif commentor_data[topic] < -1:
                        commentor_data[topic] = -1

            #Append the commentor data to the processed data list
            processed_data_mal_dm_ad_nonnull.append(commentor_data)
    except:
        #Skip rows with invalid JSON data
        print(f"Invalid JSON in row {index}")

mal_dm_ad_nonnull_api_cleaned_df.to_excel("mal_dm_ad_nonnull_api_cleaned_df.xlsx",index=False)

mal_dm_ad_nonnull_api_cleaned_df = pd.read_excel("mal_dm_ad_nonnull_api_cleaned_df.xlsx")

mal_dm_ad_nonnull_api_cleaned_df

In [132]:
#Create a DataFrame from the processed data
mal_dm_ad_nonnull_sen_df = pd.DataFrame(processed_data_mal_dm_ad_nonnull)
#Reorder the columns
ordered_columns = ["Commentor Name",
                   "Trust",
                    "Store Experience",
                    "Store Staff",
                    "Product Design",
                    "Product Variety",
                    "Discount",
                    "Making Charge",
                    "Price",
                    "Product Quality",
                    "OLD Gold Jewellery Exchange"]
#Apply the new order to the DataFrame
mal_dm_ad_nonnull_sen_df = mal_dm_ad_nonnull_sen_df[ordered_columns]

mal_dm_ad_nonnull_merged_df = pd.concat([combined_df_mal_dm_ad, mal_dm_ad_nonnull_sen_df], axis=1)

mal_dm_ad_final_sen_df = pd.concat([mal_dm_ad_nonnull_merged_df,null_dataframes['mal_dm_ad_null']], ignore_index=True)

mal_dm_ad_final_sen_df_copy = mal_dm_ad_final_sen_df.copy()
mal_dm_ad_final_sen_df_copy["Published At Date"] = mal_dm_ad_final_sen_df_copy["Published At Date"].astype(str).str[:10]

mal_dm_ad_final_sen_df_copy.to_excel("sentiment_raw_output/mal_dm_ad_final_sen_df_jul.xlsx",index=False)


### mal_b1_ad

In [133]:
batch_counter = [0]
total_batches = math.ceil(len(mal_b1_ad_nonnull_buckets['mal_b1_ad_nonnull_1-4'])/25)+math.ceil(len(mal_b1_ad_nonnull_buckets['mal_b1_ad_nonnull_5-15'])/25)+math.ceil(len(mal_b1_ad_nonnull_buckets['mal_b1_ad_nonnull_16-30'])/25)+math.ceil(len(mal_b1_ad_nonnull_buckets['mal_b1_ad_nonnull_31-60'])/25)+math.ceil(len(mal_b1_ad_nonnull_buckets['mal_b1_ad_nonnull_61-100'])/25)+math.ceil(len(mal_b1_ad_nonnull_buckets['mal_b1_ad_nonnull_greater_100'])/25)
key_counter = 0
total_keys=len(mal_b1_ad_nonnull_buckets.keys())
mal_b1_ad_nonnull_api = []
input_tokens_mal_b1_ad_nonnull=0
output_tokens_mal_b1_ad_nonnull=0
start_time_mal_b1_ad = time.time()

for key in mal_b1_ad_nonnull_buckets.keys():
    key_counter+=1
    current_df = mal_b1_ad_nonnull_buckets[key][['Name', 'review_text']]
    counter = [0]
    input_tokens = 0
    output_tokens = 0
        
    # Threading setup
    total_iterations = math.ceil(len(current_df) / 25)
    stop_event = threading.Event()
    message_thread = threading.Thread(target=print_dynamic_message, args=(batch_counter,key_counter,counter,total_batches,total_keys, total_iterations,start_time_mal_b1_ad, stop_event))
    message_thread.start()

    for start in range(0, len(current_df), 25):
        clear_output(wait=True)
        counter[0] += 1
        batch_counter[0] += 1
        end = start + 25
        result, it, ot = score_sentiments_jul(current_df[start:end])
        mal_b1_ad_nonnull_api.append(result)
        input_tokens += it
        output_tokens += ot

    # Stopping the dynamic message thread
    stop_event.set()
    message_thread.join()    
    input_tokens_mal_b1_ad_nonnull+=input_tokens
    output_tokens_mal_b1_ad_nonnull+=output_tokens
    clear_output(wait=True)
    
overall_execution_time_mal_b1_ad = time.time() - start_time_mal_b1_ad
formatted_time_mal_b1_ad = time.strftime('%H:%M:%S', time.gmtime(overall_execution_time_mal_b1_ad))
input_token_cost_mal_b1_ad = round((0.01/1000) * input_tokens_mal_b1_ad_nonnull, 2)
output_token_cost_mal_b1_ad = round((0.03/1000) * output_tokens_mal_b1_ad_nonnull, 2)
total_cost_mal_b1_ad = round(input_token_cost_mal_b1_ad + output_token_cost_mal_b1_ad, 2)    
    
print(f"Executed {batch_counter} Iterations")
print(f"Total Execution Time: {formatted_time_mal_b1_ad}")
print(f"Total Input Tokens - {input_tokens_mal_b1_ad_nonnull}")
print(f"Total Input Cost = {input_token_cost_mal_b1_ad}")
print(f"Total Output Tokens - {output_tokens_mal_b1_ad_nonnull}")
print(f"Total Output Cost = {output_token_cost_mal_b1_ad}")
print(f"Total Cost = {total_cost_mal_b1_ad}")

Executed [13] Iterations
Total Execution Time: 00:01:25
Total Input Tokens - 14273
Total Input Cost = 0.14
Total Output Tokens - 6555
Total Output Cost = 0.2
Total Cost = 0.34


In [134]:
#Remove "```json" and "```" from each string in mal_b1_ad_nonnull_api & convert to DataFrame
mal_b1_ad_nonnull_api_cleaned = [
    s.replace("```json", "")
     .replace("```", "")
     .replace("[]", "[{\"positive\": \"\", \"negative\": \"\"}]")
    for s in mal_b1_ad_nonnull_api
]                                  
mal_b1_ad_nonnull_api_cleaned_df = pd.DataFrame(mal_b1_ad_nonnull_api_cleaned)
#mal_b1_ad_nonnull_api_cleaned_df = pd.DataFrame(mal_b1_ad_nonnull_api)
column_names2 = ['Trust',
                    'Store Experience',
                    'Store Staff',
                    'Product Design',
                    'Product Variety',
                    'Discount',
                    'Making Charge',
                    'Price',
                    'Product Quality',
                    'OLD Gold Jewellery Exchange']

In [135]:
#Initialize an empty list to store the processed data
processed_data_mal_b1_ad_nonnull = []

#Iterate over each row in the DataFrame
for index, row in mal_b1_ad_nonnull_api_cleaned_df.iterrows():
    try:
        #Assuming the JSON string is in the first column
        json_string = row.iloc[0] if len(row) > 0 else ''
        data = json.loads(json_string)

        #Iterate over each key-value pair in the dictionary
        for commentor_name, feedback_list in data.items():
            #Create a dictionary for each commentor with default values as 0
            commentor_data = {col: 0 for col in column_names2}
            commentor_data["Commentor Name"] = commentor_name

            #Check if there is any feedback for the commentor
            if feedback_list:
                #Since there could be multiple feedback entries for a single commentor, we aggregate them
                for feedback in feedback_list:
                    positive = feedback.get("positive", "").split(',')
                    negative = feedback.get("negative", "").split(',')

                    for topic in column_names2:
                        #Aggregate the value: 1 for positive, -1 for negative, 0 for not mentioned
                        commentor_data[topic] += assign_value(topic, positive, negative)

                #Finalize the values to be within -1, 0, or 1
                for topic in column_names2:
                    if commentor_data[topic] > 1:
                        commentor_data[topic] = 1
                    elif commentor_data[topic] < -1:
                        commentor_data[topic] = -1

            #Append the commentor data to the processed data list
            processed_data_mal_b1_ad_nonnull.append(commentor_data)
    except:
        #Skip rows with invalid JSON data
        print(f"Invalid JSON in row {index}")

mal_b1_ad_nonnull_api_cleaned_df.to_excel("mal_b1_ad_nonnull_api_cleaned_df.xlsx",index=False)

mal_b1_ad_nonnull_api_cleaned_df = pd.read_excel("mal_b1_ad_nonnull_api_cleaned_df.xlsx")

mal_b1_ad_nonnull_api_cleaned_df

In [136]:
#Create a DataFrame from the processed data
mal_b1_ad_nonnull_sen_df = pd.DataFrame(processed_data_mal_b1_ad_nonnull)
#Reorder the columns
ordered_columns = ["Commentor Name",
                   "Trust",
                    "Store Experience",
                    "Store Staff",
                    "Product Design",
                    "Product Variety",
                    "Discount",
                    "Making Charge",
                    "Price",
                    "Product Quality",
                    "OLD Gold Jewellery Exchange"]
#Apply the new order to the DataFrame
mal_b1_ad_nonnull_sen_df = mal_b1_ad_nonnull_sen_df[ordered_columns]

mal_b1_ad_nonnull_merged_df = pd.concat([combined_df_mal_b1_ad, mal_b1_ad_nonnull_sen_df], axis=1)

mal_b1_ad_final_sen_df = pd.concat([mal_b1_ad_nonnull_merged_df,null_dataframes['mal_b1_ad_null']], ignore_index=True)

mal_b1_ad_final_sen_df_copy = mal_b1_ad_final_sen_df.copy()
mal_b1_ad_final_sen_df_copy["Published At Date"] = mal_b1_ad_final_sen_df_copy["Published At Date"].astype(str).str[:10]

mal_b1_ad_final_sen_df_copy.to_excel("sentiment_raw_output/mal_b1_ad_final_sen_df_jul.xlsx",index=False)


### mal_b2_ad

In [137]:
batch_counter = [0]
total_batches = math.ceil(len(mal_b2_ad_nonnull_buckets['mal_b2_ad_nonnull_1-4'])/25)+math.ceil(len(mal_b2_ad_nonnull_buckets['mal_b2_ad_nonnull_5-15'])/25)+math.ceil(len(mal_b2_ad_nonnull_buckets['mal_b2_ad_nonnull_16-30'])/25)+math.ceil(len(mal_b2_ad_nonnull_buckets['mal_b2_ad_nonnull_31-60'])/25)+math.ceil(len(mal_b2_ad_nonnull_buckets['mal_b2_ad_nonnull_61-100'])/25)+math.ceil(len(mal_b2_ad_nonnull_buckets['mal_b2_ad_nonnull_greater_100'])/25)
key_counter = 0
total_keys=len(mal_b2_ad_nonnull_buckets.keys())
mal_b2_ad_nonnull_api = []
input_tokens_mal_b2_ad_nonnull=0
output_tokens_mal_b2_ad_nonnull=0
start_time_mal_b2_ad = time.time()

for key in mal_b2_ad_nonnull_buckets.keys():
    key_counter+=1
    current_df = mal_b2_ad_nonnull_buckets[key][['Name', 'review_text']]
    counter = [0]
    input_tokens = 0
    output_tokens = 0
        
    # Threading setup
    total_iterations = math.ceil(len(current_df) / 25)
    stop_event = threading.Event()
    message_thread = threading.Thread(target=print_dynamic_message, args=(batch_counter,key_counter,counter,total_batches,total_keys, total_iterations,start_time_mal_b2_ad, stop_event))
    message_thread.start()

    for start in range(0, len(current_df), 25):
        clear_output(wait=True)
        counter[0] += 1
        batch_counter[0] += 1
        end = start + 25
        result, it, ot = score_sentiments_jul(current_df[start:end])
        mal_b2_ad_nonnull_api.append(result)
        input_tokens += it
        output_tokens += ot

    # Stopping the dynamic message thread
    stop_event.set()
    message_thread.join()    
    input_tokens_mal_b2_ad_nonnull+=input_tokens
    output_tokens_mal_b2_ad_nonnull+=output_tokens
    clear_output(wait=True)
    
overall_execution_time_mal_b2_ad = time.time() - start_time_mal_b2_ad
formatted_time_mal_b2_ad = time.strftime('%H:%M:%S', time.gmtime(overall_execution_time_mal_b2_ad))
input_token_cost_mal_b2_ad = round((0.01/1000) * input_tokens_mal_b2_ad_nonnull, 2)
output_token_cost_mal_b2_ad = round((0.03/1000) * output_tokens_mal_b2_ad_nonnull, 2)
total_cost_mal_b2_ad = round(input_token_cost_mal_b2_ad + output_token_cost_mal_b2_ad, 2)    
    
print(f"Executed {batch_counter} Iterations")
print(f"Total Execution Time: {formatted_time_mal_b2_ad}")
print(f"Total Input Tokens - {input_tokens_mal_b2_ad_nonnull}")
print(f"Total Input Cost = {input_token_cost_mal_b2_ad}")
print(f"Total Output Tokens - {output_tokens_mal_b2_ad_nonnull}")
print(f"Total Output Cost = {output_token_cost_mal_b2_ad}")
print(f"Total Cost = {total_cost_mal_b2_ad}")

Executed [33] Iterations
Total Execution Time: 00:04:21
Total Input Tokens - 44785
Total Input Cost = 0.45
Total Output Tokens - 21419
Total Output Cost = 0.64
Total Cost = 1.09


In [138]:
#Remove "```json" and "```" from each string in mal_b2_ad_nonnull_api & convert to DataFrame
mal_b2_ad_nonnull_api_cleaned = [
    s.replace("```json", "")
     .replace("```", "")
     .replace("[]", "[{\"positive\": \"\", \"negative\": \"\"}]")
    for s in mal_b2_ad_nonnull_api
]                                  
mal_b2_ad_nonnull_api_cleaned_df = pd.DataFrame(mal_b2_ad_nonnull_api_cleaned)
#mal_b2_ad_nonnull_api_cleaned_df = pd.DataFrame(mal_b2_ad_nonnull_api)
column_names2 = ['Trust',
                    'Store Experience',
                    'Store Staff',
                    'Product Design',
                    'Product Variety',
                    'Discount',
                    'Making Charge',
                    'Price',
                    'Product Quality',
                    'OLD Gold Jewellery Exchange']

In [139]:
#Initialize an empty list to store the processed data
processed_data_mal_b2_ad_nonnull = []

#Iterate over each row in the DataFrame
for index, row in mal_b2_ad_nonnull_api_cleaned_df.iterrows():
    try:
        #Assuming the JSON string is in the first column
        json_string = row.iloc[0] if len(row) > 0 else ''
        data = json.loads(json_string)

        #Iterate over each key-value pair in the dictionary
        for commentor_name, feedback_list in data.items():
            #Create a dictionary for each commentor with default values as 0
            commentor_data = {col: 0 for col in column_names2}
            commentor_data["Commentor Name"] = commentor_name

            #Check if there is any feedback for the commentor
            if feedback_list:
                #Since there could be multiple feedback entries for a single commentor, we aggregate them
                for feedback in feedback_list:
                    positive = feedback.get("positive", "").split(',')
                    negative = feedback.get("negative", "").split(',')

                    for topic in column_names2:
                        #Aggregate the value: 1 for positive, -1 for negative, 0 for not mentioned
                        commentor_data[topic] += assign_value(topic, positive, negative)

                #Finalize the values to be within -1, 0, or 1
                for topic in column_names2:
                    if commentor_data[topic] > 1:
                        commentor_data[topic] = 1
                    elif commentor_data[topic] < -1:
                        commentor_data[topic] = -1

            #Append the commentor data to the processed data list
            processed_data_mal_b2_ad_nonnull.append(commentor_data)
    except:
        #Skip rows with invalid JSON data
        print(f"Invalid JSON in row {index}")

mal_b2_ad_nonnull_api_cleaned_df.to_excel("mal_b2_ad_nonnull_api_cleaned_df.xlsx",index=False)

mal_b2_ad_nonnull_api_cleaned_df = pd.read_excel("mal_b2_ad_nonnull_api_cleaned_df.xlsx")

mal_b2_ad_nonnull_api_cleaned_df

In [140]:
#Create a DataFrame from the processed data
mal_b2_ad_nonnull_sen_df = pd.DataFrame(processed_data_mal_b2_ad_nonnull)
#Reorder the columns
ordered_columns = ["Commentor Name",
                   "Trust",
                    "Store Experience",
                    "Store Staff",
                    "Product Design",
                    "Product Variety",
                    "Discount",
                    "Making Charge",
                    "Price",
                    "Product Quality",
                    "OLD Gold Jewellery Exchange"]
#Apply the new order to the DataFrame
mal_b2_ad_nonnull_sen_df = mal_b2_ad_nonnull_sen_df[ordered_columns]

mal_b2_ad_nonnull_merged_df = pd.concat([combined_df_mal_b2_ad, mal_b2_ad_nonnull_sen_df], axis=1)

mal_b2_ad_final_sen_df = pd.concat([mal_b2_ad_nonnull_merged_df,null_dataframes['mal_b2_ad_null']], ignore_index=True)

mal_b2_ad_final_sen_df_copy = mal_b2_ad_final_sen_df.copy()
mal_b2_ad_final_sen_df_copy["Published At Date"] = mal_b2_ad_final_sen_df_copy["Published At Date"].astype(str).str[:10]

mal_b2_ad_final_sen_df_copy.to_excel("sentiment_raw_output/mal_b2_ad_final_sen_df_jul.xlsx",index=False)


### mal_lu_ad

In [141]:
batch_counter = [0]
total_batches = math.ceil(len(mal_lu_ad_nonnull_buckets['mal_lu_ad_nonnull_1-4'])/25)+math.ceil(len(mal_lu_ad_nonnull_buckets['mal_lu_ad_nonnull_5-15'])/25)+math.ceil(len(mal_lu_ad_nonnull_buckets['mal_lu_ad_nonnull_16-30'])/25)+math.ceil(len(mal_lu_ad_nonnull_buckets['mal_lu_ad_nonnull_31-60'])/25)+math.ceil(len(mal_lu_ad_nonnull_buckets['mal_lu_ad_nonnull_61-100'])/25)+math.ceil(len(mal_lu_ad_nonnull_buckets['mal_lu_ad_nonnull_greater_100'])/25)
key_counter = 0
total_keys=len(mal_lu_ad_nonnull_buckets.keys())
mal_lu_ad_nonnull_api = []
input_tokens_mal_lu_ad_nonnull=0
output_tokens_mal_lu_ad_nonnull=0
start_time_mal_lu_ad = time.time()

for key in mal_lu_ad_nonnull_buckets.keys():
    key_counter+=1
    current_df = mal_lu_ad_nonnull_buckets[key][['Name', 'review_text']]
    counter = [0]
    input_tokens = 0
    output_tokens = 0
        
    # Threading setup
    total_iterations = math.ceil(len(current_df) / 25)
    stop_event = threading.Event()
    message_thread = threading.Thread(target=print_dynamic_message, args=(batch_counter,key_counter,counter,total_batches,total_keys, total_iterations,start_time_mal_lu_ad, stop_event))
    message_thread.start()

    for start in range(0, len(current_df), 25):
        clear_output(wait=True)
        counter[0] += 1
        batch_counter[0] += 1
        end = start + 25
        result, it, ot = score_sentiments_jul(current_df[start:end])
        mal_lu_ad_nonnull_api.append(result)
        input_tokens += it
        output_tokens += ot

    # Stopping the dynamic message thread
    stop_event.set()
    message_thread.join()    
    input_tokens_mal_lu_ad_nonnull+=input_tokens
    output_tokens_mal_lu_ad_nonnull+=output_tokens
    clear_output(wait=True)
    
overall_execution_time_mal_lu_ad = time.time() - start_time_mal_lu_ad
formatted_time_mal_lu_ad = time.strftime('%H:%M:%S', time.gmtime(overall_execution_time_mal_lu_ad))
input_token_cost_mal_lu_ad = round((0.01/1000) * input_tokens_mal_lu_ad_nonnull, 2)
output_token_cost_mal_lu_ad = round((0.03/1000) * output_tokens_mal_lu_ad_nonnull, 2)
total_cost_mal_lu_ad = round(input_token_cost_mal_lu_ad + output_token_cost_mal_lu_ad, 2)    
    
print(f"Executed {batch_counter} Iterations")
print(f"Total Execution Time: {formatted_time_mal_lu_ad}")
print(f"Total Input Tokens - {input_tokens_mal_lu_ad_nonnull}")
print(f"Total Input Cost = {input_token_cost_mal_lu_ad}")
print(f"Total Output Tokens - {output_tokens_mal_lu_ad_nonnull}")
print(f"Total Output Cost = {output_token_cost_mal_lu_ad}")
print(f"Total Cost = {total_cost_mal_lu_ad}")

Executed [30] Iterations
Total Execution Time: 00:03:53
Total Input Tokens - 36066
Total Input Cost = 0.36
Total Output Tokens - 17852
Total Output Cost = 0.54
Total Cost = 0.9


In [142]:
#Remove "```json" and "```" from each string in mal_lu_ad_nonnull_api & convert to DataFrame
mal_lu_ad_nonnull_api_cleaned = [
    s.replace("```json", "")
     .replace("```", "")
     .replace("[]", "[{\"positive\": \"\", \"negative\": \"\"}]")
    for s in mal_lu_ad_nonnull_api
]                                  
mal_lu_ad_nonnull_api_cleaned_df = pd.DataFrame(mal_lu_ad_nonnull_api_cleaned)
#mal_lu_ad_nonnull_api_cleaned_df = pd.DataFrame(mal_lu_ad_nonnull_api)
column_names2 = ['Trust',
                    'Store Experience',
                    'Store Staff',
                    'Product Design',
                    'Product Variety',
                    'Discount',
                    'Making Charge',
                    'Price',
                    'Product Quality',
                    'OLD Gold Jewellery Exchange']

In [143]:
#Initialize an empty list to store the processed data
processed_data_mal_lu_ad_nonnull = []

#Iterate over each row in the DataFrame
for index, row in mal_lu_ad_nonnull_api_cleaned_df.iterrows():
    try:
        #Assuming the JSON string is in the first column
        json_string = row.iloc[0] if len(row) > 0 else ''
        data = json.loads(json_string)

        #Iterate over each key-value pair in the dictionary
        for commentor_name, feedback_list in data.items():
            #Create a dictionary for each commentor with default values as 0
            commentor_data = {col: 0 for col in column_names2}
            commentor_data["Commentor Name"] = commentor_name

            #Check if there is any feedback for the commentor
            if feedback_list:
                #Since there could be multiple feedback entries for a single commentor, we aggregate them
                for feedback in feedback_list:
                    positive = feedback.get("positive", "").split(',')
                    negative = feedback.get("negative", "").split(',')

                    for topic in column_names2:
                        #Aggregate the value: 1 for positive, -1 for negative, 0 for not mentioned
                        commentor_data[topic] += assign_value(topic, positive, negative)

                #Finalize the values to be within -1, 0, or 1
                for topic in column_names2:
                    if commentor_data[topic] > 1:
                        commentor_data[topic] = 1
                    elif commentor_data[topic] < -1:
                        commentor_data[topic] = -1

            #Append the commentor data to the processed data list
            processed_data_mal_lu_ad_nonnull.append(commentor_data)
    except:
        #Skip rows with invalid JSON data
        print(f"Invalid JSON in row {index}")

mal_lu_ad_nonnull_api_cleaned_df.to_excel("mal_lu_ad_nonnull_api_cleaned_df.xlsx",index=False)

mal_lu_ad_nonnull_api_cleaned_df = pd.read_excel("mal_lu_ad_nonnull_api_cleaned_df.xlsx")

mal_lu_ad_nonnull_api_cleaned_df

In [144]:
#Create a DataFrame from the processed data
mal_lu_ad_nonnull_sen_df = pd.DataFrame(processed_data_mal_lu_ad_nonnull)
#Reorder the columns
ordered_columns = ["Commentor Name",
                   "Trust",
                    "Store Experience",
                    "Store Staff",
                    "Product Design",
                    "Product Variety",
                    "Discount",
                    "Making Charge",
                    "Price",
                    "Product Quality",
                    "OLD Gold Jewellery Exchange"]
#Apply the new order to the DataFrame
mal_lu_ad_nonnull_sen_df = mal_lu_ad_nonnull_sen_df[ordered_columns]

mal_lu_ad_nonnull_merged_df = pd.concat([combined_df_mal_lu_ad, mal_lu_ad_nonnull_sen_df], axis=1)

mal_lu_ad_final_sen_df = pd.concat([mal_lu_ad_nonnull_merged_df,null_dataframes['mal_lu_ad_null']], ignore_index=True)

mal_lu_ad_final_sen_df_copy = mal_lu_ad_final_sen_df.copy()
mal_lu_ad_final_sen_df_copy["Published At Date"] = mal_lu_ad_final_sen_df_copy["Published At Date"].astype(str).str[:10]

mal_lu_ad_final_sen_df_copy.to_excel("sentiment_raw_output/mal_lu_ad_final_sen_df_jul.xlsx",index=False)


### mal_mb

In [145]:
batch_counter = [0]
total_batches = math.ceil(len(mal_mb_nonnull_buckets['mal_mb_nonnull_1-4'])/25)+math.ceil(len(mal_mb_nonnull_buckets['mal_mb_nonnull_5-15'])/25)+math.ceil(len(mal_mb_nonnull_buckets['mal_mb_nonnull_16-30'])/25)+math.ceil(len(mal_mb_nonnull_buckets['mal_mb_nonnull_31-60'])/25)+math.ceil(len(mal_mb_nonnull_buckets['mal_mb_nonnull_61-100'])/25)+math.ceil(len(mal_mb_nonnull_buckets['mal_mb_nonnull_greater_100'])/25)
key_counter = 0
total_keys=len(mal_mb_nonnull_buckets.keys())
mal_mb_nonnull_api = []
input_tokens_mal_mb_nonnull=0
output_tokens_mal_mb_nonnull=0
start_time_mal_mb = time.time()

for key in mal_mb_nonnull_buckets.keys():
    key_counter+=1
    current_df = mal_mb_nonnull_buckets[key][['Name', 'review_text']]
    counter = [0]
    input_tokens = 0
    output_tokens = 0
        
    # Threading setup
    total_iterations = math.ceil(len(current_df) / 25)
    stop_event = threading.Event()
    message_thread = threading.Thread(target=print_dynamic_message, args=(batch_counter,key_counter,counter,total_batches,total_keys, total_iterations,start_time_mal_mb, stop_event))
    message_thread.start()

    for start in range(0, len(current_df), 25):
        clear_output(wait=True)
        counter[0] += 1
        batch_counter[0] += 1
        end = start + 25
        result, it, ot = score_sentiments_jul(current_df[start:end])
        mal_mb_nonnull_api.append(result)
        input_tokens += it
        output_tokens += ot

    # Stopping the dynamic message thread
    stop_event.set()
    message_thread.join()    
    input_tokens_mal_mb_nonnull+=input_tokens
    output_tokens_mal_mb_nonnull+=output_tokens
    clear_output(wait=True)
    
overall_execution_time_mal_mb = time.time() - start_time_mal_mb
formatted_time_mal_mb = time.strftime('%H:%M:%S', time.gmtime(overall_execution_time_mal_mb))
input_token_cost_mal_mb = round((0.01/1000) * input_tokens_mal_mb_nonnull, 2)
output_token_cost_mal_mb = round((0.03/1000) * output_tokens_mal_mb_nonnull, 2)
total_cost_mal_mb = round(input_token_cost_mal_mb + output_token_cost_mal_mb, 2)    
    
print(f"Executed {batch_counter} Iterations")
print(f"Total Execution Time: {formatted_time_mal_mb}")
print(f"Total Input Tokens - {input_tokens_mal_mb_nonnull}")
print(f"Total Input Cost = {input_token_cost_mal_mb}")
print(f"Total Output Tokens - {output_tokens_mal_mb_nonnull}")
print(f"Total Output Cost = {output_token_cost_mal_mb}")
print(f"Total Cost = {total_cost_mal_mb}")

Executed [16] Iterations
Total Execution Time: 00:02:15
Total Input Tokens - 20601
Total Input Cost = 0.21
Total Output Tokens - 9156
Total Output Cost = 0.27
Total Cost = 0.48


In [146]:
#Remove "```json" and "```" from each string in mal_mb_nonnull_api & convert to DataFrame
mal_mb_nonnull_api_cleaned = [
    s.replace("```json", "")
     .replace("```", "")
     .replace("[]", "[{\"positive\": \"\", \"negative\": \"\"}]")
    for s in mal_mb_nonnull_api
]                                  
mal_mb_nonnull_api_cleaned_df = pd.DataFrame(mal_mb_nonnull_api_cleaned)
#mal_mb_nonnull_api_cleaned_df = pd.DataFrame(mal_mb_nonnull_api)
column_names2 = ['Trust',
                    'Store Experience',
                    'Store Staff',
                    'Product Design',
                    'Product Variety',
                    'Discount',
                    'Making Charge',
                    'Price',
                    'Product Quality',
                    'OLD Gold Jewellery Exchange']

In [147]:
#Initialize an empty list to store the processed data
processed_data_mal_mb_nonnull = []

#Iterate over each row in the DataFrame
for index, row in mal_mb_nonnull_api_cleaned_df.iterrows():
    try:
        #Assuming the JSON string is in the first column
        json_string = row.iloc[0] if len(row) > 0 else ''
        data = json.loads(json_string)

        #Iterate over each key-value pair in the dictionary
        for commentor_name, feedback_list in data.items():
            #Create a dictionary for each commentor with default values as 0
            commentor_data = {col: 0 for col in column_names2}
            commentor_data["Commentor Name"] = commentor_name

            #Check if there is any feedback for the commentor
            if feedback_list:
                #Since there could be multiple feedback entries for a single commentor, we aggregate them
                for feedback in feedback_list:
                    positive = feedback.get("positive", "").split(',')
                    negative = feedback.get("negative", "").split(',')

                    for topic in column_names2:
                        #Aggregate the value: 1 for positive, -1 for negative, 0 for not mentioned
                        commentor_data[topic] += assign_value(topic, positive, negative)

                #Finalize the values to be within -1, 0, or 1
                for topic in column_names2:
                    if commentor_data[topic] > 1:
                        commentor_data[topic] = 1
                    elif commentor_data[topic] < -1:
                        commentor_data[topic] = -1

            #Append the commentor data to the processed data list
            processed_data_mal_mb_nonnull.append(commentor_data)
    except:
        #Skip rows with invalid JSON data
        print(f"Invalid JSON in row {index}")

mal_mb_nonnull_api_cleaned_df.to_excel("mal_mb_nonnull_api_cleaned_df.xlsx",index=False)

mal_mb_nonnull_api_cleaned_df = pd.read_excel("mal_mb_nonnull_api_cleaned_df.xlsx")

mal_mb_nonnull_api_cleaned_df

In [148]:
#Create a DataFrame from the processed data
mal_mb_nonnull_sen_df = pd.DataFrame(processed_data_mal_mb_nonnull)
#Reorder the columns
ordered_columns = ["Commentor Name",
                   "Trust",
                    "Store Experience",
                    "Store Staff",
                    "Product Design",
                    "Product Variety",
                    "Discount",
                    "Making Charge",
                    "Price",
                    "Product Quality",
                    "OLD Gold Jewellery Exchange"]
#Apply the new order to the DataFrame
mal_mb_nonnull_sen_df = mal_mb_nonnull_sen_df[ordered_columns]

mal_mb_nonnull_merged_df = pd.concat([combined_df_mal_mb, mal_mb_nonnull_sen_df], axis=1)

mal_mb_final_sen_df = pd.concat([mal_mb_nonnull_merged_df,null_dataframes['mal_mb_null']], ignore_index=True)

mal_mb_final_sen_df_copy = mal_mb_final_sen_df.copy()
mal_mb_final_sen_df_copy["Published At Date"] = mal_mb_final_sen_df_copy["Published At Date"].astype(str).str[:10]

mal_mb_final_sen_df_copy.to_excel("sentiment_raw_output/mal_mb_final_sen_df_jul.xlsx",index=False)


### mal_sh_ad

In [149]:
batch_counter = [0]
total_batches = math.ceil(len(mal_sh_ad_nonnull_buckets['mal_sh_ad_nonnull_1-4'])/25)+math.ceil(len(mal_sh_ad_nonnull_buckets['mal_sh_ad_nonnull_5-15'])/25)+math.ceil(len(mal_sh_ad_nonnull_buckets['mal_sh_ad_nonnull_16-30'])/25)+math.ceil(len(mal_sh_ad_nonnull_buckets['mal_sh_ad_nonnull_31-60'])/25)+math.ceil(len(mal_sh_ad_nonnull_buckets['mal_sh_ad_nonnull_61-100'])/25)+math.ceil(len(mal_sh_ad_nonnull_buckets['mal_sh_ad_nonnull_greater_100'])/25)
key_counter = 0
total_keys=len(mal_sh_ad_nonnull_buckets.keys())
mal_sh_ad_nonnull_api = []
input_tokens_mal_sh_ad_nonnull=0
output_tokens_mal_sh_ad_nonnull=0
start_time_mal_sh_ad = time.time()

for key in mal_sh_ad_nonnull_buckets.keys():
    key_counter+=1
    current_df = mal_sh_ad_nonnull_buckets[key][['Name', 'review_text']]
    counter = [0]
    input_tokens = 0
    output_tokens = 0
        
    # Threading setup
    total_iterations = math.ceil(len(current_df) / 25)
    stop_event = threading.Event()
    message_thread = threading.Thread(target=print_dynamic_message, args=(batch_counter,key_counter,counter,total_batches,total_keys, total_iterations,start_time_mal_sh_ad, stop_event))
    message_thread.start()

    for start in range(0, len(current_df), 25):
        clear_output(wait=True)
        counter[0] += 1
        batch_counter[0] += 1
        end = start + 25
        result, it, ot = score_sentiments_jul(current_df[start:end])
        mal_sh_ad_nonnull_api.append(result)
        input_tokens += it
        output_tokens += ot

    # Stopping the dynamic message thread
    stop_event.set()
    message_thread.join()    
    input_tokens_mal_sh_ad_nonnull+=input_tokens
    output_tokens_mal_sh_ad_nonnull+=output_tokens
    clear_output(wait=True)
    
overall_execution_time_mal_sh_ad = time.time() - start_time_mal_sh_ad
formatted_time_mal_sh_ad = time.strftime('%H:%M:%S', time.gmtime(overall_execution_time_mal_sh_ad))
input_token_cost_mal_sh_ad = round((0.01/1000) * input_tokens_mal_sh_ad_nonnull, 2)
output_token_cost_mal_sh_ad = round((0.03/1000) * output_tokens_mal_sh_ad_nonnull, 2)
total_cost_mal_sh_ad = round(input_token_cost_mal_sh_ad + output_token_cost_mal_sh_ad, 2)    
    
print(f"Executed {batch_counter} Iterations")
print(f"Total Execution Time: {formatted_time_mal_sh_ad}")
print(f"Total Input Tokens - {input_tokens_mal_sh_ad_nonnull}")
print(f"Total Input Cost = {input_token_cost_mal_sh_ad}")
print(f"Total Output Tokens - {output_tokens_mal_sh_ad_nonnull}")
print(f"Total Output Cost = {output_token_cost_mal_sh_ad}")
print(f"Total Cost = {total_cost_mal_sh_ad}")

Executed [12] Iterations
Total Execution Time: 00:01:25
Total Input Tokens - 12307
Total Input Cost = 0.12
Total Output Tokens - 5528
Total Output Cost = 0.17
Total Cost = 0.29


In [150]:
#Remove "```json" and "```" from each string in mal_sh_ad_nonnull_api & convert to DataFrame
mal_sh_ad_nonnull_api_cleaned = [
    s.replace("```json", "")
     .replace("```", "")
     .replace("[]", "[{\"positive\": \"\", \"negative\": \"\"}]")
    for s in mal_sh_ad_nonnull_api
]                                  
mal_sh_ad_nonnull_api_cleaned_df = pd.DataFrame(mal_sh_ad_nonnull_api_cleaned)
#mal_sh_ad_nonnull_api_cleaned_df = pd.DataFrame(mal_sh_ad_nonnull_api)
column_names2 = ['Trust',
                    'Store Experience',
                    'Store Staff',
                    'Product Design',
                    'Product Variety',
                    'Discount',
                    'Making Charge',
                    'Price',
                    'Product Quality',
                    'OLD Gold Jewellery Exchange']

In [151]:
#Initialize an empty list to store the processed data
processed_data_mal_sh_ad_nonnull = []

#Iterate over each row in the DataFrame
for index, row in mal_sh_ad_nonnull_api_cleaned_df.iterrows():
    try:
        #Assuming the JSON string is in the first column
        json_string = row.iloc[0] if len(row) > 0 else ''
        data = json.loads(json_string)

        #Iterate over each key-value pair in the dictionary
        for commentor_name, feedback_list in data.items():
            #Create a dictionary for each commentor with default values as 0
            commentor_data = {col: 0 for col in column_names2}
            commentor_data["Commentor Name"] = commentor_name

            #Check if there is any feedback for the commentor
            if feedback_list:
                #Since there could be multiple feedback entries for a single commentor, we aggregate them
                for feedback in feedback_list:
                    positive = feedback.get("positive", "").split(',')
                    negative = feedback.get("negative", "").split(',')

                    for topic in column_names2:
                        #Aggregate the value: 1 for positive, -1 for negative, 0 for not mentioned
                        commentor_data[topic] += assign_value(topic, positive, negative)

                #Finalize the values to be within -1, 0, or 1
                for topic in column_names2:
                    if commentor_data[topic] > 1:
                        commentor_data[topic] = 1
                    elif commentor_data[topic] < -1:
                        commentor_data[topic] = -1

            #Append the commentor data to the processed data list
            processed_data_mal_sh_ad_nonnull.append(commentor_data)
    except:
        #Skip rows with invalid JSON data
        print(f"Invalid JSON in row {index}")

mal_sh_ad_nonnull_api_cleaned_df.to_excel("mal_sh_ad_nonnull_api_cleaned_df.xlsx",index=False)

mal_sh_ad_nonnull_api_cleaned_df = pd.read_excel("mal_sh_ad_nonnull_api_cleaned_df.xlsx")

mal_sh_ad_nonnull_api_cleaned_df

In [152]:
#Create a DataFrame from the processed data
mal_sh_ad_nonnull_sen_df = pd.DataFrame(processed_data_mal_sh_ad_nonnull)
#Reorder the columns
ordered_columns = ["Commentor Name",
                   "Trust",
                    "Store Experience",
                    "Store Staff",
                    "Product Design",
                    "Product Variety",
                    "Discount",
                    "Making Charge",
                    "Price",
                    "Product Quality",
                    "OLD Gold Jewellery Exchange"]
#Apply the new order to the DataFrame
mal_sh_ad_nonnull_sen_df = mal_sh_ad_nonnull_sen_df[ordered_columns]

mal_sh_ad_nonnull_merged_df = pd.concat([combined_df_mal_sh_ad, mal_sh_ad_nonnull_sen_df], axis=1)

mal_sh_ad_final_sen_df = pd.concat([mal_sh_ad_nonnull_merged_df,null_dataframes['mal_sh_ad_null']], ignore_index=True)

mal_sh_ad_final_sen_df_copy = mal_sh_ad_final_sen_df.copy()
mal_sh_ad_final_sen_df_copy["Published At Date"] = mal_sh_ad_final_sen_df_copy["Published At Date"].astype(str).str[:10]

mal_sh_ad_final_sen_df_copy.to_excel("sentiment_raw_output/mal_sh_ad_final_sen_df_jul.xlsx",index=False)


### mal_b2_af

In [153]:
batch_counter = [0]
total_batches = math.ceil(len(mal_b2_af_nonnull_buckets['mal_b2_af_nonnull_1-4'])/25)+math.ceil(len(mal_b2_af_nonnull_buckets['mal_b2_af_nonnull_5-15'])/25)+math.ceil(len(mal_b2_af_nonnull_buckets['mal_b2_af_nonnull_16-30'])/25)+math.ceil(len(mal_b2_af_nonnull_buckets['mal_b2_af_nonnull_31-60'])/25)+math.ceil(len(mal_b2_af_nonnull_buckets['mal_b2_af_nonnull_61-100'])/25)+math.ceil(len(mal_b2_af_nonnull_buckets['mal_b2_af_nonnull_greater_100'])/25)
key_counter = 0
total_keys=len(mal_b2_af_nonnull_buckets.keys())
mal_b2_af_nonnull_api = []
input_tokens_mal_b2_af_nonnull=0
output_tokens_mal_b2_af_nonnull=0
start_time_mal_b2_af = time.time()

for key in mal_b2_af_nonnull_buckets.keys():
    key_counter+=1
    current_df = mal_b2_af_nonnull_buckets[key][['Name', 'review_text']]
    counter = [0]
    input_tokens = 0
    output_tokens = 0
        
    # Threading setup
    total_iterations = math.ceil(len(current_df) / 25)
    stop_event = threading.Event()
    message_thread = threading.Thread(target=print_dynamic_message, args=(batch_counter,key_counter,counter,total_batches,total_keys, total_iterations,start_time_mal_b2_af, stop_event))
    message_thread.start()

    for start in range(0, len(current_df), 25):
        clear_output(wait=True)
        counter[0] += 1
        batch_counter[0] += 1
        end = start + 25
        result, it, ot = score_sentiments_jul(current_df[start:end])
        mal_b2_af_nonnull_api.append(result)
        input_tokens += it
        output_tokens += ot

    # Stopping the dynamic message thread
    stop_event.set()
    message_thread.join()    
    input_tokens_mal_b2_af_nonnull+=input_tokens
    output_tokens_mal_b2_af_nonnull+=output_tokens
    clear_output(wait=True)
    
overall_execution_time_mal_b2_af = time.time() - start_time_mal_b2_af
formatted_time_mal_b2_af = time.strftime('%H:%M:%S', time.gmtime(overall_execution_time_mal_b2_af))
input_token_cost_mal_b2_af = round((0.01/1000) * input_tokens_mal_b2_af_nonnull, 2)
output_token_cost_mal_b2_af = round((0.03/1000) * output_tokens_mal_b2_af_nonnull, 2)
total_cost_mal_b2_af = round(input_token_cost_mal_b2_af + output_token_cost_mal_b2_af, 2)    
    
print(f"Executed {batch_counter} Iterations")
print(f"Total Execution Time: {formatted_time_mal_b2_af}")
print(f"Total Input Tokens - {input_tokens_mal_b2_af_nonnull}")
print(f"Total Input Cost = {input_token_cost_mal_b2_af}")
print(f"Total Output Tokens - {output_tokens_mal_b2_af_nonnull}")
print(f"Total Output Cost = {output_token_cost_mal_b2_af}")
print(f"Total Cost = {total_cost_mal_b2_af}")

Executed [10] Iterations
Total Execution Time: 00:00:59
Total Input Tokens - 11020
Total Input Cost = 0.11
Total Output Tokens - 4405
Total Output Cost = 0.13
Total Cost = 0.24


In [154]:
#Remove "```json" and "```" from each string in mal_b2_af_nonnull_api & convert to DataFrame
mal_b2_af_nonnull_api_cleaned = [
    s.replace("```json", "")
     .replace("```", "")
     .replace("[]", "[{\"positive\": \"\", \"negative\": \"\"}]")
    for s in mal_b2_af_nonnull_api
]                                  
mal_b2_af_nonnull_api_cleaned_df = pd.DataFrame(mal_b2_af_nonnull_api_cleaned)
#mal_b2_af_nonnull_api_cleaned_df = pd.DataFrame(mal_b2_af_nonnull_api)
column_names2 = ['Trust',
                    'Store Experience',
                    'Store Staff',
                    'Product Design',
                    'Product Variety',
                    'Discount',
                    'Making Charge',
                    'Price',
                    'Product Quality',
                    'OLD Gold Jewellery Exchange']

In [155]:
#Initialize an empty list to store the processed data
processed_data_mal_b2_af_nonnull = []

#Iterate over each row in the DataFrame
for index, row in mal_b2_af_nonnull_api_cleaned_df.iterrows():
    try:
        #Assuming the JSON string is in the first column
        json_string = row.iloc[0] if len(row) > 0 else ''
        data = json.loads(json_string)

        #Iterate over each key-value pair in the dictionary
        for commentor_name, feedback_list in data.items():
            #Create a dictionary for each commentor with default values as 0
            commentor_data = {col: 0 for col in column_names2}
            commentor_data["Commentor Name"] = commentor_name

            #Check if there is any feedback for the commentor
            if feedback_list:
                #Since there could be multiple feedback entries for a single commentor, we aggregate them
                for feedback in feedback_list:
                    positive = feedback.get("positive", "").split(',')
                    negative = feedback.get("negative", "").split(',')

                    for topic in column_names2:
                        #Aggregate the value: 1 for positive, -1 for negative, 0 for not mentioned
                        commentor_data[topic] += assign_value(topic, positive, negative)

                #Finalize the values to be within -1, 0, or 1
                for topic in column_names2:
                    if commentor_data[topic] > 1:
                        commentor_data[topic] = 1
                    elif commentor_data[topic] < -1:
                        commentor_data[topic] = -1

            #Append the commentor data to the processed data list
            processed_data_mal_b2_af_nonnull.append(commentor_data)
    except:
        #Skip rows with invalid JSON data
        print(f"Invalid JSON in row {index}")

mal_b2_af_nonnull_api_cleaned_df.to_excel("mal_b2_af_nonnull_api_cleaned_df.xlsx",index=False)

mal_b2_af_nonnull_api_cleaned_df = pd.read_excel("mal_b2_af_nonnull_api_cleaned_df.xlsx")

mal_b2_af_nonnull_api_cleaned_df

In [156]:
#Create a DataFrame from the processed data
mal_b2_af_nonnull_sen_df = pd.DataFrame(processed_data_mal_b2_af_nonnull)
#Reorder the columns
ordered_columns = ["Commentor Name",
                   "Trust",
                    "Store Experience",
                    "Store Staff",
                    "Product Design",
                    "Product Variety",
                    "Discount",
                    "Making Charge",
                    "Price",
                    "Product Quality",
                    "OLD Gold Jewellery Exchange"]
#Apply the new order to the DataFrame
mal_b2_af_nonnull_sen_df = mal_b2_af_nonnull_sen_df[ordered_columns]

mal_b2_af_nonnull_merged_df = pd.concat([combined_df_mal_b2_af, mal_b2_af_nonnull_sen_df], axis=1)

mal_b2_af_final_sen_df = pd.concat([mal_b2_af_nonnull_merged_df,null_dataframes['mal_b2_af_null']], ignore_index=True)

mal_b2_af_final_sen_df_copy = mal_b2_af_final_sen_df.copy()
mal_b2_af_final_sen_df_copy["Published At Date"] = mal_b2_af_final_sen_df_copy["Published At Date"].astype(str).str[:10]

mal_b2_af_final_sen_df_copy.to_excel("sentiment_raw_output/mal_b2_af_final_sen_df_jul.xlsx",index=False)



### mna_mb

In [157]:
batch_counter = [0]
total_batches = math.ceil(len(mna_mb_nonnull_buckets['mna_mb_nonnull_1-4'])/25)+math.ceil(len(mna_mb_nonnull_buckets['mna_mb_nonnull_5-15'])/25)+math.ceil(len(mna_mb_nonnull_buckets['mna_mb_nonnull_16-30'])/25)+math.ceil(len(mna_mb_nonnull_buckets['mna_mb_nonnull_31-60'])/25)+math.ceil(len(mna_mb_nonnull_buckets['mna_mb_nonnull_61-100'])/25)+math.ceil(len(mna_mb_nonnull_buckets['mna_mb_nonnull_greater_100'])/25)
key_counter = 0
total_keys=len(mna_mb_nonnull_buckets.keys())
mna_mb_nonnull_api = []
input_tokens_mna_mb_nonnull=0
output_tokens_mna_mb_nonnull=0
start_time_mna_mb = time.time()

for key in mna_mb_nonnull_buckets.keys():
    key_counter+=1
    current_df = mna_mb_nonnull_buckets[key][['Name', 'review_text']]
    counter = [0]
    input_tokens = 0
    output_tokens = 0
        
    # Threading setup
    total_iterations = math.ceil(len(current_df) / 25)
    stop_event = threading.Event()
    message_thread = threading.Thread(target=print_dynamic_message, args=(batch_counter,key_counter,counter,total_batches,total_keys, total_iterations,start_time_mna_mb, stop_event))
    message_thread.start()

    for start in range(0, len(current_df), 25):
        clear_output(wait=True)
        counter[0] += 1
        batch_counter[0] += 1
        end = start + 25
        result, it, ot = score_sentiments_jul(current_df[start:end])
        mna_mb_nonnull_api.append(result)
        input_tokens += it
        output_tokens += ot

    # Stopping the dynamic message thread
    stop_event.set()
    message_thread.join()    
    input_tokens_mna_mb_nonnull+=input_tokens
    output_tokens_mna_mb_nonnull+=output_tokens
    clear_output(wait=True)
    
overall_execution_time_mna_mb = time.time() - start_time_mna_mb
formatted_time_mna_mb = time.strftime('%H:%M:%S', time.gmtime(overall_execution_time_mna_mb))
input_token_cost_mna_mb = round((0.01/1000) * input_tokens_mna_mb_nonnull, 2)
output_token_cost_mna_mb = round((0.03/1000) * output_tokens_mna_mb_nonnull, 2)
total_cost_mna_mb = round(input_token_cost_mna_mb + output_token_cost_mna_mb, 2)    
    
print(f"Executed {batch_counter} Iterations")
print(f"Total Execution Time: {formatted_time_mna_mb}")
print(f"Total Input Tokens - {input_tokens_mna_mb_nonnull}")
print(f"Total Input Cost = {input_token_cost_mna_mb}")
print(f"Total Output Tokens - {output_tokens_mna_mb_nonnull}")
print(f"Total Output Cost = {output_token_cost_mna_mb}")
print(f"Total Cost = {total_cost_mna_mb}")

Executed [3] Iterations
Total Execution Time: 00:00:05
Total Input Tokens - 2102
Total Input Cost = 0.02
Total Output Tokens - 114
Total Output Cost = 0.0
Total Cost = 0.02


In [158]:
#Remove "```json" and "```" from each string in mna_mb_nonnull_api & convert to DataFrame
mna_mb_nonnull_api_cleaned = [
    s.replace("```json", "")
     .replace("```", "")
     .replace("[]", "[{\"positive\": \"\", \"negative\": \"\"}]")
    for s in mna_mb_nonnull_api
]                                  
mna_mb_nonnull_api_cleaned_df = pd.DataFrame(mna_mb_nonnull_api_cleaned)
#mna_mb_nonnull_api_cleaned_df = pd.DataFrame(mna_mb_nonnull_api)
column_names2 = ['Trust',
                    'Store Experience',
                    'Store Staff',
                    'Product Design',
                    'Product Variety',
                    'Discount',
                    'Making Charge',
                    'Price',
                    'Product Quality',
                    'OLD Gold Jewellery Exchange']

In [159]:
#Initialize an empty list to store the processed data
processed_data_mna_mb_nonnull = []

#Iterate over each row in the DataFrame
for index, row in mna_mb_nonnull_api_cleaned_df.iterrows():
    try:
        #Assuming the JSON string is in the first column
        json_string = row.iloc[0] if len(row) > 0 else ''
        data = json.loads(json_string)

        #Iterate over each key-value pair in the dictionary
        for commentor_name, feedback_list in data.items():
            #Create a dictionary for each commentor with default values as 0
            commentor_data = {col: 0 for col in column_names2}
            commentor_data["Commentor Name"] = commentor_name

            #Check if there is any feedback for the commentor
            if feedback_list:
                #Since there could be multiple feedback entries for a single commentor, we aggregate them
                for feedback in feedback_list:
                    positive = feedback.get("positive", "").split(',')
                    negative = feedback.get("negative", "").split(',')

                    for topic in column_names2:
                        #Aggregate the value: 1 for positive, -1 for negative, 0 for not mentioned
                        commentor_data[topic] += assign_value(topic, positive, negative)

                #Finalize the values to be within -1, 0, or 1
                for topic in column_names2:
                    if commentor_data[topic] > 1:
                        commentor_data[topic] = 1
                    elif commentor_data[topic] < -1:
                        commentor_data[topic] = -1

            #Append the commentor data to the processed data list
            processed_data_mna_mb_nonnull.append(commentor_data)
    except:
        #Skip rows with invalid JSON data
        print(f"Invalid JSON in row {index}")

mna_mb_nonnull_api_cleaned_df.to_excel("mna_mb_nonnull_api_cleaned_df.xlsx",index=False)

mna_mb_nonnull_api_cleaned_df = pd.read_excel("mna_mb_nonnull_api_cleaned_df.xlsx")

mna_mb_nonnull_api_cleaned_df

In [160]:
#Create a DataFrame from the processed data
mna_mb_nonnull_sen_df = pd.DataFrame(processed_data_mna_mb_nonnull)
#Reorder the columns
ordered_columns = ["Commentor Name",
                   "Trust",
                    "Store Experience",
                    "Store Staff",
                    "Product Design",
                    "Product Variety",
                    "Discount",
                    "Making Charge",
                    "Price",
                    "Product Quality",
                    "OLD Gold Jewellery Exchange"]
#Apply the new order to the DataFrame
mna_mb_nonnull_sen_df = mna_mb_nonnull_sen_df[ordered_columns]

mna_mb_nonnull_merged_df = pd.concat([combined_df_mna_mb, mna_mb_nonnull_sen_df], axis=1)

mna_mb_final_sen_df = pd.concat([mna_mb_nonnull_merged_df,null_dataframes['mna_mb_null']], ignore_index=True)

mna_mb_final_sen_df_copy = mna_mb_final_sen_df.copy()
mna_mb_final_sen_df_copy["Published At Date"] = mna_mb_final_sen_df_copy["Published At Date"].astype(str).str[:10]

mna_mb_final_sen_df_copy.to_excel("sentiment_raw_output/mna_mb_final_sen_df_jul.xlsx",index=False)



### min_ak

In [161]:
batch_counter = [0]
total_batches = math.ceil(len(min_ak_nonnull_buckets['min_ak_nonnull_1-4'])/25)+math.ceil(len(min_ak_nonnull_buckets['min_ak_nonnull_5-15'])/25)+math.ceil(len(min_ak_nonnull_buckets['min_ak_nonnull_16-30'])/25)+math.ceil(len(min_ak_nonnull_buckets['min_ak_nonnull_31-60'])/25)+math.ceil(len(min_ak_nonnull_buckets['min_ak_nonnull_61-100'])/25)+math.ceil(len(min_ak_nonnull_buckets['min_ak_nonnull_greater_100'])/25)
key_counter = 0
total_keys=len(min_ak_nonnull_buckets.keys())
min_ak_nonnull_api = []
input_tokens_min_ak_nonnull=0
output_tokens_min_ak_nonnull=0
start_time_min_ak = time.time()

for key in min_ak_nonnull_buckets.keys():
    key_counter+=1
    current_df = min_ak_nonnull_buckets[key][['Name', 'review_text']]
    counter = [0]
    input_tokens = 0
    output_tokens = 0
        
    # Threading setup
    total_iterations = math.ceil(len(current_df) / 25)
    stop_event = threading.Event()
    message_thread = threading.Thread(target=print_dynamic_message, args=(batch_counter,key_counter,counter,total_batches,total_keys, total_iterations,start_time_min_ak, stop_event))
    message_thread.start()

    for start in range(0, len(current_df), 25):
        clear_output(wait=True)
        counter[0] += 1
        batch_counter[0] += 1
        end = start + 25
        result, it, ot = score_sentiments_jul(current_df[start:end])
        min_ak_nonnull_api.append(result)
        input_tokens += it
        output_tokens += ot

    # Stopping the dynamic message thread
    stop_event.set()
    message_thread.join()    
    input_tokens_min_ak_nonnull+=input_tokens
    output_tokens_min_ak_nonnull+=output_tokens
    clear_output(wait=True)
    
overall_execution_time_min_ak = time.time() - start_time_min_ak
formatted_time_min_ak = time.strftime('%H:%M:%S', time.gmtime(overall_execution_time_min_ak))
input_token_cost_min_ak = round((0.01/1000) * input_tokens_min_ak_nonnull, 2)
output_token_cost_min_ak = round((0.03/1000) * output_tokens_min_ak_nonnull, 2)
total_cost_min_ak = round(input_token_cost_min_ak + output_token_cost_min_ak, 2)    
    
print(f"Executed {batch_counter} Iterations")
print(f"Total Execution Time: {formatted_time_min_ak}")
print(f"Total Input Tokens - {input_tokens_min_ak_nonnull}")
print(f"Total Input Cost = {input_token_cost_min_ak}")
print(f"Total Output Tokens - {output_tokens_min_ak_nonnull}")
print(f"Total Output Cost = {output_token_cost_min_ak}")
print(f"Total Cost = {total_cost_min_ak}")

Executed [11] Iterations
Total Execution Time: 00:01:18
Total Input Tokens - 13299
Total Input Cost = 0.13
Total Output Tokens - 5885
Total Output Cost = 0.18
Total Cost = 0.31


In [162]:
#Remove "```json" and "```" from each string in min_ak_nonnull_api & convert to DataFrame
min_ak_nonnull_api_cleaned = [
    s.replace("```json", "")
     .replace("```", "")
     .replace("[]", "[{\"positive\": \"\", \"negative\": \"\"}]")
    for s in min_ak_nonnull_api
]                                  
min_ak_nonnull_api_cleaned_df = pd.DataFrame(min_ak_nonnull_api_cleaned)
#min_ak_nonnull_api_cleaned_df = pd.DataFrame(min_ak_nonnull_api)
column_names2 = ['Trust',
                    'Store Experience',
                    'Store Staff',
                    'Product Design',
                    'Product Variety',
                    'Discount',
                    'Making Charge',
                    'Price',
                    'Product Quality',
                    'OLD Gold Jewellery Exchange']

In [163]:
#Initialize an empty list to store the processed data
processed_data_min_ak_nonnull = []

#Iterate over each row in the DataFrame
for index, row in min_ak_nonnull_api_cleaned_df.iterrows():
    try:
        #Assuming the JSON string is in the first column
        json_string = row.iloc[0] if len(row) > 0 else ''
        data = json.loads(json_string)

        #Iterate over each key-value pair in the dictionary
        for commentor_name, feedback_list in data.items():
            #Create a dictionary for each commentor with default values as 0
            commentor_data = {col: 0 for col in column_names2}
            commentor_data["Commentor Name"] = commentor_name

            #Check if there is any feedback for the commentor
            if feedback_list:
                #Since there could be multiple feedback entries for a single commentor, we aggregate them
                for feedback in feedback_list:
                    positive = feedback.get("positive", "").split(',')
                    negative = feedback.get("negative", "").split(',')

                    for topic in column_names2:
                        #Aggregate the value: 1 for positive, -1 for negative, 0 for not mentioned
                        commentor_data[topic] += assign_value(topic, positive, negative)

                #Finalize the values to be within -1, 0, or 1
                for topic in column_names2:
                    if commentor_data[topic] > 1:
                        commentor_data[topic] = 1
                    elif commentor_data[topic] < -1:
                        commentor_data[topic] = -1

            #Append the commentor data to the processed data list
            processed_data_min_ak_nonnull.append(commentor_data)
    except:
        #Skip rows with invalid JSON data
        print(f"Invalid JSON in row {index}")

min_ak_nonnull_api_cleaned_df.to_excel("min_ak_nonnull_api_cleaned_df.xlsx",index=False)

min_ak_nonnull_api_cleaned_df = pd.read_excel("min_ak_nonnull_api_cleaned_df.xlsx")

min_ak_nonnull_api_cleaned_df

In [164]:
#Create a DataFrame from the processed data
min_ak_nonnull_sen_df = pd.DataFrame(processed_data_min_ak_nonnull)
#Reorder the columns
ordered_columns = ["Commentor Name",
                   "Trust",
                    "Store Experience",
                    "Store Staff",
                    "Product Design",
                    "Product Variety",
                    "Discount",
                    "Making Charge",
                    "Price",
                    "Product Quality",
                    "OLD Gold Jewellery Exchange"]
#Apply the new order to the DataFrame
min_ak_nonnull_sen_df = min_ak_nonnull_sen_df[ordered_columns]

min_ak_nonnull_merged_df = pd.concat([combined_df_min_ak, min_ak_nonnull_sen_df], axis=1)

min_ak_final_sen_df = pd.concat([min_ak_nonnull_merged_df,null_dataframes['min_ak_null']], ignore_index=True)

min_ak_final_sen_df_copy = min_ak_final_sen_df.copy()
min_ak_final_sen_df_copy["Published At Date"] = min_ak_final_sen_df_copy["Published At Date"].astype(str).str[:10]

min_ak_final_sen_df_copy.to_excel("sentiment_raw_output/min_ak_final_sen_df_jul.xlsx",index=False)



### joy_ak

In [165]:
batch_counter = [0]
total_batches = math.ceil(len(joy_ak_nonnull_buckets['joy_ak_nonnull_1-4'])/25)+math.ceil(len(joy_ak_nonnull_buckets['joy_ak_nonnull_5-15'])/25)+math.ceil(len(joy_ak_nonnull_buckets['joy_ak_nonnull_16-30'])/25)+math.ceil(len(joy_ak_nonnull_buckets['joy_ak_nonnull_31-60'])/25)+math.ceil(len(joy_ak_nonnull_buckets['joy_ak_nonnull_61-100'])/25)+math.ceil(len(joy_ak_nonnull_buckets['joy_ak_nonnull_greater_100'])/25)
key_counter = 0
total_keys=len(joy_ak_nonnull_buckets.keys())
joy_ak_nonnull_api = []
input_tokens_joy_ak_nonnull=0
output_tokens_joy_ak_nonnull=0
start_time_joy_ak = time.time()

for key in joy_ak_nonnull_buckets.keys():
    key_counter+=1
    current_df = joy_ak_nonnull_buckets[key][['Name', 'review_text']]
    counter = [0]
    input_tokens = 0
    output_tokens = 0
        
    # Threading setup
    total_iterations = math.ceil(len(current_df) / 25)
    stop_event = threading.Event()
    message_thread = threading.Thread(target=print_dynamic_message, args=(batch_counter,key_counter,counter,total_batches,total_keys, total_iterations,start_time_joy_ak, stop_event))
    message_thread.start()

    for start in range(0, len(current_df), 25):
        clear_output(wait=True)
        counter[0] += 1
        batch_counter[0] += 1
        end = start + 25
        result, it, ot = score_sentiments_jul(current_df[start:end])
        joy_ak_nonnull_api.append(result)
        input_tokens += it
        output_tokens += ot

    # Stopping the dynamic message thread
    stop_event.set()
    message_thread.join()    
    input_tokens_joy_ak_nonnull+=input_tokens
    output_tokens_joy_ak_nonnull+=output_tokens
    clear_output(wait=True)
    
overall_execution_time_joy_ak = time.time() - start_time_joy_ak
formatted_time_joy_ak = time.strftime('%H:%M:%S', time.gmtime(overall_execution_time_joy_ak))
input_token_cost_joy_ak = round((0.01/1000) * input_tokens_joy_ak_nonnull, 2)
output_token_cost_joy_ak = round((0.03/1000) * output_tokens_joy_ak_nonnull, 2)
total_cost_joy_ak = round(input_token_cost_joy_ak + output_token_cost_joy_ak, 2)    
    
print(f"Executed {batch_counter} Iterations")
print(f"Total Execution Time: {formatted_time_joy_ak}")
print(f"Total Input Tokens - {input_tokens_joy_ak_nonnull}")
print(f"Total Input Cost = {input_token_cost_joy_ak}")
print(f"Total Output Tokens - {output_tokens_joy_ak_nonnull}")
print(f"Total Output Cost = {output_token_cost_joy_ak}")
print(f"Total Cost = {total_cost_joy_ak}")

Executed [13] Iterations
Total Execution Time: 00:01:28
Total Input Tokens - 14322
Total Input Cost = 0.14
Total Output Tokens - 6359
Total Output Cost = 0.19
Total Cost = 0.33


In [166]:
#Remove "```json" and "```" from each string in joy_ak_nonnull_api & convert to DataFrame
joy_ak_nonnull_api_cleaned = [
    s.replace("```json", "")
     .replace("```", "")
     .replace("[]", "[{\"positive\": \"\", \"negative\": \"\"}]")
    for s in joy_ak_nonnull_api
]                                  
joy_ak_nonnull_api_cleaned_df = pd.DataFrame(joy_ak_nonnull_api_cleaned)
#joy_ak_nonnull_api_cleaned_df = pd.DataFrame(joy_ak_nonnull_api)
column_names2 = ['Trust',
                    'Store Experience',
                    'Store Staff',
                    'Product Design',
                    'Product Variety',
                    'Discount',
                    'Making Charge',
                    'Price',
                    'Product Quality',
                    'OLD Gold Jewellery Exchange']

In [167]:
#Initialize an empty list to store the processed data
processed_data_joy_ak_nonnull = []

#Iterate over each row in the DataFrame
for index, row in joy_ak_nonnull_api_cleaned_df.iterrows():
    try:
        #Assuming the JSON string is in the first column
        json_string = row.iloc[0] if len(row) > 0 else ''
        data = json.loads(json_string)

        #Iterate over each key-value pair in the dictionary
        for commentor_name, feedback_list in data.items():
            #Create a dictionary for each commentor with default values as 0
            commentor_data = {col: 0 for col in column_names2}
            commentor_data["Commentor Name"] = commentor_name

            #Check if there is any feedback for the commentor
            if feedback_list:
                #Since there could be multiple feedback entries for a single commentor, we aggregate them
                for feedback in feedback_list:
                    positive = feedback.get("positive", "").split(',')
                    negative = feedback.get("negative", "").split(',')

                    for topic in column_names2:
                        #Aggregate the value: 1 for positive, -1 for negative, 0 for not mentioned
                        commentor_data[topic] += assign_value(topic, positive, negative)

                #Finalize the values to be within -1, 0, or 1
                for topic in column_names2:
                    if commentor_data[topic] > 1:
                        commentor_data[topic] = 1
                    elif commentor_data[topic] < -1:
                        commentor_data[topic] = -1

            #Append the commentor data to the processed data list
            processed_data_joy_ak_nonnull.append(commentor_data)
    except:
        #Skip rows with invalid JSON data
        print(f"Invalid JSON in row {index}")

joy_ak_nonnull_api_cleaned_df.to_excel("joy_ak_nonnull_api_cleaned_df.xlsx",index=False)

joy_ak_nonnull_api_cleaned_df = pd.read_excel("joy_ak_nonnull_api_cleaned_df.xlsx")

joy_ak_nonnull_api_cleaned_df

In [168]:
#Create a DataFrame from the processed data
joy_ak_nonnull_sen_df = pd.DataFrame(processed_data_joy_ak_nonnull)
#Reorder the columns
ordered_columns = ["Commentor Name",
                   "Trust",
                    "Store Experience",
                    "Store Staff",
                    "Product Design",
                    "Product Variety",
                    "Discount",
                    "Making Charge",
                    "Price",
                    "Product Quality",
                    "OLD Gold Jewellery Exchange"]
#Apply the new order to the DataFrame
joy_ak_nonnull_sen_df = joy_ak_nonnull_sen_df[ordered_columns]

joy_ak_nonnull_merged_df = pd.concat([combined_df_joy_ak, joy_ak_nonnull_sen_df], axis=1)

joy_ak_final_sen_df = pd.concat([joy_ak_nonnull_merged_df,null_dataframes['joy_ak_null']], ignore_index=True)

joy_ak_final_sen_df_copy = joy_ak_final_sen_df.copy()
joy_ak_final_sen_df_copy["Published At Date"] = joy_ak_final_sen_df_copy["Published At Date"].astype(str).str[:10]

joy_ak_final_sen_df_copy.to_excel("sentiment_raw_output/joy_ak_final_sen_df_jul.xlsx",index=False)



### kan_mb

In [169]:
batch_counter = [0]
total_batches = math.ceil(len(kan_mb_nonnull_buckets['kan_mb_nonnull_1-4'])/25)+math.ceil(len(kan_mb_nonnull_buckets['kan_mb_nonnull_5-15'])/25)+math.ceil(len(kan_mb_nonnull_buckets['kan_mb_nonnull_16-30'])/25)+math.ceil(len(kan_mb_nonnull_buckets['kan_mb_nonnull_31-60'])/25)+math.ceil(len(kan_mb_nonnull_buckets['kan_mb_nonnull_61-100'])/25)+math.ceil(len(kan_mb_nonnull_buckets['kan_mb_nonnull_greater_100'])/25)
key_counter = 0
total_keys=len(kan_mb_nonnull_buckets.keys())
kan_mb_nonnull_api = []
input_tokens_kan_mb_nonnull=0
output_tokens_kan_mb_nonnull=0
start_time_kan_mb = time.time()

for key in kan_mb_nonnull_buckets.keys():
    key_counter+=1
    current_df = kan_mb_nonnull_buckets[key][['Name', 'review_text']]
    counter = [0]
    input_tokens = 0
    output_tokens = 0
        
    # Threading setup
    total_iterations = math.ceil(len(current_df) / 25)
    stop_event = threading.Event()
    message_thread = threading.Thread(target=print_dynamic_message, args=(batch_counter,key_counter,counter,total_batches,total_keys, total_iterations,start_time_kan_mb, stop_event))
    message_thread.start()

    for start in range(0, len(current_df), 25):
        clear_output(wait=True)
        counter[0] += 1
        batch_counter[0] += 1
        end = start + 25
        result, it, ot = score_sentiments_jul(current_df[start:end])
        kan_mb_nonnull_api.append(result)
        input_tokens += it
        output_tokens += ot

    # Stopping the dynamic message thread
    stop_event.set()
    message_thread.join()    
    input_tokens_kan_mb_nonnull+=input_tokens
    output_tokens_kan_mb_nonnull+=output_tokens
    clear_output(wait=True)
    
overall_execution_time_kan_mb = time.time() - start_time_kan_mb
formatted_time_kan_mb = time.strftime('%H:%M:%S', time.gmtime(overall_execution_time_kan_mb))
input_token_cost_kan_mb = round((0.01/1000) * input_tokens_kan_mb_nonnull, 2)
output_token_cost_kan_mb = round((0.03/1000) * output_tokens_kan_mb_nonnull, 2)
total_cost_kan_mb = round(input_token_cost_kan_mb + output_token_cost_kan_mb, 2)    
    
print(f"Executed {batch_counter} Iterations")
print(f"Total Execution Time: {formatted_time_kan_mb}")
print(f"Total Input Tokens - {input_tokens_kan_mb_nonnull}")
print(f"Total Input Cost = {input_token_cost_kan_mb}")
print(f"Total Output Tokens - {output_tokens_kan_mb_nonnull}")
print(f"Total Output Cost = {output_token_cost_kan_mb}")
print(f"Total Cost = {total_cost_kan_mb}")

Executed [4] Iterations
Total Execution Time: 00:00:10
Total Input Tokens - 3139
Total Input Cost = 0.03
Total Output Tokens - 501
Total Output Cost = 0.02
Total Cost = 0.05


In [170]:
#Remove "```json" and "```" from each string in kan_mb_nonnull_api & convert to DataFrame
kan_mb_nonnull_api_cleaned = [
    s.replace("```json", "")
     .replace("```", "")
     .replace("[]", "[{\"positive\": \"\", \"negative\": \"\"}]")
    for s in kan_mb_nonnull_api
]                                  
kan_mb_nonnull_api_cleaned_df = pd.DataFrame(kan_mb_nonnull_api_cleaned)
#kan_mb_nonnull_api_cleaned_df = pd.DataFrame(kan_mb_nonnull_api)
column_names2 = ['Trust',
                    'Store Experience',
                    'Store Staff',
                    'Product Design',
                    'Product Variety',
                    'Discount',
                    'Making Charge',
                    'Price',
                    'Product Quality',
                    'OLD Gold Jewellery Exchange']

In [171]:
#Initialize an empty list to store the processed data
processed_data_kan_mb_nonnull = []

#Iterate over each row in the DataFrame
for index, row in kan_mb_nonnull_api_cleaned_df.iterrows():
    try:
        #Assuming the JSON string is in the first column
        json_string = row.iloc[0] if len(row) > 0 else ''
        data = json.loads(json_string)

        #Iterate over each key-value pair in the dictionary
        for commentor_name, feedback_list in data.items():
            #Create a dictionary for each commentor with default values as 0
            commentor_data = {col: 0 for col in column_names2}
            commentor_data["Commentor Name"] = commentor_name

            #Check if there is any feedback for the commentor
            if feedback_list:
                #Since there could be multiple feedback entries for a single commentor, we aggregate them
                for feedback in feedback_list:
                    positive = feedback.get("positive", "").split(',')
                    negative = feedback.get("negative", "").split(',')

                    for topic in column_names2:
                        #Aggregate the value: 1 for positive, -1 for negative, 0 for not mentioned
                        commentor_data[topic] += assign_value(topic, positive, negative)

                #Finalize the values to be within -1, 0, or 1
                for topic in column_names2:
                    if commentor_data[topic] > 1:
                        commentor_data[topic] = 1
                    elif commentor_data[topic] < -1:
                        commentor_data[topic] = -1

            #Append the commentor data to the processed data list
            processed_data_kan_mb_nonnull.append(commentor_data)
    except:
        #Skip rows with invalid JSON data
        print(f"Invalid JSON in row {index}")

kan_mb_nonnull_api_cleaned_df.to_excel("kan_mb_nonnull_api_cleaned_df.xlsx",index=False)

kan_mb_nonnull_api_cleaned_df = pd.read_excel("kan_mb_nonnull_api_cleaned_df.xlsx")

kan_mb_nonnull_api_cleaned_df

In [172]:
#Create a DataFrame from the processed data
kan_mb_nonnull_sen_df = pd.DataFrame(processed_data_kan_mb_nonnull)
#Reorder the columns
ordered_columns = ["Commentor Name",
                   "Trust",
                    "Store Experience",
                    "Store Staff",
                    "Product Design",
                    "Product Variety",
                    "Discount",
                    "Making Charge",
                    "Price",
                    "Product Quality",
                    "OLD Gold Jewellery Exchange"]
#Apply the new order to the DataFrame
kan_mb_nonnull_sen_df = kan_mb_nonnull_sen_df[ordered_columns]

kan_mb_nonnull_merged_df = pd.concat([combined_df_kan_mb, kan_mb_nonnull_sen_df], axis=1)

kan_mb_final_sen_df = pd.concat([kan_mb_nonnull_merged_df,null_dataframes['kan_mb_null']], ignore_index=True)

kan_mb_final_sen_df_copy = kan_mb_final_sen_df.copy()
kan_mb_final_sen_df_copy["Published At Date"] = kan_mb_final_sen_df_copy["Published At Date"].astype(str).str[:10]

kan_mb_final_sen_df_copy.to_excel("sentiment_raw_output/kan_mb_final_sen_df_jul.xlsx",index=False)



### agd_mb

In [173]:
batch_counter = [0]
total_batches = math.ceil(len(agd_mb_nonnull_buckets['agd_mb_nonnull_1-4'])/25)+math.ceil(len(agd_mb_nonnull_buckets['agd_mb_nonnull_5-15'])/25)+math.ceil(len(agd_mb_nonnull_buckets['agd_mb_nonnull_16-30'])/25)+math.ceil(len(agd_mb_nonnull_buckets['agd_mb_nonnull_31-60'])/25)+math.ceil(len(agd_mb_nonnull_buckets['agd_mb_nonnull_61-100'])/25)+math.ceil(len(agd_mb_nonnull_buckets['agd_mb_nonnull_greater_100'])/25)
key_counter = 0
total_keys=len(agd_mb_nonnull_buckets.keys())
agd_mb_nonnull_api = []
input_tokens_agd_mb_nonnull=0
output_tokens_agd_mb_nonnull=0
start_time_agd_mb = time.time()

for key in agd_mb_nonnull_buckets.keys():
    key_counter+=1
    current_df = agd_mb_nonnull_buckets[key][['Name', 'review_text']]
    counter = [0]
    input_tokens = 0
    output_tokens = 0
        
    # Threading setup
    total_iterations = math.ceil(len(current_df) / 25)
    stop_event = threading.Event()
    message_thread = threading.Thread(target=print_dynamic_message, args=(batch_counter,key_counter,counter,total_batches,total_keys, total_iterations,start_time_agd_mb, stop_event))
    message_thread.start()

    for start in range(0, len(current_df), 25):
        clear_output(wait=True)
        counter[0] += 1
        batch_counter[0] += 1
        end = start + 25
        result, it, ot = score_sentiments_jul(current_df[start:end])
        agd_mb_nonnull_api.append(result)
        input_tokens += it
        output_tokens += ot

    # Stopping the dynamic message thread
    stop_event.set()
    message_thread.join()    
    input_tokens_agd_mb_nonnull+=input_tokens
    output_tokens_agd_mb_nonnull+=output_tokens
    clear_output(wait=True)
    
overall_execution_time_agd_mb = time.time() - start_time_agd_mb
formatted_time_agd_mb = time.strftime('%H:%M:%S', time.gmtime(overall_execution_time_agd_mb))
input_token_cost_agd_mb = round((0.01/1000) * input_tokens_agd_mb_nonnull, 2)
output_token_cost_agd_mb = round((0.03/1000) * output_tokens_agd_mb_nonnull, 2)
total_cost_agd_mb = round(input_token_cost_agd_mb + output_token_cost_agd_mb, 2)    
    
print(f"Executed {batch_counter} Iterations")
print(f"Total Execution Time: {formatted_time_agd_mb}")
print(f"Total Input Tokens - {input_tokens_agd_mb_nonnull}")
print(f"Total Input Cost = {input_token_cost_agd_mb}")
print(f"Total Output Tokens - {output_tokens_agd_mb_nonnull}")
print(f"Total Output Cost = {output_token_cost_agd_mb}")
print(f"Total Cost = {total_cost_agd_mb}")

Executed [1] Iterations
Total Execution Time: 00:00:01
Total Input Tokens - 670
Total Input Cost = 0.01
Total Output Tokens - 38
Total Output Cost = 0.0
Total Cost = 0.01


In [174]:
#Remove "```json" and "```" from each string in agd_mb_nonnull_api & convert to DataFrame
agd_mb_nonnull_api_cleaned = [
    s.replace("```json", "")
     .replace("```", "")
     .replace("[]", "[{\"positive\": \"\", \"negative\": \"\"}]")
    for s in agd_mb_nonnull_api
]                                  
agd_mb_nonnull_api_cleaned_df = pd.DataFrame(agd_mb_nonnull_api_cleaned)
#agd_mb_nonnull_api_cleaned_df = pd.DataFrame(agd_mb_nonnull_api)
column_names2 = ['Trust',
                    'Store Experience',
                    'Store Staff',
                    'Product Design',
                    'Product Variety',
                    'Discount',
                    'Making Charge',
                    'Price',
                    'Product Quality',
                    'OLD Gold Jewellery Exchange']

In [175]:
#Initialize an empty list to store the processed data
processed_data_agd_mb_nonnull = []

#Iterate over each row in the DataFrame
for index, row in agd_mb_nonnull_api_cleaned_df.iterrows():
    try:
        #Assuming the JSON string is in the first column
        json_string = row.iloc[0] if len(row) > 0 else ''
        data = json.loads(json_string)

        #Iterate over each key-value pair in the dictionary
        for commentor_name, feedback_list in data.items():
            #Create a dictionary for each commentor with default values as 0
            commentor_data = {col: 0 for col in column_names2}
            commentor_data["Commentor Name"] = commentor_name

            #Check if there is any feedback for the commentor
            if feedback_list:
                #Since there could be multiple feedback entries for a single commentor, we aggregate them
                for feedback in feedback_list:
                    positive = feedback.get("positive", "").split(',')
                    negative = feedback.get("negative", "").split(',')

                    for topic in column_names2:
                        #Aggregate the value: 1 for positive, -1 for negative, 0 for not mentioned
                        commentor_data[topic] += assign_value(topic, positive, negative)

                #Finalize the values to be within -1, 0, or 1
                for topic in column_names2:
                    if commentor_data[topic] > 1:
                        commentor_data[topic] = 1
                    elif commentor_data[topic] < -1:
                        commentor_data[topic] = -1

            #Append the commentor data to the processed data list
            processed_data_agd_mb_nonnull.append(commentor_data)
    except:
        #Skip rows with invalid JSON data
        print(f"Invalid JSON in row {index}")

agd_mb_nonnull_api_cleaned_df.to_excel("agd_mb_nonnull_api_cleaned_df.xlsx",index=False)

agd_mb_nonnull_api_cleaned_df = pd.read_excel("agd_mb_nonnull_api_cleaned_df.xlsx")

agd_mb_nonnull_api_cleaned_df

In [176]:
#Create a DataFrame from the processed data
agd_mb_nonnull_sen_df = pd.DataFrame(processed_data_agd_mb_nonnull)
#Reorder the columns
ordered_columns = ["Commentor Name",
                   "Trust",
                    "Store Experience",
                    "Store Staff",
                    "Product Design",
                    "Product Variety",
                    "Discount",
                    "Making Charge",
                    "Price",
                    "Product Quality",
                    "OLD Gold Jewellery Exchange"]
#Apply the new order to the DataFrame
agd_mb_nonnull_sen_df = agd_mb_nonnull_sen_df[ordered_columns]

agd_mb_nonnull_merged_df = pd.concat([combined_df_agd_mb, agd_mb_nonnull_sen_df], axis=1)

agd_mb_final_sen_df = pd.concat([agd_mb_nonnull_merged_df,null_dataframes['agd_mb_null']], ignore_index=True)

agd_mb_final_sen_df_copy = agd_mb_final_sen_df.copy()
agd_mb_final_sen_df_copy["Published At Date"] = agd_mb_final_sen_df_copy["Published At Date"].astype(str).str[:10]

agd_mb_final_sen_df_copy.to_excel("sentiment_raw_output/agd_mb_final_sen_df_jul.xlsx",index=False)



### bhi_dec_ga

In [177]:
batch_counter = [0]
total_batches = math.ceil(len(bhi_dec_ga_nonnull_buckets['bhi_dec_ga_nonnull_1-4'])/25)+math.ceil(len(bhi_dec_ga_nonnull_buckets['bhi_dec_ga_nonnull_5-15'])/25)+math.ceil(len(bhi_dec_ga_nonnull_buckets['bhi_dec_ga_nonnull_16-30'])/25)+math.ceil(len(bhi_dec_ga_nonnull_buckets['bhi_dec_ga_nonnull_31-60'])/25)+math.ceil(len(bhi_dec_ga_nonnull_buckets['bhi_dec_ga_nonnull_61-100'])/25)+math.ceil(len(bhi_dec_ga_nonnull_buckets['bhi_dec_ga_nonnull_greater_100'])/25)
key_counter = 0
total_keys=len(bhi_dec_ga_nonnull_buckets.keys())
bhi_dec_ga_nonnull_api = []
input_tokens_bhi_dec_ga_nonnull=0
output_tokens_bhi_dec_ga_nonnull=0
start_time_bhi_dec_ga = time.time()

for key in bhi_dec_ga_nonnull_buckets.keys():
    key_counter+=1
    current_df = bhi_dec_ga_nonnull_buckets[key][['Name', 'review_text']]
    counter = [0]
    input_tokens = 0
    output_tokens = 0
        
    # Threading setup
    total_iterations = math.ceil(len(current_df) / 25)
    stop_event = threading.Event()
    message_thread = threading.Thread(target=print_dynamic_message, args=(batch_counter,key_counter,counter,total_batches,total_keys, total_iterations,start_time_bhi_dec_ga, stop_event))
    message_thread.start()

    for start in range(0, len(current_df), 25):
        clear_output(wait=True)
        counter[0] += 1
        batch_counter[0] += 1
        end = start + 25
        result, it, ot = score_sentiments_jul(current_df[start:end])
        bhi_dec_ga_nonnull_api.append(result)
        input_tokens += it
        output_tokens += ot

    # Stopping the dynamic message thread
    stop_event.set()
    message_thread.join()    
    input_tokens_bhi_dec_ga_nonnull+=input_tokens
    output_tokens_bhi_dec_ga_nonnull+=output_tokens
    clear_output(wait=True)
    
overall_execution_time_bhi_dec_ga = time.time() - start_time_bhi_dec_ga
formatted_time_bhi_dec_ga = time.strftime('%H:%M:%S', time.gmtime(overall_execution_time_bhi_dec_ga))
input_token_cost_bhi_dec_ga = round((0.01/1000) * input_tokens_bhi_dec_ga_nonnull, 2)
output_token_cost_bhi_dec_ga = round((0.03/1000) * output_tokens_bhi_dec_ga_nonnull, 2)
total_cost_bhi_dec_ga = round(input_token_cost_bhi_dec_ga + output_token_cost_bhi_dec_ga, 2)    
    
print(f"Executed {batch_counter} Iterations")
print(f"Total Execution Time: {formatted_time_bhi_dec_ga}")
print(f"Total Input Tokens - {input_tokens_bhi_dec_ga_nonnull}")
print(f"Total Input Cost = {input_token_cost_bhi_dec_ga}")
print(f"Total Output Tokens - {output_tokens_bhi_dec_ga_nonnull}")
print(f"Total Output Cost = {output_token_cost_bhi_dec_ga}")
print(f"Total Cost = {total_cost_bhi_dec_ga}")

Executed [2] Iterations
Total Execution Time: 00:00:03
Total Input Tokens - 1409
Total Input Cost = 0.01
Total Output Tokens - 72
Total Output Cost = 0.0
Total Cost = 0.01


In [178]:
#Remove "```json" and "```" from each string in bhi_dec_ga_nonnull_api & convert to DataFrame
bhi_dec_ga_nonnull_api_cleaned = [
    s.replace("```json", "")
     .replace("```", "")
     .replace("[]", "[{\"positive\": \"\", \"negative\": \"\"}]")
    for s in bhi_dec_ga_nonnull_api
]                                  
bhi_dec_ga_nonnull_api_cleaned_df = pd.DataFrame(bhi_dec_ga_nonnull_api_cleaned)
#bhi_dec_ga_nonnull_api_cleaned_df = pd.DataFrame(bhi_dec_ga_nonnull_api)
column_names2 = ['Trust',
                    'Store Experience',
                    'Store Staff',
                    'Product Design',
                    'Product Variety',
                    'Discount',
                    'Making Charge',
                    'Price',
                    'Product Quality',
                    'OLD Gold Jewellery Exchange']

In [179]:
#Initialize an empty list to store the processed data
processed_data_bhi_dec_ga_nonnull = []

#Iterate over each row in the DataFrame
for index, row in bhi_dec_ga_nonnull_api_cleaned_df.iterrows():
    try:
        #Assuming the JSON string is in the first column
        json_string = row.iloc[0] if len(row) > 0 else ''
        data = json.loads(json_string)

        #Iterate over each key-value pair in the dictionary
        for commentor_name, feedback_list in data.items():
            #Create a dictionary for each commentor with default values as 0
            commentor_data = {col: 0 for col in column_names2}
            commentor_data["Commentor Name"] = commentor_name

            #Check if there is any feedback for the commentor
            if feedback_list:
                #Since there could be multiple feedback entries for a single commentor, we aggregate them
                for feedback in feedback_list:
                    positive = feedback.get("positive", "").split(',')
                    negative = feedback.get("negative", "").split(',')

                    for topic in column_names2:
                        #Aggregate the value: 1 for positive, -1 for negative, 0 for not mentioned
                        commentor_data[topic] += assign_value(topic, positive, negative)

                #Finalize the values to be within -1, 0, or 1
                for topic in column_names2:
                    if commentor_data[topic] > 1:
                        commentor_data[topic] = 1
                    elif commentor_data[topic] < -1:
                        commentor_data[topic] = -1

            #Append the commentor data to the processed data list
            processed_data_bhi_dec_ga_nonnull.append(commentor_data)
    except:
        #Skip rows with invalid JSON data
        print(f"Invalid JSON in row {index}")

bhi_dec_ga_nonnull_api_cleaned_df.to_excel("bhi_dec_ga_nonnull_api_cleaned_df.xlsx",index=False)

bhi_dec_ga_nonnull_api_cleaned_df = pd.read_excel("bhi_dec_ga_nonnull_api_cleaned_df.xlsx")

bhi_dec_ga_nonnull_api_cleaned_df

In [180]:
#Create a DataFrame from the processed data
bhi_dec_ga_nonnull_sen_df = pd.DataFrame(processed_data_bhi_dec_ga_nonnull)
#Reorder the columns
ordered_columns = ["Commentor Name",
                   "Trust",
                    "Store Experience",
                    "Store Staff",
                    "Product Design",
                    "Product Variety",
                    "Discount",
                    "Making Charge",
                    "Price",
                    "Product Quality",
                    "OLD Gold Jewellery Exchange"]
#Apply the new order to the DataFrame
bhi_dec_ga_nonnull_sen_df = bhi_dec_ga_nonnull_sen_df[ordered_columns]

bhi_dec_ga_nonnull_merged_df = pd.concat([combined_df_bhi_dec_ga, bhi_dec_ga_nonnull_sen_df], axis=1)

bhi_dec_ga_final_sen_df = pd.concat([bhi_dec_ga_nonnull_merged_df,null_dataframes['bhi_dec_ga_null']], ignore_index=True)

bhi_dec_ga_final_sen_df_copy = bhi_dec_ga_final_sen_df.copy()
bhi_dec_ga_final_sen_df_copy["Published At Date"] = bhi_dec_ga_final_sen_df_copy["Published At Date"].astype(str).str[:10]

bhi_dec_ga_final_sen_df_copy.to_excel("sentiment_raw_output/bhi_dec_ga_final_sen_df_jul.xlsx",index=False)



### eve_joh_ga

In [181]:
batch_counter = [0]
total_batches = math.ceil(len(eve_joh_ga_nonnull_buckets['eve_joh_ga_nonnull_1-4'])/25)+math.ceil(len(eve_joh_ga_nonnull_buckets['eve_joh_ga_nonnull_5-15'])/25)+math.ceil(len(eve_joh_ga_nonnull_buckets['eve_joh_ga_nonnull_16-30'])/25)+math.ceil(len(eve_joh_ga_nonnull_buckets['eve_joh_ga_nonnull_31-60'])/25)+math.ceil(len(eve_joh_ga_nonnull_buckets['eve_joh_ga_nonnull_61-100'])/25)+math.ceil(len(eve_joh_ga_nonnull_buckets['eve_joh_ga_nonnull_greater_100'])/25)
key_counter = 0
total_keys=len(eve_joh_ga_nonnull_buckets.keys())
eve_joh_ga_nonnull_api = []
input_tokens_eve_joh_ga_nonnull=0
output_tokens_eve_joh_ga_nonnull=0
start_time_eve_joh_ga = time.time()

for key in eve_joh_ga_nonnull_buckets.keys():
    key_counter+=1
    current_df = eve_joh_ga_nonnull_buckets[key][['Name', 'review_text']]
    counter = [0]
    input_tokens = 0
    output_tokens = 0
        
    # Threading setup
    total_iterations = math.ceil(len(current_df) / 25)
    stop_event = threading.Event()
    message_thread = threading.Thread(target=print_dynamic_message, args=(batch_counter,key_counter,counter,total_batches,total_keys, total_iterations,start_time_eve_joh_ga, stop_event))
    message_thread.start()

    for start in range(0, len(current_df), 25):
        clear_output(wait=True)
        counter[0] += 1
        batch_counter[0] += 1
        end = start + 25
        result, it, ot = score_sentiments_jul(current_df[start:end])
        eve_joh_ga_nonnull_api.append(result)
        input_tokens += it
        output_tokens += ot

    # Stopping the dynamic message thread
    stop_event.set()
    message_thread.join()    
    input_tokens_eve_joh_ga_nonnull+=input_tokens
    output_tokens_eve_joh_ga_nonnull+=output_tokens
    clear_output(wait=True)
    
overall_execution_time_eve_joh_ga = time.time() - start_time_eve_joh_ga
formatted_time_eve_joh_ga = time.strftime('%H:%M:%S', time.gmtime(overall_execution_time_eve_joh_ga))
input_token_cost_eve_joh_ga = round((0.01/1000) * input_tokens_eve_joh_ga_nonnull, 2)
output_token_cost_eve_joh_ga = round((0.03/1000) * output_tokens_eve_joh_ga_nonnull, 2)
total_cost_eve_joh_ga = round(input_token_cost_eve_joh_ga + output_token_cost_eve_joh_ga, 2)    
    
print(f"Executed {batch_counter} Iterations")
print(f"Total Execution Time: {formatted_time_eve_joh_ga}")
print(f"Total Input Tokens - {input_tokens_eve_joh_ga_nonnull}")
print(f"Total Input Cost = {input_token_cost_eve_joh_ga}")
print(f"Total Output Tokens - {output_tokens_eve_joh_ga_nonnull}")
print(f"Total Output Cost = {output_token_cost_eve_joh_ga}")
print(f"Total Cost = {total_cost_eve_joh_ga}")

Executed [0] Iterations
Total Execution Time: 00:00:00
Total Input Tokens - 0
Total Input Cost = 0.0
Total Output Tokens - 0
Total Output Cost = 0.0
Total Cost = 0.0


In [182]:
#Remove "```json" and "```" from each string in eve_joh_ga_nonnull_api & convert to DataFrame
eve_joh_ga_nonnull_api_cleaned = [
    s.replace("```json", "")
     .replace("```", "")
     .replace("[]", "[{\"positive\": \"\", \"negative\": \"\"}]")
    for s in eve_joh_ga_nonnull_api
]                                  
eve_joh_ga_nonnull_api_cleaned_df = pd.DataFrame(eve_joh_ga_nonnull_api_cleaned)
#eve_joh_ga_nonnull_api_cleaned_df = pd.DataFrame(eve_joh_ga_nonnull_api)
column_names2 = ['Trust',
                    'Store Experience',
                    'Store Staff',
                    'Product Design',
                    'Product Variety',
                    'Discount',
                    'Making Charge',
                    'Price',
                    'Product Quality',
                    'OLD Gold Jewellery Exchange']

In [183]:
#Initialize an empty list to store the processed data
processed_data_eve_joh_ga_nonnull = []

#Iterate over each row in the DataFrame
for index, row in eve_joh_ga_nonnull_api_cleaned_df.iterrows():
    try:
        #Assuming the JSON string is in the first column
        json_string = row.iloc[0] if len(row) > 0 else ''
        data = json.loads(json_string)

        #Iterate over each key-value pair in the dictionary
        for commentor_name, feedback_list in data.items():
            #Create a dictionary for each commentor with default values as 0
            commentor_data = {col: 0 for col in column_names2}
            commentor_data["Commentor Name"] = commentor_name

            #Check if there is any feedback for the commentor
            if feedback_list:
                #Since there could be multiple feedback entries for a single commentor, we aggregate them
                for feedback in feedback_list:
                    positive = feedback.get("positive", "").split(',')
                    negative = feedback.get("negative", "").split(',')

                    for topic in column_names2:
                        #Aggregate the value: 1 for positive, -1 for negative, 0 for not mentioned
                        commentor_data[topic] += assign_value(topic, positive, negative)

                #Finalize the values to be within -1, 0, or 1
                for topic in column_names2:
                    if commentor_data[topic] > 1:
                        commentor_data[topic] = 1
                    elif commentor_data[topic] < -1:
                        commentor_data[topic] = -1

            #Append the commentor data to the processed data list
            processed_data_eve_joh_ga_nonnull.append(commentor_data)
    except:
        #Skip rows with invalid JSON data
        print(f"Invalid JSON in row {index}")

eve_joh_ga_nonnull_api_cleaned_df.to_excel("eve_joh_ga_nonnull_api_cleaned_df.xlsx",index=False)

eve_joh_ga_nonnull_api_cleaned_df = pd.read_excel("eve_joh_ga_nonnull_api_cleaned_df.xlsx")

eve_joh_ga_nonnull_api_cleaned_df

In [184]:
processed_data_eve_joh_ga_nonnull

[]

In [None]:
#Create a DataFrame from the processed data
eve_joh_ga_nonnull_sen_df = pd.DataFrame(processed_data_eve_joh_ga_nonnull)
#Reorder the columns
ordered_columns = ["Commentor Name",
                   "Trust",
                    "Store Experience",
                    "Store Staff",
                    "Product Design",
                    "Product Variety",
                    "Discount",
                    "Making Charge",
                    "Price",
                    "Product Quality",
                    "OLD Gold Jewellery Exchange"]
#Apply the new order to the DataFrame
eve_joh_ga_nonnull_sen_df = eve_joh_ga_nonnull_sen_df[ordered_columns]

eve_joh_ga_nonnull_merged_df = pd.concat([combined_df_eve_joh_ga, eve_joh_ga_nonnull_sen_df], axis=1)

eve_joh_ga_final_sen_df = pd.concat([eve_joh_ga_nonnull_merged_df,null_dataframes['eve_joh_ga_null']], ignore_index=True)

eve_joh_ga_final_sen_df_copy = eve_joh_ga_final_sen_df.copy()
eve_joh_ga_final_sen_df_copy["Published At Date"] = eve_joh_ga_final_sen_df_copy["Published At Date"].astype(str).str[:10]

eve_joh_ga_final_sen_df_copy.to_excel("sentiment_raw_output/eve_joh_ga_final_sen_df_jul.xlsx",index=False)



### jar_bol_il

In [185]:
batch_counter = [0]
total_batches = math.ceil(len(jar_bol_il_nonnull_buckets['jar_bol_il_nonnull_1-4'])/25)+math.ceil(len(jar_bol_il_nonnull_buckets['jar_bol_il_nonnull_5-15'])/25)+math.ceil(len(jar_bol_il_nonnull_buckets['jar_bol_il_nonnull_16-30'])/25)+math.ceil(len(jar_bol_il_nonnull_buckets['jar_bol_il_nonnull_31-60'])/25)+math.ceil(len(jar_bol_il_nonnull_buckets['jar_bol_il_nonnull_61-100'])/25)+math.ceil(len(jar_bol_il_nonnull_buckets['jar_bol_il_nonnull_greater_100'])/25)
key_counter = 0
total_keys=len(jar_bol_il_nonnull_buckets.keys())
jar_bol_il_nonnull_api = []
input_tokens_jar_bol_il_nonnull=0
output_tokens_jar_bol_il_nonnull=0
start_time_jar_bol_il = time.time()

for key in jar_bol_il_nonnull_buckets.keys():
    key_counter+=1
    current_df = jar_bol_il_nonnull_buckets[key][['Name', 'review_text']]
    counter = [0]
    input_tokens = 0
    output_tokens = 0
        
    # Threading setup
    total_iterations = math.ceil(len(current_df) / 25)
    stop_event = threading.Event()
    message_thread = threading.Thread(target=print_dynamic_message, args=(batch_counter,key_counter,counter,total_batches,total_keys, total_iterations,start_time_jar_bol_il, stop_event))
    message_thread.start()

    for start in range(0, len(current_df), 25):
        clear_output(wait=True)
        counter[0] += 1
        batch_counter[0] += 1
        end = start + 25
        result, it, ot = score_sentiments_jul(current_df[start:end])
        jar_bol_il_nonnull_api.append(result)
        input_tokens += it
        output_tokens += ot

    # Stopping the dynamic message thread
    stop_event.set()
    message_thread.join()    
    input_tokens_jar_bol_il_nonnull+=input_tokens
    output_tokens_jar_bol_il_nonnull+=output_tokens
    clear_output(wait=True)
    
overall_execution_time_jar_bol_il = time.time() - start_time_jar_bol_il
formatted_time_jar_bol_il = time.strftime('%H:%M:%S', time.gmtime(overall_execution_time_jar_bol_il))
input_token_cost_jar_bol_il = round((0.01/1000) * input_tokens_jar_bol_il_nonnull, 2)
output_token_cost_jar_bol_il = round((0.03/1000) * output_tokens_jar_bol_il_nonnull, 2)
total_cost_jar_bol_il = round(input_token_cost_jar_bol_il + output_token_cost_jar_bol_il, 2)    
    
print(f"Executed {batch_counter} Iterations")
print(f"Total Execution Time: {formatted_time_jar_bol_il}")
print(f"Total Input Tokens - {input_tokens_jar_bol_il_nonnull}")
print(f"Total Input Cost = {input_token_cost_jar_bol_il}")
print(f"Total Output Tokens - {output_tokens_jar_bol_il_nonnull}")
print(f"Total Output Cost = {output_token_cost_jar_bol_il}")
print(f"Total Cost = {total_cost_jar_bol_il}")

Executed [4] Iterations
Total Execution Time: 00:00:08
Total Input Tokens - 3195
Total Input Cost = 0.03
Total Output Tokens - 285
Total Output Cost = 0.01
Total Cost = 0.04


In [186]:
#Remove "```json" and "```" from each string in jar_bol_il_nonnull_api & convert to DataFrame
jar_bol_il_nonnull_api_cleaned = [
    s.replace("```json", "")
     .replace("```", "")
     .replace("[]", "[{\"positive\": \"\", \"negative\": \"\"}]")
    for s in jar_bol_il_nonnull_api
]                                  
jar_bol_il_nonnull_api_cleaned_df = pd.DataFrame(jar_bol_il_nonnull_api_cleaned)
#jar_bol_il_nonnull_api_cleaned_df = pd.DataFrame(jar_bol_il_nonnull_api)
column_names2 = ['Trust',
                    'Store Experience',
                    'Store Staff',
                    'Product Design',
                    'Product Variety',
                    'Discount',
                    'Making Charge',
                    'Price',
                    'Product Quality',
                    'OLD Gold Jewellery Exchange']

In [187]:
#Initialize an empty list to store the processed data
processed_data_jar_bol_il_nonnull = []

#Iterate over each row in the DataFrame
for index, row in jar_bol_il_nonnull_api_cleaned_df.iterrows():
    try:
        #Assuming the JSON string is in the first column
        json_string = row.iloc[0] if len(row) > 0 else ''
        data = json.loads(json_string)

        #Iterate over each key-value pair in the dictionary
        for commentor_name, feedback_list in data.items():
            #Create a dictionary for each commentor with default values as 0
            commentor_data = {col: 0 for col in column_names2}
            commentor_data["Commentor Name"] = commentor_name

            #Check if there is any feedback for the commentor
            if feedback_list:
                #Since there could be multiple feedback entries for a single commentor, we aggregate them
                for feedback in feedback_list:
                    positive = feedback.get("positive", "").split(',')
                    negative = feedback.get("negative", "").split(',')

                    for topic in column_names2:
                        #Aggregate the value: 1 for positive, -1 for negative, 0 for not mentioned
                        commentor_data[topic] += assign_value(topic, positive, negative)

                #Finalize the values to be within -1, 0, or 1
                for topic in column_names2:
                    if commentor_data[topic] > 1:
                        commentor_data[topic] = 1
                    elif commentor_data[topic] < -1:
                        commentor_data[topic] = -1

            #Append the commentor data to the processed data list
            processed_data_jar_bol_il_nonnull.append(commentor_data)
    except:
        #Skip rows with invalid JSON data
        print(f"Invalid JSON in row {index}")

jar_bol_il_nonnull_api_cleaned_df.to_excel("jar_bol_il_nonnull_api_cleaned_df.xlsx",index=False)

jar_bol_il_nonnull_api_cleaned_df = pd.read_excel("jar_bol_il_nonnull_api_cleaned_df.xlsx")

jar_bol_il_nonnull_api_cleaned_df

In [188]:
#Create a DataFrame from the processed data
jar_bol_il_nonnull_sen_df = pd.DataFrame(processed_data_jar_bol_il_nonnull)
#Reorder the columns
ordered_columns = ["Commentor Name",
                   "Trust",
                    "Store Experience",
                    "Store Staff",
                    "Product Design",
                    "Product Variety",
                    "Discount",
                    "Making Charge",
                    "Price",
                    "Product Quality",
                    "OLD Gold Jewellery Exchange"]
#Apply the new order to the DataFrame
jar_bol_il_nonnull_sen_df = jar_bol_il_nonnull_sen_df[ordered_columns]

jar_bol_il_nonnull_merged_df = pd.concat([combined_df_jar_bol_il, jar_bol_il_nonnull_sen_df], axis=1)

jar_bol_il_final_sen_df = pd.concat([jar_bol_il_nonnull_merged_df,null_dataframes['jar_bol_il_null']], ignore_index=True)

jar_bol_il_final_sen_df_copy = jar_bol_il_final_sen_df.copy()
jar_bol_il_final_sen_df_copy["Published At Date"] = jar_bol_il_final_sen_df_copy["Published At Date"].astype(str).str[:10]

jar_bol_il_final_sen_df_copy.to_excel("sentiment_raw_output/jar_bol_il_final_sen_df_jul.xlsx",index=False)



### jar_ver_il

In [189]:
batch_counter = [0]
total_batches = math.ceil(len(jar_ver_il_nonnull_buckets['jar_ver_il_nonnull_1-4'])/25)+math.ceil(len(jar_ver_il_nonnull_buckets['jar_ver_il_nonnull_5-15'])/25)+math.ceil(len(jar_ver_il_nonnull_buckets['jar_ver_il_nonnull_16-30'])/25)+math.ceil(len(jar_ver_il_nonnull_buckets['jar_ver_il_nonnull_31-60'])/25)+math.ceil(len(jar_ver_il_nonnull_buckets['jar_ver_il_nonnull_61-100'])/25)+math.ceil(len(jar_ver_il_nonnull_buckets['jar_ver_il_nonnull_greater_100'])/25)
key_counter = 0
total_keys=len(jar_ver_il_nonnull_buckets.keys())
jar_ver_il_nonnull_api = []
input_tokens_jar_ver_il_nonnull=0
output_tokens_jar_ver_il_nonnull=0
start_time_jar_ver_il = time.time()

for key in jar_ver_il_nonnull_buckets.keys():
    key_counter+=1
    current_df = jar_ver_il_nonnull_buckets[key][['Name', 'review_text']]
    counter = [0]
    input_tokens = 0
    output_tokens = 0
        
    # Threading setup
    total_iterations = math.ceil(len(current_df) / 25)
    stop_event = threading.Event()
    message_thread = threading.Thread(target=print_dynamic_message, args=(batch_counter,key_counter,counter,total_batches,total_keys, total_iterations,start_time_jar_ver_il, stop_event))
    message_thread.start()

    for start in range(0, len(current_df), 25):
        clear_output(wait=True)
        counter[0] += 1
        batch_counter[0] += 1
        end = start + 25
        result, it, ot = score_sentiments_jul(current_df[start:end])
        jar_ver_il_nonnull_api.append(result)
        input_tokens += it
        output_tokens += ot

    # Stopping the dynamic message thread
    stop_event.set()
    message_thread.join()    
    input_tokens_jar_ver_il_nonnull+=input_tokens
    output_tokens_jar_ver_il_nonnull+=output_tokens
    clear_output(wait=True)
    
overall_execution_time_jar_ver_il = time.time() - start_time_jar_ver_il
formatted_time_jar_ver_il = time.strftime('%H:%M:%S', time.gmtime(overall_execution_time_jar_ver_il))
input_token_cost_jar_ver_il = round((0.01/1000) * input_tokens_jar_ver_il_nonnull, 2)
output_token_cost_jar_ver_il = round((0.03/1000) * output_tokens_jar_ver_il_nonnull, 2)
total_cost_jar_ver_il = round(input_token_cost_jar_ver_il + output_token_cost_jar_ver_il, 2)    
    
print(f"Executed {batch_counter} Iterations")
print(f"Total Execution Time: {formatted_time_jar_ver_il}")
print(f"Total Input Tokens - {input_tokens_jar_ver_il_nonnull}")
print(f"Total Input Cost = {input_token_cost_jar_ver_il}")
print(f"Total Output Tokens - {output_tokens_jar_ver_il_nonnull}")
print(f"Total Output Cost = {output_token_cost_jar_ver_il}")
print(f"Total Cost = {total_cost_jar_ver_il}")

Executed [5] Iterations
Total Execution Time: 00:00:08
Total Input Tokens - 3732
Total Input Cost = 0.04
Total Output Tokens - 179
Total Output Cost = 0.01
Total Cost = 0.05


In [190]:
#Remove "```json" and "```" from each string in jar_ver_il_nonnull_api & convert to DataFrame
jar_ver_il_nonnull_api_cleaned = [
    s.replace("```json", "")
     .replace("```", "")
     .replace("[]", "[{\"positive\": \"\", \"negative\": \"\"}]")
    for s in jar_ver_il_nonnull_api
]                                  
jar_ver_il_nonnull_api_cleaned_df = pd.DataFrame(jar_ver_il_nonnull_api_cleaned)
#jar_ver_il_nonnull_api_cleaned_df = pd.DataFrame(jar_ver_il_nonnull_api)
column_names2 = ['Trust',
                    'Store Experience',
                    'Store Staff',
                    'Product Design',
                    'Product Variety',
                    'Discount',
                    'Making Charge',
                    'Price',
                    'Product Quality',
                    'OLD Gold Jewellery Exchange']

In [191]:
#Initialize an empty list to store the processed data
processed_data_jar_ver_il_nonnull = []

#Iterate over each row in the DataFrame
for index, row in jar_ver_il_nonnull_api_cleaned_df.iterrows():
    try:
        #Assuming the JSON string is in the first column
        json_string = row.iloc[0] if len(row) > 0 else ''
        data = json.loads(json_string)

        #Iterate over each key-value pair in the dictionary
        for commentor_name, feedback_list in data.items():
            #Create a dictionary for each commentor with default values as 0
            commentor_data = {col: 0 for col in column_names2}
            commentor_data["Commentor Name"] = commentor_name

            #Check if there is any feedback for the commentor
            if feedback_list:
                #Since there could be multiple feedback entries for a single commentor, we aggregate them
                for feedback in feedback_list:
                    positive = feedback.get("positive", "").split(',')
                    negative = feedback.get("negative", "").split(',')

                    for topic in column_names2:
                        #Aggregate the value: 1 for positive, -1 for negative, 0 for not mentioned
                        commentor_data[topic] += assign_value(topic, positive, negative)

                #Finalize the values to be within -1, 0, or 1
                for topic in column_names2:
                    if commentor_data[topic] > 1:
                        commentor_data[topic] = 1
                    elif commentor_data[topic] < -1:
                        commentor_data[topic] = -1

            #Append the commentor data to the processed data list
            processed_data_jar_ver_il_nonnull.append(commentor_data)
    except:
        #Skip rows with invalid JSON data
        print(f"Invalid JSON in row {index}")

jar_ver_il_nonnull_api_cleaned_df.to_excel("jar_ver_il_nonnull_api_cleaned_df.xlsx",index=False)

jar_ver_il_nonnull_api_cleaned_df = pd.read_excel("jar_ver_il_nonnull_api_cleaned_df.xlsx")

jar_ver_il_nonnull_api_cleaned_df

In [192]:
#Create a DataFrame from the processed data
jar_ver_il_nonnull_sen_df = pd.DataFrame(processed_data_jar_ver_il_nonnull)
#Reorder the columns
ordered_columns = ["Commentor Name",
                   "Trust",
                    "Store Experience",
                    "Store Staff",
                    "Product Design",
                    "Product Variety",
                    "Discount",
                    "Making Charge",
                    "Price",
                    "Product Quality",
                    "OLD Gold Jewellery Exchange"]
#Apply the new order to the DataFrame
jar_ver_il_nonnull_sen_df = jar_ver_il_nonnull_sen_df[ordered_columns]

jar_ver_il_nonnull_merged_df = pd.concat([combined_df_jar_ver_il, jar_ver_il_nonnull_sen_df], axis=1)

jar_ver_il_final_sen_df = pd.concat([jar_ver_il_nonnull_merged_df,null_dataframes['jar_ver_il_null']], ignore_index=True)

jar_ver_il_final_sen_df_copy = jar_ver_il_final_sen_df.copy()
jar_ver_il_final_sen_df_copy["Published At Date"] = jar_ver_il_final_sen_df_copy["Published At Date"].astype(str).str[:10]

jar_ver_il_final_sen_df_copy.to_excel("sentiment_raw_output/jar_ver_il_final_sen_df_jul.xlsx",index=False)



### jar_lom_il

In [193]:
batch_counter = [0]
total_batches = math.ceil(len(jar_lom_il_nonnull_buckets['jar_lom_il_nonnull_1-4'])/25)+math.ceil(len(jar_lom_il_nonnull_buckets['jar_lom_il_nonnull_5-15'])/25)+math.ceil(len(jar_lom_il_nonnull_buckets['jar_lom_il_nonnull_16-30'])/25)+math.ceil(len(jar_lom_il_nonnull_buckets['jar_lom_il_nonnull_31-60'])/25)+math.ceil(len(jar_lom_il_nonnull_buckets['jar_lom_il_nonnull_61-100'])/25)+math.ceil(len(jar_lom_il_nonnull_buckets['jar_lom_il_nonnull_greater_100'])/25)
key_counter = 0
total_keys=len(jar_lom_il_nonnull_buckets.keys())
jar_lom_il_nonnull_api = []
input_tokens_jar_lom_il_nonnull=0
output_tokens_jar_lom_il_nonnull=0
start_time_jar_lom_il = time.time()

for key in jar_lom_il_nonnull_buckets.keys():
    key_counter+=1
    current_df = jar_lom_il_nonnull_buckets[key][['Name', 'review_text']]
    counter = [0]
    input_tokens = 0
    output_tokens = 0
        
    # Threading setup
    total_iterations = math.ceil(len(current_df) / 25)
    stop_event = threading.Event()
    message_thread = threading.Thread(target=print_dynamic_message, args=(batch_counter,key_counter,counter,total_batches,total_keys, total_iterations,start_time_jar_lom_il, stop_event))
    message_thread.start()

    for start in range(0, len(current_df), 25):
        clear_output(wait=True)
        counter[0] += 1
        batch_counter[0] += 1
        end = start + 25
        result, it, ot = score_sentiments_jul(current_df[start:end])
        jar_lom_il_nonnull_api.append(result)
        input_tokens += it
        output_tokens += ot

    # Stopping the dynamic message thread
    stop_event.set()
    message_thread.join()    
    input_tokens_jar_lom_il_nonnull+=input_tokens
    output_tokens_jar_lom_il_nonnull+=output_tokens
    clear_output(wait=True)
    
overall_execution_time_jar_lom_il = time.time() - start_time_jar_lom_il
formatted_time_jar_lom_il = time.strftime('%H:%M:%S', time.gmtime(overall_execution_time_jar_lom_il))
input_token_cost_jar_lom_il = round((0.01/1000) * input_tokens_jar_lom_il_nonnull, 2)
output_token_cost_jar_lom_il = round((0.03/1000) * output_tokens_jar_lom_il_nonnull, 2)
total_cost_jar_lom_il = round(input_token_cost_jar_lom_il + output_token_cost_jar_lom_il, 2)    
    
print(f"Executed {batch_counter} Iterations")
print(f"Total Execution Time: {formatted_time_jar_lom_il}")
print(f"Total Input Tokens - {input_tokens_jar_lom_il_nonnull}")
print(f"Total Input Cost = {input_token_cost_jar_lom_il}")
print(f"Total Output Tokens - {output_tokens_jar_lom_il_nonnull}")
print(f"Total Output Cost = {output_token_cost_jar_lom_il}")
print(f"Total Cost = {total_cost_jar_lom_il}")

Executed [2] Iterations
Total Execution Time: 00:00:03
Total Input Tokens - 1581
Total Input Cost = 0.02
Total Output Tokens - 105
Total Output Cost = 0.0
Total Cost = 0.02


In [194]:
#Remove "```json" and "```" from each string in jar_lom_il_nonnull_api & convert to DataFrame
jar_lom_il_nonnull_api_cleaned = [
    s.replace("```json", "")
     .replace("```", "")
     .replace("[]", "[{\"positive\": \"\", \"negative\": \"\"}]")
    for s in jar_lom_il_nonnull_api
]                                  
jar_lom_il_nonnull_api_cleaned_df = pd.DataFrame(jar_lom_il_nonnull_api_cleaned)
#jar_lom_il_nonnull_api_cleaned_df = pd.DataFrame(jar_lom_il_nonnull_api)
column_names2 = ['Trust',
                    'Store Experience',
                    'Store Staff',
                    'Product Design',
                    'Product Variety',
                    'Discount',
                    'Making Charge',
                    'Price',
                    'Product Quality',
                    'OLD Gold Jewellery Exchange']

In [195]:
#Initialize an empty list to store the processed data
processed_data_jar_lom_il_nonnull = []

#Iterate over each row in the DataFrame
for index, row in jar_lom_il_nonnull_api_cleaned_df.iterrows():
    try:
        #Assuming the JSON string is in the first column
        json_string = row.iloc[0] if len(row) > 0 else ''
        data = json.loads(json_string)

        #Iterate over each key-value pair in the dictionary
        for commentor_name, feedback_list in data.items():
            #Create a dictionary for each commentor with default values as 0
            commentor_data = {col: 0 for col in column_names2}
            commentor_data["Commentor Name"] = commentor_name

            #Check if there is any feedback for the commentor
            if feedback_list:
                #Since there could be multiple feedback entries for a single commentor, we aggregate them
                for feedback in feedback_list:
                    positive = feedback.get("positive", "").split(',')
                    negative = feedback.get("negative", "").split(',')

                    for topic in column_names2:
                        #Aggregate the value: 1 for positive, -1 for negative, 0 for not mentioned
                        commentor_data[topic] += assign_value(topic, positive, negative)

                #Finalize the values to be within -1, 0, or 1
                for topic in column_names2:
                    if commentor_data[topic] > 1:
                        commentor_data[topic] = 1
                    elif commentor_data[topic] < -1:
                        commentor_data[topic] = -1

            #Append the commentor data to the processed data list
            processed_data_jar_lom_il_nonnull.append(commentor_data)
    except:
        #Skip rows with invalid JSON data
        print(f"Invalid JSON in row {index}")

jar_lom_il_nonnull_api_cleaned_df.to_excel("jar_lom_il_nonnull_api_cleaned_df.xlsx",index=False)

jar_lom_il_nonnull_api_cleaned_df = pd.read_excel("jar_lom_il_nonnull_api_cleaned_df.xlsx")

jar_lom_il_nonnull_api_cleaned_df

In [196]:
#Create a DataFrame from the processed data
jar_lom_il_nonnull_sen_df = pd.DataFrame(processed_data_jar_lom_il_nonnull)
#Reorder the columns
ordered_columns = ["Commentor Name",
                   "Trust",
                    "Store Experience",
                    "Store Staff",
                    "Product Design",
                    "Product Variety",
                    "Discount",
                    "Making Charge",
                    "Price",
                    "Product Quality",
                    "OLD Gold Jewellery Exchange"]
#Apply the new order to the DataFrame
jar_lom_il_nonnull_sen_df = jar_lom_il_nonnull_sen_df[ordered_columns]

jar_lom_il_nonnull_merged_df = pd.concat([combined_df_jar_lom_il, jar_lom_il_nonnull_sen_df], axis=1)

jar_lom_il_final_sen_df = pd.concat([jar_lom_il_nonnull_merged_df,null_dataframes['jar_lom_il_null']], ignore_index=True)

jar_lom_il_final_sen_df_copy = jar_lom_il_final_sen_df.copy()
jar_lom_il_final_sen_df_copy["Published At Date"] = jar_lom_il_final_sen_df_copy["Published At Date"].astype(str).str[:10]

jar_lom_il_final_sen_df_copy.to_excel("sentiment_raw_output/jar_lom_il_final_sen_df_jul.xlsx",index=False)



### jar_orl_il

In [197]:
batch_counter = [0]
total_batches = math.ceil(len(jar_orl_il_nonnull_buckets['jar_orl_il_nonnull_1-4'])/25)+math.ceil(len(jar_orl_il_nonnull_buckets['jar_orl_il_nonnull_5-15'])/25)+math.ceil(len(jar_orl_il_nonnull_buckets['jar_orl_il_nonnull_16-30'])/25)+math.ceil(len(jar_orl_il_nonnull_buckets['jar_orl_il_nonnull_31-60'])/25)+math.ceil(len(jar_orl_il_nonnull_buckets['jar_orl_il_nonnull_61-100'])/25)+math.ceil(len(jar_orl_il_nonnull_buckets['jar_orl_il_nonnull_greater_100'])/25)
key_counter = 0
total_keys=len(jar_orl_il_nonnull_buckets.keys())
jar_orl_il_nonnull_api = []
input_tokens_jar_orl_il_nonnull=0
output_tokens_jar_orl_il_nonnull=0
start_time_jar_orl_il = time.time()

for key in jar_orl_il_nonnull_buckets.keys():
    key_counter+=1
    current_df = jar_orl_il_nonnull_buckets[key][['Name', 'review_text']]
    counter = [0]
    input_tokens = 0
    output_tokens = 0
        
    # Threading setup
    total_iterations = math.ceil(len(current_df) / 25)
    stop_event = threading.Event()
    message_thread = threading.Thread(target=print_dynamic_message, args=(batch_counter,key_counter,counter,total_batches,total_keys, total_iterations,start_time_jar_orl_il, stop_event))
    message_thread.start()

    for start in range(0, len(current_df), 25):
        clear_output(wait=True)
        counter[0] += 1
        batch_counter[0] += 1
        end = start + 25
        result, it, ot = score_sentiments_jul(current_df[start:end])
        jar_orl_il_nonnull_api.append(result)
        input_tokens += it
        output_tokens += ot

    # Stopping the dynamic message thread
    stop_event.set()
    message_thread.join()    
    input_tokens_jar_orl_il_nonnull+=input_tokens
    output_tokens_jar_orl_il_nonnull+=output_tokens
    clear_output(wait=True)
    
overall_execution_time_jar_orl_il = time.time() - start_time_jar_orl_il
formatted_time_jar_orl_il = time.strftime('%H:%M:%S', time.gmtime(overall_execution_time_jar_orl_il))
input_token_cost_jar_orl_il = round((0.01/1000) * input_tokens_jar_orl_il_nonnull, 2)
output_token_cost_jar_orl_il = round((0.03/1000) * output_tokens_jar_orl_il_nonnull, 2)
total_cost_jar_orl_il = round(input_token_cost_jar_orl_il + output_token_cost_jar_orl_il, 2)    
    
print(f"Executed {batch_counter} Iterations")
print(f"Total Execution Time: {formatted_time_jar_orl_il}")
print(f"Total Input Tokens - {input_tokens_jar_orl_il_nonnull}")
print(f"Total Input Cost = {input_token_cost_jar_orl_il}")
print(f"Total Output Tokens - {output_tokens_jar_orl_il_nonnull}")
print(f"Total Output Cost = {output_token_cost_jar_orl_il}")
print(f"Total Cost = {total_cost_jar_orl_il}")

Executed [1] Iterations
Total Execution Time: 00:00:02
Total Input Tokens - 759
Total Input Cost = 0.01
Total Output Tokens - 92
Total Output Cost = 0.0
Total Cost = 0.01


In [198]:
#Remove "```json" and "```" from each string in jar_orl_il_nonnull_api & convert to DataFrame
jar_orl_il_nonnull_api_cleaned = [
    s.replace("```json", "")
     .replace("```", "")
     .replace("[]", "[{\"positive\": \"\", \"negative\": \"\"}]")
    for s in jar_orl_il_nonnull_api
]                                  
jar_orl_il_nonnull_api_cleaned_df = pd.DataFrame(jar_orl_il_nonnull_api_cleaned)
#jar_orl_il_nonnull_api_cleaned_df = pd.DataFrame(jar_orl_il_nonnull_api)
column_names2 = ['Trust',
                    'Store Experience',
                    'Store Staff',
                    'Product Design',
                    'Product Variety',
                    'Discount',
                    'Making Charge',
                    'Price',
                    'Product Quality',
                    'OLD Gold Jewellery Exchange']

In [199]:
#Initialize an empty list to store the processed data
processed_data_jar_orl_il_nonnull = []

#Iterate over each row in the DataFrame
for index, row in jar_orl_il_nonnull_api_cleaned_df.iterrows():
    try:
        #Assuming the JSON string is in the first column
        json_string = row.iloc[0] if len(row) > 0 else ''
        data = json.loads(json_string)

        #Iterate over each key-value pair in the dictionary
        for commentor_name, feedback_list in data.items():
            #Create a dictionary for each commentor with default values as 0
            commentor_data = {col: 0 for col in column_names2}
            commentor_data["Commentor Name"] = commentor_name

            #Check if there is any feedback for the commentor
            if feedback_list:
                #Since there could be multiple feedback entries for a single commentor, we aggregate them
                for feedback in feedback_list:
                    positive = feedback.get("positive", "").split(',')
                    negative = feedback.get("negative", "").split(',')

                    for topic in column_names2:
                        #Aggregate the value: 1 for positive, -1 for negative, 0 for not mentioned
                        commentor_data[topic] += assign_value(topic, positive, negative)

                #Finalize the values to be within -1, 0, or 1
                for topic in column_names2:
                    if commentor_data[topic] > 1:
                        commentor_data[topic] = 1
                    elif commentor_data[topic] < -1:
                        commentor_data[topic] = -1

            #Append the commentor data to the processed data list
            processed_data_jar_orl_il_nonnull.append(commentor_data)
    except:
        #Skip rows with invalid JSON data
        print(f"Invalid JSON in row {index}")

jar_orl_il_nonnull_api_cleaned_df.to_excel("jar_orl_il_nonnull_api_cleaned_df.xlsx",index=False)

jar_orl_il_nonnull_api_cleaned_df = pd.read_excel("jar_orl_il_nonnull_api_cleaned_df.xlsx")

jar_orl_il_nonnull_api_cleaned_df

In [200]:
#Create a DataFrame from the processed data
jar_orl_il_nonnull_sen_df = pd.DataFrame(processed_data_jar_orl_il_nonnull)
#Reorder the columns
ordered_columns = ["Commentor Name",
                   "Trust",
                    "Store Experience",
                    "Store Staff",
                    "Product Design",
                    "Product Variety",
                    "Discount",
                    "Making Charge",
                    "Price",
                    "Product Quality",
                    "OLD Gold Jewellery Exchange"]
#Apply the new order to the DataFrame
jar_orl_il_nonnull_sen_df = jar_orl_il_nonnull_sen_df[ordered_columns]

jar_orl_il_nonnull_merged_df = pd.concat([combined_df_jar_orl_il, jar_orl_il_nonnull_sen_df], axis=1)

jar_orl_il_final_sen_df = pd.concat([jar_orl_il_nonnull_merged_df,null_dataframes['jar_orl_il_null']], ignore_index=True)

jar_orl_il_final_sen_df_copy = jar_orl_il_final_sen_df.copy()
jar_orl_il_final_sen_df_copy["Published At Date"] = jar_orl_il_final_sen_df_copy["Published At Date"].astype(str).str[:10]

jar_orl_il_final_sen_df_copy.to_excel("sentiment_raw_output/jar_orl_il_final_sen_df_jul.xlsx",index=False)



### jar_aur_il

In [201]:
batch_counter = [0]
total_batches = math.ceil(len(jar_aur_il_nonnull_buckets['jar_aur_il_nonnull_1-4'])/25)+math.ceil(len(jar_aur_il_nonnull_buckets['jar_aur_il_nonnull_5-15'])/25)+math.ceil(len(jar_aur_il_nonnull_buckets['jar_aur_il_nonnull_16-30'])/25)+math.ceil(len(jar_aur_il_nonnull_buckets['jar_aur_il_nonnull_31-60'])/25)+math.ceil(len(jar_aur_il_nonnull_buckets['jar_aur_il_nonnull_61-100'])/25)+math.ceil(len(jar_aur_il_nonnull_buckets['jar_aur_il_nonnull_greater_100'])/25)
key_counter = 0
total_keys=len(jar_aur_il_nonnull_buckets.keys())
jar_aur_il_nonnull_api = []
input_tokens_jar_aur_il_nonnull=0
output_tokens_jar_aur_il_nonnull=0
start_time_jar_aur_il = time.time()

for key in jar_aur_il_nonnull_buckets.keys():
    key_counter+=1
    current_df = jar_aur_il_nonnull_buckets[key][['Name', 'review_text']]
    counter = [0]
    input_tokens = 0
    output_tokens = 0
        
    # Threading setup
    total_iterations = math.ceil(len(current_df) / 25)
    stop_event = threading.Event()
    message_thread = threading.Thread(target=print_dynamic_message, args=(batch_counter,key_counter,counter,total_batches,total_keys, total_iterations,start_time_jar_aur_il, stop_event))
    message_thread.start()

    for start in range(0, len(current_df), 25):
        clear_output(wait=True)
        counter[0] += 1
        batch_counter[0] += 1
        end = start + 25
        result, it, ot = score_sentiments_jul(current_df[start:end])
        jar_aur_il_nonnull_api.append(result)
        input_tokens += it
        output_tokens += ot

    # Stopping the dynamic message thread
    stop_event.set()
    message_thread.join()    
    input_tokens_jar_aur_il_nonnull+=input_tokens
    output_tokens_jar_aur_il_nonnull+=output_tokens
    clear_output(wait=True)
    
overall_execution_time_jar_aur_il = time.time() - start_time_jar_aur_il
formatted_time_jar_aur_il = time.strftime('%H:%M:%S', time.gmtime(overall_execution_time_jar_aur_il))
input_token_cost_jar_aur_il = round((0.01/1000) * input_tokens_jar_aur_il_nonnull, 2)
output_token_cost_jar_aur_il = round((0.03/1000) * output_tokens_jar_aur_il_nonnull, 2)
total_cost_jar_aur_il = round(input_token_cost_jar_aur_il + output_token_cost_jar_aur_il, 2)    
    
print(f"Executed {batch_counter} Iterations")
print(f"Total Execution Time: {formatted_time_jar_aur_il}")
print(f"Total Input Tokens - {input_tokens_jar_aur_il_nonnull}")
print(f"Total Input Cost = {input_token_cost_jar_aur_il}")
print(f"Total Output Tokens - {output_tokens_jar_aur_il_nonnull}")
print(f"Total Output Cost = {output_token_cost_jar_aur_il}")
print(f"Total Cost = {total_cost_jar_aur_il}")

Executed [5] Iterations
Total Execution Time: 00:00:08
Total Input Tokens - 3687
Total Input Cost = 0.04
Total Output Tokens - 209
Total Output Cost = 0.01
Total Cost = 0.05


In [202]:
#Remove "```json" and "```" from each string in jar_aur_il_nonnull_api & convert to DataFrame
jar_aur_il_nonnull_api_cleaned = [
    s.replace("```json", "")
     .replace("```", "")
     .replace("[]", "[{\"positive\": \"\", \"negative\": \"\"}]")
    for s in jar_aur_il_nonnull_api
]                                  
jar_aur_il_nonnull_api_cleaned_df = pd.DataFrame(jar_aur_il_nonnull_api_cleaned)
#jar_aur_il_nonnull_api_cleaned_df = pd.DataFrame(jar_aur_il_nonnull_api)
column_names2 = ['Trust',
                    'Store Experience',
                    'Store Staff',
                    'Product Design',
                    'Product Variety',
                    'Discount',
                    'Making Charge',
                    'Price',
                    'Product Quality',
                    'OLD Gold Jewellery Exchange']

In [203]:
#Initialize an empty list to store the processed data
processed_data_jar_aur_il_nonnull = []

#Iterate over each row in the DataFrame
for index, row in jar_aur_il_nonnull_api_cleaned_df.iterrows():
    try:
        #Assuming the JSON string is in the first column
        json_string = row.iloc[0] if len(row) > 0 else ''
        data = json.loads(json_string)

        #Iterate over each key-value pair in the dictionary
        for commentor_name, feedback_list in data.items():
            #Create a dictionary for each commentor with default values as 0
            commentor_data = {col: 0 for col in column_names2}
            commentor_data["Commentor Name"] = commentor_name

            #Check if there is any feedback for the commentor
            if feedback_list:
                #Since there could be multiple feedback entries for a single commentor, we aggregate them
                for feedback in feedback_list:
                    positive = feedback.get("positive", "").split(',')
                    negative = feedback.get("negative", "").split(',')

                    for topic in column_names2:
                        #Aggregate the value: 1 for positive, -1 for negative, 0 for not mentioned
                        commentor_data[topic] += assign_value(topic, positive, negative)

                #Finalize the values to be within -1, 0, or 1
                for topic in column_names2:
                    if commentor_data[topic] > 1:
                        commentor_data[topic] = 1
                    elif commentor_data[topic] < -1:
                        commentor_data[topic] = -1

            #Append the commentor data to the processed data list
            processed_data_jar_aur_il_nonnull.append(commentor_data)
    except:
        #Skip rows with invalid JSON data
        print(f"Invalid JSON in row {index}")

jar_aur_il_nonnull_api_cleaned_df.to_excel("jar_aur_il_nonnull_api_cleaned_df.xlsx",index=False)

jar_aur_il_nonnull_api_cleaned_df = pd.read_excel("jar_aur_il_nonnull_api_cleaned_df.xlsx")

jar_aur_il_nonnull_api_cleaned_df

In [204]:
#Create a DataFrame from the processed data
jar_aur_il_nonnull_sen_df = pd.DataFrame(processed_data_jar_aur_il_nonnull)
#Reorder the columns
ordered_columns = ["Commentor Name",
                   "Trust",
                    "Store Experience",
                    "Store Staff",
                    "Product Design",
                    "Product Variety",
                    "Discount",
                    "Making Charge",
                    "Price",
                    "Product Quality",
                    "OLD Gold Jewellery Exchange"]
#Apply the new order to the DataFrame
jar_aur_il_nonnull_sen_df = jar_aur_il_nonnull_sen_df[ordered_columns]

jar_aur_il_nonnull_merged_df = pd.concat([combined_df_jar_aur_il, jar_aur_il_nonnull_sen_df], axis=1)

jar_aur_il_final_sen_df = pd.concat([jar_aur_il_nonnull_merged_df,null_dataframes['jar_aur_il_null']], ignore_index=True)

jar_aur_il_final_sen_df_copy = jar_aur_il_final_sen_df.copy()
jar_aur_il_final_sen_df_copy["Published At Date"] = jar_aur_il_final_sen_df_copy["Published At Date"].astype(str).str[:10]

jar_aur_il_final_sen_df_copy.to_excel("sentiment_raw_output/jar_aur_il_final_sen_df_jul.xlsx",index=False)


### jar_alg_il

In [205]:
batch_counter = [0]
total_batches = math.ceil(len(jar_alg_il_nonnull_buckets['jar_alg_il_nonnull_1-4'])/25)+math.ceil(len(jar_alg_il_nonnull_buckets['jar_alg_il_nonnull_5-15'])/25)+math.ceil(len(jar_alg_il_nonnull_buckets['jar_alg_il_nonnull_16-30'])/25)+math.ceil(len(jar_alg_il_nonnull_buckets['jar_alg_il_nonnull_31-60'])/25)+math.ceil(len(jar_alg_il_nonnull_buckets['jar_alg_il_nonnull_61-100'])/25)+math.ceil(len(jar_alg_il_nonnull_buckets['jar_alg_il_nonnull_greater_100'])/25)
key_counter = 0
total_keys=len(jar_alg_il_nonnull_buckets.keys())
jar_alg_il_nonnull_api = []
input_tokens_jar_alg_il_nonnull=0
output_tokens_jar_alg_il_nonnull=0
start_time_jar_alg_il = time.time()

for key in jar_alg_il_nonnull_buckets.keys():
    key_counter+=1
    current_df = jar_alg_il_nonnull_buckets[key][['Name', 'review_text']]
    counter = [0]
    input_tokens = 0
    output_tokens = 0
        
    # Threading setup
    total_iterations = math.ceil(len(current_df) / 25)
    stop_event = threading.Event()
    message_thread = threading.Thread(target=print_dynamic_message, args=(batch_counter,key_counter,counter,total_batches,total_keys, total_iterations,start_time_jar_alg_il, stop_event))
    message_thread.start()

    for start in range(0, len(current_df), 25):
        clear_output(wait=True)
        counter[0] += 1
        batch_counter[0] += 1
        end = start + 25
        result, it, ot = score_sentiments_jul(current_df[start:end])
        jar_alg_il_nonnull_api.append(result)
        input_tokens += it
        output_tokens += ot

    # Stopping the dynamic message thread
    stop_event.set()
    message_thread.join()    
    input_tokens_jar_alg_il_nonnull+=input_tokens
    output_tokens_jar_alg_il_nonnull+=output_tokens
    clear_output(wait=True)
    
overall_execution_time_jar_alg_il = time.time() - start_time_jar_alg_il
formatted_time_jar_alg_il = time.strftime('%H:%M:%S', time.gmtime(overall_execution_time_jar_alg_il))
input_token_cost_jar_alg_il = round((0.01/1000) * input_tokens_jar_alg_il_nonnull, 2)
output_token_cost_jar_alg_il = round((0.03/1000) * output_tokens_jar_alg_il_nonnull, 2)
total_cost_jar_alg_il = round(input_token_cost_jar_alg_il + output_token_cost_jar_alg_il, 2)    
    
print(f"Executed {batch_counter} Iterations")
print(f"Total Execution Time: {formatted_time_jar_alg_il}")
print(f"Total Input Tokens - {input_tokens_jar_alg_il_nonnull}")
print(f"Total Input Cost = {input_token_cost_jar_alg_il}")
print(f"Total Output Tokens - {output_tokens_jar_alg_il_nonnull}")
print(f"Total Output Cost = {output_token_cost_jar_alg_il}")
print(f"Total Cost = {total_cost_jar_alg_il}")

Executed [6] Iterations
Total Execution Time: 00:00:13
Total Input Tokens - 5143
Total Input Cost = 0.05
Total Output Tokens - 612
Total Output Cost = 0.02
Total Cost = 0.07


In [206]:
#Remove "```json" and "```" from each string in jar_alg_il_nonnull_api & convert to DataFrame
jar_alg_il_nonnull_api_cleaned = [
    s.replace("```json", "")
     .replace("```", "")
     .replace("[]", "[{\"positive\": \"\", \"negative\": \"\"}]")
    for s in jar_alg_il_nonnull_api
]                                  
jar_alg_il_nonnull_api_cleaned_df = pd.DataFrame(jar_alg_il_nonnull_api_cleaned)
#jar_alg_il_nonnull_api_cleaned_df = pd.DataFrame(jar_alg_il_nonnull_api)
column_names2 = ['Trust',
                    'Store Experience',
                    'Store Staff',
                    'Product Design',
                    'Product Variety',
                    'Discount',
                    'Making Charge',
                    'Price',
                    'Product Quality',
                    'OLD Gold Jewellery Exchange']

In [207]:
#Initialize an empty list to store the processed data
processed_data_jar_alg_il_nonnull = []

#Iterate over each row in the DataFrame
for index, row in jar_alg_il_nonnull_api_cleaned_df.iterrows():
    try:
        #Assuming the JSON string is in the first column
        json_string = row.iloc[0] if len(row) > 0 else ''
        data = json.loads(json_string)

        #Iterate over each key-value pair in the dictionary
        for commentor_name, feedback_list in data.items():
            #Create a dictionary for each commentor with default values as 0
            commentor_data = {col: 0 for col in column_names2}
            commentor_data["Commentor Name"] = commentor_name

            #Check if there is any feedback for the commentor
            if feedback_list:
                #Since there could be multiple feedback entries for a single commentor, we aggregate them
                for feedback in feedback_list:
                    positive = feedback.get("positive", "").split(',')
                    negative = feedback.get("negative", "").split(',')

                    for topic in column_names2:
                        #Aggregate the value: 1 for positive, -1 for negative, 0 for not mentioned
                        commentor_data[topic] += assign_value(topic, positive, negative)

                #Finalize the values to be within -1, 0, or 1
                for topic in column_names2:
                    if commentor_data[topic] > 1:
                        commentor_data[topic] = 1
                    elif commentor_data[topic] < -1:
                        commentor_data[topic] = -1

            #Append the commentor data to the processed data list
            processed_data_jar_alg_il_nonnull.append(commentor_data)
    except:
        #Skip rows with invalid JSON data
        print(f"Invalid JSON in row {index}")

jar_alg_il_nonnull_api_cleaned_df.to_excel("jar_alg_il_nonnull_api_cleaned_df.xlsx",index=False)

jar_alg_il_nonnull_api_cleaned_df = pd.read_excel("jar_alg_il_nonnull_api_cleaned_df.xlsx")

jar_alg_il_nonnull_api_cleaned_df

In [208]:
#Create a DataFrame from the processed data
jar_alg_il_nonnull_sen_df = pd.DataFrame(processed_data_jar_alg_il_nonnull)
#Reorder the columns
ordered_columns = ["Commentor Name",
                   "Trust",
                    "Store Experience",
                    "Store Staff",
                    "Product Design",
                    "Product Variety",
                    "Discount",
                    "Making Charge",
                    "Price",
                    "Product Quality",
                    "OLD Gold Jewellery Exchange"]
#Apply the new order to the DataFrame
jar_alg_il_nonnull_sen_df = jar_alg_il_nonnull_sen_df[ordered_columns]

jar_alg_il_nonnull_merged_df = pd.concat([combined_df_jar_alg_il, jar_alg_il_nonnull_sen_df], axis=1)

jar_alg_il_final_sen_df = pd.concat([jar_alg_il_nonnull_merged_df,null_dataframes['jar_alg_il_null']], ignore_index=True)

jar_alg_il_final_sen_df_copy = jar_alg_il_final_sen_df.copy()
jar_alg_il_final_sen_df_copy["Published At Date"] = jar_alg_il_final_sen_df_copy["Published At Date"].astype(str).str[:10]

jar_alg_il_final_sen_df_copy.to_excel("sentiment_raw_output/jar_alg_il_final_sen_df_jul.xlsx",index=False)



### jar_sch_il

In [209]:
batch_counter = [0]
total_batches = math.ceil(len(jar_sch_il_nonnull_buckets['jar_sch_il_nonnull_1-4'])/25)+math.ceil(len(jar_sch_il_nonnull_buckets['jar_sch_il_nonnull_5-15'])/25)+math.ceil(len(jar_sch_il_nonnull_buckets['jar_sch_il_nonnull_16-30'])/25)+math.ceil(len(jar_sch_il_nonnull_buckets['jar_sch_il_nonnull_31-60'])/25)+math.ceil(len(jar_sch_il_nonnull_buckets['jar_sch_il_nonnull_61-100'])/25)+math.ceil(len(jar_sch_il_nonnull_buckets['jar_sch_il_nonnull_greater_100'])/25)
key_counter = 0
total_keys=len(jar_sch_il_nonnull_buckets.keys())
jar_sch_il_nonnull_api = []
input_tokens_jar_sch_il_nonnull=0
output_tokens_jar_sch_il_nonnull=0
start_time_jar_sch_il = time.time()

for key in jar_sch_il_nonnull_buckets.keys():
    key_counter+=1
    current_df = jar_sch_il_nonnull_buckets[key][['Name', 'review_text']]
    counter = [0]
    input_tokens = 0
    output_tokens = 0
        
    # Threading setup
    total_iterations = math.ceil(len(current_df) / 25)
    stop_event = threading.Event()
    message_thread = threading.Thread(target=print_dynamic_message, args=(batch_counter,key_counter,counter,total_batches,total_keys, total_iterations,start_time_jar_sch_il, stop_event))
    message_thread.start()

    for start in range(0, len(current_df), 25):
        clear_output(wait=True)
        counter[0] += 1
        batch_counter[0] += 1
        end = start + 25
        result, it, ot = score_sentiments_jul(current_df[start:end])
        jar_sch_il_nonnull_api.append(result)
        input_tokens += it
        output_tokens += ot

    # Stopping the dynamic message thread
    stop_event.set()
    message_thread.join()    
    input_tokens_jar_sch_il_nonnull+=input_tokens
    output_tokens_jar_sch_il_nonnull+=output_tokens
    clear_output(wait=True)
    
overall_execution_time_jar_sch_il = time.time() - start_time_jar_sch_il
formatted_time_jar_sch_il = time.strftime('%H:%M:%S', time.gmtime(overall_execution_time_jar_sch_il))
input_token_cost_jar_sch_il = round((0.01/1000) * input_tokens_jar_sch_il_nonnull, 2)
output_token_cost_jar_sch_il = round((0.03/1000) * output_tokens_jar_sch_il_nonnull, 2)
total_cost_jar_sch_il = round(input_token_cost_jar_sch_il + output_token_cost_jar_sch_il, 2)    
    
print(f"Executed {batch_counter} Iterations")
print(f"Total Execution Time: {formatted_time_jar_sch_il}")
print(f"Total Input Tokens - {input_tokens_jar_sch_il_nonnull}")
print(f"Total Input Cost = {input_token_cost_jar_sch_il}")
print(f"Total Output Tokens - {output_tokens_jar_sch_il_nonnull}")
print(f"Total Output Cost = {output_token_cost_jar_sch_il}")
print(f"Total Cost = {total_cost_jar_sch_il}")

Executed [5] Iterations
Total Execution Time: 00:00:10
Total Input Tokens - 4180
Total Input Cost = 0.04
Total Output Tokens - 408
Total Output Cost = 0.01
Total Cost = 0.05


In [210]:
#Remove "```json" and "```" from each string in jar_sch_il_nonnull_api & convert to DataFrame
jar_sch_il_nonnull_api_cleaned = [
    s.replace("```json", "")
     .replace("```", "")
     .replace("[]", "[{\"positive\": \"\", \"negative\": \"\"}]")
    for s in jar_sch_il_nonnull_api
]                                  
jar_sch_il_nonnull_api_cleaned_df = pd.DataFrame(jar_sch_il_nonnull_api_cleaned)
#jar_sch_il_nonnull_api_cleaned_df = pd.DataFrame(jar_sch_il_nonnull_api)
column_names2 = ['Trust',
                    'Store Experience',
                    'Store Staff',
                    'Product Design',
                    'Product Variety',
                    'Discount',
                    'Making Charge',
                    'Price',
                    'Product Quality',
                    'OLD Gold Jewellery Exchange']

In [211]:
#Initialize an empty list to store the processed data
processed_data_jar_sch_il_nonnull = []

#Iterate over each row in the DataFrame
for index, row in jar_sch_il_nonnull_api_cleaned_df.iterrows():
    try:
        #Assuming the JSON string is in the first column
        json_string = row.iloc[0] if len(row) > 0 else ''
        data = json.loads(json_string)

        #Iterate over each key-value pair in the dictionary
        for commentor_name, feedback_list in data.items():
            #Create a dictionary for each commentor with default values as 0
            commentor_data = {col: 0 for col in column_names2}
            commentor_data["Commentor Name"] = commentor_name

            #Check if there is any feedback for the commentor
            if feedback_list:
                #Since there could be multiple feedback entries for a single commentor, we aggregate them
                for feedback in feedback_list:
                    positive = feedback.get("positive", "").split(',')
                    negative = feedback.get("negative", "").split(',')

                    for topic in column_names2:
                        #Aggregate the value: 1 for positive, -1 for negative, 0 for not mentioned
                        commentor_data[topic] += assign_value(topic, positive, negative)

                #Finalize the values to be within -1, 0, or 1
                for topic in column_names2:
                    if commentor_data[topic] > 1:
                        commentor_data[topic] = 1
                    elif commentor_data[topic] < -1:
                        commentor_data[topic] = -1

            #Append the commentor data to the processed data list
            processed_data_jar_sch_il_nonnull.append(commentor_data)
    except:
        #Skip rows with invalid JSON data
        print(f"Invalid JSON in row {index}")

jar_sch_il_nonnull_api_cleaned_df.to_excel("jar_sch_il_nonnull_api_cleaned_df.xlsx",index=False)

jar_sch_il_nonnull_api_cleaned_df = pd.read_excel("jar_sch_il_nonnull_api_cleaned_df.xlsx")

jar_sch_il_nonnull_api_cleaned_df

In [212]:
#Create a DataFrame from the processed data
jar_sch_il_nonnull_sen_df = pd.DataFrame(processed_data_jar_sch_il_nonnull)
#Reorder the columns
ordered_columns = ["Commentor Name",
                   "Trust",
                    "Store Experience",
                    "Store Staff",
                    "Product Design",
                    "Product Variety",
                    "Discount",
                    "Making Charge",
                    "Price",
                    "Product Quality",
                    "OLD Gold Jewellery Exchange"]
#Apply the new order to the DataFrame
jar_sch_il_nonnull_sen_df = jar_sch_il_nonnull_sen_df[ordered_columns]

jar_sch_il_nonnull_merged_df = pd.concat([combined_df_jar_sch_il, jar_sch_il_nonnull_sen_df], axis=1)

jar_sch_il_final_sen_df = pd.concat([jar_sch_il_nonnull_merged_df,null_dataframes['jar_sch_il_null']], ignore_index=True)

jar_sch_il_final_sen_df_copy = jar_sch_il_final_sen_df.copy()
jar_sch_il_final_sen_df_copy["Published At Date"] = jar_sch_il_final_sen_df_copy["Published At Date"].astype(str).str[:10]

jar_sch_il_final_sen_df_copy.to_excel("sentiment_raw_output/jar_sch_il_final_sen_df_jul.xlsx",index=False)



### joy_suw_ga

In [213]:
batch_counter = [0]
total_batches = math.ceil(len(joy_suw_ga_nonnull_buckets['joy_suw_ga_nonnull_1-4'])/25)+math.ceil(len(joy_suw_ga_nonnull_buckets['joy_suw_ga_nonnull_5-15'])/25)+math.ceil(len(joy_suw_ga_nonnull_buckets['joy_suw_ga_nonnull_16-30'])/25)+math.ceil(len(joy_suw_ga_nonnull_buckets['joy_suw_ga_nonnull_31-60'])/25)+math.ceil(len(joy_suw_ga_nonnull_buckets['joy_suw_ga_nonnull_61-100'])/25)+math.ceil(len(joy_suw_ga_nonnull_buckets['joy_suw_ga_nonnull_greater_100'])/25)
key_counter = 0
total_keys=len(joy_suw_ga_nonnull_buckets.keys())
joy_suw_ga_nonnull_api = []
input_tokens_joy_suw_ga_nonnull=0
output_tokens_joy_suw_ga_nonnull=0
start_time_joy_suw_ga = time.time()

for key in joy_suw_ga_nonnull_buckets.keys():
    key_counter+=1
    current_df = joy_suw_ga_nonnull_buckets[key][['Name', 'review_text']]
    counter = [0]
    input_tokens = 0
    output_tokens = 0
        
    # Threading setup
    total_iterations = math.ceil(len(current_df) / 25)
    stop_event = threading.Event()
    message_thread = threading.Thread(target=print_dynamic_message, args=(batch_counter,key_counter,counter,total_batches,total_keys, total_iterations,start_time_joy_suw_ga, stop_event))
    message_thread.start()

    for start in range(0, len(current_df), 25):
        clear_output(wait=True)
        counter[0] += 1
        batch_counter[0] += 1
        end = start + 25
        result, it, ot = score_sentiments_jul(current_df[start:end])
        joy_suw_ga_nonnull_api.append(result)
        input_tokens += it
        output_tokens += ot

    # Stopping the dynamic message thread
    stop_event.set()
    message_thread.join()    
    input_tokens_joy_suw_ga_nonnull+=input_tokens
    output_tokens_joy_suw_ga_nonnull+=output_tokens
    clear_output(wait=True)
    
overall_execution_time_joy_suw_ga = time.time() - start_time_joy_suw_ga
formatted_time_joy_suw_ga = time.strftime('%H:%M:%S', time.gmtime(overall_execution_time_joy_suw_ga))
input_token_cost_joy_suw_ga = round((0.01/1000) * input_tokens_joy_suw_ga_nonnull, 2)
output_token_cost_joy_suw_ga = round((0.03/1000) * output_tokens_joy_suw_ga_nonnull, 2)
total_cost_joy_suw_ga = round(input_token_cost_joy_suw_ga + output_token_cost_joy_suw_ga, 2)    
    
print(f"Executed {batch_counter} Iterations")
print(f"Total Execution Time: {formatted_time_joy_suw_ga}")
print(f"Total Input Tokens - {input_tokens_joy_suw_ga_nonnull}")
print(f"Total Input Cost = {input_token_cost_joy_suw_ga}")
print(f"Total Output Tokens - {output_tokens_joy_suw_ga_nonnull}")
print(f"Total Output Cost = {output_token_cost_joy_suw_ga}")
print(f"Total Cost = {total_cost_joy_suw_ga}")

Executed [10] Iterations
Total Execution Time: 00:01:12
Total Input Tokens - 15228
Total Input Cost = 0.15
Total Output Tokens - 5391
Total Output Cost = 0.16
Total Cost = 0.31


In [214]:
#Remove "```json" and "```" from each string in joy_suw_ga_nonnull_api & convert to DataFrame
joy_suw_ga_nonnull_api_cleaned = [
    s.replace("```json", "")
     .replace("```", "")
     .replace("[]", "[{\"positive\": \"\", \"negative\": \"\"}]")
    for s in joy_suw_ga_nonnull_api
]                                  
joy_suw_ga_nonnull_api_cleaned_df = pd.DataFrame(joy_suw_ga_nonnull_api_cleaned)
#joy_suw_ga_nonnull_api_cleaned_df = pd.DataFrame(joy_suw_ga_nonnull_api)
column_names2 = ['Trust',
                    'Store Experience',
                    'Store Staff',
                    'Product Design',
                    'Product Variety',
                    'Discount',
                    'Making Charge',
                    'Price',
                    'Product Quality',
                    'OLD Gold Jewellery Exchange']

In [215]:
#Initialize an empty list to store the processed data
processed_data_joy_suw_ga_nonnull = []

#Iterate over each row in the DataFrame
for index, row in joy_suw_ga_nonnull_api_cleaned_df.iterrows():
    try:
        #Assuming the JSON string is in the first column
        json_string = row.iloc[0] if len(row) > 0 else ''
        data = json.loads(json_string)

        #Iterate over each key-value pair in the dictionary
        for commentor_name, feedback_list in data.items():
            #Create a dictionary for each commentor with default values as 0
            commentor_data = {col: 0 for col in column_names2}
            commentor_data["Commentor Name"] = commentor_name

            #Check if there is any feedback for the commentor
            if feedback_list:
                #Since there could be multiple feedback entries for a single commentor, we aggregate them
                for feedback in feedback_list:
                    positive = feedback.get("positive", "").split(',')
                    negative = feedback.get("negative", "").split(',')

                    for topic in column_names2:
                        #Aggregate the value: 1 for positive, -1 for negative, 0 for not mentioned
                        commentor_data[topic] += assign_value(topic, positive, negative)

                #Finalize the values to be within -1, 0, or 1
                for topic in column_names2:
                    if commentor_data[topic] > 1:
                        commentor_data[topic] = 1
                    elif commentor_data[topic] < -1:
                        commentor_data[topic] = -1

            #Append the commentor data to the processed data list
            processed_data_joy_suw_ga_nonnull.append(commentor_data)
    except:
        #Skip rows with invalid JSON data
        print(f"Invalid JSON in row {index}")

joy_suw_ga_nonnull_api_cleaned_df.to_excel("joy_suw_ga_nonnull_api_cleaned_df.xlsx",index=False)

joy_suw_ga_nonnull_api_cleaned_df = pd.read_excel("joy_suw_ga_nonnull_api_cleaned_df.xlsx")

joy_suw_ga_nonnull_api_cleaned_df

In [216]:
#Create a DataFrame from the processed data
joy_suw_ga_nonnull_sen_df = pd.DataFrame(processed_data_joy_suw_ga_nonnull)
#Reorder the columns
ordered_columns = ["Commentor Name",
                   "Trust",
                    "Store Experience",
                    "Store Staff",
                    "Product Design",
                    "Product Variety",
                    "Discount",
                    "Making Charge",
                    "Price",
                    "Product Quality",
                    "OLD Gold Jewellery Exchange"]
#Apply the new order to the DataFrame
joy_suw_ga_nonnull_sen_df = joy_suw_ga_nonnull_sen_df[ordered_columns]

joy_suw_ga_nonnull_merged_df = pd.concat([combined_df_joy_suw_ga, joy_suw_ga_nonnull_sen_df], axis=1)

joy_suw_ga_final_sen_df = pd.concat([joy_suw_ga_nonnull_merged_df,null_dataframes['joy_suw_ga_null']], ignore_index=True)

joy_suw_ga_final_sen_df_copy = joy_suw_ga_final_sen_df.copy()
joy_suw_ga_final_sen_df_copy["Published At Date"] = joy_suw_ga_final_sen_df_copy["Published At Date"].astype(str).str[:10]

joy_suw_ga_final_sen_df_copy.to_excel("sentiment_raw_output/joy_suw_ga_final_sen_df_jul.xlsx",index=False)



### joy_chi_il

In [217]:
batch_counter = [0]
total_batches = math.ceil(len(joy_chi_il_nonnull_buckets['joy_chi_il_nonnull_1-4'])/25)+math.ceil(len(joy_chi_il_nonnull_buckets['joy_chi_il_nonnull_5-15'])/25)+math.ceil(len(joy_chi_il_nonnull_buckets['joy_chi_il_nonnull_16-30'])/25)+math.ceil(len(joy_chi_il_nonnull_buckets['joy_chi_il_nonnull_31-60'])/25)+math.ceil(len(joy_chi_il_nonnull_buckets['joy_chi_il_nonnull_61-100'])/25)+math.ceil(len(joy_chi_il_nonnull_buckets['joy_chi_il_nonnull_greater_100'])/25)
key_counter = 0
total_keys=len(joy_chi_il_nonnull_buckets.keys())
joy_chi_il_nonnull_api = []
input_tokens_joy_chi_il_nonnull=0
output_tokens_joy_chi_il_nonnull=0
start_time_joy_chi_il = time.time()

for key in joy_chi_il_nonnull_buckets.keys():
    key_counter+=1
    current_df = joy_chi_il_nonnull_buckets[key][['Name', 'review_text']]
    counter = [0]
    input_tokens = 0
    output_tokens = 0
        
    # Threading setup
    total_iterations = math.ceil(len(current_df) / 25)
    stop_event = threading.Event()
    message_thread = threading.Thread(target=print_dynamic_message, args=(batch_counter,key_counter,counter,total_batches,total_keys, total_iterations,start_time_joy_chi_il, stop_event))
    message_thread.start()

    for start in range(0, len(current_df), 25):
        clear_output(wait=True)
        counter[0] += 1
        batch_counter[0] += 1
        end = start + 25
        result, it, ot = score_sentiments_jul(current_df[start:end])
        joy_chi_il_nonnull_api.append(result)
        input_tokens += it
        output_tokens += ot

    # Stopping the dynamic message thread
    stop_event.set()
    message_thread.join()    
    input_tokens_joy_chi_il_nonnull+=input_tokens
    output_tokens_joy_chi_il_nonnull+=output_tokens
    clear_output(wait=True)
    
overall_execution_time_joy_chi_il = time.time() - start_time_joy_chi_il
formatted_time_joy_chi_il = time.strftime('%H:%M:%S', time.gmtime(overall_execution_time_joy_chi_il))
input_token_cost_joy_chi_il = round((0.01/1000) * input_tokens_joy_chi_il_nonnull, 2)
output_token_cost_joy_chi_il = round((0.03/1000) * output_tokens_joy_chi_il_nonnull, 2)
total_cost_joy_chi_il = round(input_token_cost_joy_chi_il + output_token_cost_joy_chi_il, 2)    
    
print(f"Executed {batch_counter} Iterations")
print(f"Total Execution Time: {formatted_time_joy_chi_il}")
print(f"Total Input Tokens - {input_tokens_joy_chi_il_nonnull}")
print(f"Total Input Cost = {input_token_cost_joy_chi_il}")
print(f"Total Output Tokens - {output_tokens_joy_chi_il_nonnull}")
print(f"Total Output Cost = {output_token_cost_joy_chi_il}")
print(f"Total Cost = {total_cost_joy_chi_il}")

Executed [8] Iterations
Total Execution Time: 00:00:36
Total Input Tokens - 8285
Total Input Cost = 0.08
Total Output Tokens - 2489
Total Output Cost = 0.07
Total Cost = 0.15


In [218]:
#Remove "```json" and "```" from each string in joy_chi_il_nonnull_api & convert to DataFrame
joy_chi_il_nonnull_api_cleaned = [
    s.replace("```json", "")
     .replace("```", "")
     .replace("[]", "[{\"positive\": \"\", \"negative\": \"\"}]")
    for s in joy_chi_il_nonnull_api
]                                  
joy_chi_il_nonnull_api_cleaned_df = pd.DataFrame(joy_chi_il_nonnull_api_cleaned)
#joy_chi_il_nonnull_api_cleaned_df = pd.DataFrame(joy_chi_il_nonnull_api)
column_names2 = ['Trust',
                    'Store Experience',
                    'Store Staff',
                    'Product Design',
                    'Product Variety',
                    'Discount',
                    'Making Charge',
                    'Price',
                    'Product Quality',
                    'OLD Gold Jewellery Exchange']

In [219]:
#Initialize an empty list to store the processed data
processed_data_joy_chi_il_nonnull = []

#Iterate over each row in the DataFrame
for index, row in joy_chi_il_nonnull_api_cleaned_df.iterrows():
    try:
        #Assuming the JSON string is in the first column
        json_string = row.iloc[0] if len(row) > 0 else ''
        data = json.loads(json_string)

        #Iterate over each key-value pair in the dictionary
        for commentor_name, feedback_list in data.items():
            #Create a dictionary for each commentor with default values as 0
            commentor_data = {col: 0 for col in column_names2}
            commentor_data["Commentor Name"] = commentor_name

            #Check if there is any feedback for the commentor
            if feedback_list:
                #Since there could be multiple feedback entries for a single commentor, we aggregate them
                for feedback in feedback_list:
                    positive = feedback.get("positive", "").split(',')
                    negative = feedback.get("negative", "").split(',')

                    for topic in column_names2:
                        #Aggregate the value: 1 for positive, -1 for negative, 0 for not mentioned
                        commentor_data[topic] += assign_value(topic, positive, negative)

                #Finalize the values to be within -1, 0, or 1
                for topic in column_names2:
                    if commentor_data[topic] > 1:
                        commentor_data[topic] = 1
                    elif commentor_data[topic] < -1:
                        commentor_data[topic] = -1

            #Append the commentor data to the processed data list
            processed_data_joy_chi_il_nonnull.append(commentor_data)
    except:
        #Skip rows with invalid JSON data
        print(f"Invalid JSON in row {index}")

joy_chi_il_nonnull_api_cleaned_df.to_excel("joy_chi_il_nonnull_api_cleaned_df.xlsx",index=False)

joy_chi_il_nonnull_api_cleaned_df = pd.read_excel("joy_chi_il_nonnull_api_cleaned_df.xlsx")

joy_chi_il_nonnull_api_cleaned_df

In [220]:
#Create a DataFrame from the processed data
joy_chi_il_nonnull_sen_df = pd.DataFrame(processed_data_joy_chi_il_nonnull)
#Reorder the columns
ordered_columns = ["Commentor Name",
                   "Trust",
                    "Store Experience",
                    "Store Staff",
                    "Product Design",
                    "Product Variety",
                    "Discount",
                    "Making Charge",
                    "Price",
                    "Product Quality",
                    "OLD Gold Jewellery Exchange"]
#Apply the new order to the DataFrame
joy_chi_il_nonnull_sen_df = joy_chi_il_nonnull_sen_df[ordered_columns]

joy_chi_il_nonnull_merged_df = pd.concat([combined_df_joy_chi_il, joy_chi_il_nonnull_sen_df], axis=1)

joy_chi_il_final_sen_df = pd.concat([joy_chi_il_nonnull_merged_df,null_dataframes['joy_chi_il_null']], ignore_index=True)

joy_chi_il_final_sen_df_copy = joy_chi_il_final_sen_df.copy()
joy_chi_il_final_sen_df_copy["Published At Date"] = joy_chi_il_final_sen_df_copy["Published At Date"].astype(str).str[:10]

joy_chi_il_final_sen_df_copy.to_excel("sentiment_raw_output/joy_chi_il_final_sen_df_jul.xlsx",index=False)



### joy_hou_tx

In [221]:
batch_counter = [0]
total_batches = math.ceil(len(joy_hou_tx_nonnull_buckets['joy_hou_tx_nonnull_1-4'])/25)+math.ceil(len(joy_hou_tx_nonnull_buckets['joy_hou_tx_nonnull_5-15'])/25)+math.ceil(len(joy_hou_tx_nonnull_buckets['joy_hou_tx_nonnull_16-30'])/25)+math.ceil(len(joy_hou_tx_nonnull_buckets['joy_hou_tx_nonnull_31-60'])/25)+math.ceil(len(joy_hou_tx_nonnull_buckets['joy_hou_tx_nonnull_61-100'])/25)+math.ceil(len(joy_hou_tx_nonnull_buckets['joy_hou_tx_nonnull_greater_100'])/25)
key_counter = 0
total_keys=len(joy_hou_tx_nonnull_buckets.keys())
joy_hou_tx_nonnull_api = []
input_tokens_joy_hou_tx_nonnull=0
output_tokens_joy_hou_tx_nonnull=0
start_time_joy_hou_tx = time.time()

for key in joy_hou_tx_nonnull_buckets.keys():
    key_counter+=1
    current_df = joy_hou_tx_nonnull_buckets[key][['Name', 'review_text']]
    counter = [0]
    input_tokens = 0
    output_tokens = 0
        
    # Threading setup
    total_iterations = math.ceil(len(current_df) / 25)
    stop_event = threading.Event()
    message_thread = threading.Thread(target=print_dynamic_message, args=(batch_counter,key_counter,counter,total_batches,total_keys, total_iterations,start_time_joy_hou_tx, stop_event))
    message_thread.start()

    for start in range(0, len(current_df), 25):
        clear_output(wait=True)
        counter[0] += 1
        batch_counter[0] += 1
        end = start + 25
        result, it, ot = score_sentiments_jul(current_df[start:end])
        joy_hou_tx_nonnull_api.append(result)
        input_tokens += it
        output_tokens += ot

    # Stopping the dynamic message thread
    stop_event.set()
    message_thread.join()    
    input_tokens_joy_hou_tx_nonnull+=input_tokens
    output_tokens_joy_hou_tx_nonnull+=output_tokens
    clear_output(wait=True)
    
overall_execution_time_joy_hou_tx = time.time() - start_time_joy_hou_tx
formatted_time_joy_hou_tx = time.strftime('%H:%M:%S', time.gmtime(overall_execution_time_joy_hou_tx))
input_token_cost_joy_hou_tx = round((0.01/1000) * input_tokens_joy_hou_tx_nonnull, 2)
output_token_cost_joy_hou_tx = round((0.03/1000) * output_tokens_joy_hou_tx_nonnull, 2)
total_cost_joy_hou_tx = round(input_token_cost_joy_hou_tx + output_token_cost_joy_hou_tx, 2)    
    
print(f"Executed {batch_counter} Iterations")
print(f"Total Execution Time: {formatted_time_joy_hou_tx}")
print(f"Total Input Tokens - {input_tokens_joy_hou_tx_nonnull}")
print(f"Total Input Cost = {input_token_cost_joy_hou_tx}")
print(f"Total Output Tokens - {output_tokens_joy_hou_tx_nonnull}")
print(f"Total Output Cost = {output_token_cost_joy_hou_tx}")
print(f"Total Cost = {total_cost_joy_hou_tx}")

Executed [8] Iterations
Total Execution Time: 00:00:39
Total Input Tokens - 8945
Total Input Cost = 0.09
Total Output Tokens - 2825
Total Output Cost = 0.08
Total Cost = 0.17


In [222]:
#Remove "```json" and "```" from each string in joy_hou_tx_nonnull_api & convert to DataFrame
joy_hou_tx_nonnull_api_cleaned = [
    s.replace("```json", "")
     .replace("```", "")
     .replace("[]", "[{\"positive\": \"\", \"negative\": \"\"}]")
    for s in joy_hou_tx_nonnull_api
]                                  
joy_hou_tx_nonnull_api_cleaned_df = pd.DataFrame(joy_hou_tx_nonnull_api_cleaned)
#joy_hou_tx_nonnull_api_cleaned_df = pd.DataFrame(joy_hou_tx_nonnull_api)
column_names2 = ['Trust',
                    'Store Experience',
                    'Store Staff',
                    'Product Design',
                    'Product Variety',
                    'Discount',
                    'Making Charge',
                    'Price',
                    'Product Quality',
                    'OLD Gold Jewellery Exchange']

In [223]:
#Initialize an empty list to store the processed data
processed_data_joy_hou_tx_nonnull = []

#Iterate over each row in the DataFrame
for index, row in joy_hou_tx_nonnull_api_cleaned_df.iterrows():
    try:
        #Assuming the JSON string is in the first column
        json_string = row.iloc[0] if len(row) > 0 else ''
        data = json.loads(json_string)

        #Iterate over each key-value pair in the dictionary
        for commentor_name, feedback_list in data.items():
            #Create a dictionary for each commentor with default values as 0
            commentor_data = {col: 0 for col in column_names2}
            commentor_data["Commentor Name"] = commentor_name

            #Check if there is any feedback for the commentor
            if feedback_list:
                #Since there could be multiple feedback entries for a single commentor, we aggregate them
                for feedback in feedback_list:
                    positive = feedback.get("positive", "").split(',')
                    negative = feedback.get("negative", "").split(',')

                    for topic in column_names2:
                        #Aggregate the value: 1 for positive, -1 for negative, 0 for not mentioned
                        commentor_data[topic] += assign_value(topic, positive, negative)

                #Finalize the values to be within -1, 0, or 1
                for topic in column_names2:
                    if commentor_data[topic] > 1:
                        commentor_data[topic] = 1
                    elif commentor_data[topic] < -1:
                        commentor_data[topic] = -1

            #Append the commentor data to the processed data list
            processed_data_joy_hou_tx_nonnull.append(commentor_data)
    except:
        #Skip rows with invalid JSON data
        print(f"Invalid JSON in row {index}")

joy_hou_tx_nonnull_api_cleaned_df.to_excel("joy_hou_tx_nonnull_api_cleaned_df.xlsx",index=False)

joy_hou_tx_nonnull_api_cleaned_df = pd.read_excel("joy_hou_tx_nonnull_api_cleaned_df.xlsx")

joy_hou_tx_nonnull_api_cleaned_df

In [224]:
#Create a DataFrame from the processed data
joy_hou_tx_nonnull_sen_df = pd.DataFrame(processed_data_joy_hou_tx_nonnull)
#Reorder the columns
ordered_columns = ["Commentor Name",
                   "Trust",
                    "Store Experience",
                    "Store Staff",
                    "Product Design",
                    "Product Variety",
                    "Discount",
                    "Making Charge",
                    "Price",
                    "Product Quality",
                    "OLD Gold Jewellery Exchange"]
#Apply the new order to the DataFrame
joy_hou_tx_nonnull_sen_df = joy_hou_tx_nonnull_sen_df[ordered_columns]

joy_hou_tx_nonnull_merged_df = pd.concat([combined_df_joy_hou_tx, joy_hou_tx_nonnull_sen_df], axis=1)

joy_hou_tx_final_sen_df = pd.concat([joy_hou_tx_nonnull_merged_df,null_dataframes['joy_hou_tx_null']], ignore_index=True)

joy_hou_tx_final_sen_df_copy = joy_hou_tx_final_sen_df.copy()
joy_hou_tx_final_sen_df_copy["Published At Date"] = joy_hou_tx_final_sen_df_copy["Published At Date"].astype(str).str[:10]

joy_hou_tx_final_sen_df_copy.to_excel("sentiment_raw_output/joy_hou_tx_final_sen_df_jul.xlsx",index=False)



### joy_fri_tx

In [225]:
batch_counter = [0]
total_batches = math.ceil(len(joy_fri_tx_nonnull_buckets['joy_fri_tx_nonnull_1-4'])/25)+math.ceil(len(joy_fri_tx_nonnull_buckets['joy_fri_tx_nonnull_5-15'])/25)+math.ceil(len(joy_fri_tx_nonnull_buckets['joy_fri_tx_nonnull_16-30'])/25)+math.ceil(len(joy_fri_tx_nonnull_buckets['joy_fri_tx_nonnull_31-60'])/25)+math.ceil(len(joy_fri_tx_nonnull_buckets['joy_fri_tx_nonnull_61-100'])/25)+math.ceil(len(joy_fri_tx_nonnull_buckets['joy_fri_tx_nonnull_greater_100'])/25)
key_counter = 0
total_keys=len(joy_fri_tx_nonnull_buckets.keys())
joy_fri_tx_nonnull_api = []
input_tokens_joy_fri_tx_nonnull=0
output_tokens_joy_fri_tx_nonnull=0
start_time_joy_fri_tx = time.time()

for key in joy_fri_tx_nonnull_buckets.keys():
    key_counter+=1
    current_df = joy_fri_tx_nonnull_buckets[key][['Name', 'review_text']]
    counter = [0]
    input_tokens = 0
    output_tokens = 0
        
    # Threading setup
    total_iterations = math.ceil(len(current_df) / 25)
    stop_event = threading.Event()
    message_thread = threading.Thread(target=print_dynamic_message, args=(batch_counter,key_counter,counter,total_batches,total_keys, total_iterations,start_time_joy_fri_tx, stop_event))
    message_thread.start()

    for start in range(0, len(current_df), 25):
        clear_output(wait=True)
        counter[0] += 1
        batch_counter[0] += 1
        end = start + 25
        result, it, ot = score_sentiments_jul(current_df[start:end])
        joy_fri_tx_nonnull_api.append(result)
        input_tokens += it
        output_tokens += ot

    # Stopping the dynamic message thread
    stop_event.set()
    message_thread.join()    
    input_tokens_joy_fri_tx_nonnull+=input_tokens
    output_tokens_joy_fri_tx_nonnull+=output_tokens
    clear_output(wait=True)
    
overall_execution_time_joy_fri_tx = time.time() - start_time_joy_fri_tx
formatted_time_joy_fri_tx = time.strftime('%H:%M:%S', time.gmtime(overall_execution_time_joy_fri_tx))
input_token_cost_joy_fri_tx = round((0.01/1000) * input_tokens_joy_fri_tx_nonnull, 2)
output_token_cost_joy_fri_tx = round((0.03/1000) * output_tokens_joy_fri_tx_nonnull, 2)
total_cost_joy_fri_tx = round(input_token_cost_joy_fri_tx + output_token_cost_joy_fri_tx, 2)    
    
print(f"Executed {batch_counter} Iterations")
print(f"Total Execution Time: {formatted_time_joy_fri_tx}")
print(f"Total Input Tokens - {input_tokens_joy_fri_tx_nonnull}")
print(f"Total Input Cost = {input_token_cost_joy_fri_tx}")
print(f"Total Output Tokens - {output_tokens_joy_fri_tx_nonnull}")
print(f"Total Output Cost = {output_token_cost_joy_fri_tx}")
print(f"Total Cost = {total_cost_joy_fri_tx}")

Executed [12] Iterations
Total Execution Time: 00:01:19
Total Input Tokens - 16908
Total Input Cost = 0.17
Total Output Tokens - 6217
Total Output Cost = 0.19
Total Cost = 0.36


In [226]:
#Remove "```json" and "```" from each string in joy_fri_tx_nonnull_api & convert to DataFrame
joy_fri_tx_nonnull_api_cleaned = [
    s.replace("```json", "")
     .replace("```", "")
     .replace("[]", "[{\"positive\": \"\", \"negative\": \"\"}]")
    for s in joy_fri_tx_nonnull_api
]                                  
joy_fri_tx_nonnull_api_cleaned_df = pd.DataFrame(joy_fri_tx_nonnull_api_cleaned)
#joy_fri_tx_nonnull_api_cleaned_df = pd.DataFrame(joy_fri_tx_nonnull_api)
column_names2 = ['Trust',
                    'Store Experience',
                    'Store Staff',
                    'Product Design',
                    'Product Variety',
                    'Discount',
                    'Making Charge',
                    'Price',
                    'Product Quality',
                    'OLD Gold Jewellery Exchange']

In [227]:
#Initialize an empty list to store the processed data
processed_data_joy_fri_tx_nonnull = []

#Iterate over each row in the DataFrame
for index, row in joy_fri_tx_nonnull_api_cleaned_df.iterrows():
    try:
        #Assuming the JSON string is in the first column
        json_string = row.iloc[0] if len(row) > 0 else ''
        data = json.loads(json_string)

        #Iterate over each key-value pair in the dictionary
        for commentor_name, feedback_list in data.items():
            #Create a dictionary for each commentor with default values as 0
            commentor_data = {col: 0 for col in column_names2}
            commentor_data["Commentor Name"] = commentor_name

            #Check if there is any feedback for the commentor
            if feedback_list:
                #Since there could be multiple feedback entries for a single commentor, we aggregate them
                for feedback in feedback_list:
                    positive = feedback.get("positive", "").split(',')
                    negative = feedback.get("negative", "").split(',')

                    for topic in column_names2:
                        #Aggregate the value: 1 for positive, -1 for negative, 0 for not mentioned
                        commentor_data[topic] += assign_value(topic, positive, negative)

                #Finalize the values to be within -1, 0, or 1
                for topic in column_names2:
                    if commentor_data[topic] > 1:
                        commentor_data[topic] = 1
                    elif commentor_data[topic] < -1:
                        commentor_data[topic] = -1

            #Append the commentor data to the processed data list
            processed_data_joy_fri_tx_nonnull.append(commentor_data)
    except:
        #Skip rows with invalid JSON data
        print(f"Invalid JSON in row {index}")

joy_fri_tx_nonnull_api_cleaned_df.to_excel("joy_fri_tx_nonnull_api_cleaned_df.xlsx",index=False)

joy_fri_tx_nonnull_api_cleaned_df = pd.read_excel("joy_fri_tx_nonnull_api_cleaned_df.xlsx")

joy_fri_tx_nonnull_api_cleaned_df

In [228]:
#Create a DataFrame from the processed data
joy_fri_tx_nonnull_sen_df = pd.DataFrame(processed_data_joy_fri_tx_nonnull)
#Reorder the columns
ordered_columns = ["Commentor Name",
                   "Trust",
                    "Store Experience",
                    "Store Staff",
                    "Product Design",
                    "Product Variety",
                    "Discount",
                    "Making Charge",
                    "Price",
                    "Product Quality",
                    "OLD Gold Jewellery Exchange"]
#Apply the new order to the DataFrame
joy_fri_tx_nonnull_sen_df = joy_fri_tx_nonnull_sen_df[ordered_columns]

joy_fri_tx_nonnull_merged_df = pd.concat([combined_df_joy_fri_tx, joy_fri_tx_nonnull_sen_df], axis=1)

joy_fri_tx_final_sen_df = pd.concat([joy_fri_tx_nonnull_merged_df,null_dataframes['joy_fri_tx_null']], ignore_index=True)

joy_fri_tx_final_sen_df_copy = joy_fri_tx_final_sen_df.copy()
joy_fri_tx_final_sen_df_copy["Published At Date"] = joy_fri_tx_final_sen_df_copy["Published At Date"].astype(str).str[:10]

joy_fri_tx_final_sen_df_copy.to_excel("sentiment_raw_output/joy_fri_tx_final_sen_df_jul.xlsx",index=False)



### mal_chi_il

In [229]:
batch_counter = [0]
total_batches = math.ceil(len(mal_chi_il_nonnull_buckets['mal_chi_il_nonnull_1-4'])/25)+math.ceil(len(mal_chi_il_nonnull_buckets['mal_chi_il_nonnull_5-15'])/25)+math.ceil(len(mal_chi_il_nonnull_buckets['mal_chi_il_nonnull_16-30'])/25)+math.ceil(len(mal_chi_il_nonnull_buckets['mal_chi_il_nonnull_31-60'])/25)+math.ceil(len(mal_chi_il_nonnull_buckets['mal_chi_il_nonnull_61-100'])/25)+math.ceil(len(mal_chi_il_nonnull_buckets['mal_chi_il_nonnull_greater_100'])/25)
key_counter = 0
total_keys=len(mal_chi_il_nonnull_buckets.keys())
mal_chi_il_nonnull_api = []
input_tokens_mal_chi_il_nonnull=0
output_tokens_mal_chi_il_nonnull=0
start_time_mal_chi_il = time.time()

for key in mal_chi_il_nonnull_buckets.keys():
    key_counter+=1
    current_df = mal_chi_il_nonnull_buckets[key][['Name', 'review_text']]
    counter = [0]
    input_tokens = 0
    output_tokens = 0
        
    # Threading setup
    total_iterations = math.ceil(len(current_df) / 25)
    stop_event = threading.Event()
    message_thread = threading.Thread(target=print_dynamic_message, args=(batch_counter,key_counter,counter,total_batches,total_keys, total_iterations,start_time_mal_chi_il, stop_event))
    message_thread.start()

    for start in range(0, len(current_df), 25):
        clear_output(wait=True)
        counter[0] += 1
        batch_counter[0] += 1
        end = start + 25
        result, it, ot = score_sentiments_jul(current_df[start:end])
        mal_chi_il_nonnull_api.append(result)
        input_tokens += it
        output_tokens += ot

    # Stopping the dynamic message thread
    stop_event.set()
    message_thread.join()    
    input_tokens_mal_chi_il_nonnull+=input_tokens
    output_tokens_mal_chi_il_nonnull+=output_tokens
    clear_output(wait=True)
    
overall_execution_time_mal_chi_il = time.time() - start_time_mal_chi_il
formatted_time_mal_chi_il = time.strftime('%H:%M:%S', time.gmtime(overall_execution_time_mal_chi_il))
input_token_cost_mal_chi_il = round((0.01/1000) * input_tokens_mal_chi_il_nonnull, 2)
output_token_cost_mal_chi_il = round((0.03/1000) * output_tokens_mal_chi_il_nonnull, 2)
total_cost_mal_chi_il = round(input_token_cost_mal_chi_il + output_token_cost_mal_chi_il, 2)    
    
print(f"Executed {batch_counter} Iterations")
print(f"Total Execution Time: {formatted_time_mal_chi_il}")
print(f"Total Input Tokens - {input_tokens_mal_chi_il_nonnull}")
print(f"Total Input Cost = {input_token_cost_mal_chi_il}")
print(f"Total Output Tokens - {output_tokens_mal_chi_il_nonnull}")
print(f"Total Output Cost = {output_token_cost_mal_chi_il}")
print(f"Total Cost = {total_cost_mal_chi_il}")

Executed [13] Iterations
Total Execution Time: 00:01:22
Total Input Tokens - 18124
Total Input Cost = 0.18
Total Output Tokens - 7052
Total Output Cost = 0.21
Total Cost = 0.39


In [230]:
#Remove "```json" and "```" from each string in mal_chi_il_nonnull_api & convert to DataFrame
mal_chi_il_nonnull_api_cleaned = [
    s.replace("```json", "")
     .replace("```", "")
     .replace("[]", "[{\"positive\": \"\", \"negative\": \"\"}]")
    for s in mal_chi_il_nonnull_api
]                                  
mal_chi_il_nonnull_api_cleaned_df = pd.DataFrame(mal_chi_il_nonnull_api_cleaned)
#mal_chi_il_nonnull_api_cleaned_df = pd.DataFrame(mal_chi_il_nonnull_api)
column_names2 = ['Trust',
                    'Store Experience',
                    'Store Staff',
                    'Product Design',
                    'Product Variety',
                    'Discount',
                    'Making Charge',
                    'Price',
                    'Product Quality',
                    'OLD Gold Jewellery Exchange']

In [231]:
#Initialize an empty list to store the processed data
processed_data_mal_chi_il_nonnull = []

#Iterate over each row in the DataFrame
for index, row in mal_chi_il_nonnull_api_cleaned_df.iterrows():
    try:
        #Assuming the JSON string is in the first column
        json_string = row.iloc[0] if len(row) > 0 else ''
        data = json.loads(json_string)

        #Iterate over each key-value pair in the dictionary
        for commentor_name, feedback_list in data.items():
            #Create a dictionary for each commentor with default values as 0
            commentor_data = {col: 0 for col in column_names2}
            commentor_data["Commentor Name"] = commentor_name

            #Check if there is any feedback for the commentor
            if feedback_list:
                #Since there could be multiple feedback entries for a single commentor, we aggregate them
                for feedback in feedback_list:
                    positive = feedback.get("positive", "").split(',')
                    negative = feedback.get("negative", "").split(',')

                    for topic in column_names2:
                        #Aggregate the value: 1 for positive, -1 for negative, 0 for not mentioned
                        commentor_data[topic] += assign_value(topic, positive, negative)

                #Finalize the values to be within -1, 0, or 1
                for topic in column_names2:
                    if commentor_data[topic] > 1:
                        commentor_data[topic] = 1
                    elif commentor_data[topic] < -1:
                        commentor_data[topic] = -1

            #Append the commentor data to the processed data list
            processed_data_mal_chi_il_nonnull.append(commentor_data)
    except:
        #Skip rows with invalid JSON data
        print(f"Invalid JSON in row {index}")

mal_chi_il_nonnull_api_cleaned_df.to_excel("mal_chi_il_nonnull_api_cleaned_df.xlsx",index=False)

mal_chi_il_nonnull_api_cleaned_df = pd.read_excel("mal_chi_il_nonnull_api_cleaned_df.xlsx")

mal_chi_il_nonnull_api_cleaned_df

In [232]:
#Create a DataFrame from the processed data
mal_chi_il_nonnull_sen_df = pd.DataFrame(processed_data_mal_chi_il_nonnull)
#Reorder the columns
ordered_columns = ["Commentor Name",
                   "Trust",
                    "Store Experience",
                    "Store Staff",
                    "Product Design",
                    "Product Variety",
                    "Discount",
                    "Making Charge",
                    "Price",
                    "Product Quality",
                    "OLD Gold Jewellery Exchange"]
#Apply the new order to the DataFrame
mal_chi_il_nonnull_sen_df = mal_chi_il_nonnull_sen_df[ordered_columns]

mal_chi_il_nonnull_merged_df = pd.concat([combined_df_mal_chi_il, mal_chi_il_nonnull_sen_df], axis=1)

mal_chi_il_final_sen_df = pd.concat([mal_chi_il_nonnull_merged_df,null_dataframes['mal_chi_il_null']], ignore_index=True)

mal_chi_il_final_sen_df_copy = mal_chi_il_final_sen_df.copy()
mal_chi_il_final_sen_df_copy["Published At Date"] = mal_chi_il_final_sen_df_copy["Published At Date"].astype(str).str[:10]

mal_chi_il_final_sen_df_copy.to_excel("sentiment_raw_output/mal_chi_il_final_sen_df_jul.xlsx",index=False)



### mal_nap_il

In [233]:
batch_counter = [0]
total_batches = math.ceil(len(mal_nap_il_nonnull_buckets['mal_nap_il_nonnull_1-4'])/25)+math.ceil(len(mal_nap_il_nonnull_buckets['mal_nap_il_nonnull_5-15'])/25)+math.ceil(len(mal_nap_il_nonnull_buckets['mal_nap_il_nonnull_16-30'])/25)+math.ceil(len(mal_nap_il_nonnull_buckets['mal_nap_il_nonnull_31-60'])/25)+math.ceil(len(mal_nap_il_nonnull_buckets['mal_nap_il_nonnull_61-100'])/25)+math.ceil(len(mal_nap_il_nonnull_buckets['mal_nap_il_nonnull_greater_100'])/25)
key_counter = 0
total_keys=len(mal_nap_il_nonnull_buckets.keys())
mal_nap_il_nonnull_api = []
input_tokens_mal_nap_il_nonnull=0
output_tokens_mal_nap_il_nonnull=0
start_time_mal_nap_il = time.time()

for key in mal_nap_il_nonnull_buckets.keys():
    key_counter+=1
    current_df = mal_nap_il_nonnull_buckets[key][['Name', 'review_text']]
    counter = [0]
    input_tokens = 0
    output_tokens = 0
        
    # Threading setup
    total_iterations = math.ceil(len(current_df) / 25)
    stop_event = threading.Event()
    message_thread = threading.Thread(target=print_dynamic_message, args=(batch_counter,key_counter,counter,total_batches,total_keys, total_iterations,start_time_mal_nap_il, stop_event))
    message_thread.start()

    for start in range(0, len(current_df), 25):
        clear_output(wait=True)
        counter[0] += 1
        batch_counter[0] += 1
        end = start + 25
        result, it, ot = score_sentiments_jul(current_df[start:end])
        mal_nap_il_nonnull_api.append(result)
        input_tokens += it
        output_tokens += ot

    # Stopping the dynamic message thread
    stop_event.set()
    message_thread.join()    
    input_tokens_mal_nap_il_nonnull+=input_tokens
    output_tokens_mal_nap_il_nonnull+=output_tokens
    clear_output(wait=True)
    
overall_execution_time_mal_nap_il = time.time() - start_time_mal_nap_il
formatted_time_mal_nap_il = time.strftime('%H:%M:%S', time.gmtime(overall_execution_time_mal_nap_il))
input_token_cost_mal_nap_il = round((0.01/1000) * input_tokens_mal_nap_il_nonnull, 2)
output_token_cost_mal_nap_il = round((0.03/1000) * output_tokens_mal_nap_il_nonnull, 2)
total_cost_mal_nap_il = round(input_token_cost_mal_nap_il + output_token_cost_mal_nap_il, 2)    
    
print(f"Executed {batch_counter} Iterations")
print(f"Total Execution Time: {formatted_time_mal_nap_il}")
print(f"Total Input Tokens - {input_tokens_mal_nap_il_nonnull}")
print(f"Total Input Cost = {input_token_cost_mal_nap_il}")
print(f"Total Output Tokens - {output_tokens_mal_nap_il_nonnull}")
print(f"Total Output Cost = {output_token_cost_mal_nap_il}")
print(f"Total Cost = {total_cost_mal_nap_il}")

Executed [17] Iterations
Total Execution Time: 00:01:52
Total Input Tokens - 23167
Total Input Cost = 0.23
Total Output Tokens - 9785
Total Output Cost = 0.29
Total Cost = 0.52


In [234]:
#Remove "```json" and "```" from each string in mal_nap_il_nonnull_api & convert to DataFrame
mal_nap_il_nonnull_api_cleaned = [
    s.replace("```json", "")
     .replace("```", "")
     .replace("[]", "[{\"positive\": \"\", \"negative\": \"\"}]")
    for s in mal_nap_il_nonnull_api
]                                  
mal_nap_il_nonnull_api_cleaned_df = pd.DataFrame(mal_nap_il_nonnull_api_cleaned)
#mal_nap_il_nonnull_api_cleaned_df = pd.DataFrame(mal_nap_il_nonnull_api)
column_names2 = ['Trust',
                    'Store Experience',
                    'Store Staff',
                    'Product Design',
                    'Product Variety',
                    'Discount',
                    'Making Charge',
                    'Price',
                    'Product Quality',
                    'OLD Gold Jewellery Exchange']

In [235]:
#Initialize an empty list to store the processed data
processed_data_mal_nap_il_nonnull = []

#Iterate over each row in the DataFrame
for index, row in mal_nap_il_nonnull_api_cleaned_df.iterrows():
    try:
        #Assuming the JSON string is in the first column
        json_string = row.iloc[0] if len(row) > 0 else ''
        data = json.loads(json_string)

        #Iterate over each key-value pair in the dictionary
        for commentor_name, feedback_list in data.items():
            #Create a dictionary for each commentor with default values as 0
            commentor_data = {col: 0 for col in column_names2}
            commentor_data["Commentor Name"] = commentor_name

            #Check if there is any feedback for the commentor
            if feedback_list:
                #Since there could be multiple feedback entries for a single commentor, we aggregate them
                for feedback in feedback_list:
                    positive = feedback.get("positive", "").split(',')
                    negative = feedback.get("negative", "").split(',')

                    for topic in column_names2:
                        #Aggregate the value: 1 for positive, -1 for negative, 0 for not mentioned
                        commentor_data[topic] += assign_value(topic, positive, negative)

                #Finalize the values to be within -1, 0, or 1
                for topic in column_names2:
                    if commentor_data[topic] > 1:
                        commentor_data[topic] = 1
                    elif commentor_data[topic] < -1:
                        commentor_data[topic] = -1

            #Append the commentor data to the processed data list
            processed_data_mal_nap_il_nonnull.append(commentor_data)
    except:
        #Skip rows with invalid JSON data
        print(f"Invalid JSON in row {index}")

mal_nap_il_nonnull_api_cleaned_df.to_excel("mal_nap_il_nonnull_api_cleaned_df.xlsx",index=False)

mal_nap_il_nonnull_api_cleaned_df = pd.read_excel("mal_nap_il_nonnull_api_cleaned_df.xlsx")

mal_nap_il_nonnull_api_cleaned_df

In [236]:
#Create a DataFrame from the processed data
mal_nap_il_nonnull_sen_df = pd.DataFrame(processed_data_mal_nap_il_nonnull)
#Reorder the columns
ordered_columns = ["Commentor Name",
                   "Trust",
                    "Store Experience",
                    "Store Staff",
                    "Product Design",
                    "Product Variety",
                    "Discount",
                    "Making Charge",
                    "Price",
                    "Product Quality",
                    "OLD Gold Jewellery Exchange"]
#Apply the new order to the DataFrame
mal_nap_il_nonnull_sen_df = mal_nap_il_nonnull_sen_df[ordered_columns]

mal_nap_il_nonnull_merged_df = pd.concat([combined_df_mal_nap_il, mal_nap_il_nonnull_sen_df], axis=1)

mal_nap_il_final_sen_df = pd.concat([mal_nap_il_nonnull_merged_df,null_dataframes['mal_nap_il_null']], ignore_index=True)

mal_nap_il_final_sen_df_copy = mal_nap_il_final_sen_df.copy()
mal_nap_il_final_sen_df_copy["Published At Date"] = mal_nap_il_final_sen_df_copy["Published At Date"].astype(str).str[:10]

mal_nap_il_final_sen_df_copy.to_excel("sentiment_raw_output/mal_nap_il_final_sen_df_jul.xlsx",index=False)



### mal_ise_nj

In [237]:
batch_counter = [0]
total_batches = math.ceil(len(mal_ise_nj_nonnull_buckets['mal_ise_nj_nonnull_1-4'])/25)+math.ceil(len(mal_ise_nj_nonnull_buckets['mal_ise_nj_nonnull_5-15'])/25)+math.ceil(len(mal_ise_nj_nonnull_buckets['mal_ise_nj_nonnull_16-30'])/25)+math.ceil(len(mal_ise_nj_nonnull_buckets['mal_ise_nj_nonnull_31-60'])/25)+math.ceil(len(mal_ise_nj_nonnull_buckets['mal_ise_nj_nonnull_61-100'])/25)+math.ceil(len(mal_ise_nj_nonnull_buckets['mal_ise_nj_nonnull_greater_100'])/25)
key_counter = 0
total_keys=len(mal_ise_nj_nonnull_buckets.keys())
mal_ise_nj_nonnull_api = []
input_tokens_mal_ise_nj_nonnull=0
output_tokens_mal_ise_nj_nonnull=0
start_time_mal_ise_nj = time.time()

for key in mal_ise_nj_nonnull_buckets.keys():
    key_counter+=1
    current_df = mal_ise_nj_nonnull_buckets[key][['Name', 'review_text']]
    counter = [0]
    input_tokens = 0
    output_tokens = 0
        
    # Threading setup
    total_iterations = math.ceil(len(current_df) / 25)
    stop_event = threading.Event()
    message_thread = threading.Thread(target=print_dynamic_message, args=(batch_counter,key_counter,counter,total_batches,total_keys, total_iterations,start_time_mal_ise_nj, stop_event))
    message_thread.start()

    for start in range(0, len(current_df), 25):
        clear_output(wait=True)
        counter[0] += 1
        batch_counter[0] += 1
        end = start + 25
        result, it, ot = score_sentiments_jul(current_df[start:end])
        mal_ise_nj_nonnull_api.append(result)
        input_tokens += it
        output_tokens += ot

    # Stopping the dynamic message thread
    stop_event.set()
    message_thread.join()    
    input_tokens_mal_ise_nj_nonnull+=input_tokens
    output_tokens_mal_ise_nj_nonnull+=output_tokens
    clear_output(wait=True)
    
overall_execution_time_mal_ise_nj = time.time() - start_time_mal_ise_nj
formatted_time_mal_ise_nj = time.strftime('%H:%M:%S', time.gmtime(overall_execution_time_mal_ise_nj))
input_token_cost_mal_ise_nj = round((0.01/1000) * input_tokens_mal_ise_nj_nonnull, 2)
output_token_cost_mal_ise_nj = round((0.03/1000) * output_tokens_mal_ise_nj_nonnull, 2)
total_cost_mal_ise_nj = round(input_token_cost_mal_ise_nj + output_token_cost_mal_ise_nj, 2)    
    
print(f"Executed {batch_counter} Iterations")
print(f"Total Execution Time: {formatted_time_mal_ise_nj}")
print(f"Total Input Tokens - {input_tokens_mal_ise_nj_nonnull}")
print(f"Total Input Cost = {input_token_cost_mal_ise_nj}")
print(f"Total Output Tokens - {output_tokens_mal_ise_nj_nonnull}")
print(f"Total Output Cost = {output_token_cost_mal_ise_nj}")
print(f"Total Cost = {total_cost_mal_ise_nj}")

Executed [18] Iterations
Total Execution Time: 00:02:07
Total Input Tokens - 24135
Total Input Cost = 0.24
Total Output Tokens - 10058
Total Output Cost = 0.3
Total Cost = 0.54


In [238]:
#Remove "```json" and "```" from each string in mal_ise_nj_nonnull_api & convert to DataFrame
mal_ise_nj_nonnull_api_cleaned = [
    s.replace("```json", "")
     .replace("```", "")
     .replace("[]", "[{\"positive\": \"\", \"negative\": \"\"}]")
    for s in mal_ise_nj_nonnull_api
]                                  
mal_ise_nj_nonnull_api_cleaned_df = pd.DataFrame(mal_ise_nj_nonnull_api_cleaned)
#mal_ise_nj_nonnull_api_cleaned_df = pd.DataFrame(mal_ise_nj_nonnull_api)
column_names2 = ['Trust',
                    'Store Experience',
                    'Store Staff',
                    'Product Design',
                    'Product Variety',
                    'Discount',
                    'Making Charge',
                    'Price',
                    'Product Quality',
                    'OLD Gold Jewellery Exchange']

In [239]:
#Initialize an empty list to store the processed data
processed_data_mal_ise_nj_nonnull = []

#Iterate over each row in the DataFrame
for index, row in mal_ise_nj_nonnull_api_cleaned_df.iterrows():
    try:
        #Assuming the JSON string is in the first column
        json_string = row.iloc[0] if len(row) > 0 else ''
        data = json.loads(json_string)

        #Iterate over each key-value pair in the dictionary
        for commentor_name, feedback_list in data.items():
            #Create a dictionary for each commentor with default values as 0
            commentor_data = {col: 0 for col in column_names2}
            commentor_data["Commentor Name"] = commentor_name

            #Check if there is any feedback for the commentor
            if feedback_list:
                #Since there could be multiple feedback entries for a single commentor, we aggregate them
                for feedback in feedback_list:
                    positive = feedback.get("positive", "").split(',')
                    negative = feedback.get("negative", "").split(',')

                    for topic in column_names2:
                        #Aggregate the value: 1 for positive, -1 for negative, 0 for not mentioned
                        commentor_data[topic] += assign_value(topic, positive, negative)

                #Finalize the values to be within -1, 0, or 1
                for topic in column_names2:
                    if commentor_data[topic] > 1:
                        commentor_data[topic] = 1
                    elif commentor_data[topic] < -1:
                        commentor_data[topic] = -1

            #Append the commentor data to the processed data list
            processed_data_mal_ise_nj_nonnull.append(commentor_data)
    except:
        #Skip rows with invalid JSON data
        print(f"Invalid JSON in row {index}")

mal_ise_nj_nonnull_api_cleaned_df.to_excel("mal_ise_nj_nonnull_api_cleaned_df.xlsx",index=False)

mal_ise_nj_nonnull_api_cleaned_df = pd.read_excel("mal_ise_nj_nonnull_api_cleaned_df.xlsx")

mal_ise_nj_nonnull_api_cleaned_df

In [240]:
#Create a DataFrame from the processed data
mal_ise_nj_nonnull_sen_df = pd.DataFrame(processed_data_mal_ise_nj_nonnull)
#Reorder the columns
ordered_columns = ["Commentor Name",
                   "Trust",
                    "Store Experience",
                    "Store Staff",
                    "Product Design",
                    "Product Variety",
                    "Discount",
                    "Making Charge",
                    "Price",
                    "Product Quality",
                    "OLD Gold Jewellery Exchange"]
#Apply the new order to the DataFrame
mal_ise_nj_nonnull_sen_df = mal_ise_nj_nonnull_sen_df[ordered_columns]

mal_ise_nj_nonnull_merged_df = pd.concat([combined_df_mal_ise_nj, mal_ise_nj_nonnull_sen_df], axis=1)

mal_ise_nj_final_sen_df = pd.concat([mal_ise_nj_nonnull_merged_df,null_dataframes['mal_ise_nj_null']], ignore_index=True)

mal_ise_nj_final_sen_df_copy = mal_ise_nj_final_sen_df.copy()
mal_ise_nj_final_sen_df_copy["Published At Date"] = mal_ise_nj_final_sen_df_copy["Published At Date"].astype(str).str[:10]

mal_ise_nj_final_sen_df_copy.to_excel("sentiment_raw_output/mal_ise_nj_final_sen_df_jul.xlsx",index=False)


### mal_fri_tx

In [241]:
batch_counter = [0]
total_batches = math.ceil(len(mal_fri_tx_nonnull_buckets['mal_fri_tx_nonnull_1-4'])/25)+math.ceil(len(mal_fri_tx_nonnull_buckets['mal_fri_tx_nonnull_5-15'])/25)+math.ceil(len(mal_fri_tx_nonnull_buckets['mal_fri_tx_nonnull_16-30'])/25)+math.ceil(len(mal_fri_tx_nonnull_buckets['mal_fri_tx_nonnull_31-60'])/25)+math.ceil(len(mal_fri_tx_nonnull_buckets['mal_fri_tx_nonnull_61-100'])/25)+math.ceil(len(mal_fri_tx_nonnull_buckets['mal_fri_tx_nonnull_greater_100'])/25)
key_counter = 0
total_keys=len(mal_fri_tx_nonnull_buckets.keys())
mal_fri_tx_nonnull_api = []
input_tokens_mal_fri_tx_nonnull=0
output_tokens_mal_fri_tx_nonnull=0
start_time_mal_fri_tx = time.time()

for key in mal_fri_tx_nonnull_buckets.keys():
    key_counter+=1
    current_df = mal_fri_tx_nonnull_buckets[key][['Name', 'review_text']]
    counter = [0]
    input_tokens = 0
    output_tokens = 0
        
    # Threading setup
    total_iterations = math.ceil(len(current_df) / 25)
    stop_event = threading.Event()
    message_thread = threading.Thread(target=print_dynamic_message, args=(batch_counter,key_counter,counter,total_batches,total_keys, total_iterations,start_time_mal_fri_tx, stop_event))
    message_thread.start()

    for start in range(0, len(current_df), 25):
        clear_output(wait=True)
        counter[0] += 1
        batch_counter[0] += 1
        end = start + 25
        result, it, ot = score_sentiments_jul(current_df[start:end])
        mal_fri_tx_nonnull_api.append(result)
        input_tokens += it
        output_tokens += ot

    # Stopping the dynamic message thread
    stop_event.set()
    message_thread.join()    
    input_tokens_mal_fri_tx_nonnull+=input_tokens
    output_tokens_mal_fri_tx_nonnull+=output_tokens
    clear_output(wait=True)
    
overall_execution_time_mal_fri_tx = time.time() - start_time_mal_fri_tx
formatted_time_mal_fri_tx = time.strftime('%H:%M:%S', time.gmtime(overall_execution_time_mal_fri_tx))
input_token_cost_mal_fri_tx = round((0.01/1000) * input_tokens_mal_fri_tx_nonnull, 2)
output_token_cost_mal_fri_tx = round((0.03/1000) * output_tokens_mal_fri_tx_nonnull, 2)
total_cost_mal_fri_tx = round(input_token_cost_mal_fri_tx + output_token_cost_mal_fri_tx, 2)    
    
print(f"Executed {batch_counter} Iterations")
print(f"Total Execution Time: {formatted_time_mal_fri_tx}")
print(f"Total Input Tokens - {input_tokens_mal_fri_tx_nonnull}")
print(f"Total Input Cost = {input_token_cost_mal_fri_tx}")
print(f"Total Output Tokens - {output_tokens_mal_fri_tx_nonnull}")
print(f"Total Output Cost = {output_token_cost_mal_fri_tx}")
print(f"Total Cost = {total_cost_mal_fri_tx}")

Executed [15] Iterations
Total Execution Time: 00:01:54
Total Input Tokens - 21931
Total Input Cost = 0.22
Total Output Tokens - 8889
Total Output Cost = 0.27
Total Cost = 0.49


In [242]:
#Remove "```json" and "```" from each string in mal_fri_tx_nonnull_api & convert to DataFrame
mal_fri_tx_nonnull_api_cleaned = [
    s.replace("```json", "")
     .replace("```", "")
     .replace("[]", "[{\"positive\": \"\", \"negative\": \"\"}]")
    for s in mal_fri_tx_nonnull_api
]                                  
mal_fri_tx_nonnull_api_cleaned_df = pd.DataFrame(mal_fri_tx_nonnull_api_cleaned)
#mal_fri_tx_nonnull_api_cleaned_df = pd.DataFrame(mal_fri_tx_nonnull_api)
column_names2 = ['Trust',
                    'Store Experience',
                    'Store Staff',
                    'Product Design',
                    'Product Variety',
                    'Discount',
                    'Making Charge',
                    'Price',
                    'Product Quality',
                    'OLD Gold Jewellery Exchange']

In [243]:
#Initialize an empty list to store the processed data
processed_data_mal_fri_tx_nonnull = []

#Iterate over each row in the DataFrame
for index, row in mal_fri_tx_nonnull_api_cleaned_df.iterrows():
    try:
        #Assuming the JSON string is in the first column
        json_string = row.iloc[0] if len(row) > 0 else ''
        data = json.loads(json_string)

        #Iterate over each key-value pair in the dictionary
        for commentor_name, feedback_list in data.items():
            #Create a dictionary for each commentor with default values as 0
            commentor_data = {col: 0 for col in column_names2}
            commentor_data["Commentor Name"] = commentor_name

            #Check if there is any feedback for the commentor
            if feedback_list:
                #Since there could be multiple feedback entries for a single commentor, we aggregate them
                for feedback in feedback_list:
                    positive = feedback.get("positive", "").split(',')
                    negative = feedback.get("negative", "").split(',')

                    for topic in column_names2:
                        #Aggregate the value: 1 for positive, -1 for negative, 0 for not mentioned
                        commentor_data[topic] += assign_value(topic, positive, negative)

                #Finalize the values to be within -1, 0, or 1
                for topic in column_names2:
                    if commentor_data[topic] > 1:
                        commentor_data[topic] = 1
                    elif commentor_data[topic] < -1:
                        commentor_data[topic] = -1

            #Append the commentor data to the processed data list
            processed_data_mal_fri_tx_nonnull.append(commentor_data)
    except:
        #Skip rows with invalid JSON data
        print(f"Invalid JSON in row {index}")

mal_fri_tx_nonnull_api_cleaned_df.to_excel("mal_fri_tx_nonnull_api_cleaned_df.xlsx",index=False)

mal_fri_tx_nonnull_api_cleaned_df = pd.read_excel("mal_fri_tx_nonnull_api_cleaned_df.xlsx")

mal_fri_tx_nonnull_api_cleaned_df

In [244]:
#Create a DataFrame from the processed data
mal_fri_tx_nonnull_sen_df = pd.DataFrame(processed_data_mal_fri_tx_nonnull)
#Reorder the columns
ordered_columns = ["Commentor Name",
                   "Trust",
                    "Store Experience",
                    "Store Staff",
                    "Product Design",
                    "Product Variety",
                    "Discount",
                    "Making Charge",
                    "Price",
                    "Product Quality",
                    "OLD Gold Jewellery Exchange"]
#Apply the new order to the DataFrame
mal_fri_tx_nonnull_sen_df = mal_fri_tx_nonnull_sen_df[ordered_columns]

mal_fri_tx_nonnull_merged_df = pd.concat([combined_df_mal_fri_tx, mal_fri_tx_nonnull_sen_df], axis=1)

mal_fri_tx_final_sen_df = pd.concat([mal_fri_tx_nonnull_merged_df,null_dataframes['mal_fri_tx_null']], ignore_index=True)

mal_fri_tx_final_sen_df_copy = mal_fri_tx_final_sen_df.copy()
mal_fri_tx_final_sen_df_copy["Published At Date"] = mal_fri_tx_final_sen_df_copy["Published At Date"].astype(str).str[:10]

mal_fri_tx_final_sen_df_copy.to_excel("sentiment_raw_output/mal_fri_tx_final_sen_df_jul.xlsx",index=False)


### mal_ric_tx

In [245]:
batch_counter = [0]
total_batches = math.ceil(len(mal_ric_tx_nonnull_buckets['mal_ric_tx_nonnull_1-4'])/25)+math.ceil(len(mal_ric_tx_nonnull_buckets['mal_ric_tx_nonnull_5-15'])/25)+math.ceil(len(mal_ric_tx_nonnull_buckets['mal_ric_tx_nonnull_16-30'])/25)+math.ceil(len(mal_ric_tx_nonnull_buckets['mal_ric_tx_nonnull_31-60'])/25)+math.ceil(len(mal_ric_tx_nonnull_buckets['mal_ric_tx_nonnull_61-100'])/25)+math.ceil(len(mal_ric_tx_nonnull_buckets['mal_ric_tx_nonnull_greater_100'])/25)
key_counter = 0
total_keys=len(mal_ric_tx_nonnull_buckets.keys())
mal_ric_tx_nonnull_api = []
input_tokens_mal_ric_tx_nonnull=0
output_tokens_mal_ric_tx_nonnull=0
start_time_mal_ric_tx = time.time()

for key in mal_ric_tx_nonnull_buckets.keys():
    key_counter+=1
    current_df = mal_ric_tx_nonnull_buckets[key][['Name', 'review_text']]
    counter = [0]
    input_tokens = 0
    output_tokens = 0
        
    # Threading setup
    total_iterations = math.ceil(len(current_df) / 25)
    stop_event = threading.Event()
    message_thread = threading.Thread(target=print_dynamic_message, args=(batch_counter,key_counter,counter,total_batches,total_keys, total_iterations,start_time_mal_ric_tx, stop_event))
    message_thread.start()

    for start in range(0, len(current_df), 25):
        clear_output(wait=True)
        counter[0] += 1
        batch_counter[0] += 1
        end = start + 25
        result, it, ot = score_sentiments_jul(current_df[start:end])
        mal_ric_tx_nonnull_api.append(result)
        input_tokens += it
        output_tokens += ot

    # Stopping the dynamic message thread
    stop_event.set()
    message_thread.join()    
    input_tokens_mal_ric_tx_nonnull+=input_tokens
    output_tokens_mal_ric_tx_nonnull+=output_tokens
    clear_output(wait=True)
    
overall_execution_time_mal_ric_tx = time.time() - start_time_mal_ric_tx
formatted_time_mal_ric_tx = time.strftime('%H:%M:%S', time.gmtime(overall_execution_time_mal_ric_tx))
input_token_cost_mal_ric_tx = round((0.01/1000) * input_tokens_mal_ric_tx_nonnull, 2)
output_token_cost_mal_ric_tx = round((0.03/1000) * output_tokens_mal_ric_tx_nonnull, 2)
total_cost_mal_ric_tx = round(input_token_cost_mal_ric_tx + output_token_cost_mal_ric_tx, 2)    
    
print(f"Executed {batch_counter} Iterations")
print(f"Total Execution Time: {formatted_time_mal_ric_tx}")
print(f"Total Input Tokens - {input_tokens_mal_ric_tx_nonnull}")
print(f"Total Input Cost = {input_token_cost_mal_ric_tx}")
print(f"Total Output Tokens - {output_tokens_mal_ric_tx_nonnull}")
print(f"Total Output Cost = {output_token_cost_mal_ric_tx}")
print(f"Total Cost = {total_cost_mal_ric_tx}")

Executed [6] Iterations
Total Execution Time: 00:00:27
Total Input Tokens - 6737
Total Input Cost = 0.07
Total Output Tokens - 1834
Total Output Cost = 0.06
Total Cost = 0.13


In [246]:
#Remove "```json" and "```" from each string in mal_ric_tx_nonnull_api & convert to DataFrame
mal_ric_tx_nonnull_api_cleaned = [
    s.replace("```json", "")
     .replace("```", "")
     .replace("[]", "[{\"positive\": \"\", \"negative\": \"\"}]")
    for s in mal_ric_tx_nonnull_api
]                                  
mal_ric_tx_nonnull_api_cleaned_df = pd.DataFrame(mal_ric_tx_nonnull_api_cleaned)
#mal_ric_tx_nonnull_api_cleaned_df = pd.DataFrame(mal_ric_tx_nonnull_api)
column_names2 = ['Trust',
                    'Store Experience',
                    'Store Staff',
                    'Product Design',
                    'Product Variety',
                    'Discount',
                    'Making Charge',
                    'Price',
                    'Product Quality',
                    'OLD Gold Jewellery Exchange']

In [247]:
#Initialize an empty list to store the processed data
processed_data_mal_ric_tx_nonnull = []

#Iterate over each row in the DataFrame
for index, row in mal_ric_tx_nonnull_api_cleaned_df.iterrows():
    try:
        #Assuming the JSON string is in the first column
        json_string = row.iloc[0] if len(row) > 0 else ''
        data = json.loads(json_string)

        #Iterate over each key-value pair in the dictionary
        for commentor_name, feedback_list in data.items():
            #Create a dictionary for each commentor with default values as 0
            commentor_data = {col: 0 for col in column_names2}
            commentor_data["Commentor Name"] = commentor_name

            #Check if there is any feedback for the commentor
            if feedback_list:
                #Since there could be multiple feedback entries for a single commentor, we aggregate them
                for feedback in feedback_list:
                    positive = feedback.get("positive", "").split(',')
                    negative = feedback.get("negative", "").split(',')

                    for topic in column_names2:
                        #Aggregate the value: 1 for positive, -1 for negative, 0 for not mentioned
                        commentor_data[topic] += assign_value(topic, positive, negative)

                #Finalize the values to be within -1, 0, or 1
                for topic in column_names2:
                    if commentor_data[topic] > 1:
                        commentor_data[topic] = 1
                    elif commentor_data[topic] < -1:
                        commentor_data[topic] = -1

            #Append the commentor data to the processed data list
            processed_data_mal_ric_tx_nonnull.append(commentor_data)
    except:
        #Skip rows with invalid JSON data
        print(f"Invalid JSON in row {index}")

mal_ric_tx_nonnull_api_cleaned_df.to_excel("mal_ric_tx_nonnull_api_cleaned_df.xlsx",index=False)

mal_ric_tx_nonnull_api_cleaned_df = pd.read_excel("mal_ric_tx_nonnull_api_cleaned_df.xlsx")

mal_ric_tx_nonnull_api_cleaned_df

In [248]:
#Create a DataFrame from the processed data
mal_ric_tx_nonnull_sen_df = pd.DataFrame(processed_data_mal_ric_tx_nonnull)
#Reorder the columns
ordered_columns = ["Commentor Name",
                   "Trust",
                    "Store Experience",
                    "Store Staff",
                    "Product Design",
                    "Product Variety",
                    "Discount",
                    "Making Charge",
                    "Price",
                    "Product Quality",
                    "OLD Gold Jewellery Exchange"]
#Apply the new order to the DataFrame
mal_ric_tx_nonnull_sen_df = mal_ric_tx_nonnull_sen_df[ordered_columns]

mal_ric_tx_nonnull_merged_df = pd.concat([combined_df_mal_ric_tx, mal_ric_tx_nonnull_sen_df], axis=1)

mal_ric_tx_final_sen_df = pd.concat([mal_ric_tx_nonnull_merged_df,null_dataframes['mal_ric_tx_null']], ignore_index=True)

mal_ric_tx_final_sen_df_copy = mal_ric_tx_final_sen_df.copy()
mal_ric_tx_final_sen_df_copy["Published At Date"] = mal_ric_tx_final_sen_df_copy["Published At Date"].astype(str).str[:10]

mal_ric_tx_final_sen_df_copy.to_excel("sentiment_raw_output/mal_ric_tx_final_sen_df_jul.xlsx",index=False)


### may_vie_va

In [249]:
batch_counter = [0]
total_batches = math.ceil(len(may_vie_va_nonnull_buckets['may_vie_va_nonnull_1-4'])/25)+math.ceil(len(may_vie_va_nonnull_buckets['may_vie_va_nonnull_5-15'])/25)+math.ceil(len(may_vie_va_nonnull_buckets['may_vie_va_nonnull_16-30'])/25)+math.ceil(len(may_vie_va_nonnull_buckets['may_vie_va_nonnull_31-60'])/25)+math.ceil(len(may_vie_va_nonnull_buckets['may_vie_va_nonnull_61-100'])/25)+math.ceil(len(may_vie_va_nonnull_buckets['may_vie_va_nonnull_greater_100'])/25)
key_counter = 0
total_keys=len(may_vie_va_nonnull_buckets.keys())
may_vie_va_nonnull_api = []
input_tokens_may_vie_va_nonnull=0
output_tokens_may_vie_va_nonnull=0
start_time_may_vie_va = time.time()

for key in may_vie_va_nonnull_buckets.keys():
    key_counter+=1
    current_df = may_vie_va_nonnull_buckets[key][['Name', 'review_text']]
    counter = [0]
    input_tokens = 0
    output_tokens = 0
        
    # Threading setup
    total_iterations = math.ceil(len(current_df) / 25)
    stop_event = threading.Event()
    message_thread = threading.Thread(target=print_dynamic_message, args=(batch_counter,key_counter,counter,total_batches,total_keys, total_iterations,start_time_may_vie_va, stop_event))
    message_thread.start()

    for start in range(0, len(current_df), 25):
        clear_output(wait=True)
        counter[0] += 1
        batch_counter[0] += 1
        end = start + 25
        result, it, ot = score_sentiments_jul(current_df[start:end])
        may_vie_va_nonnull_api.append(result)
        input_tokens += it
        output_tokens += ot

    # Stopping the dynamic message thread
    stop_event.set()
    message_thread.join()    
    input_tokens_may_vie_va_nonnull+=input_tokens
    output_tokens_may_vie_va_nonnull+=output_tokens
    clear_output(wait=True)
    
overall_execution_time_may_vie_va = time.time() - start_time_may_vie_va
formatted_time_may_vie_va = time.strftime('%H:%M:%S', time.gmtime(overall_execution_time_may_vie_va))
input_token_cost_may_vie_va = round((0.01/1000) * input_tokens_may_vie_va_nonnull, 2)
output_token_cost_may_vie_va = round((0.03/1000) * output_tokens_may_vie_va_nonnull, 2)
total_cost_may_vie_va = round(input_token_cost_may_vie_va + output_token_cost_may_vie_va, 2)    
    
print(f"Executed {batch_counter} Iterations")
print(f"Total Execution Time: {formatted_time_may_vie_va}")
print(f"Total Input Tokens - {input_tokens_may_vie_va_nonnull}")
print(f"Total Input Cost = {input_token_cost_may_vie_va}")
print(f"Total Output Tokens - {output_tokens_may_vie_va_nonnull}")
print(f"Total Output Cost = {output_token_cost_may_vie_va}")
print(f"Total Cost = {total_cost_may_vie_va}")

Executed [1] Iterations
Total Execution Time: 00:00:01
Total Input Tokens - 658
Total Input Cost = 0.01
Total Output Tokens - 35
Total Output Cost = 0.0
Total Cost = 0.01


In [250]:
#Remove "```json" and "```" from each string in may_vie_va_nonnull_api & convert to DataFrame
may_vie_va_nonnull_api_cleaned = [
    s.replace("```json", "")
     .replace("```", "")
     .replace("[]", "[{\"positive\": \"\", \"negative\": \"\"}]")
    for s in may_vie_va_nonnull_api
]                                  
may_vie_va_nonnull_api_cleaned_df = pd.DataFrame(may_vie_va_nonnull_api_cleaned)
#may_vie_va_nonnull_api_cleaned_df = pd.DataFrame(may_vie_va_nonnull_api)
column_names2 = ['Trust',
                    'Store Experience',
                    'Store Staff',
                    'Product Design',
                    'Product Variety',
                    'Discount',
                    'Making Charge',
                    'Price',
                    'Product Quality',
                    'OLD Gold Jewellery Exchange']

In [251]:
#Initialize an empty list to store the processed data
processed_data_may_vie_va_nonnull = []

#Iterate over each row in the DataFrame
for index, row in may_vie_va_nonnull_api_cleaned_df.iterrows():
    try:
        #Assuming the JSON string is in the first column
        json_string = row.iloc[0] if len(row) > 0 else ''
        data = json.loads(json_string)

        #Iterate over each key-value pair in the dictionary
        for commentor_name, feedback_list in data.items():
            #Create a dictionary for each commentor with default values as 0
            commentor_data = {col: 0 for col in column_names2}
            commentor_data["Commentor Name"] = commentor_name

            #Check if there is any feedback for the commentor
            if feedback_list:
                #Since there could be multiple feedback entries for a single commentor, we aggregate them
                for feedback in feedback_list:
                    positive = feedback.get("positive", "").split(',')
                    negative = feedback.get("negative", "").split(',')

                    for topic in column_names2:
                        #Aggregate the value: 1 for positive, -1 for negative, 0 for not mentioned
                        commentor_data[topic] += assign_value(topic, positive, negative)

                #Finalize the values to be within -1, 0, or 1
                for topic in column_names2:
                    if commentor_data[topic] > 1:
                        commentor_data[topic] = 1
                    elif commentor_data[topic] < -1:
                        commentor_data[topic] = -1

            #Append the commentor data to the processed data list
            processed_data_may_vie_va_nonnull.append(commentor_data)
    except:
        #Skip rows with invalid JSON data
        print(f"Invalid JSON in row {index}")

may_vie_va_nonnull_api_cleaned_df.to_excel("may_vie_va_nonnull_api_cleaned_df.xlsx",index=False)

may_vie_va_nonnull_api_cleaned_df = pd.read_excel("may_vie_va_nonnull_api_cleaned_df.xlsx")

may_vie_va_nonnull_api_cleaned_df

In [252]:
#Create a DataFrame from the processed data
may_vie_va_nonnull_sen_df = pd.DataFrame(processed_data_may_vie_va_nonnull)
#Reorder the columns
ordered_columns = ["Commentor Name",
                   "Trust",
                    "Store Experience",
                    "Store Staff",
                    "Product Design",
                    "Product Variety",
                    "Discount",
                    "Making Charge",
                    "Price",
                    "Product Quality",
                    "OLD Gold Jewellery Exchange"]
#Apply the new order to the DataFrame
may_vie_va_nonnull_sen_df = may_vie_va_nonnull_sen_df[ordered_columns]

may_vie_va_nonnull_merged_df = pd.concat([combined_df_may_vie_va, may_vie_va_nonnull_sen_df], axis=1)

may_vie_va_final_sen_df = pd.concat([may_vie_va_nonnull_merged_df,null_dataframes['may_vie_va_null']], ignore_index=True)

may_vie_va_final_sen_df_copy = may_vie_va_final_sen_df.copy()
may_vie_va_final_sen_df_copy["Published At Date"] = may_vie_va_final_sen_df_copy["Published At Date"].astype(str).str[:10]

may_vie_va_final_sen_df_copy.to_excel("sentiment_raw_output/may_vie_va_final_sen_df_jul.xlsx",index=False)


### son_ise_nj

In [253]:
batch_counter = [0]
total_batches = math.ceil(len(son_ise_nj_nonnull_buckets['son_ise_nj_nonnull_1-4'])/25)+math.ceil(len(son_ise_nj_nonnull_buckets['son_ise_nj_nonnull_5-15'])/25)+math.ceil(len(son_ise_nj_nonnull_buckets['son_ise_nj_nonnull_16-30'])/25)+math.ceil(len(son_ise_nj_nonnull_buckets['son_ise_nj_nonnull_31-60'])/25)+math.ceil(len(son_ise_nj_nonnull_buckets['son_ise_nj_nonnull_61-100'])/25)+math.ceil(len(son_ise_nj_nonnull_buckets['son_ise_nj_nonnull_greater_100'])/25)
key_counter = 0
total_keys=len(son_ise_nj_nonnull_buckets.keys())
son_ise_nj_nonnull_api = []
input_tokens_son_ise_nj_nonnull=0
output_tokens_son_ise_nj_nonnull=0
start_time_son_ise_nj = time.time()

for key in son_ise_nj_nonnull_buckets.keys():
    key_counter+=1
    current_df = son_ise_nj_nonnull_buckets[key][['Name', 'review_text']]
    counter = [0]
    input_tokens = 0
    output_tokens = 0
        
    # Threading setup
    total_iterations = math.ceil(len(current_df) / 25)
    stop_event = threading.Event()
    message_thread = threading.Thread(target=print_dynamic_message, args=(batch_counter,key_counter,counter,total_batches,total_keys, total_iterations,start_time_son_ise_nj, stop_event))
    message_thread.start()

    for start in range(0, len(current_df), 25):
        clear_output(wait=True)
        counter[0] += 1
        batch_counter[0] += 1
        end = start + 25
        result, it, ot = score_sentiments_jul(current_df[start:end])
        son_ise_nj_nonnull_api.append(result)
        input_tokens += it
        output_tokens += ot

    # Stopping the dynamic message thread
    stop_event.set()
    message_thread.join()    
    input_tokens_son_ise_nj_nonnull+=input_tokens
    output_tokens_son_ise_nj_nonnull+=output_tokens
    clear_output(wait=True)
    
overall_execution_time_son_ise_nj = time.time() - start_time_son_ise_nj
formatted_time_son_ise_nj = time.strftime('%H:%M:%S', time.gmtime(overall_execution_time_son_ise_nj))
input_token_cost_son_ise_nj = round((0.01/1000) * input_tokens_son_ise_nj_nonnull, 2)
output_token_cost_son_ise_nj = round((0.03/1000) * output_tokens_son_ise_nj_nonnull, 2)
total_cost_son_ise_nj = round(input_token_cost_son_ise_nj + output_token_cost_son_ise_nj, 2)    
    
print(f"Executed {batch_counter} Iterations")
print(f"Total Execution Time: {formatted_time_son_ise_nj}")
print(f"Total Input Tokens - {input_tokens_son_ise_nj_nonnull}")
print(f"Total Input Cost = {input_token_cost_son_ise_nj}")
print(f"Total Output Tokens - {output_tokens_son_ise_nj_nonnull}")
print(f"Total Output Cost = {output_token_cost_son_ise_nj}")
print(f"Total Cost = {total_cost_son_ise_nj}")

Executed [4] Iterations
Total Execution Time: 00:00:07
Total Input Tokens - 2836
Total Input Cost = 0.03
Total Output Tokens - 190
Total Output Cost = 0.01
Total Cost = 0.04


In [254]:
#Remove "```json" and "```" from each string in son_ise_nj_nonnull_api & convert to DataFrame
son_ise_nj_nonnull_api_cleaned = [
    s.replace("```json", "")
     .replace("```", "")
     .replace("[]", "[{\"positive\": \"\", \"negative\": \"\"}]")
    for s in son_ise_nj_nonnull_api
]                                  
son_ise_nj_nonnull_api_cleaned_df = pd.DataFrame(son_ise_nj_nonnull_api_cleaned)
#son_ise_nj_nonnull_api_cleaned_df = pd.DataFrame(son_ise_nj_nonnull_api)
column_names2 = ['Trust',
                    'Store Experience',
                    'Store Staff',
                    'Product Design',
                    'Product Variety',
                    'Discount',
                    'Making Charge',
                    'Price',
                    'Product Quality',
                    'OLD Gold Jewellery Exchange']

In [255]:
#Initialize an empty list to store the processed data
processed_data_son_ise_nj_nonnull = []

#Iterate over each row in the DataFrame
for index, row in son_ise_nj_nonnull_api_cleaned_df.iterrows():
    try:
        #Assuming the JSON string is in the first column
        json_string = row.iloc[0] if len(row) > 0 else ''
        data = json.loads(json_string)

        #Iterate over each key-value pair in the dictionary
        for commentor_name, feedback_list in data.items():
            #Create a dictionary for each commentor with default values as 0
            commentor_data = {col: 0 for col in column_names2}
            commentor_data["Commentor Name"] = commentor_name

            #Check if there is any feedback for the commentor
            if feedback_list:
                #Since there could be multiple feedback entries for a single commentor, we aggregate them
                for feedback in feedback_list:
                    positive = feedback.get("positive", "").split(',')
                    negative = feedback.get("negative", "").split(',')

                    for topic in column_names2:
                        #Aggregate the value: 1 for positive, -1 for negative, 0 for not mentioned
                        commentor_data[topic] += assign_value(topic, positive, negative)

                #Finalize the values to be within -1, 0, or 1
                for topic in column_names2:
                    if commentor_data[topic] > 1:
                        commentor_data[topic] = 1
                    elif commentor_data[topic] < -1:
                        commentor_data[topic] = -1

            #Append the commentor data to the processed data list
            processed_data_son_ise_nj_nonnull.append(commentor_data)
    except:
        #Skip rows with invalid JSON data
        print(f"Invalid JSON in row {index}")

son_ise_nj_nonnull_api_cleaned_df.to_excel("son_ise_nj_nonnull_api_cleaned_df.xlsx",index=False)

son_ise_nj_nonnull_api_cleaned_df = pd.read_excel("son_ise_nj_nonnull_api_cleaned_df.xlsx")

son_ise_nj_nonnull_api_cleaned_df

In [256]:
#Create a DataFrame from the processed data
son_ise_nj_nonnull_sen_df = pd.DataFrame(processed_data_son_ise_nj_nonnull)
#Reorder the columns
ordered_columns = ["Commentor Name",
                   "Trust",
                    "Store Experience",
                    "Store Staff",
                    "Product Design",
                    "Product Variety",
                    "Discount",
                    "Making Charge",
                    "Price",
                    "Product Quality",
                    "OLD Gold Jewellery Exchange"]
#Apply the new order to the DataFrame
son_ise_nj_nonnull_sen_df = son_ise_nj_nonnull_sen_df[ordered_columns]

son_ise_nj_nonnull_merged_df = pd.concat([combined_df_son_ise_nj, son_ise_nj_nonnull_sen_df], axis=1)

son_ise_nj_final_sen_df = pd.concat([son_ise_nj_nonnull_merged_df,null_dataframes['son_ise_nj_null']], ignore_index=True)

son_ise_nj_final_sen_df_copy = son_ise_nj_final_sen_df.copy()
son_ise_nj_final_sen_df_copy["Published At Date"] = son_ise_nj_final_sen_df_copy["Published At Date"].astype(str).str[:10]

son_ise_nj_final_sen_df_copy.to_excel("sentiment_raw_output/son_ise_nj_final_sen_df_jul.xlsx",index=False)


### tif_chi_il

In [257]:
batch_counter = [0]
total_batches = math.ceil(len(tif_chi_il_nonnull_buckets['tif_chi_il_nonnull_1-4'])/25)+math.ceil(len(tif_chi_il_nonnull_buckets['tif_chi_il_nonnull_5-15'])/25)+math.ceil(len(tif_chi_il_nonnull_buckets['tif_chi_il_nonnull_16-30'])/25)+math.ceil(len(tif_chi_il_nonnull_buckets['tif_chi_il_nonnull_31-60'])/25)+math.ceil(len(tif_chi_il_nonnull_buckets['tif_chi_il_nonnull_61-100'])/25)+math.ceil(len(tif_chi_il_nonnull_buckets['tif_chi_il_nonnull_greater_100'])/25)
key_counter = 0
total_keys=len(tif_chi_il_nonnull_buckets.keys())
tif_chi_il_nonnull_api = []
input_tokens_tif_chi_il_nonnull=0
output_tokens_tif_chi_il_nonnull=0
start_time_tif_chi_il = time.time()

for key in tif_chi_il_nonnull_buckets.keys():
    key_counter+=1
    current_df = tif_chi_il_nonnull_buckets[key][['Name', 'review_text']]
    counter = [0]
    input_tokens = 0
    output_tokens = 0
        
    # Threading setup
    total_iterations = math.ceil(len(current_df) / 25)
    stop_event = threading.Event()
    message_thread = threading.Thread(target=print_dynamic_message, args=(batch_counter,key_counter,counter,total_batches,total_keys, total_iterations,start_time_tif_chi_il, stop_event))
    message_thread.start()

    for start in range(0, len(current_df), 25):
        clear_output(wait=True)
        counter[0] += 1
        batch_counter[0] += 1
        end = start + 25
        result, it, ot = score_sentiments_jul(current_df[start:end])
        tif_chi_il_nonnull_api.append(result)
        input_tokens += it
        output_tokens += ot

    # Stopping the dynamic message thread
    stop_event.set()
    message_thread.join()    
    input_tokens_tif_chi_il_nonnull+=input_tokens
    output_tokens_tif_chi_il_nonnull+=output_tokens
    clear_output(wait=True)
    
overall_execution_time_tif_chi_il = time.time() - start_time_tif_chi_il
formatted_time_tif_chi_il = time.strftime('%H:%M:%S', time.gmtime(overall_execution_time_tif_chi_il))
input_token_cost_tif_chi_il = round((0.01/1000) * input_tokens_tif_chi_il_nonnull, 2)
output_token_cost_tif_chi_il = round((0.03/1000) * output_tokens_tif_chi_il_nonnull, 2)
total_cost_tif_chi_il = round(input_token_cost_tif_chi_il + output_token_cost_tif_chi_il, 2)    
    
print(f"Executed {batch_counter} Iterations")
print(f"Total Execution Time: {formatted_time_tif_chi_il}")
print(f"Total Input Tokens - {input_tokens_tif_chi_il_nonnull}")
print(f"Total Input Cost = {input_token_cost_tif_chi_il}")
print(f"Total Output Tokens - {output_tokens_tif_chi_il_nonnull}")
print(f"Total Output Cost = {output_token_cost_tif_chi_il}")
print(f"Total Cost = {total_cost_tif_chi_il}")

Executed [5] Iterations
Total Execution Time: 00:00:09
Total Input Tokens - 3661
Total Input Cost = 0.04
Total Output Tokens - 207
Total Output Cost = 0.01
Total Cost = 0.05


In [258]:
#Remove "```json" and "```" from each string in tif_chi_il_nonnull_api & convert to DataFrame
tif_chi_il_nonnull_api_cleaned = [
    s.replace("```json", "")
     .replace("```", "")
     .replace("[]", "[{\"positive\": \"\", \"negative\": \"\"}]")
    for s in tif_chi_il_nonnull_api
]                                  
tif_chi_il_nonnull_api_cleaned_df = pd.DataFrame(tif_chi_il_nonnull_api_cleaned)
#tif_chi_il_nonnull_api_cleaned_df = pd.DataFrame(tif_chi_il_nonnull_api)
column_names2 = ['Trust',
                    'Store Experience',
                    'Store Staff',
                    'Product Design',
                    'Product Variety',
                    'Discount',
                    'Making Charge',
                    'Price',
                    'Product Quality',
                    'OLD Gold Jewellery Exchange']

In [259]:
#Initialize an empty list to store the processed data
processed_data_tif_chi_il_nonnull = []

#Iterate over each row in the DataFrame
for index, row in tif_chi_il_nonnull_api_cleaned_df.iterrows():
    try:
        #Assuming the JSON string is in the first column
        json_string = row.iloc[0] if len(row) > 0 else ''
        data = json.loads(json_string)

        #Iterate over each key-value pair in the dictionary
        for commentor_name, feedback_list in data.items():
            #Create a dictionary for each commentor with default values as 0
            commentor_data = {col: 0 for col in column_names2}
            commentor_data["Commentor Name"] = commentor_name

            #Check if there is any feedback for the commentor
            if feedback_list:
                #Since there could be multiple feedback entries for a single commentor, we aggregate them
                for feedback in feedback_list:
                    positive = feedback.get("positive", "").split(',')
                    negative = feedback.get("negative", "").split(',')

                    for topic in column_names2:
                        #Aggregate the value: 1 for positive, -1 for negative, 0 for not mentioned
                        commentor_data[topic] += assign_value(topic, positive, negative)

                #Finalize the values to be within -1, 0, or 1
                for topic in column_names2:
                    if commentor_data[topic] > 1:
                        commentor_data[topic] = 1
                    elif commentor_data[topic] < -1:
                        commentor_data[topic] = -1

            #Append the commentor data to the processed data list
            processed_data_tif_chi_il_nonnull.append(commentor_data)
    except:
        #Skip rows with invalid JSON data
        print(f"Invalid JSON in row {index}")

tif_chi_il_nonnull_api_cleaned_df.to_excel("tif_chi_il_nonnull_api_cleaned_df.xlsx",index=False)

tif_chi_il_nonnull_api_cleaned_df = pd.read_excel("tif_chi_il_nonnull_api_cleaned_df.xlsx")

tif_chi_il_nonnull_api_cleaned_df

In [260]:
#Create a DataFrame from the processed data
tif_chi_il_nonnull_sen_df = pd.DataFrame(processed_data_tif_chi_il_nonnull)
#Reorder the columns
ordered_columns = ["Commentor Name",
                   "Trust",
                    "Store Experience",
                    "Store Staff",
                    "Product Design",
                    "Product Variety",
                    "Discount",
                    "Making Charge",
                    "Price",
                    "Product Quality",
                    "OLD Gold Jewellery Exchange"]
#Apply the new order to the DataFrame
tif_chi_il_nonnull_sen_df = tif_chi_il_nonnull_sen_df[ordered_columns]

tif_chi_il_nonnull_merged_df = pd.concat([combined_df_tif_chi_il, tif_chi_il_nonnull_sen_df], axis=1)

tif_chi_il_final_sen_df = pd.concat([tif_chi_il_nonnull_merged_df,null_dataframes['tif_chi_il_null']], ignore_index=True)

tif_chi_il_final_sen_df_copy = tif_chi_il_final_sen_df.copy()
tif_chi_il_final_sen_df_copy["Published At Date"] = tif_chi_il_final_sen_df_copy["Published At Date"].astype(str).str[:10]

tif_chi_il_final_sen_df_copy.to_excel("sentiment_raw_output/tif_chi_il_final_sen_df_jul.xlsx",index=False)


### tif_nor_il

In [261]:
batch_counter = [0]
total_batches = math.ceil(len(tif_nor_il_nonnull_buckets['tif_nor_il_nonnull_1-4'])/25)+math.ceil(len(tif_nor_il_nonnull_buckets['tif_nor_il_nonnull_5-15'])/25)+math.ceil(len(tif_nor_il_nonnull_buckets['tif_nor_il_nonnull_16-30'])/25)+math.ceil(len(tif_nor_il_nonnull_buckets['tif_nor_il_nonnull_31-60'])/25)+math.ceil(len(tif_nor_il_nonnull_buckets['tif_nor_il_nonnull_61-100'])/25)+math.ceil(len(tif_nor_il_nonnull_buckets['tif_nor_il_nonnull_greater_100'])/25)
key_counter = 0
total_keys=len(tif_nor_il_nonnull_buckets.keys())
tif_nor_il_nonnull_api = []
input_tokens_tif_nor_il_nonnull=0
output_tokens_tif_nor_il_nonnull=0
start_time_tif_nor_il = time.time()

for key in tif_nor_il_nonnull_buckets.keys():
    key_counter+=1
    current_df = tif_nor_il_nonnull_buckets[key][['Name', 'review_text']]
    counter = [0]
    input_tokens = 0
    output_tokens = 0
        
    # Threading setup
    total_iterations = math.ceil(len(current_df) / 25)
    stop_event = threading.Event()
    message_thread = threading.Thread(target=print_dynamic_message, args=(batch_counter,key_counter,counter,total_batches,total_keys, total_iterations,start_time_tif_nor_il, stop_event))
    message_thread.start()

    for start in range(0, len(current_df), 25):
        clear_output(wait=True)
        counter[0] += 1
        batch_counter[0] += 1
        end = start + 25
        result, it, ot = score_sentiments_jul(current_df[start:end])
        tif_nor_il_nonnull_api.append(result)
        input_tokens += it
        output_tokens += ot

    # Stopping the dynamic message thread
    stop_event.set()
    message_thread.join()    
    input_tokens_tif_nor_il_nonnull+=input_tokens
    output_tokens_tif_nor_il_nonnull+=output_tokens
    clear_output(wait=True)
    
overall_execution_time_tif_nor_il = time.time() - start_time_tif_nor_il
formatted_time_tif_nor_il = time.strftime('%H:%M:%S', time.gmtime(overall_execution_time_tif_nor_il))
input_token_cost_tif_nor_il = round((0.01/1000) * input_tokens_tif_nor_il_nonnull, 2)
output_token_cost_tif_nor_il = round((0.03/1000) * output_tokens_tif_nor_il_nonnull, 2)
total_cost_tif_nor_il = round(input_token_cost_tif_nor_il + output_token_cost_tif_nor_il, 2)    
    
print(f"Executed {batch_counter} Iterations")
print(f"Total Execution Time: {formatted_time_tif_nor_il}")
print(f"Total Input Tokens - {input_tokens_tif_nor_il_nonnull}")
print(f"Total Input Cost = {input_token_cost_tif_nor_il}")
print(f"Total Output Tokens - {output_tokens_tif_nor_il_nonnull}")
print(f"Total Output Cost = {output_token_cost_tif_nor_il}")
print(f"Total Cost = {total_cost_tif_nor_il}")

Executed [1] Iterations
Total Execution Time: 00:00:02
Total Input Tokens - 656
Total Input Cost = 0.01
Total Output Tokens - 33
Total Output Cost = 0.0
Total Cost = 0.01


In [262]:
#Remove "```json" and "```" from each string in tif_nor_il_nonnull_api & convert to DataFrame
tif_nor_il_nonnull_api_cleaned = [
    s.replace("```json", "")
     .replace("```", "")
     .replace("[]", "[{\"positive\": \"\", \"negative\": \"\"}]")
    for s in tif_nor_il_nonnull_api
]                                  
tif_nor_il_nonnull_api_cleaned_df = pd.DataFrame(tif_nor_il_nonnull_api_cleaned)
#tif_nor_il_nonnull_api_cleaned_df = pd.DataFrame(tif_nor_il_nonnull_api)
column_names2 = ['Trust',
                    'Store Experience',
                    'Store Staff',
                    'Product Design',
                    'Product Variety',
                    'Discount',
                    'Making Charge',
                    'Price',
                    'Product Quality',
                    'OLD Gold Jewellery Exchange']

In [263]:
#Initialize an empty list to store the processed data
processed_data_tif_nor_il_nonnull = []

#Iterate over each row in the DataFrame
for index, row in tif_nor_il_nonnull_api_cleaned_df.iterrows():
    try:
        #Assuming the JSON string is in the first column
        json_string = row.iloc[0] if len(row) > 0 else ''
        data = json.loads(json_string)

        #Iterate over each key-value pair in the dictionary
        for commentor_name, feedback_list in data.items():
            #Create a dictionary for each commentor with default values as 0
            commentor_data = {col: 0 for col in column_names2}
            commentor_data["Commentor Name"] = commentor_name

            #Check if there is any feedback for the commentor
            if feedback_list:
                #Since there could be multiple feedback entries for a single commentor, we aggregate them
                for feedback in feedback_list:
                    positive = feedback.get("positive", "").split(',')
                    negative = feedback.get("negative", "").split(',')

                    for topic in column_names2:
                        #Aggregate the value: 1 for positive, -1 for negative, 0 for not mentioned
                        commentor_data[topic] += assign_value(topic, positive, negative)

                #Finalize the values to be within -1, 0, or 1
                for topic in column_names2:
                    if commentor_data[topic] > 1:
                        commentor_data[topic] = 1
                    elif commentor_data[topic] < -1:
                        commentor_data[topic] = -1

            #Append the commentor data to the processed data list
            processed_data_tif_nor_il_nonnull.append(commentor_data)
    except:
        #Skip rows with invalid JSON data
        print(f"Invalid JSON in row {index}")

tif_nor_il_nonnull_api_cleaned_df.to_excel("tif_nor_il_nonnull_api_cleaned_df.xlsx",index=False)

tif_nor_il_nonnull_api_cleaned_df = pd.read_excel("tif_nor_il_nonnull_api_cleaned_df.xlsx")

tif_nor_il_nonnull_api_cleaned_df

In [264]:
#Create a DataFrame from the processed data
tif_nor_il_nonnull_sen_df = pd.DataFrame(processed_data_tif_nor_il_nonnull)
#Reorder the columns
ordered_columns = ["Commentor Name",
                   "Trust",
                    "Store Experience",
                    "Store Staff",
                    "Product Design",
                    "Product Variety",
                    "Discount",
                    "Making Charge",
                    "Price",
                    "Product Quality",
                    "OLD Gold Jewellery Exchange"]
#Apply the new order to the DataFrame
tif_nor_il_nonnull_sen_df = tif_nor_il_nonnull_sen_df[ordered_columns]

tif_nor_il_nonnull_merged_df = pd.concat([combined_df_tif_nor_il, tif_nor_il_nonnull_sen_df], axis=1)

tif_nor_il_final_sen_df = pd.concat([tif_nor_il_nonnull_merged_df,null_dataframes['tif_nor_il_null']], ignore_index=True)

tif_nor_il_final_sen_df_copy = tif_nor_il_final_sen_df.copy()
tif_nor_il_final_sen_df_copy["Published At Date"] = tif_nor_il_final_sen_df_copy["Published At Date"].astype(str).str[:10]

tif_nor_il_final_sen_df_copy.to_excel("sentiment_raw_output/tif_nor_il_final_sen_df_jul.xlsx",index=False)


### tif_sko_il

In [265]:
batch_counter = [0]
total_batches = math.ceil(len(tif_sko_il_nonnull_buckets['tif_sko_il_nonnull_1-4'])/25)+math.ceil(len(tif_sko_il_nonnull_buckets['tif_sko_il_nonnull_5-15'])/25)+math.ceil(len(tif_sko_il_nonnull_buckets['tif_sko_il_nonnull_16-30'])/25)+math.ceil(len(tif_sko_il_nonnull_buckets['tif_sko_il_nonnull_31-60'])/25)+math.ceil(len(tif_sko_il_nonnull_buckets['tif_sko_il_nonnull_61-100'])/25)+math.ceil(len(tif_sko_il_nonnull_buckets['tif_sko_il_nonnull_greater_100'])/25)
key_counter = 0
total_keys=len(tif_sko_il_nonnull_buckets.keys())
tif_sko_il_nonnull_api = []
input_tokens_tif_sko_il_nonnull=0
output_tokens_tif_sko_il_nonnull=0
start_time_tif_sko_il = time.time()

for key in tif_sko_il_nonnull_buckets.keys():
    key_counter+=1
    current_df = tif_sko_il_nonnull_buckets[key][['Name', 'review_text']]
    counter = [0]
    input_tokens = 0
    output_tokens = 0
        
    # Threading setup
    total_iterations = math.ceil(len(current_df) / 25)
    stop_event = threading.Event()
    message_thread = threading.Thread(target=print_dynamic_message, args=(batch_counter,key_counter,counter,total_batches,total_keys, total_iterations,start_time_tif_sko_il, stop_event))
    message_thread.start()

    for start in range(0, len(current_df), 25):
        clear_output(wait=True)
        counter[0] += 1
        batch_counter[0] += 1
        end = start + 25
        result, it, ot = score_sentiments_jul(current_df[start:end])
        tif_sko_il_nonnull_api.append(result)
        input_tokens += it
        output_tokens += ot

    # Stopping the dynamic message thread
    stop_event.set()
    message_thread.join()    
    input_tokens_tif_sko_il_nonnull+=input_tokens
    output_tokens_tif_sko_il_nonnull+=output_tokens
    clear_output(wait=True)
    
overall_execution_time_tif_sko_il = time.time() - start_time_tif_sko_il
formatted_time_tif_sko_il = time.strftime('%H:%M:%S', time.gmtime(overall_execution_time_tif_sko_il))
input_token_cost_tif_sko_il = round((0.01/1000) * input_tokens_tif_sko_il_nonnull, 2)
output_token_cost_tif_sko_il = round((0.03/1000) * output_tokens_tif_sko_il_nonnull, 2)
total_cost_tif_sko_il = round(input_token_cost_tif_sko_il + output_token_cost_tif_sko_il, 2)    
    
print(f"Executed {batch_counter} Iterations")
print(f"Total Execution Time: {formatted_time_tif_sko_il}")
print(f"Total Input Tokens - {input_tokens_tif_sko_il_nonnull}")
print(f"Total Input Cost = {input_token_cost_tif_sko_il}")
print(f"Total Output Tokens - {output_tokens_tif_sko_il_nonnull}")
print(f"Total Output Cost = {output_token_cost_tif_sko_il}")
print(f"Total Cost = {total_cost_tif_sko_il}")

Executed [1] Iterations
Total Execution Time: 00:00:01
Total Input Tokens - 1348
Total Input Cost = 0.01
Total Output Tokens - 71
Total Output Cost = 0.0
Total Cost = 0.01


In [266]:
#Remove "```json" and "```" from each string in tif_sko_il_nonnull_api & convert to DataFrame
tif_sko_il_nonnull_api_cleaned = [
    s.replace("```json", "")
     .replace("```", "")
     .replace("[]", "[{\"positive\": \"\", \"negative\": \"\"}]")
    for s in tif_sko_il_nonnull_api
]                                  
tif_sko_il_nonnull_api_cleaned_df = pd.DataFrame(tif_sko_il_nonnull_api_cleaned)
#tif_sko_il_nonnull_api_cleaned_df = pd.DataFrame(tif_sko_il_nonnull_api)
column_names2 = ['Trust',
                    'Store Experience',
                    'Store Staff',
                    'Product Design',
                    'Product Variety',
                    'Discount',
                    'Making Charge',
                    'Price',
                    'Product Quality',
                    'OLD Gold Jewellery Exchange']

In [267]:
#Initialize an empty list to store the processed data
processed_data_tif_sko_il_nonnull = []

#Iterate over each row in the DataFrame
for index, row in tif_sko_il_nonnull_api_cleaned_df.iterrows():
    try:
        #Assuming the JSON string is in the first column
        json_string = row.iloc[0] if len(row) > 0 else ''
        data = json.loads(json_string)

        #Iterate over each key-value pair in the dictionary
        for commentor_name, feedback_list in data.items():
            #Create a dictionary for each commentor with default values as 0
            commentor_data = {col: 0 for col in column_names2}
            commentor_data["Commentor Name"] = commentor_name

            #Check if there is any feedback for the commentor
            if feedback_list:
                #Since there could be multiple feedback entries for a single commentor, we aggregate them
                for feedback in feedback_list:
                    positive = feedback.get("positive", "").split(',')
                    negative = feedback.get("negative", "").split(',')

                    for topic in column_names2:
                        #Aggregate the value: 1 for positive, -1 for negative, 0 for not mentioned
                        commentor_data[topic] += assign_value(topic, positive, negative)

                #Finalize the values to be within -1, 0, or 1
                for topic in column_names2:
                    if commentor_data[topic] > 1:
                        commentor_data[topic] = 1
                    elif commentor_data[topic] < -1:
                        commentor_data[topic] = -1

            #Append the commentor data to the processed data list
            processed_data_tif_sko_il_nonnull.append(commentor_data)
    except:
        #Skip rows with invalid JSON data
        print(f"Invalid JSON in row {index}")

tif_sko_il_nonnull_api_cleaned_df.to_excel("tif_sko_il_nonnull_api_cleaned_df.xlsx",index=False)

tif_sko_il_nonnull_api_cleaned_df = pd.read_excel("tif_sko_il_nonnull_api_cleaned_df.xlsx")

tif_sko_il_nonnull_api_cleaned_df

In [268]:
#Create a DataFrame from the processed data
tif_sko_il_nonnull_sen_df = pd.DataFrame(processed_data_tif_sko_il_nonnull)
#Reorder the columns
ordered_columns = ["Commentor Name",
                   "Trust",
                    "Store Experience",
                    "Store Staff",
                    "Product Design",
                    "Product Variety",
                    "Discount",
                    "Making Charge",
                    "Price",
                    "Product Quality",
                    "OLD Gold Jewellery Exchange"]
#Apply the new order to the DataFrame
tif_sko_il_nonnull_sen_df = tif_sko_il_nonnull_sen_df[ordered_columns]

tif_sko_il_nonnull_merged_df = pd.concat([combined_df_tif_sko_il, tif_sko_il_nonnull_sen_df], axis=1)

tif_sko_il_final_sen_df = pd.concat([tif_sko_il_nonnull_merged_df,null_dataframes['tif_sko_il_null']], ignore_index=True)

tif_sko_il_final_sen_df_copy = tif_sko_il_final_sen_df.copy()
tif_sko_il_final_sen_df_copy["Published At Date"] = tif_sko_il_final_sen_df_copy["Published At Date"].astype(str).str[:10]

tif_sko_il_final_sen_df_copy.to_excel("sentiment_raw_output/tif_sko_il_final_sen_df_jul.xlsx",index=False)


### tif_eas_nj

In [269]:
batch_counter = [0]
total_batches = math.ceil(len(tif_eas_nj_nonnull_buckets['tif_eas_nj_nonnull_1-4'])/25)+math.ceil(len(tif_eas_nj_nonnull_buckets['tif_eas_nj_nonnull_5-15'])/25)+math.ceil(len(tif_eas_nj_nonnull_buckets['tif_eas_nj_nonnull_16-30'])/25)+math.ceil(len(tif_eas_nj_nonnull_buckets['tif_eas_nj_nonnull_31-60'])/25)+math.ceil(len(tif_eas_nj_nonnull_buckets['tif_eas_nj_nonnull_61-100'])/25)+math.ceil(len(tif_eas_nj_nonnull_buckets['tif_eas_nj_nonnull_greater_100'])/25)
key_counter = 0
total_keys=len(tif_eas_nj_nonnull_buckets.keys())
tif_eas_nj_nonnull_api = []
input_tokens_tif_eas_nj_nonnull=0
output_tokens_tif_eas_nj_nonnull=0
start_time_tif_eas_nj = time.time()

for key in tif_eas_nj_nonnull_buckets.keys():
    key_counter+=1
    current_df = tif_eas_nj_nonnull_buckets[key][['Name', 'review_text']]
    counter = [0]
    input_tokens = 0
    output_tokens = 0
        
    # Threading setup
    total_iterations = math.ceil(len(current_df) / 25)
    stop_event = threading.Event()
    message_thread = threading.Thread(target=print_dynamic_message, args=(batch_counter,key_counter,counter,total_batches,total_keys, total_iterations,start_time_tif_eas_nj, stop_event))
    message_thread.start()

    for start in range(0, len(current_df), 25):
        clear_output(wait=True)
        counter[0] += 1
        batch_counter[0] += 1
        end = start + 25
        result, it, ot = score_sentiments_jul(current_df[start:end])
        tif_eas_nj_nonnull_api.append(result)
        input_tokens += it
        output_tokens += ot

    # Stopping the dynamic message thread
    stop_event.set()
    message_thread.join()    
    input_tokens_tif_eas_nj_nonnull+=input_tokens
    output_tokens_tif_eas_nj_nonnull+=output_tokens
    clear_output(wait=True)
    
overall_execution_time_tif_eas_nj = time.time() - start_time_tif_eas_nj
formatted_time_tif_eas_nj = time.strftime('%H:%M:%S', time.gmtime(overall_execution_time_tif_eas_nj))
input_token_cost_tif_eas_nj = round((0.01/1000) * input_tokens_tif_eas_nj_nonnull, 2)
output_token_cost_tif_eas_nj = round((0.03/1000) * output_tokens_tif_eas_nj_nonnull, 2)
total_cost_tif_eas_nj = round(input_token_cost_tif_eas_nj + output_token_cost_tif_eas_nj, 2)    
    
print(f"Executed {batch_counter} Iterations")
print(f"Total Execution Time: {formatted_time_tif_eas_nj}")
print(f"Total Input Tokens - {input_tokens_tif_eas_nj_nonnull}")
print(f"Total Input Cost = {input_token_cost_tif_eas_nj}")
print(f"Total Output Tokens - {output_tokens_tif_eas_nj_nonnull}")
print(f"Total Output Cost = {output_token_cost_tif_eas_nj}")
print(f"Total Cost = {total_cost_tif_eas_nj}")

Executed [3] Iterations
Total Execution Time: 00:00:04
Total Input Tokens - 2050
Total Input Cost = 0.02
Total Output Tokens - 106
Total Output Cost = 0.0
Total Cost = 0.02


In [270]:
#Remove "```json" and "```" from each string in tif_eas_nj_nonnull_api & convert to DataFrame
tif_eas_nj_nonnull_api_cleaned = [
    s.replace("```json", "")
     .replace("```", "")
     .replace("[]", "[{\"positive\": \"\", \"negative\": \"\"}]")
    for s in tif_eas_nj_nonnull_api
]                                  
tif_eas_nj_nonnull_api_cleaned_df = pd.DataFrame(tif_eas_nj_nonnull_api_cleaned)
#tif_eas_nj_nonnull_api_cleaned_df = pd.DataFrame(tif_eas_nj_nonnull_api)
column_names2 = ['Trust',
                    'Store Experience',
                    'Store Staff',
                    'Product Design',
                    'Product Variety',
                    'Discount',
                    'Making Charge',
                    'Price',
                    'Product Quality',
                    'OLD Gold Jewellery Exchange']

In [271]:
#Initialize an empty list to store the processed data
processed_data_tif_eas_nj_nonnull = []

#Iterate over each row in the DataFrame
for index, row in tif_eas_nj_nonnull_api_cleaned_df.iterrows():
    try:
        #Assuming the JSON string is in the first column
        json_string = row.iloc[0] if len(row) > 0 else ''
        data = json.loads(json_string)

        #Iterate over each key-value pair in the dictionary
        for commentor_name, feedback_list in data.items():
            #Create a dictionary for each commentor with default values as 0
            commentor_data = {col: 0 for col in column_names2}
            commentor_data["Commentor Name"] = commentor_name

            #Check if there is any feedback for the commentor
            if feedback_list:
                #Since there could be multiple feedback entries for a single commentor, we aggregate them
                for feedback in feedback_list:
                    positive = feedback.get("positive", "").split(',')
                    negative = feedback.get("negative", "").split(',')

                    for topic in column_names2:
                        #Aggregate the value: 1 for positive, -1 for negative, 0 for not mentioned
                        commentor_data[topic] += assign_value(topic, positive, negative)

                #Finalize the values to be within -1, 0, or 1
                for topic in column_names2:
                    if commentor_data[topic] > 1:
                        commentor_data[topic] = 1
                    elif commentor_data[topic] < -1:
                        commentor_data[topic] = -1

            #Append the commentor data to the processed data list
            processed_data_tif_eas_nj_nonnull.append(commentor_data)
    except:
        #Skip rows with invalid JSON data
        print(f"Invalid JSON in row {index}")

tif_eas_nj_nonnull_api_cleaned_df.to_excel("tif_eas_nj_nonnull_api_cleaned_df.xlsx",index=False)

tif_eas_nj_nonnull_api_cleaned_df = pd.read_excel("tif_eas_nj_nonnull_api_cleaned_df.xlsx")

tif_eas_nj_nonnull_api_cleaned_df

In [272]:
#Create a DataFrame from the processed data
tif_eas_nj_nonnull_sen_df = pd.DataFrame(processed_data_tif_eas_nj_nonnull)
#Reorder the columns
ordered_columns = ["Commentor Name",
                   "Trust",
                    "Store Experience",
                    "Store Staff",
                    "Product Design",
                    "Product Variety",
                    "Discount",
                    "Making Charge",
                    "Price",
                    "Product Quality",
                    "OLD Gold Jewellery Exchange"]
#Apply the new order to the DataFrame
tif_eas_nj_nonnull_sen_df = tif_eas_nj_nonnull_sen_df[ordered_columns]

tif_eas_nj_nonnull_merged_df = pd.concat([combined_df_tif_eas_nj, tif_eas_nj_nonnull_sen_df], axis=1)

tif_eas_nj_final_sen_df = pd.concat([tif_eas_nj_nonnull_merged_df,null_dataframes['tif_eas_nj_null']], ignore_index=True)

tif_eas_nj_final_sen_df_copy = tif_eas_nj_final_sen_df.copy()
tif_eas_nj_final_sen_df_copy["Published At Date"] = tif_eas_nj_final_sen_df_copy["Published At Date"].astype(str).str[:10]

tif_eas_nj_final_sen_df_copy.to_excel("sentiment_raw_output/tif_eas_nj_final_sen_df_jul.xlsx",index=False)


### tif_red_nj

In [273]:
batch_counter = [0]
total_batches = math.ceil(len(tif_red_nj_nonnull_buckets['tif_red_nj_nonnull_1-4'])/25)+math.ceil(len(tif_red_nj_nonnull_buckets['tif_red_nj_nonnull_5-15'])/25)+math.ceil(len(tif_red_nj_nonnull_buckets['tif_red_nj_nonnull_16-30'])/25)+math.ceil(len(tif_red_nj_nonnull_buckets['tif_red_nj_nonnull_31-60'])/25)+math.ceil(len(tif_red_nj_nonnull_buckets['tif_red_nj_nonnull_61-100'])/25)+math.ceil(len(tif_red_nj_nonnull_buckets['tif_red_nj_nonnull_greater_100'])/25)
key_counter = 0
total_keys=len(tif_red_nj_nonnull_buckets.keys())
tif_red_nj_nonnull_api = []
input_tokens_tif_red_nj_nonnull=0
output_tokens_tif_red_nj_nonnull=0
start_time_tif_red_nj = time.time()

for key in tif_red_nj_nonnull_buckets.keys():
    key_counter+=1
    current_df = tif_red_nj_nonnull_buckets[key][['Name', 'review_text']]
    counter = [0]
    input_tokens = 0
    output_tokens = 0
        
    # Threading setup
    total_iterations = math.ceil(len(current_df) / 25)
    stop_event = threading.Event()
    message_thread = threading.Thread(target=print_dynamic_message, args=(batch_counter,key_counter,counter,total_batches,total_keys, total_iterations,start_time_tif_red_nj, stop_event))
    message_thread.start()

    for start in range(0, len(current_df), 25):
        clear_output(wait=True)
        counter[0] += 1
        batch_counter[0] += 1
        end = start + 25
        result, it, ot = score_sentiments_jul(current_df[start:end])
        tif_red_nj_nonnull_api.append(result)
        input_tokens += it
        output_tokens += ot

    # Stopping the dynamic message thread
    stop_event.set()
    message_thread.join()    
    input_tokens_tif_red_nj_nonnull+=input_tokens
    output_tokens_tif_red_nj_nonnull+=output_tokens
    clear_output(wait=True)
    
overall_execution_time_tif_red_nj = time.time() - start_time_tif_red_nj
formatted_time_tif_red_nj = time.strftime('%H:%M:%S', time.gmtime(overall_execution_time_tif_red_nj))
input_token_cost_tif_red_nj = round((0.01/1000) * input_tokens_tif_red_nj_nonnull, 2)
output_token_cost_tif_red_nj = round((0.03/1000) * output_tokens_tif_red_nj_nonnull, 2)
total_cost_tif_red_nj = round(input_token_cost_tif_red_nj + output_token_cost_tif_red_nj, 2)    
    
print(f"Executed {batch_counter} Iterations")
print(f"Total Execution Time: {formatted_time_tif_red_nj}")
print(f"Total Input Tokens - {input_tokens_tif_red_nj_nonnull}")
print(f"Total Input Cost = {input_token_cost_tif_red_nj}")
print(f"Total Output Tokens - {output_tokens_tif_red_nj_nonnull}")
print(f"Total Output Cost = {output_token_cost_tif_red_nj}")
print(f"Total Cost = {total_cost_tif_red_nj}")

Executed [1] Iterations
Total Execution Time: 00:00:01
Total Input Tokens - 1000
Total Input Cost = 0.01
Total Output Tokens - 41
Total Output Cost = 0.0
Total Cost = 0.01


In [274]:
#Remove "```json" and "```" from each string in tif_red_nj_nonnull_api & convert to DataFrame
tif_red_nj_nonnull_api_cleaned = [
    s.replace("```json", "")
     .replace("```", "")
     .replace("[]", "[{\"positive\": \"\", \"negative\": \"\"}]")
    for s in tif_red_nj_nonnull_api
]                                  
tif_red_nj_nonnull_api_cleaned_df = pd.DataFrame(tif_red_nj_nonnull_api_cleaned)
#tif_red_nj_nonnull_api_cleaned_df = pd.DataFrame(tif_red_nj_nonnull_api)
column_names2 = ['Trust',
                    'Store Experience',
                    'Store Staff',
                    'Product Design',
                    'Product Variety',
                    'Discount',
                    'Making Charge',
                    'Price',
                    'Product Quality',
                    'OLD Gold Jewellery Exchange']

In [275]:
#Initialize an empty list to store the processed data
processed_data_tif_red_nj_nonnull = []

#Iterate over each row in the DataFrame
for index, row in tif_red_nj_nonnull_api_cleaned_df.iterrows():
    try:
        #Assuming the JSON string is in the first column
        json_string = row.iloc[0] if len(row) > 0 else ''
        data = json.loads(json_string)

        #Iterate over each key-value pair in the dictionary
        for commentor_name, feedback_list in data.items():
            #Create a dictionary for each commentor with default values as 0
            commentor_data = {col: 0 for col in column_names2}
            commentor_data["Commentor Name"] = commentor_name

            #Check if there is any feedback for the commentor
            if feedback_list:
                #Since there could be multiple feedback entries for a single commentor, we aggregate them
                for feedback in feedback_list:
                    positive = feedback.get("positive", "").split(',')
                    negative = feedback.get("negative", "").split(',')

                    for topic in column_names2:
                        #Aggregate the value: 1 for positive, -1 for negative, 0 for not mentioned
                        commentor_data[topic] += assign_value(topic, positive, negative)

                #Finalize the values to be within -1, 0, or 1
                for topic in column_names2:
                    if commentor_data[topic] > 1:
                        commentor_data[topic] = 1
                    elif commentor_data[topic] < -1:
                        commentor_data[topic] = -1

            #Append the commentor data to the processed data list
            processed_data_tif_red_nj_nonnull.append(commentor_data)
    except:
        #Skip rows with invalid JSON data
        print(f"Invalid JSON in row {index}")

tif_red_nj_nonnull_api_cleaned_df.to_excel("tif_red_nj_nonnull_api_cleaned_df.xlsx",index=False)

tif_red_nj_nonnull_api_cleaned_df = pd.read_excel("tif_red_nj_nonnull_api_cleaned_df.xlsx")

tif_red_nj_nonnull_api_cleaned_df

In [276]:
#Create a DataFrame from the processed data
tif_red_nj_nonnull_sen_df = pd.DataFrame(processed_data_tif_red_nj_nonnull)
#Reorder the columns
ordered_columns = ["Commentor Name",
                   "Trust",
                    "Store Experience",
                    "Store Staff",
                    "Product Design",
                    "Product Variety",
                    "Discount",
                    "Making Charge",
                    "Price",
                    "Product Quality",
                    "OLD Gold Jewellery Exchange"]
#Apply the new order to the DataFrame
tif_red_nj_nonnull_sen_df = tif_red_nj_nonnull_sen_df[ordered_columns]

tif_red_nj_nonnull_merged_df = pd.concat([combined_df_tif_red_nj, tif_red_nj_nonnull_sen_df], axis=1)

tif_red_nj_final_sen_df = pd.concat([tif_red_nj_nonnull_merged_df,null_dataframes['tif_red_nj_null']], ignore_index=True)

tif_red_nj_final_sen_df_copy = tif_red_nj_final_sen_df.copy()
tif_red_nj_final_sen_df_copy["Published At Date"] = tif_red_nj_final_sen_df_copy["Published At Date"].astype(str).str[:10]

tif_red_nj_final_sen_df_copy.to_excel("sentiment_raw_output/tif_red_nj_final_sen_df_jul.xlsx",index=False)


### tif_hac_nj

In [277]:
batch_counter = [0]
total_batches = math.ceil(len(tif_hac_nj_nonnull_buckets['tif_hac_nj_nonnull_1-4'])/25)+math.ceil(len(tif_hac_nj_nonnull_buckets['tif_hac_nj_nonnull_5-15'])/25)+math.ceil(len(tif_hac_nj_nonnull_buckets['tif_hac_nj_nonnull_16-30'])/25)+math.ceil(len(tif_hac_nj_nonnull_buckets['tif_hac_nj_nonnull_31-60'])/25)+math.ceil(len(tif_hac_nj_nonnull_buckets['tif_hac_nj_nonnull_61-100'])/25)+math.ceil(len(tif_hac_nj_nonnull_buckets['tif_hac_nj_nonnull_greater_100'])/25)
key_counter = 0
total_keys=len(tif_hac_nj_nonnull_buckets.keys())
tif_hac_nj_nonnull_api = []
input_tokens_tif_hac_nj_nonnull=0
output_tokens_tif_hac_nj_nonnull=0
start_time_tif_hac_nj = time.time()

for key in tif_hac_nj_nonnull_buckets.keys():
    key_counter+=1
    current_df = tif_hac_nj_nonnull_buckets[key][['Name', 'review_text']]
    counter = [0]
    input_tokens = 0
    output_tokens = 0
        
    # Threading setup
    total_iterations = math.ceil(len(current_df) / 25)
    stop_event = threading.Event()
    message_thread = threading.Thread(target=print_dynamic_message, args=(batch_counter,key_counter,counter,total_batches,total_keys, total_iterations,start_time_tif_hac_nj, stop_event))
    message_thread.start()

    for start in range(0, len(current_df), 25):
        clear_output(wait=True)
        counter[0] += 1
        batch_counter[0] += 1
        end = start + 25
        result, it, ot = score_sentiments_jul(current_df[start:end])
        tif_hac_nj_nonnull_api.append(result)
        input_tokens += it
        output_tokens += ot

    # Stopping the dynamic message thread
    stop_event.set()
    message_thread.join()    
    input_tokens_tif_hac_nj_nonnull+=input_tokens
    output_tokens_tif_hac_nj_nonnull+=output_tokens
    clear_output(wait=True)
    
overall_execution_time_tif_hac_nj = time.time() - start_time_tif_hac_nj
formatted_time_tif_hac_nj = time.strftime('%H:%M:%S', time.gmtime(overall_execution_time_tif_hac_nj))
input_token_cost_tif_hac_nj = round((0.01/1000) * input_tokens_tif_hac_nj_nonnull, 2)
output_token_cost_tif_hac_nj = round((0.03/1000) * output_tokens_tif_hac_nj_nonnull, 2)
total_cost_tif_hac_nj = round(input_token_cost_tif_hac_nj + output_token_cost_tif_hac_nj, 2)    
    
print(f"Executed {batch_counter} Iterations")
print(f"Total Execution Time: {formatted_time_tif_hac_nj}")
print(f"Total Input Tokens - {input_tokens_tif_hac_nj_nonnull}")
print(f"Total Input Cost = {input_token_cost_tif_hac_nj}")
print(f"Total Output Tokens - {output_tokens_tif_hac_nj_nonnull}")
print(f"Total Output Cost = {output_token_cost_tif_hac_nj}")
print(f"Total Cost = {total_cost_tif_hac_nj}")

Executed [2] Iterations
Total Execution Time: 00:00:04
Total Input Tokens - 1431
Total Input Cost = 0.01
Total Output Tokens - 100
Total Output Cost = 0.0
Total Cost = 0.01


In [278]:
#Remove "```json" and "```" from each string in tif_hac_nj_nonnull_api & convert to DataFrame
tif_hac_nj_nonnull_api_cleaned = [
    s.replace("```json", "")
     .replace("```", "")
     .replace("[]", "[{\"positive\": \"\", \"negative\": \"\"}]")
    for s in tif_hac_nj_nonnull_api
]                                  
tif_hac_nj_nonnull_api_cleaned_df = pd.DataFrame(tif_hac_nj_nonnull_api_cleaned)
#tif_hac_nj_nonnull_api_cleaned_df = pd.DataFrame(tif_hac_nj_nonnull_api)
column_names2 = ['Trust',
                    'Store Experience',
                    'Store Staff',
                    'Product Design',
                    'Product Variety',
                    'Discount',
                    'Making Charge',
                    'Price',
                    'Product Quality',
                    'OLD Gold Jewellery Exchange']

In [279]:
#Initialize an empty list to store the processed data
processed_data_tif_hac_nj_nonnull = []

#Iterate over each row in the DataFrame
for index, row in tif_hac_nj_nonnull_api_cleaned_df.iterrows():
    try:
        #Assuming the JSON string is in the first column
        json_string = row.iloc[0] if len(row) > 0 else ''
        data = json.loads(json_string)

        #Iterate over each key-value pair in the dictionary
        for commentor_name, feedback_list in data.items():
            #Create a dictionary for each commentor with default values as 0
            commentor_data = {col: 0 for col in column_names2}
            commentor_data["Commentor Name"] = commentor_name

            #Check if there is any feedback for the commentor
            if feedback_list:
                #Since there could be multiple feedback entries for a single commentor, we aggregate them
                for feedback in feedback_list:
                    positive = feedback.get("positive", "").split(',')
                    negative = feedback.get("negative", "").split(',')

                    for topic in column_names2:
                        #Aggregate the value: 1 for positive, -1 for negative, 0 for not mentioned
                        commentor_data[topic] += assign_value(topic, positive, negative)

                #Finalize the values to be within -1, 0, or 1
                for topic in column_names2:
                    if commentor_data[topic] > 1:
                        commentor_data[topic] = 1
                    elif commentor_data[topic] < -1:
                        commentor_data[topic] = -1

            #Append the commentor data to the processed data list
            processed_data_tif_hac_nj_nonnull.append(commentor_data)
    except:
        #Skip rows with invalid JSON data
        print(f"Invalid JSON in row {index}")

tif_hac_nj_nonnull_api_cleaned_df.to_excel("tif_hac_nj_nonnull_api_cleaned_df.xlsx",index=False)

tif_hac_nj_nonnull_api_cleaned_df = pd.read_excel("tif_hac_nj_nonnull_api_cleaned_df.xlsx")

tif_hac_nj_nonnull_api_cleaned_df

In [280]:
#Create a DataFrame from the processed data
tif_hac_nj_nonnull_sen_df = pd.DataFrame(processed_data_tif_hac_nj_nonnull)
#Reorder the columns
ordered_columns = ["Commentor Name",
                   "Trust",
                    "Store Experience",
                    "Store Staff",
                    "Product Design",
                    "Product Variety",
                    "Discount",
                    "Making Charge",
                    "Price",
                    "Product Quality",
                    "OLD Gold Jewellery Exchange"]
#Apply the new order to the DataFrame
tif_hac_nj_nonnull_sen_df = tif_hac_nj_nonnull_sen_df[ordered_columns]

tif_hac_nj_nonnull_merged_df = pd.concat([combined_df_tif_hac_nj, tif_hac_nj_nonnull_sen_df], axis=1)

tif_hac_nj_final_sen_df = pd.concat([tif_hac_nj_nonnull_merged_df,null_dataframes['tif_hac_nj_null']], ignore_index=True)

tif_hac_nj_final_sen_df_copy = tif_hac_nj_final_sen_df.copy()
tif_hac_nj_final_sen_df_copy["Published At Date"] = tif_hac_nj_final_sen_df_copy["Published At Date"].astype(str).str[:10]

tif_hac_nj_final_sen_df_copy.to_excel("sentiment_raw_output/tif_hac_nj_final_sen_df_jul.xlsx",index=False)


### tif_sho_nj

In [281]:
batch_counter = [0]
total_batches = math.ceil(len(tif_sho_nj_nonnull_buckets['tif_sho_nj_nonnull_1-4'])/25)+math.ceil(len(tif_sho_nj_nonnull_buckets['tif_sho_nj_nonnull_5-15'])/25)+math.ceil(len(tif_sho_nj_nonnull_buckets['tif_sho_nj_nonnull_16-30'])/25)+math.ceil(len(tif_sho_nj_nonnull_buckets['tif_sho_nj_nonnull_31-60'])/25)+math.ceil(len(tif_sho_nj_nonnull_buckets['tif_sho_nj_nonnull_61-100'])/25)+math.ceil(len(tif_sho_nj_nonnull_buckets['tif_sho_nj_nonnull_greater_100'])/25)
key_counter = 0
total_keys=len(tif_sho_nj_nonnull_buckets.keys())
tif_sho_nj_nonnull_api = []
input_tokens_tif_sho_nj_nonnull=0
output_tokens_tif_sho_nj_nonnull=0
start_time_tif_sho_nj = time.time()

for key in tif_sho_nj_nonnull_buckets.keys():
    key_counter+=1
    current_df = tif_sho_nj_nonnull_buckets[key][['Name', 'review_text']]
    counter = [0]
    input_tokens = 0
    output_tokens = 0
        
    # Threading setup
    total_iterations = math.ceil(len(current_df) / 25)
    stop_event = threading.Event()
    message_thread = threading.Thread(target=print_dynamic_message, args=(batch_counter,key_counter,counter,total_batches,total_keys, total_iterations,start_time_tif_sho_nj, stop_event))
    message_thread.start()

    for start in range(0, len(current_df), 25):
        clear_output(wait=True)
        counter[0] += 1
        batch_counter[0] += 1
        end = start + 25
        result, it, ot = score_sentiments_jul(current_df[start:end])
        tif_sho_nj_nonnull_api.append(result)
        input_tokens += it
        output_tokens += ot

    # Stopping the dynamic message thread
    stop_event.set()
    message_thread.join()    
    input_tokens_tif_sho_nj_nonnull+=input_tokens
    output_tokens_tif_sho_nj_nonnull+=output_tokens
    clear_output(wait=True)
    
overall_execution_time_tif_sho_nj = time.time() - start_time_tif_sho_nj
formatted_time_tif_sho_nj = time.strftime('%H:%M:%S', time.gmtime(overall_execution_time_tif_sho_nj))
input_token_cost_tif_sho_nj = round((0.01/1000) * input_tokens_tif_sho_nj_nonnull, 2)
output_token_cost_tif_sho_nj = round((0.03/1000) * output_tokens_tif_sho_nj_nonnull, 2)
total_cost_tif_sho_nj = round(input_token_cost_tif_sho_nj + output_token_cost_tif_sho_nj, 2)    
    
print(f"Executed {batch_counter} Iterations")
print(f"Total Execution Time: {formatted_time_tif_sho_nj}")
print(f"Total Input Tokens - {input_tokens_tif_sho_nj_nonnull}")
print(f"Total Input Cost = {input_token_cost_tif_sho_nj}")
print(f"Total Output Tokens - {output_tokens_tif_sho_nj_nonnull}")
print(f"Total Output Cost = {output_token_cost_tif_sho_nj}")
print(f"Total Cost = {total_cost_tif_sho_nj}")

Executed [1] Iterations
Total Execution Time: 00:00:01
Total Input Tokens - 703
Total Input Cost = 0.01
Total Output Tokens - 37
Total Output Cost = 0.0
Total Cost = 0.01


In [282]:
#Remove "```json" and "```" from each string in tif_sho_nj_nonnull_api & convert to DataFrame
tif_sho_nj_nonnull_api_cleaned = [
    s.replace("```json", "")
     .replace("```", "")
     .replace("[]", "[{\"positive\": \"\", \"negative\": \"\"}]")
    for s in tif_sho_nj_nonnull_api
]                                  
tif_sho_nj_nonnull_api_cleaned_df = pd.DataFrame(tif_sho_nj_nonnull_api_cleaned)
#tif_sho_nj_nonnull_api_cleaned_df = pd.DataFrame(tif_sho_nj_nonnull_api)
column_names2 = ['Trust',
                    'Store Experience',
                    'Store Staff',
                    'Product Design',
                    'Product Variety',
                    'Discount',
                    'Making Charge',
                    'Price',
                    'Product Quality',
                    'OLD Gold Jewellery Exchange']

In [283]:
#Initialize an empty list to store the processed data
processed_data_tif_sho_nj_nonnull = []

#Iterate over each row in the DataFrame
for index, row in tif_sho_nj_nonnull_api_cleaned_df.iterrows():
    try:
        #Assuming the JSON string is in the first column
        json_string = row.iloc[0] if len(row) > 0 else ''
        data = json.loads(json_string)

        #Iterate over each key-value pair in the dictionary
        for commentor_name, feedback_list in data.items():
            #Create a dictionary for each commentor with default values as 0
            commentor_data = {col: 0 for col in column_names2}
            commentor_data["Commentor Name"] = commentor_name

            #Check if there is any feedback for the commentor
            if feedback_list:
                #Since there could be multiple feedback entries for a single commentor, we aggregate them
                for feedback in feedback_list:
                    positive = feedback.get("positive", "").split(',')
                    negative = feedback.get("negative", "").split(',')

                    for topic in column_names2:
                        #Aggregate the value: 1 for positive, -1 for negative, 0 for not mentioned
                        commentor_data[topic] += assign_value(topic, positive, negative)

                #Finalize the values to be within -1, 0, or 1
                for topic in column_names2:
                    if commentor_data[topic] > 1:
                        commentor_data[topic] = 1
                    elif commentor_data[topic] < -1:
                        commentor_data[topic] = -1

            #Append the commentor data to the processed data list
            processed_data_tif_sho_nj_nonnull.append(commentor_data)
    except:
        #Skip rows with invalid JSON data
        print(f"Invalid JSON in row {index}")

tif_sho_nj_nonnull_api_cleaned_df.to_excel("tif_sho_nj_nonnull_api_cleaned_df.xlsx",index=False)

tif_sho_nj_nonnull_api_cleaned_df = pd.read_excel("tif_sho_nj_nonnull_api_cleaned_df.xlsx")

tif_sho_nj_nonnull_api_cleaned_df

In [284]:
#Create a DataFrame from the processed data
tif_sho_nj_nonnull_sen_df = pd.DataFrame(processed_data_tif_sho_nj_nonnull)
#Reorder the columns
ordered_columns = ["Commentor Name",
                   "Trust",
                    "Store Experience",
                    "Store Staff",
                    "Product Design",
                    "Product Variety",
                    "Discount",
                    "Making Charge",
                    "Price",
                    "Product Quality",
                    "OLD Gold Jewellery Exchange"]
#Apply the new order to the DataFrame
tif_sho_nj_nonnull_sen_df = tif_sho_nj_nonnull_sen_df[ordered_columns]

tif_sho_nj_nonnull_merged_df = pd.concat([combined_df_tif_sho_nj, tif_sho_nj_nonnull_sen_df], axis=1)

tif_sho_nj_final_sen_df = pd.concat([tif_sho_nj_nonnull_merged_df,null_dataframes['tif_sho_nj_null']], ignore_index=True)

tif_sho_nj_final_sen_df_copy = tif_sho_nj_final_sen_df.copy()
tif_sho_nj_final_sen_df_copy["Published At Date"] = tif_sho_nj_final_sen_df_copy["Published At Date"].astype(str).str[:10]

tif_sho_nj_final_sen_df_copy.to_excel("sentiment_raw_output/tif_sho_nj_final_sen_df_jul.xlsx",index=False)


### tif_par_nj

In [285]:
batch_counter = [0]
total_batches = math.ceil(len(tif_par_nj_nonnull_buckets['tif_par_nj_nonnull_1-4'])/25)+math.ceil(len(tif_par_nj_nonnull_buckets['tif_par_nj_nonnull_5-15'])/25)+math.ceil(len(tif_par_nj_nonnull_buckets['tif_par_nj_nonnull_16-30'])/25)+math.ceil(len(tif_par_nj_nonnull_buckets['tif_par_nj_nonnull_31-60'])/25)+math.ceil(len(tif_par_nj_nonnull_buckets['tif_par_nj_nonnull_61-100'])/25)+math.ceil(len(tif_par_nj_nonnull_buckets['tif_par_nj_nonnull_greater_100'])/25)
key_counter = 0
total_keys=len(tif_par_nj_nonnull_buckets.keys())
tif_par_nj_nonnull_api = []
input_tokens_tif_par_nj_nonnull=0
output_tokens_tif_par_nj_nonnull=0
start_time_tif_par_nj = time.time()

for key in tif_par_nj_nonnull_buckets.keys():
    key_counter+=1
    current_df = tif_par_nj_nonnull_buckets[key][['Name', 'review_text']]
    counter = [0]
    input_tokens = 0
    output_tokens = 0
        
    # Threading setup
    total_iterations = math.ceil(len(current_df) / 25)
    stop_event = threading.Event()
    message_thread = threading.Thread(target=print_dynamic_message, args=(batch_counter,key_counter,counter,total_batches,total_keys, total_iterations,start_time_tif_par_nj, stop_event))
    message_thread.start()

    for start in range(0, len(current_df), 25):
        clear_output(wait=True)
        counter[0] += 1
        batch_counter[0] += 1
        end = start + 25
        result, it, ot = score_sentiments_jul(current_df[start:end])
        tif_par_nj_nonnull_api.append(result)
        input_tokens += it
        output_tokens += ot

    # Stopping the dynamic message thread
    stop_event.set()
    message_thread.join()    
    input_tokens_tif_par_nj_nonnull+=input_tokens
    output_tokens_tif_par_nj_nonnull+=output_tokens
    clear_output(wait=True)
    
overall_execution_time_tif_par_nj = time.time() - start_time_tif_par_nj
formatted_time_tif_par_nj = time.strftime('%H:%M:%S', time.gmtime(overall_execution_time_tif_par_nj))
input_token_cost_tif_par_nj = round((0.01/1000) * input_tokens_tif_par_nj_nonnull, 2)
output_token_cost_tif_par_nj = round((0.03/1000) * output_tokens_tif_par_nj_nonnull, 2)
total_cost_tif_par_nj = round(input_token_cost_tif_par_nj + output_token_cost_tif_par_nj, 2)    
    
print(f"Executed {batch_counter} Iterations")
print(f"Total Execution Time: {formatted_time_tif_par_nj}")
print(f"Total Input Tokens - {input_tokens_tif_par_nj_nonnull}")
print(f"Total Input Cost = {input_token_cost_tif_par_nj}")
print(f"Total Output Tokens - {output_tokens_tif_par_nj_nonnull}")
print(f"Total Output Cost = {output_token_cost_tif_par_nj}")
print(f"Total Cost = {total_cost_tif_par_nj}")

Executed [0] Iterations
Total Execution Time: 00:00:00
Total Input Tokens - 0
Total Input Cost = 0.0
Total Output Tokens - 0
Total Output Cost = 0.0
Total Cost = 0.0


In [286]:
#Remove "```json" and "```" from each string in tif_par_nj_nonnull_api & convert to DataFrame
tif_par_nj_nonnull_api_cleaned = [
    s.replace("```json", "")
     .replace("```", "")
     .replace("[]", "[{\"positive\": \"\", \"negative\": \"\"}]")
    for s in tif_par_nj_nonnull_api
]                                  
tif_par_nj_nonnull_api_cleaned_df = pd.DataFrame(tif_par_nj_nonnull_api_cleaned)
#tif_par_nj_nonnull_api_cleaned_df = pd.DataFrame(tif_par_nj_nonnull_api)
column_names2 = ['Trust',
                    'Store Experience',
                    'Store Staff',
                    'Product Design',
                    'Product Variety',
                    'Discount',
                    'Making Charge',
                    'Price',
                    'Product Quality',
                    'OLD Gold Jewellery Exchange']

In [287]:
#Initialize an empty list to store the processed data
processed_data_tif_par_nj_nonnull = []

#Iterate over each row in the DataFrame
for index, row in tif_par_nj_nonnull_api_cleaned_df.iterrows():
    try:
        #Assuming the JSON string is in the first column
        json_string = row.iloc[0] if len(row) > 0 else ''
        data = json.loads(json_string)

        #Iterate over each key-value pair in the dictionary
        for commentor_name, feedback_list in data.items():
            #Create a dictionary for each commentor with default values as 0
            commentor_data = {col: 0 for col in column_names2}
            commentor_data["Commentor Name"] = commentor_name

            #Check if there is any feedback for the commentor
            if feedback_list:
                #Since there could be multiple feedback entries for a single commentor, we aggregate them
                for feedback in feedback_list:
                    positive = feedback.get("positive", "").split(',')
                    negative = feedback.get("negative", "").split(',')

                    for topic in column_names2:
                        #Aggregate the value: 1 for positive, -1 for negative, 0 for not mentioned
                        commentor_data[topic] += assign_value(topic, positive, negative)

                #Finalize the values to be within -1, 0, or 1
                for topic in column_names2:
                    if commentor_data[topic] > 1:
                        commentor_data[topic] = 1
                    elif commentor_data[topic] < -1:
                        commentor_data[topic] = -1

            #Append the commentor data to the processed data list
            processed_data_tif_par_nj_nonnull.append(commentor_data)
    except:
        #Skip rows with invalid JSON data
        print(f"Invalid JSON in row {index}")

tif_par_nj_nonnull_api_cleaned_df.to_excel("tif_par_nj_nonnull_api_cleaned_df.xlsx",index=False)

tif_par_nj_nonnull_api_cleaned_df = pd.read_excel("tif_par_nj_nonnull_api_cleaned_df.xlsx")

tif_par_nj_nonnull_api_cleaned_df

In [288]:
#Create a DataFrame from the processed data
tif_par_nj_nonnull_sen_df = pd.DataFrame(processed_data_tif_par_nj_nonnull)
#Reorder the columns
ordered_columns = ["Commentor Name",
                   "Trust",
                    "Store Experience",
                    "Store Staff",
                    "Product Design",
                    "Product Variety",
                    "Discount",
                    "Making Charge",
                    "Price",
                    "Product Quality",
                    "OLD Gold Jewellery Exchange"]
#Apply the new order to the DataFrame
tif_par_nj_nonnull_sen_df = tif_par_nj_nonnull_sen_df[ordered_columns]

tif_par_nj_nonnull_merged_df = pd.concat([combined_df_tif_par_nj, tif_par_nj_nonnull_sen_df], axis=1)

tif_par_nj_final_sen_df = pd.concat([tif_par_nj_nonnull_merged_df,null_dataframes['tif_par_nj_null']], ignore_index=True)

tif_par_nj_final_sen_df_copy = tif_par_nj_final_sen_df.copy()
tif_par_nj_final_sen_df_copy["Published At Date"] = tif_par_nj_final_sen_df_copy["Published At Date"].astype(str).str[:10]

tif_par_nj_final_sen_df_copy.to_excel("sentiment_raw_output/tif_par_nj_final_sen_df_jul.xlsx",index=False)


KeyError: "None of [Index(['Commentor Name', 'Trust', 'Store Experience', 'Store Staff',\n       'Product Design', 'Product Variety', 'Discount', 'Making Charge',\n       'Price', 'Product Quality', 'OLD Gold Jewellery Exchange'],\n      dtype='object')] are in the [columns]"

### tif_vie_va

In [289]:
batch_counter = [0]
total_batches = math.ceil(len(tif_vie_va_nonnull_buckets['tif_vie_va_nonnull_1-4'])/25)+math.ceil(len(tif_vie_va_nonnull_buckets['tif_vie_va_nonnull_5-15'])/25)+math.ceil(len(tif_vie_va_nonnull_buckets['tif_vie_va_nonnull_16-30'])/25)+math.ceil(len(tif_vie_va_nonnull_buckets['tif_vie_va_nonnull_31-60'])/25)+math.ceil(len(tif_vie_va_nonnull_buckets['tif_vie_va_nonnull_61-100'])/25)+math.ceil(len(tif_vie_va_nonnull_buckets['tif_vie_va_nonnull_greater_100'])/25)
key_counter = 0
total_keys=len(tif_vie_va_nonnull_buckets.keys())
tif_vie_va_nonnull_api = []
input_tokens_tif_vie_va_nonnull=0
output_tokens_tif_vie_va_nonnull=0
start_time_tif_vie_va = time.time()

for key in tif_vie_va_nonnull_buckets.keys():
    key_counter+=1
    current_df = tif_vie_va_nonnull_buckets[key][['Name', 'review_text']]
    counter = [0]
    input_tokens = 0
    output_tokens = 0
        
    # Threading setup
    total_iterations = math.ceil(len(current_df) / 25)
    stop_event = threading.Event()
    message_thread = threading.Thread(target=print_dynamic_message, args=(batch_counter,key_counter,counter,total_batches,total_keys, total_iterations,start_time_tif_vie_va, stop_event))
    message_thread.start()

    for start in range(0, len(current_df), 25):
        clear_output(wait=True)
        counter[0] += 1
        batch_counter[0] += 1
        end = start + 25
        result, it, ot = score_sentiments_jul(current_df[start:end])
        tif_vie_va_nonnull_api.append(result)
        input_tokens += it
        output_tokens += ot

    # Stopping the dynamic message thread
    stop_event.set()
    message_thread.join()    
    input_tokens_tif_vie_va_nonnull+=input_tokens
    output_tokens_tif_vie_va_nonnull+=output_tokens
    clear_output(wait=True)
    
overall_execution_time_tif_vie_va = time.time() - start_time_tif_vie_va
formatted_time_tif_vie_va = time.strftime('%H:%M:%S', time.gmtime(overall_execution_time_tif_vie_va))
input_token_cost_tif_vie_va = round((0.01/1000) * input_tokens_tif_vie_va_nonnull, 2)
output_token_cost_tif_vie_va = round((0.03/1000) * output_tokens_tif_vie_va_nonnull, 2)
total_cost_tif_vie_va = round(input_token_cost_tif_vie_va + output_token_cost_tif_vie_va, 2)    
    
print(f"Executed {batch_counter} Iterations")
print(f"Total Execution Time: {formatted_time_tif_vie_va}")
print(f"Total Input Tokens - {input_tokens_tif_vie_va_nonnull}")
print(f"Total Input Cost = {input_token_cost_tif_vie_va}")
print(f"Total Output Tokens - {output_tokens_tif_vie_va_nonnull}")
print(f"Total Output Cost = {output_token_cost_tif_vie_va}")
print(f"Total Cost = {total_cost_tif_vie_va}")

Executed [3] Iterations
Total Execution Time: 00:00:05
Total Input Tokens - 2129
Total Input Cost = 0.02
Total Output Tokens - 112
Total Output Cost = 0.0
Total Cost = 0.02


In [290]:
#Remove "```json" and "```" from each string in tif_vie_va_nonnull_api & convert to DataFrame
tif_vie_va_nonnull_api_cleaned = [
    s.replace("```json", "")
     .replace("```", "")
     .replace("[]", "[{\"positive\": \"\", \"negative\": \"\"}]")
    for s in tif_vie_va_nonnull_api
]                                  
tif_vie_va_nonnull_api_cleaned_df = pd.DataFrame(tif_vie_va_nonnull_api_cleaned)
#tif_vie_va_nonnull_api_cleaned_df = pd.DataFrame(tif_vie_va_nonnull_api)
column_names2 = ['Trust',
                    'Store Experience',
                    'Store Staff',
                    'Product Design',
                    'Product Variety',
                    'Discount',
                    'Making Charge',
                    'Price',
                    'Product Quality',
                    'OLD Gold Jewellery Exchange']

In [291]:
#Initialize an empty list to store the processed data
processed_data_tif_vie_va_nonnull = []

#Iterate over each row in the DataFrame
for index, row in tif_vie_va_nonnull_api_cleaned_df.iterrows():
    try:
        #Assuming the JSON string is in the first column
        json_string = row.iloc[0] if len(row) > 0 else ''
        data = json.loads(json_string)

        #Iterate over each key-value pair in the dictionary
        for commentor_name, feedback_list in data.items():
            #Create a dictionary for each commentor with default values as 0
            commentor_data = {col: 0 for col in column_names2}
            commentor_data["Commentor Name"] = commentor_name

            #Check if there is any feedback for the commentor
            if feedback_list:
                #Since there could be multiple feedback entries for a single commentor, we aggregate them
                for feedback in feedback_list:
                    positive = feedback.get("positive", "").split(',')
                    negative = feedback.get("negative", "").split(',')

                    for topic in column_names2:
                        #Aggregate the value: 1 for positive, -1 for negative, 0 for not mentioned
                        commentor_data[topic] += assign_value(topic, positive, negative)

                #Finalize the values to be within -1, 0, or 1
                for topic in column_names2:
                    if commentor_data[topic] > 1:
                        commentor_data[topic] = 1
                    elif commentor_data[topic] < -1:
                        commentor_data[topic] = -1

            #Append the commentor data to the processed data list
            processed_data_tif_vie_va_nonnull.append(commentor_data)
    except:
        #Skip rows with invalid JSON data
        print(f"Invalid JSON in row {index}")

tif_vie_va_nonnull_api_cleaned_df.to_excel("tif_vie_va_nonnull_api_cleaned_df.xlsx",index=False)

tif_vie_va_nonnull_api_cleaned_df = pd.read_excel("tif_vie_va_nonnull_api_cleaned_df.xlsx")

tif_vie_va_nonnull_api_cleaned_df

In [292]:
#Create a DataFrame from the processed data
tif_vie_va_nonnull_sen_df = pd.DataFrame(processed_data_tif_vie_va_nonnull)
#Reorder the columns
ordered_columns = ["Commentor Name",
                   "Trust",
                    "Store Experience",
                    "Store Staff",
                    "Product Design",
                    "Product Variety",
                    "Discount",
                    "Making Charge",
                    "Price",
                    "Product Quality",
                    "OLD Gold Jewellery Exchange"]
#Apply the new order to the DataFrame
tif_vie_va_nonnull_sen_df = tif_vie_va_nonnull_sen_df[ordered_columns]

tif_vie_va_nonnull_merged_df = pd.concat([combined_df_tif_vie_va, tif_vie_va_nonnull_sen_df], axis=1)

tif_vie_va_final_sen_df = pd.concat([tif_vie_va_nonnull_merged_df,null_dataframes['tif_vie_va_null']], ignore_index=True)

tif_vie_va_final_sen_df_copy = tif_vie_va_final_sen_df.copy()
tif_vie_va_final_sen_df_copy["Published At Date"] = tif_vie_va_final_sen_df_copy["Published At Date"].astype(str).str[:10]

tif_vie_va_final_sen_df_copy.to_excel("sentiment_raw_output/tif_vie_va_final_sen_df_jul.xlsx",index=False)

### tif_ric_va

In [293]:
batch_counter = [0]
total_batches = math.ceil(len(tif_ric_va_nonnull_buckets['tif_ric_va_nonnull_1-4'])/25)+math.ceil(len(tif_ric_va_nonnull_buckets['tif_ric_va_nonnull_5-15'])/25)+math.ceil(len(tif_ric_va_nonnull_buckets['tif_ric_va_nonnull_16-30'])/25)+math.ceil(len(tif_ric_va_nonnull_buckets['tif_ric_va_nonnull_31-60'])/25)+math.ceil(len(tif_ric_va_nonnull_buckets['tif_ric_va_nonnull_61-100'])/25)+math.ceil(len(tif_ric_va_nonnull_buckets['tif_ric_va_nonnull_greater_100'])/25)
key_counter = 0
total_keys=len(tif_ric_va_nonnull_buckets.keys())
tif_ric_va_nonnull_api = []
input_tokens_tif_ric_va_nonnull=0
output_tokens_tif_ric_va_nonnull=0
start_time_tif_ric_va = time.time()

for key in tif_ric_va_nonnull_buckets.keys():
    key_counter+=1
    current_df = tif_ric_va_nonnull_buckets[key][['Name', 'review_text']]
    counter = [0]
    input_tokens = 0
    output_tokens = 0
        
    # Threading setup
    total_iterations = math.ceil(len(current_df) / 25)
    stop_event = threading.Event()
    message_thread = threading.Thread(target=print_dynamic_message, args=(batch_counter,key_counter,counter,total_batches,total_keys, total_iterations,start_time_tif_ric_va, stop_event))
    message_thread.start()

    for start in range(0, len(current_df), 25):
        clear_output(wait=True)
        counter[0] += 1
        batch_counter[0] += 1
        end = start + 25
        result, it, ot = score_sentiments_jul(current_df[start:end])
        tif_ric_va_nonnull_api.append(result)
        input_tokens += it
        output_tokens += ot

    # Stopping the dynamic message thread
    stop_event.set()
    message_thread.join()    
    input_tokens_tif_ric_va_nonnull+=input_tokens
    output_tokens_tif_ric_va_nonnull+=output_tokens
    clear_output(wait=True)
    
overall_execution_time_tif_ric_va = time.time() - start_time_tif_ric_va
formatted_time_tif_ric_va = time.strftime('%H:%M:%S', time.gmtime(overall_execution_time_tif_ric_va))
input_token_cost_tif_ric_va = round((0.01/1000) * input_tokens_tif_ric_va_nonnull, 2)
output_token_cost_tif_ric_va = round((0.03/1000) * output_tokens_tif_ric_va_nonnull, 2)
total_cost_tif_ric_va = round(input_token_cost_tif_ric_va + output_token_cost_tif_ric_va, 2)    
    
print(f"Executed {batch_counter} Iterations")
print(f"Total Execution Time: {formatted_time_tif_ric_va}")
print(f"Total Input Tokens - {input_tokens_tif_ric_va_nonnull}")
print(f"Total Input Cost = {input_token_cost_tif_ric_va}")
print(f"Total Output Tokens - {output_tokens_tif_ric_va_nonnull}")
print(f"Total Output Cost = {output_token_cost_tif_ric_va}")
print(f"Total Cost = {total_cost_tif_ric_va}")

Executed [1] Iterations
Total Execution Time: 00:00:01
Total Input Tokens - 680
Total Input Cost = 0.01
Total Output Tokens - 37
Total Output Cost = 0.0
Total Cost = 0.01


In [294]:
#Remove "```json" and "```" from each string in tif_ric_va_nonnull_api & convert to DataFrame
tif_ric_va_nonnull_api_cleaned = [
    s.replace("```json", "")
     .replace("```", "")
     .replace("[]", "[{\"positive\": \"\", \"negative\": \"\"}]")
    for s in tif_ric_va_nonnull_api
]                                  
tif_ric_va_nonnull_api_cleaned_df = pd.DataFrame(tif_ric_va_nonnull_api_cleaned)
#tif_ric_va_nonnull_api_cleaned_df = pd.DataFrame(tif_ric_va_nonnull_api)
column_names2 = ['Trust',
                    'Store Experience',
                    'Store Staff',
                    'Product Design',
                    'Product Variety',
                    'Discount',
                    'Making Charge',
                    'Price',
                    'Product Quality',
                    'OLD Gold Jewellery Exchange']

In [295]:
#Initialize an empty list to store the processed data
processed_data_tif_ric_va_nonnull = []

#Iterate over each row in the DataFrame
for index, row in tif_ric_va_nonnull_api_cleaned_df.iterrows():
    try:
        #Assuming the JSON string is in the first column
        json_string = row.iloc[0] if len(row) > 0 else ''
        data = json.loads(json_string)

        #Iterate over each key-value pair in the dictionary
        for commentor_name, feedback_list in data.items():
            #Create a dictionary for each commentor with default values as 0
            commentor_data = {col: 0 for col in column_names2}
            commentor_data["Commentor Name"] = commentor_name

            #Check if there is any feedback for the commentor
            if feedback_list:
                #Since there could be multiple feedback entries for a single commentor, we aggregate them
                for feedback in feedback_list:
                    positive = feedback.get("positive", "").split(',')
                    negative = feedback.get("negative", "").split(',')

                    for topic in column_names2:
                        #Aggregate the value: 1 for positive, -1 for negative, 0 for not mentioned
                        commentor_data[topic] += assign_value(topic, positive, negative)

                #Finalize the values to be within -1, 0, or 1
                for topic in column_names2:
                    if commentor_data[topic] > 1:
                        commentor_data[topic] = 1
                    elif commentor_data[topic] < -1:
                        commentor_data[topic] = -1

            #Append the commentor data to the processed data list
            processed_data_tif_ric_va_nonnull.append(commentor_data)
    except:
        #Skip rows with invalid JSON data
        print(f"Invalid JSON in row {index}")

tif_ric_va_nonnull_api_cleaned_df.to_excel("tif_ric_va_nonnull_api_cleaned_df.xlsx",index=False)

tif_ric_va_nonnull_api_cleaned_df = pd.read_excel("tif_ric_va_nonnull_api_cleaned_df.xlsx")

tif_ric_va_nonnull_api_cleaned_df

In [296]:
#Create a DataFrame from the processed data
tif_ric_va_nonnull_sen_df = pd.DataFrame(processed_data_tif_ric_va_nonnull)
#Reorder the columns
ordered_columns = ["Commentor Name",
                   "Trust",
                    "Store Experience",
                    "Store Staff",
                    "Product Design",
                    "Product Variety",
                    "Discount",
                    "Making Charge",
                    "Price",
                    "Product Quality",
                    "OLD Gold Jewellery Exchange"]
#Apply the new order to the DataFrame
tif_ric_va_nonnull_sen_df = tif_ric_va_nonnull_sen_df[ordered_columns]

tif_ric_va_nonnull_merged_df = pd.concat([combined_df_tif_ric_va, tif_ric_va_nonnull_sen_df], axis=1)

tif_ric_va_final_sen_df = pd.concat([tif_ric_va_nonnull_merged_df,null_dataframes['tif_ric_va_null']], ignore_index=True)

tif_ric_va_final_sen_df_copy = tif_ric_va_final_sen_df.copy()
tif_ric_va_final_sen_df_copy["Published At Date"] = tif_ric_va_final_sen_df_copy["Published At Date"].astype(str).str[:10]

tif_ric_va_final_sen_df_copy.to_excel("sentiment_raw_output/tif_ric_va_final_sen_df_jul.xlsx",index=False)


### vbj_fri_tx

In [297]:
batch_counter = [0]
total_batches = math.ceil(len(vbj_fri_tx_nonnull_buckets['vbj_fri_tx_nonnull_1-4'])/25)+math.ceil(len(vbj_fri_tx_nonnull_buckets['vbj_fri_tx_nonnull_5-15'])/25)+math.ceil(len(vbj_fri_tx_nonnull_buckets['vbj_fri_tx_nonnull_16-30'])/25)+math.ceil(len(vbj_fri_tx_nonnull_buckets['vbj_fri_tx_nonnull_31-60'])/25)+math.ceil(len(vbj_fri_tx_nonnull_buckets['vbj_fri_tx_nonnull_61-100'])/25)+math.ceil(len(vbj_fri_tx_nonnull_buckets['vbj_fri_tx_nonnull_greater_100'])/25)
key_counter = 0
total_keys=len(vbj_fri_tx_nonnull_buckets.keys())
vbj_fri_tx_nonnull_api = []
input_tokens_vbj_fri_tx_nonnull=0
output_tokens_vbj_fri_tx_nonnull=0
start_time_vbj_fri_tx = time.time()

for key in vbj_fri_tx_nonnull_buckets.keys():
    key_counter+=1
    current_df = vbj_fri_tx_nonnull_buckets[key][['Name', 'review_text']]
    counter = [0]
    input_tokens = 0
    output_tokens = 0
        
    # Threading setup
    total_iterations = math.ceil(len(current_df) / 25)
    stop_event = threading.Event()
    message_thread = threading.Thread(target=print_dynamic_message, args=(batch_counter,key_counter,counter,total_batches,total_keys, total_iterations,start_time_vbj_fri_tx, stop_event))
    message_thread.start()

    for start in range(0, len(current_df), 25):
        clear_output(wait=True)
        counter[0] += 1
        batch_counter[0] += 1
        end = start + 25
        result, it, ot = score_sentiments_jul(current_df[start:end])
        vbj_fri_tx_nonnull_api.append(result)
        input_tokens += it
        output_tokens += ot

    # Stopping the dynamic message thread
    stop_event.set()
    message_thread.join()    
    input_tokens_vbj_fri_tx_nonnull+=input_tokens
    output_tokens_vbj_fri_tx_nonnull+=output_tokens
    clear_output(wait=True)
    
overall_execution_time_vbj_fri_tx = time.time() - start_time_vbj_fri_tx
formatted_time_vbj_fri_tx = time.strftime('%H:%M:%S', time.gmtime(overall_execution_time_vbj_fri_tx))
input_token_cost_vbj_fri_tx = round((0.01/1000) * input_tokens_vbj_fri_tx_nonnull, 2)
output_token_cost_vbj_fri_tx = round((0.03/1000) * output_tokens_vbj_fri_tx_nonnull, 2)
total_cost_vbj_fri_tx = round(input_token_cost_vbj_fri_tx + output_token_cost_vbj_fri_tx, 2)    
    
print(f"Executed {batch_counter} Iterations")
print(f"Total Execution Time: {formatted_time_vbj_fri_tx}")
print(f"Total Input Tokens - {input_tokens_vbj_fri_tx_nonnull}")
print(f"Total Input Cost = {input_token_cost_vbj_fri_tx}")
print(f"Total Output Tokens - {output_tokens_vbj_fri_tx_nonnull}")
print(f"Total Output Cost = {output_token_cost_vbj_fri_tx}")
print(f"Total Cost = {total_cost_vbj_fri_tx}")

Executed [5] Iterations
Total Execution Time: 00:00:26
Total Input Tokens - 5347
Total Input Cost = 0.05
Total Output Tokens - 1417
Total Output Cost = 0.04
Total Cost = 0.09


In [298]:
#Remove "```json" and "```" from each string in vbj_fri_tx_nonnull_api & convert to DataFrame
vbj_fri_tx_nonnull_api_cleaned = [
    s.replace("```json", "")
     .replace("```", "")
     .replace("[]", "[{\"positive\": \"\", \"negative\": \"\"}]")
    for s in vbj_fri_tx_nonnull_api
]                                  
vbj_fri_tx_nonnull_api_cleaned_df = pd.DataFrame(vbj_fri_tx_nonnull_api_cleaned)
#vbj_fri_tx_nonnull_api_cleaned_df = pd.DataFrame(vbj_fri_tx_nonnull_api)
column_names2 = ['Trust',
                    'Store Experience',
                    'Store Staff',
                    'Product Design',
                    'Product Variety',
                    'Discount',
                    'Making Charge',
                    'Price',
                    'Product Quality',
                    'OLD Gold Jewellery Exchange']

In [299]:
#Initialize an empty list to store the processed data
processed_data_vbj_fri_tx_nonnull = []

#Iterate over each row in the DataFrame
for index, row in vbj_fri_tx_nonnull_api_cleaned_df.iterrows():
    try:
        #Assuming the JSON string is in the first column
        json_string = row.iloc[0] if len(row) > 0 else ''
        data = json.loads(json_string)

        #Iterate over each key-value pair in the dictionary
        for commentor_name, feedback_list in data.items():
            #Create a dictionary for each commentor with default values as 0
            commentor_data = {col: 0 for col in column_names2}
            commentor_data["Commentor Name"] = commentor_name

            #Check if there is any feedback for the commentor
            if feedback_list:
                #Since there could be multiple feedback entries for a single commentor, we aggregate them
                for feedback in feedback_list:
                    positive = feedback.get("positive", "").split(',')
                    negative = feedback.get("negative", "").split(',')

                    for topic in column_names2:
                        #Aggregate the value: 1 for positive, -1 for negative, 0 for not mentioned
                        commentor_data[topic] += assign_value(topic, positive, negative)

                #Finalize the values to be within -1, 0, or 1
                for topic in column_names2:
                    if commentor_data[topic] > 1:
                        commentor_data[topic] = 1
                    elif commentor_data[topic] < -1:
                        commentor_data[topic] = -1

            #Append the commentor data to the processed data list
            processed_data_vbj_fri_tx_nonnull.append(commentor_data)
    except:
        #Skip rows with invalid JSON data
        print(f"Invalid JSON in row {index}")

vbj_fri_tx_nonnull_api_cleaned_df.to_excel("vbj_fri_tx_nonnull_api_cleaned_df.xlsx",index=False)

vbj_fri_tx_nonnull_api_cleaned_df = pd.read_excel("vbj_fri_tx_nonnull_api_cleaned_df.xlsx")

vbj_fri_tx_nonnull_api_cleaned_df

In [300]:
#Create a DataFrame from the processed data
vbj_fri_tx_nonnull_sen_df = pd.DataFrame(processed_data_vbj_fri_tx_nonnull)
#Reorder the columns
ordered_columns = ["Commentor Name",
                   "Trust",
                    "Store Experience",
                    "Store Staff",
                    "Product Design",
                    "Product Variety",
                    "Discount",
                    "Making Charge",
                    "Price",
                    "Product Quality",
                    "OLD Gold Jewellery Exchange"]
#Apply the new order to the DataFrame
vbj_fri_tx_nonnull_sen_df = vbj_fri_tx_nonnull_sen_df[ordered_columns]

vbj_fri_tx_nonnull_merged_df = pd.concat([combined_df_vbj_fri_tx, vbj_fri_tx_nonnull_sen_df], axis=1)

vbj_fri_tx_final_sen_df = pd.concat([vbj_fri_tx_nonnull_merged_df,null_dataframes['vbj_fri_tx_null']], ignore_index=True)

vbj_fri_tx_final_sen_df_copy = vbj_fri_tx_final_sen_df.copy()
vbj_fri_tx_final_sen_df_copy["Published At Date"] = vbj_fri_tx_final_sen_df_copy["Published At Date"].astype(str).str[:10]

vbj_fri_tx_final_sen_df_copy.to_excel("sentiment_raw_output/vbj_fri_tx_final_sen_df_jul.xlsx",index=False)


## Tanishq

### tan_chi_il

In [301]:
batch_counter = [0]
total_batches = math.ceil(len(tan_chi_il_nonnull_buckets['tan_chi_il_nonnull_1-4'])/25)+math.ceil(len(tan_chi_il_nonnull_buckets['tan_chi_il_nonnull_5-15'])/25)+math.ceil(len(tan_chi_il_nonnull_buckets['tan_chi_il_nonnull_16-30'])/25)+math.ceil(len(tan_chi_il_nonnull_buckets['tan_chi_il_nonnull_31-60'])/25)+math.ceil(len(tan_chi_il_nonnull_buckets['tan_chi_il_nonnull_61-100'])/25)+math.ceil(len(tan_chi_il_nonnull_buckets['tan_chi_il_nonnull_greater_100'])/25)
key_counter = 0
total_keys=len(tan_chi_il_nonnull_buckets.keys())
tan_chi_il_nonnull_api = []
input_tokens_tan_chi_il_nonnull=0
output_tokens_tan_chi_il_nonnull=0
start_time_tan_chi_il = time.time()

for key in tan_chi_il_nonnull_buckets.keys():
    key_counter+=1
    current_df = tan_chi_il_nonnull_buckets[key][['Name', 'review_text']]
    counter = [0]
    input_tokens = 0
    output_tokens = 0
        
    # Threading setup
    total_iterations = math.ceil(len(current_df) / 25)
    stop_event = threading.Event()
    message_thread = threading.Thread(target=print_dynamic_message, args=(batch_counter,key_counter,counter,total_batches,total_keys, total_iterations,start_time_tan_chi_il, stop_event))
    message_thread.start()

    for start in range(0, len(current_df), 25):
        clear_output(wait=True)
        counter[0] += 1
        batch_counter[0] += 1
        end = start + 25
        result, it, ot = score_sentiments_jul(current_df[start:end])
        tan_chi_il_nonnull_api.append(result)
        input_tokens += it
        output_tokens += ot

    # Stopping the dynamic message thread
    stop_event.set()
    message_thread.join()    
    input_tokens_tan_chi_il_nonnull+=input_tokens
    output_tokens_tan_chi_il_nonnull+=output_tokens
    clear_output(wait=True)
    
overall_execution_time_tan_chi_il = time.time() - start_time_tan_chi_il
formatted_time_tan_chi_il = time.strftime('%H:%M:%S', time.gmtime(overall_execution_time_tan_chi_il))
input_token_cost_tan_chi_il = round((0.01/1000) * input_tokens_tan_chi_il_nonnull, 2)
output_token_cost_tan_chi_il = round((0.03/1000) * output_tokens_tan_chi_il_nonnull, 2)
total_cost_tan_chi_il = round(input_token_cost_tan_chi_il + output_token_cost_tan_chi_il, 2)    
    
print(f"Executed {batch_counter} Iterations")
print(f"Total Execution Time: {formatted_time_tan_chi_il}")
print(f"Total Input Tokens - {input_tokens_tan_chi_il_nonnull}")
print(f"Total Input Cost = {input_token_cost_tan_chi_il}")
print(f"Total Output Tokens - {output_tokens_tan_chi_il_nonnull}")
print(f"Total Output Cost = {output_token_cost_tan_chi_il}")
print(f"Total Cost = {total_cost_tan_chi_il}")

Executed [11] Iterations
Total Execution Time: 00:01:30
Total Input Tokens - 16855
Total Input Cost = 0.17
Total Output Tokens - 5987
Total Output Cost = 0.18
Total Cost = 0.35


In [302]:
#Remove "```json" and "```" from each string in tan_chi_il_nonnull_api & convert to DataFrame
tan_chi_il_nonnull_api_cleaned = [
    s.replace("```json", "")
     .replace("```", "")
     .replace("[]", "[{\"positive\": \"\", \"negative\": \"\"}]")
    for s in tan_chi_il_nonnull_api
]                                  
tan_chi_il_nonnull_api_cleaned_df = pd.DataFrame(tan_chi_il_nonnull_api_cleaned)
#tan_chi_il_nonnull_api_cleaned_df = pd.DataFrame(tan_chi_il_nonnull_api)
column_names2 = ['Trust',
                    'Store Experience',
                    'Store Staff',
                    'Product Design',
                    'Product Variety',
                    'Discount',
                    'Making Charge',
                    'Price',
                    'Product Quality',
                    'OLD Gold Jewellery Exchange']

In [303]:
#Initialize an empty list to store the processed data
processed_data_tan_chi_il_nonnull = []

#Iterate over each row in the DataFrame
for index, row in tan_chi_il_nonnull_api_cleaned_df.iterrows():
    try:
        #Assuming the JSON string is in the first column
        json_string = row.iloc[0] if len(row) > 0 else ''
        data = json.loads(json_string)

        #Iterate over each key-value pair in the dictionary
        for commentor_name, feedback_list in data.items():
            #Create a dictionary for each commentor with default values as 0
            commentor_data = {col: 0 for col in column_names2}
            commentor_data["Commentor Name"] = commentor_name

            #Check if there is any feedback for the commentor
            if feedback_list:
                #Since there could be multiple feedback entries for a single commentor, we aggregate them
                for feedback in feedback_list:
                    positive = feedback.get("positive", "").split(',')
                    negative = feedback.get("negative", "").split(',')

                    for topic in column_names2:
                        #Aggregate the value: 1 for positive, -1 for negative, 0 for not mentioned
                        commentor_data[topic] += assign_value(topic, positive, negative)

                #Finalize the values to be within -1, 0, or 1
                for topic in column_names2:
                    if commentor_data[topic] > 1:
                        commentor_data[topic] = 1
                    elif commentor_data[topic] < -1:
                        commentor_data[topic] = -1

            #Append the commentor data to the processed data list
            processed_data_tan_chi_il_nonnull.append(commentor_data)
    except:
        #Skip rows with invalid JSON data
        print(f"Invalid JSON in row {index}")

tan_chi_il_nonnull_api_cleaned_df.to_excel("tan_chi_il_nonnull_api_cleaned_df.xlsx",index=False)

tan_chi_il_nonnull_api_cleaned_df = pd.read_excel("tan_chi_il_nonnull_api_cleaned_df.xlsx")

tan_chi_il_nonnull_api_cleaned_df

In [304]:
#Create a DataFrame from the processed data
tan_chi_il_nonnull_sen_df = pd.DataFrame(processed_data_tan_chi_il_nonnull)
#Reorder the columns
ordered_columns = ["Commentor Name",
                   "Trust",
                    "Store Experience",
                    "Store Staff",
                    "Product Design",
                    "Product Variety",
                    "Discount",
                    "Making Charge",
                    "Price",
                    "Product Quality",
                    "OLD Gold Jewellery Exchange"]
#Apply the new order to the DataFrame
tan_chi_il_nonnull_sen_df = tan_chi_il_nonnull_sen_df[ordered_columns]

tan_chi_il_nonnull_merged_df = pd.concat([combined_df_tan_chi_il, tan_chi_il_nonnull_sen_df], axis=1)

tan_chi_il_final_sen_df = pd.concat([tan_chi_il_nonnull_merged_df,null_dataframes['tan_chi_il_null']], ignore_index=True)

tan_chi_il_final_sen_df_copy = tan_chi_il_final_sen_df.copy()
tan_chi_il_final_sen_df_copy["Published At Date"] = tan_chi_il_final_sen_df_copy["Published At Date"].astype(str).str[:10]

tan_chi_il_final_sen_df_copy.to_excel("sentiment_raw_output/tan_chi_il_final_sen_df_jul.xlsx",index=False)


### tan_fri_tx

In [305]:
batch_counter = [0]
total_batches = math.ceil(len(tan_fri_tx_nonnull_buckets['tan_fri_tx_nonnull_1-4'])/25)+math.ceil(len(tan_fri_tx_nonnull_buckets['tan_fri_tx_nonnull_5-15'])/25)+math.ceil(len(tan_fri_tx_nonnull_buckets['tan_fri_tx_nonnull_16-30'])/25)+math.ceil(len(tan_fri_tx_nonnull_buckets['tan_fri_tx_nonnull_31-60'])/25)+math.ceil(len(tan_fri_tx_nonnull_buckets['tan_fri_tx_nonnull_61-100'])/25)+math.ceil(len(tan_fri_tx_nonnull_buckets['tan_fri_tx_nonnull_greater_100'])/25)
key_counter = 0
total_keys=len(tan_fri_tx_nonnull_buckets.keys())
tan_fri_tx_nonnull_api = []
input_tokens_tan_fri_tx_nonnull=0
output_tokens_tan_fri_tx_nonnull=0
start_time_tan_fri_tx = time.time()

for key in tan_fri_tx_nonnull_buckets.keys():
    key_counter+=1
    current_df = tan_fri_tx_nonnull_buckets[key][['Name', 'review_text']]
    counter = [0]
    input_tokens = 0
    output_tokens = 0
        
    # Threading setup
    total_iterations = math.ceil(len(current_df) / 25)
    stop_event = threading.Event()
    message_thread = threading.Thread(target=print_dynamic_message, args=(batch_counter,key_counter,counter,total_batches,total_keys, total_iterations,start_time_tan_fri_tx, stop_event))
    message_thread.start()

    for start in range(0, len(current_df), 25):
        clear_output(wait=True)
        counter[0] += 1
        batch_counter[0] += 1
        end = start + 25
        result, it, ot = score_sentiments_jul(current_df[start:end])
        tan_fri_tx_nonnull_api.append(result)
        input_tokens += it
        output_tokens += ot

    # Stopping the dynamic message thread
    stop_event.set()
    message_thread.join()    
    input_tokens_tan_fri_tx_nonnull+=input_tokens
    output_tokens_tan_fri_tx_nonnull+=output_tokens
    clear_output(wait=True)
    
overall_execution_time_tan_fri_tx = time.time() - start_time_tan_fri_tx
formatted_time_tan_fri_tx = time.strftime('%H:%M:%S', time.gmtime(overall_execution_time_tan_fri_tx))
input_token_cost_tan_fri_tx = round((0.01/1000) * input_tokens_tan_fri_tx_nonnull, 2)
output_token_cost_tan_fri_tx = round((0.03/1000) * output_tokens_tan_fri_tx_nonnull, 2)
total_cost_tan_fri_tx = round(input_token_cost_tan_fri_tx + output_token_cost_tan_fri_tx, 2)    
    
print(f"Executed {batch_counter} Iterations")
print(f"Total Execution Time: {formatted_time_tan_fri_tx}")
print(f"Total Input Tokens - {input_tokens_tan_fri_tx_nonnull}")
print(f"Total Input Cost = {input_token_cost_tan_fri_tx}")
print(f"Total Output Tokens - {output_tokens_tan_fri_tx_nonnull}")
print(f"Total Output Cost = {output_token_cost_tan_fri_tx}")
print(f"Total Cost = {total_cost_tan_fri_tx}")

Executed [34] Iterations
Total Execution Time: 00:05:26
Total Input Tokens - 53791
Total Input Cost = 0.54
Total Output Tokens - 22687
Total Output Cost = 0.68
Total Cost = 1.22


In [306]:
#Remove "```json" and "```" from each string in tan_fri_tx_nonnull_api & convert to DataFrame
tan_fri_tx_nonnull_api_cleaned = [
    s.replace("```json", "")
     .replace("```", "")
     .replace("[]", "[{\"positive\": \"\", \"negative\": \"\"}]")
    for s in tan_fri_tx_nonnull_api
]                                  
tan_fri_tx_nonnull_api_cleaned_df = pd.DataFrame(tan_fri_tx_nonnull_api_cleaned)
#tan_fri_tx_nonnull_api_cleaned_df = pd.DataFrame(tan_fri_tx_nonnull_api)
column_names2 = ['Trust',
                    'Store Experience',
                    'Store Staff',
                    'Product Design',
                    'Product Variety',
                    'Discount',
                    'Making Charge',
                    'Price',
                    'Product Quality',
                    'OLD Gold Jewellery Exchange']

In [307]:
#Initialize an empty list to store the processed data
processed_data_tan_fri_tx_nonnull = []

#Iterate over each row in the DataFrame
for index, row in tan_fri_tx_nonnull_api_cleaned_df.iterrows():
    try:
        #Assuming the JSON string is in the first column
        json_string = row.iloc[0] if len(row) > 0 else ''
        data = json.loads(json_string)

        #Iterate over each key-value pair in the dictionary
        for commentor_name, feedback_list in data.items():
            #Create a dictionary for each commentor with default values as 0
            commentor_data = {col: 0 for col in column_names2}
            commentor_data["Commentor Name"] = commentor_name

            #Check if there is any feedback for the commentor
            if feedback_list:
                #Since there could be multiple feedback entries for a single commentor, we aggregate them
                for feedback in feedback_list:
                    positive = feedback.get("positive", "").split(',')
                    negative = feedback.get("negative", "").split(',')

                    for topic in column_names2:
                        #Aggregate the value: 1 for positive, -1 for negative, 0 for not mentioned
                        commentor_data[topic] += assign_value(topic, positive, negative)

                #Finalize the values to be within -1, 0, or 1
                for topic in column_names2:
                    if commentor_data[topic] > 1:
                        commentor_data[topic] = 1
                    elif commentor_data[topic] < -1:
                        commentor_data[topic] = -1

            #Append the commentor data to the processed data list
            processed_data_tan_fri_tx_nonnull.append(commentor_data)
    except:
        #Skip rows with invalid JSON data
        print(f"Invalid JSON in row {index}")

tan_fri_tx_nonnull_api_cleaned_df.to_excel("tan_fri_tx_nonnull_api_cleaned_df.xlsx",index=False)

tan_fri_tx_nonnull_api_cleaned_df = pd.read_excel("tan_fri_tx_nonnull_api_cleaned_df.xlsx")

tan_fri_tx_nonnull_api_cleaned_df

In [308]:
#Create a DataFrame from the processed data
tan_fri_tx_nonnull_sen_df = pd.DataFrame(processed_data_tan_fri_tx_nonnull)
#Reorder the columns
ordered_columns = ["Commentor Name",
                   "Trust",
                    "Store Experience",
                    "Store Staff",
                    "Product Design",
                    "Product Variety",
                    "Discount",
                    "Making Charge",
                    "Price",
                    "Product Quality",
                    "OLD Gold Jewellery Exchange"]
#Apply the new order to the DataFrame
tan_fri_tx_nonnull_sen_df = tan_fri_tx_nonnull_sen_df[ordered_columns]

tan_fri_tx_nonnull_merged_df = pd.concat([combined_df_tan_fri_tx, tan_fri_tx_nonnull_sen_df], axis=1)

tan_fri_tx_final_sen_df = pd.concat([tan_fri_tx_nonnull_merged_df,null_dataframes['tan_fri_tx_null']], ignore_index=True)

tan_fri_tx_final_sen_df_copy = tan_fri_tx_final_sen_df.copy()
tan_fri_tx_final_sen_df_copy["Published At Date"] = tan_fri_tx_final_sen_df_copy["Published At Date"].astype(str).str[:10]

tan_fri_tx_final_sen_df_copy.to_excel("sentiment_raw_output/tan_fri_tx_final_sen_df_jul.xlsx",index=False)


### tan_hou_tx

In [309]:
batch_counter = [0]
total_batches = math.ceil(len(tan_hou_tx_nonnull_buckets['tan_hou_tx_nonnull_1-4'])/25)+math.ceil(len(tan_hou_tx_nonnull_buckets['tan_hou_tx_nonnull_5-15'])/25)+math.ceil(len(tan_hou_tx_nonnull_buckets['tan_hou_tx_nonnull_16-30'])/25)+math.ceil(len(tan_hou_tx_nonnull_buckets['tan_hou_tx_nonnull_31-60'])/25)+math.ceil(len(tan_hou_tx_nonnull_buckets['tan_hou_tx_nonnull_61-100'])/25)+math.ceil(len(tan_hou_tx_nonnull_buckets['tan_hou_tx_nonnull_greater_100'])/25)
key_counter = 0
total_keys=len(tan_hou_tx_nonnull_buckets.keys())
tan_hou_tx_nonnull_api = []
input_tokens_tan_hou_tx_nonnull=0
output_tokens_tan_hou_tx_nonnull=0
start_time_tan_hou_tx = time.time()

for key in tan_hou_tx_nonnull_buckets.keys():
    key_counter+=1
    current_df = tan_hou_tx_nonnull_buckets[key][['Name', 'review_text']]
    counter = [0]
    input_tokens = 0
    output_tokens = 0
        
    # Threading setup
    total_iterations = math.ceil(len(current_df) / 25)
    stop_event = threading.Event()
    message_thread = threading.Thread(target=print_dynamic_message, args=(batch_counter,key_counter,counter,total_batches,total_keys, total_iterations,start_time_tan_hou_tx, stop_event))
    message_thread.start()

    for start in range(0, len(current_df), 25):
        clear_output(wait=True)
        counter[0] += 1
        batch_counter[0] += 1
        end = start + 25
        result, it, ot = score_sentiments_jul(current_df[start:end])
        tan_hou_tx_nonnull_api.append(result)
        input_tokens += it
        output_tokens += ot

    # Stopping the dynamic message thread
    stop_event.set()
    message_thread.join()    
    input_tokens_tan_hou_tx_nonnull+=input_tokens
    output_tokens_tan_hou_tx_nonnull+=output_tokens
    clear_output(wait=True)
    
overall_execution_time_tan_hou_tx = time.time() - start_time_tan_hou_tx
formatted_time_tan_hou_tx = time.strftime('%H:%M:%S', time.gmtime(overall_execution_time_tan_hou_tx))
input_token_cost_tan_hou_tx = round((0.01/1000) * input_tokens_tan_hou_tx_nonnull, 2)
output_token_cost_tan_hou_tx = round((0.03/1000) * output_tokens_tan_hou_tx_nonnull, 2)
total_cost_tan_hou_tx = round(input_token_cost_tan_hou_tx + output_token_cost_tan_hou_tx, 2)    
    
print(f"Executed {batch_counter} Iterations")
print(f"Total Execution Time: {formatted_time_tan_hou_tx}")
print(f"Total Input Tokens - {input_tokens_tan_hou_tx_nonnull}")
print(f"Total Input Cost = {input_token_cost_tan_hou_tx}")
print(f"Total Output Tokens - {output_tokens_tan_hou_tx_nonnull}")
print(f"Total Output Cost = {output_token_cost_tan_hou_tx}")
print(f"Total Cost = {total_cost_tan_hou_tx}")

Executed [13] Iterations
Total Execution Time: 00:02:33
Total Input Tokens - 19995
Total Input Cost = 0.2
Total Output Tokens - 7359
Total Output Cost = 0.22
Total Cost = 0.42


In [310]:
#Remove "```json" and "```" from each string in tan_hou_tx_nonnull_api & convert to DataFrame
tan_hou_tx_nonnull_api_cleaned = [
    s.replace("```json", "")
     .replace("```", "")
     .replace("[]", "[{\"positive\": \"\", \"negative\": \"\"}]")
    for s in tan_hou_tx_nonnull_api
]                                  
tan_hou_tx_nonnull_api_cleaned_df = pd.DataFrame(tan_hou_tx_nonnull_api_cleaned)
#tan_hou_tx_nonnull_api_cleaned_df = pd.DataFrame(tan_hou_tx_nonnull_api)
column_names2 = ['Trust',
                    'Store Experience',
                    'Store Staff',
                    'Product Design',
                    'Product Variety',
                    'Discount',
                    'Making Charge',
                    'Price',
                    'Product Quality',
                    'OLD Gold Jewellery Exchange']

In [311]:
#Initialize an empty list to store the processed data
processed_data_tan_hou_tx_nonnull = []

#Iterate over each row in the DataFrame
for index, row in tan_hou_tx_nonnull_api_cleaned_df.iterrows():
    try:
        #Assuming the JSON string is in the first column
        json_string = row.iloc[0] if len(row) > 0 else ''
        data = json.loads(json_string)

        #Iterate over each key-value pair in the dictionary
        for commentor_name, feedback_list in data.items():
            #Create a dictionary for each commentor with default values as 0
            commentor_data = {col: 0 for col in column_names2}
            commentor_data["Commentor Name"] = commentor_name

            #Check if there is any feedback for the commentor
            if feedback_list:
                #Since there could be multiple feedback entries for a single commentor, we aggregate them
                for feedback in feedback_list:
                    positive = feedback.get("positive", "").split(',')
                    negative = feedback.get("negative", "").split(',')

                    for topic in column_names2:
                        #Aggregate the value: 1 for positive, -1 for negative, 0 for not mentioned
                        commentor_data[topic] += assign_value(topic, positive, negative)

                #Finalize the values to be within -1, 0, or 1
                for topic in column_names2:
                    if commentor_data[topic] > 1:
                        commentor_data[topic] = 1
                    elif commentor_data[topic] < -1:
                        commentor_data[topic] = -1

            #Append the commentor data to the processed data list
            processed_data_tan_hou_tx_nonnull.append(commentor_data)
    except:
        #Skip rows with invalid JSON data
        print(f"Invalid JSON in row {index}")

tan_hou_tx_nonnull_api_cleaned_df.to_excel("tan_hou_tx_nonnull_api_cleaned_df.xlsx",index=False)

tan_hou_tx_nonnull_api_cleaned_df = pd.read_excel("tan_hou_tx_nonnull_api_cleaned_df.xlsx")

tan_hou_tx_nonnull_api_cleaned_df

In [312]:
#Create a DataFrame from the processed data
tan_hou_tx_nonnull_sen_df = pd.DataFrame(processed_data_tan_hou_tx_nonnull)
#Reorder the columns
ordered_columns = ["Commentor Name",
                   "Trust",
                    "Store Experience",
                    "Store Staff",
                    "Product Design",
                    "Product Variety",
                    "Discount",
                    "Making Charge",
                    "Price",
                    "Product Quality",
                    "OLD Gold Jewellery Exchange"]
#Apply the new order to the DataFrame
tan_hou_tx_nonnull_sen_df = tan_hou_tx_nonnull_sen_df[ordered_columns]

tan_hou_tx_nonnull_merged_df = pd.concat([combined_df_tan_hou_tx, tan_hou_tx_nonnull_sen_df], axis=1)

tan_hou_tx_final_sen_df = pd.concat([tan_hou_tx_nonnull_merged_df,null_dataframes['tan_hou_tx_null']], ignore_index=True)

tan_hou_tx_final_sen_df_copy = tan_hou_tx_final_sen_df.copy()
tan_hou_tx_final_sen_df_copy["Published At Date"] = tan_hou_tx_final_sen_df_copy["Published At Date"].astype(str).str[:10]

tan_hou_tx_final_sen_df_copy.to_excel("sentiment_raw_output/tan_hou_tx_final_sen_df_jul.xlsx",index=False)


### tan_new_nj

In [313]:
batch_counter = [0]
total_batches = math.ceil(len(tan_new_nj_nonnull_buckets['tan_new_nj_nonnull_1-4'])/25)+math.ceil(len(tan_new_nj_nonnull_buckets['tan_new_nj_nonnull_5-15'])/25)+math.ceil(len(tan_new_nj_nonnull_buckets['tan_new_nj_nonnull_16-30'])/25)+math.ceil(len(tan_new_nj_nonnull_buckets['tan_new_nj_nonnull_31-60'])/25)+math.ceil(len(tan_new_nj_nonnull_buckets['tan_new_nj_nonnull_61-100'])/25)+math.ceil(len(tan_new_nj_nonnull_buckets['tan_new_nj_nonnull_greater_100'])/25)
key_counter = 0
total_keys=len(tan_new_nj_nonnull_buckets.keys())
tan_new_nj_nonnull_api = []
input_tokens_tan_new_nj_nonnull=0
output_tokens_tan_new_nj_nonnull=0
start_time_tan_new_nj = time.time()

for key in tan_new_nj_nonnull_buckets.keys():
    key_counter+=1
    current_df = tan_new_nj_nonnull_buckets[key][['Name', 'review_text']]
    counter = [0]
    input_tokens = 0
    output_tokens = 0
        
    # Threading setup
    total_iterations = math.ceil(len(current_df) / 25)
    stop_event = threading.Event()
    message_thread = threading.Thread(target=print_dynamic_message, args=(batch_counter,key_counter,counter,total_batches,total_keys, total_iterations,start_time_tan_new_nj, stop_event))
    message_thread.start()

    for start in range(0, len(current_df), 25):
        clear_output(wait=True)
        counter[0] += 1
        batch_counter[0] += 1
        end = start + 25
        result, it, ot = score_sentiments_jul(current_df[start:end])
        tan_new_nj_nonnull_api.append(result)
        input_tokens += it
        output_tokens += ot

    # Stopping the dynamic message thread
    stop_event.set()
    message_thread.join()    
    input_tokens_tan_new_nj_nonnull+=input_tokens
    output_tokens_tan_new_nj_nonnull+=output_tokens
    clear_output(wait=True)
    
overall_execution_time_tan_new_nj = time.time() - start_time_tan_new_nj
formatted_time_tan_new_nj = time.strftime('%H:%M:%S', time.gmtime(overall_execution_time_tan_new_nj))
input_token_cost_tan_new_nj = round((0.01/1000) * input_tokens_tan_new_nj_nonnull, 2)
output_token_cost_tan_new_nj = round((0.03/1000) * output_tokens_tan_new_nj_nonnull, 2)
total_cost_tan_new_nj = round(input_token_cost_tan_new_nj + output_token_cost_tan_new_nj, 2)    
    
print(f"Executed {batch_counter} Iterations")
print(f"Total Execution Time: {formatted_time_tan_new_nj}")
print(f"Total Input Tokens - {input_tokens_tan_new_nj_nonnull}")
print(f"Total Input Cost = {input_token_cost_tan_new_nj}")
print(f"Total Output Tokens - {output_tokens_tan_new_nj_nonnull}")
print(f"Total Output Cost = {output_token_cost_tan_new_nj}")
print(f"Total Cost = {total_cost_tan_new_nj}")

Executed [11] Iterations
Total Execution Time: 00:02:00
Total Input Tokens - 22998
Total Input Cost = 0.23
Total Output Tokens - 6838
Total Output Cost = 0.21
Total Cost = 0.44


In [314]:
#Remove "```json" and "```" from each string in tan_new_nj_nonnull_api & convert to DataFrame
tan_new_nj_nonnull_api_cleaned = [
    s.replace("```json", "")
     .replace("```", "")
     .replace("[]", "[{\"positive\": \"\", \"negative\": \"\"}]")
    for s in tan_new_nj_nonnull_api
]                                  
tan_new_nj_nonnull_api_cleaned_df = pd.DataFrame(tan_new_nj_nonnull_api_cleaned)
#tan_new_nj_nonnull_api_cleaned_df = pd.DataFrame(tan_new_nj_nonnull_api)
column_names2 = ['Trust',
                    'Store Experience',
                    'Store Staff',
                    'Product Design',
                    'Product Variety',
                    'Discount',
                    'Making Charge',
                    'Price',
                    'Product Quality',
                    'OLD Gold Jewellery Exchange']

In [315]:
#Initialize an empty list to store the processed data
processed_data_tan_new_nj_nonnull = []

#Iterate over each row in the DataFrame
for index, row in tan_new_nj_nonnull_api_cleaned_df.iterrows():
    try:
        #Assuming the JSON string is in the first column
        json_string = row.iloc[0] if len(row) > 0 else ''
        data = json.loads(json_string)

        #Iterate over each key-value pair in the dictionary
        for commentor_name, feedback_list in data.items():
            #Create a dictionary for each commentor with default values as 0
            commentor_data = {col: 0 for col in column_names2}
            commentor_data["Commentor Name"] = commentor_name

            #Check if there is any feedback for the commentor
            if feedback_list:
                #Since there could be multiple feedback entries for a single commentor, we aggregate them
                for feedback in feedback_list:
                    positive = feedback.get("positive", "").split(',')
                    negative = feedback.get("negative", "").split(',')

                    for topic in column_names2:
                        #Aggregate the value: 1 for positive, -1 for negative, 0 for not mentioned
                        commentor_data[topic] += assign_value(topic, positive, negative)

                #Finalize the values to be within -1, 0, or 1
                for topic in column_names2:
                    if commentor_data[topic] > 1:
                        commentor_data[topic] = 1
                    elif commentor_data[topic] < -1:
                        commentor_data[topic] = -1

            #Append the commentor data to the processed data list
            processed_data_tan_new_nj_nonnull.append(commentor_data)
    except:
        #Skip rows with invalid JSON data
        print(f"Invalid JSON in row {index}")

tan_new_nj_nonnull_api_cleaned_df.to_excel("tan_new_nj_nonnull_api_cleaned_df.xlsx",index=False)

tan_new_nj_nonnull_api_cleaned_df = pd.read_excel("tan_new_nj_nonnull_api_cleaned_df.xlsx")

tan_new_nj_nonnull_api_cleaned_df

In [316]:
#Create a DataFrame from the processed data
tan_new_nj_nonnull_sen_df = pd.DataFrame(processed_data_tan_new_nj_nonnull)
#Reorder the columns
ordered_columns = ["Commentor Name",
                   "Trust",
                    "Store Experience",
                    "Store Staff",
                    "Product Design",
                    "Product Variety",
                    "Discount",
                    "Making Charge",
                    "Price",
                    "Product Quality",
                    "OLD Gold Jewellery Exchange"]
#Apply the new order to the DataFrame
tan_new_nj_nonnull_sen_df = tan_new_nj_nonnull_sen_df[ordered_columns]

tan_new_nj_nonnull_merged_df = pd.concat([combined_df_tan_new_nj, tan_new_nj_nonnull_sen_df], axis=1)

tan_new_nj_final_sen_df = pd.concat([tan_new_nj_nonnull_merged_df,null_dataframes['tan_new_nj_null']], ignore_index=True)

tan_new_nj_final_sen_df_copy = tan_new_nj_final_sen_df.copy()
tan_new_nj_final_sen_df_copy["Published At Date"] = tan_new_nj_final_sen_df_copy["Published At Date"].astype(str).str[:10]

tan_new_nj_final_sen_df_copy.to_excel("sentiment_raw_output/tan_new_nj_final_sen_df_jul.xlsx",index=False)


### tan_bar_db

In [317]:
batch_counter = [0]
total_batches = math.ceil(len(tan_bar_db_nonnull_buckets['tan_bar_db_nonnull_1-4'])/25)+math.ceil(len(tan_bar_db_nonnull_buckets['tan_bar_db_nonnull_5-15'])/25)+math.ceil(len(tan_bar_db_nonnull_buckets['tan_bar_db_nonnull_16-30'])/25)+math.ceil(len(tan_bar_db_nonnull_buckets['tan_bar_db_nonnull_31-60'])/25)+math.ceil(len(tan_bar_db_nonnull_buckets['tan_bar_db_nonnull_61-100'])/25)+math.ceil(len(tan_bar_db_nonnull_buckets['tan_bar_db_nonnull_greater_100'])/25)
key_counter = 0
total_keys=len(tan_bar_db_nonnull_buckets.keys())
tan_bar_db_nonnull_api = []
input_tokens_tan_bar_db_nonnull=0
output_tokens_tan_bar_db_nonnull=0
start_time_tan_bar_db = time.time()

for key in tan_bar_db_nonnull_buckets.keys():
    key_counter+=1
    current_df = tan_bar_db_nonnull_buckets[key][['Name', 'review_text']]
    counter = [0]
    input_tokens = 0
    output_tokens = 0
        
    # Threading setup
    total_iterations = math.ceil(len(current_df) / 25)
    stop_event = threading.Event()
    message_thread = threading.Thread(target=print_dynamic_message, args=(batch_counter,key_counter,counter,total_batches,total_keys, total_iterations,start_time_tan_bar_db, stop_event))
    message_thread.start()

    for start in range(0, len(current_df), 25):
        clear_output(wait=True)
        counter[0] += 1
        batch_counter[0] += 1
        end = start + 25
        result, it, ot = score_sentiments_jul(current_df[start:end])
        tan_bar_db_nonnull_api.append(result)
        input_tokens += it
        output_tokens += ot

    # Stopping the dynamic message thread
    stop_event.set()
    message_thread.join()    
    input_tokens_tan_bar_db_nonnull+=input_tokens
    output_tokens_tan_bar_db_nonnull+=output_tokens
    clear_output(wait=True)
    
overall_execution_time_tan_bar_db = time.time() - start_time_tan_bar_db
formatted_time_tan_bar_db = time.strftime('%H:%M:%S', time.gmtime(overall_execution_time_tan_bar_db))
input_token_cost_tan_bar_db = round((0.01/1000) * input_tokens_tan_bar_db_nonnull, 2)
output_token_cost_tan_bar_db = round((0.03/1000) * output_tokens_tan_bar_db_nonnull, 2)
total_cost_tan_bar_db = round(input_token_cost_tan_bar_db + output_token_cost_tan_bar_db, 2)    
    
print(f"Executed {batch_counter} Iterations")
print(f"Total Execution Time: {formatted_time_tan_bar_db}")
print(f"Total Input Tokens - {input_tokens_tan_bar_db_nonnull}")
print(f"Total Input Cost = {input_token_cost_tan_bar_db}")
print(f"Total Output Tokens - {output_tokens_tan_bar_db_nonnull}")
print(f"Total Output Cost = {output_token_cost_tan_bar_db}")
print(f"Total Cost = {total_cost_tan_bar_db}")

Executed [44] Iterations
Total Execution Time: 00:08:02
Total Input Tokens - 68101
Total Input Cost = 0.68
Total Output Tokens - 30247
Total Output Cost = 0.91
Total Cost = 1.59


In [318]:
#Remove "```json" and "```" from each string in tan_bar_db_nonnull_api & convert to DataFrame
tan_bar_db_nonnull_api_cleaned = [
    s.replace("```json", "")
     .replace("```", "")
     .replace("[]", "[{\"positive\": \"\", \"negative\": \"\"}]")
    for s in tan_bar_db_nonnull_api
]                                  
tan_bar_db_nonnull_api_cleaned_df = pd.DataFrame(tan_bar_db_nonnull_api_cleaned)
#tan_bar_db_nonnull_api_cleaned_df = pd.DataFrame(tan_bar_db_nonnull_api)
column_names2 = ['Trust',
                    'Store Experience',
                    'Store Staff',
                    'Product Design',
                    'Product Variety',
                    'Discount',
                    'Making Charge',
                    'Price',
                    'Product Quality',
                    'OLD Gold Jewellery Exchange']

In [319]:
#Initialize an empty list to store the processed data
processed_data_tan_bar_db_nonnull = []

#Iterate over each row in the DataFrame
for index, row in tan_bar_db_nonnull_api_cleaned_df.iterrows():
    try:
        #Assuming the JSON string is in the first column
        json_string = row.iloc[0] if len(row) > 0 else ''
        data = json.loads(json_string)

        #Iterate over each key-value pair in the dictionary
        for commentor_name, feedback_list in data.items():
            #Create a dictionary for each commentor with default values as 0
            commentor_data = {col: 0 for col in column_names2}
            commentor_data["Commentor Name"] = commentor_name

            #Check if there is any feedback for the commentor
            if feedback_list:
                #Since there could be multiple feedback entries for a single commentor, we aggregate them
                for feedback in feedback_list:
                    positive = feedback.get("positive", "").split(',')
                    negative = feedback.get("negative", "").split(',')

                    for topic in column_names2:
                        #Aggregate the value: 1 for positive, -1 for negative, 0 for not mentioned
                        commentor_data[topic] += assign_value(topic, positive, negative)

                #Finalize the values to be within -1, 0, or 1
                for topic in column_names2:
                    if commentor_data[topic] > 1:
                        commentor_data[topic] = 1
                    elif commentor_data[topic] < -1:
                        commentor_data[topic] = -1

            #Append the commentor data to the processed data list
            processed_data_tan_bar_db_nonnull.append(commentor_data)
    except:
        #Skip rows with invalid JSON data
        print(f"Invalid JSON in row {index}")

tan_bar_db_nonnull_api_cleaned_df.to_excel("tan_bar_db_nonnull_api_cleaned_df.xlsx",index=False)

tan_bar_db_nonnull_api_cleaned_df = pd.read_excel("tan_bar_db_nonnull_api_cleaned_df.xlsx")

tan_bar_db_nonnull_api_cleaned_df

In [320]:
#Create a DataFrame from the processed data
tan_bar_db_nonnull_sen_df = pd.DataFrame(processed_data_tan_bar_db_nonnull)
#Reorder the columns
ordered_columns = ["Commentor Name",
                   "Trust",
                    "Store Experience",
                    "Store Staff",
                    "Product Design",
                    "Product Variety",
                    "Discount",
                    "Making Charge",
                    "Price",
                    "Product Quality",
                    "OLD Gold Jewellery Exchange"]
#Apply the new order to the DataFrame
tan_bar_db_nonnull_sen_df = tan_bar_db_nonnull_sen_df[ordered_columns]

tan_bar_db_nonnull_merged_df = pd.concat([combined_df_tan_bar_db, tan_bar_db_nonnull_sen_df], axis=1)

tan_bar_db_final_sen_df = pd.concat([tan_bar_db_nonnull_merged_df,null_dataframes['tan_bar_db_null']], ignore_index=True)

tan_bar_db_final_sen_df_copy = tan_bar_db_final_sen_df.copy()
tan_bar_db_final_sen_df_copy["Published At Date"] = tan_bar_db_final_sen_df_copy["Published At Date"].astype(str).str[:10]

tan_bar_db_final_sen_df_copy.to_excel("sentiment_raw_output/tan_bar_db_final_sen_df_jul.xlsx",index=False)


### tan_fah_db

In [321]:
batch_counter = [0]
total_batches = math.ceil(len(tan_fah_db_nonnull_buckets['tan_fah_db_nonnull_1-4'])/25)+math.ceil(len(tan_fah_db_nonnull_buckets['tan_fah_db_nonnull_5-15'])/25)+math.ceil(len(tan_fah_db_nonnull_buckets['tan_fah_db_nonnull_16-30'])/25)+math.ceil(len(tan_fah_db_nonnull_buckets['tan_fah_db_nonnull_31-60'])/25)+math.ceil(len(tan_fah_db_nonnull_buckets['tan_fah_db_nonnull_61-100'])/25)+math.ceil(len(tan_fah_db_nonnull_buckets['tan_fah_db_nonnull_greater_100'])/25)
key_counter = 0
total_keys=len(tan_fah_db_nonnull_buckets.keys())
tan_fah_db_nonnull_api = []
input_tokens_tan_fah_db_nonnull=0
output_tokens_tan_fah_db_nonnull=0
start_time_tan_fah_db = time.time()

for key in tan_fah_db_nonnull_buckets.keys():
    key_counter+=1
    current_df = tan_fah_db_nonnull_buckets[key][['Name', 'review_text']]
    counter = [0]
    input_tokens = 0
    output_tokens = 0
        
    # Threading setup
    total_iterations = math.ceil(len(current_df) / 25)
    stop_event = threading.Event()
    message_thread = threading.Thread(target=print_dynamic_message, args=(batch_counter,key_counter,counter,total_batches,total_keys, total_iterations,start_time_tan_fah_db, stop_event))
    message_thread.start()

    for start in range(0, len(current_df), 25):
        clear_output(wait=True)
        counter[0] += 1
        batch_counter[0] += 1
        end = start + 25
        result, it, ot = score_sentiments_jul(current_df[start:end])
        tan_fah_db_nonnull_api.append(result)
        input_tokens += it
        output_tokens += ot

    # Stopping the dynamic message thread
    stop_event.set()
    message_thread.join()    
    input_tokens_tan_fah_db_nonnull+=input_tokens
    output_tokens_tan_fah_db_nonnull+=output_tokens
    clear_output(wait=True)
    
overall_execution_time_tan_fah_db = time.time() - start_time_tan_fah_db
formatted_time_tan_fah_db = time.strftime('%H:%M:%S', time.gmtime(overall_execution_time_tan_fah_db))
input_token_cost_tan_fah_db = round((0.01/1000) * input_tokens_tan_fah_db_nonnull, 2)
output_token_cost_tan_fah_db = round((0.03/1000) * output_tokens_tan_fah_db_nonnull, 2)
total_cost_tan_fah_db = round(input_token_cost_tan_fah_db + output_token_cost_tan_fah_db, 2)    
    
print(f"Executed {batch_counter} Iterations")
print(f"Total Execution Time: {formatted_time_tan_fah_db}")
print(f"Total Input Tokens - {input_tokens_tan_fah_db_nonnull}")
print(f"Total Input Cost = {input_token_cost_tan_fah_db}")
print(f"Total Output Tokens - {output_tokens_tan_fah_db_nonnull}")
print(f"Total Output Cost = {output_token_cost_tan_fah_db}")
print(f"Total Cost = {total_cost_tan_fah_db}")

Executed [76] Iterations
Total Execution Time: 00:13:00
Total Input Tokens - 106933
Total Input Cost = 1.07
Total Output Tokens - 51735
Total Output Cost = 1.55
Total Cost = 2.62


In [322]:
#Remove "```json" and "```" from each string in tan_fah_db_nonnull_api & convert to DataFrame
tan_fah_db_nonnull_api_cleaned = [
    s.replace("```json", "")
     .replace("```", "")
     .replace("[]", "[{\"positive\": \"\", \"negative\": \"\"}]")
    for s in tan_fah_db_nonnull_api
]                                  
tan_fah_db_nonnull_api_cleaned_df = pd.DataFrame(tan_fah_db_nonnull_api_cleaned)
#tan_fah_db_nonnull_api_cleaned_df = pd.DataFrame(tan_fah_db_nonnull_api)
column_names2 = ['Trust',
                    'Store Experience',
                    'Store Staff',
                    'Product Design',
                    'Product Variety',
                    'Discount',
                    'Making Charge',
                    'Price',
                    'Product Quality',
                    'OLD Gold Jewellery Exchange']

In [323]:
#Initialize an empty list to store the processed data
processed_data_tan_fah_db_nonnull = []

#Iterate over each row in the DataFrame
for index, row in tan_fah_db_nonnull_api_cleaned_df.iterrows():
    try:
        #Assuming the JSON string is in the first column
        json_string = row.iloc[0] if len(row) > 0 else ''
        data = json.loads(json_string)

        #Iterate over each key-value pair in the dictionary
        for commentor_name, feedback_list in data.items():
            #Create a dictionary for each commentor with default values as 0
            commentor_data = {col: 0 for col in column_names2}
            commentor_data["Commentor Name"] = commentor_name

            #Check if there is any feedback for the commentor
            if feedback_list:
                #Since there could be multiple feedback entries for a single commentor, we aggregate them
                for feedback in feedback_list:
                    positive = feedback.get("positive", "").split(',')
                    negative = feedback.get("negative", "").split(',')

                    for topic in column_names2:
                        #Aggregate the value: 1 for positive, -1 for negative, 0 for not mentioned
                        commentor_data[topic] += assign_value(topic, positive, negative)

                #Finalize the values to be within -1, 0, or 1
                for topic in column_names2:
                    if commentor_data[topic] > 1:
                        commentor_data[topic] = 1
                    elif commentor_data[topic] < -1:
                        commentor_data[topic] = -1

            #Append the commentor data to the processed data list
            processed_data_tan_fah_db_nonnull.append(commentor_data)
    except:
        #Skip rows with invalid JSON data
        print(f"Invalid JSON in row {index}")

tan_fah_db_nonnull_api_cleaned_df.to_excel("tan_fah_db_nonnull_api_cleaned_df.xlsx",index=False)

tan_fah_db_nonnull_api_cleaned_df = pd.read_excel("tan_fah_db_nonnull_api_cleaned_df.xlsx")

tan_fah_db_nonnull_api_cleaned_df

In [324]:
#Create a DataFrame from the processed data
tan_fah_db_nonnull_sen_df = pd.DataFrame(processed_data_tan_fah_db_nonnull)
#Reorder the columns
ordered_columns = ["Commentor Name",
                   "Trust",
                    "Store Experience",
                    "Store Staff",
                    "Product Design",
                    "Product Variety",
                    "Discount",
                    "Making Charge",
                    "Price",
                    "Product Quality",
                    "OLD Gold Jewellery Exchange"]
#Apply the new order to the DataFrame
tan_fah_db_nonnull_sen_df = tan_fah_db_nonnull_sen_df[ordered_columns]

tan_fah_db_nonnull_merged_df = pd.concat([combined_df_tan_fah_db, tan_fah_db_nonnull_sen_df], axis=1)

tan_fah_db_final_sen_df = pd.concat([tan_fah_db_nonnull_merged_df,null_dataframes['tan_fah_db_null']], ignore_index=True)

tan_fah_db_final_sen_df_copy = tan_fah_db_final_sen_df.copy()
tan_fah_db_final_sen_df_copy["Published At Date"] = tan_fah_db_final_sen_df_copy["Published At Date"].astype(str).str[:10]

tan_fah_db_final_sen_df_copy.to_excel("sentiment_raw_output/tan_fah_db_final_sen_df_jul.xlsx",index=False)


### tan_kar_db

In [325]:
batch_counter = [0]
total_batches = math.ceil(len(tan_kar_db_nonnull_buckets['tan_kar_db_nonnull_1-4'])/25)+math.ceil(len(tan_kar_db_nonnull_buckets['tan_kar_db_nonnull_5-15'])/25)+math.ceil(len(tan_kar_db_nonnull_buckets['tan_kar_db_nonnull_16-30'])/25)+math.ceil(len(tan_kar_db_nonnull_buckets['tan_kar_db_nonnull_31-60'])/25)+math.ceil(len(tan_kar_db_nonnull_buckets['tan_kar_db_nonnull_61-100'])/25)+math.ceil(len(tan_kar_db_nonnull_buckets['tan_kar_db_nonnull_greater_100'])/25)
key_counter = 0
total_keys=len(tan_kar_db_nonnull_buckets.keys())
tan_kar_db_nonnull_api = []
input_tokens_tan_kar_db_nonnull=0
output_tokens_tan_kar_db_nonnull=0
start_time_tan_kar_db = time.time()

for key in tan_kar_db_nonnull_buckets.keys():
    key_counter+=1
    current_df = tan_kar_db_nonnull_buckets[key][['Name', 'review_text']]
    counter = [0]
    input_tokens = 0
    output_tokens = 0
        
    # Threading setup
    total_iterations = math.ceil(len(current_df) / 25)
    stop_event = threading.Event()
    message_thread = threading.Thread(target=print_dynamic_message, args=(batch_counter,key_counter,counter,total_batches,total_keys, total_iterations,start_time_tan_kar_db, stop_event))
    message_thread.start()

    for start in range(0, len(current_df), 25):
        clear_output(wait=True)
        counter[0] += 1
        batch_counter[0] += 1
        end = start + 25
        result, it, ot = score_sentiments_jul(current_df[start:end])
        tan_kar_db_nonnull_api.append(result)
        input_tokens += it
        output_tokens += ot

    # Stopping the dynamic message thread
    stop_event.set()
    message_thread.join()    
    input_tokens_tan_kar_db_nonnull+=input_tokens
    output_tokens_tan_kar_db_nonnull+=output_tokens
    clear_output(wait=True)
    
overall_execution_time_tan_kar_db = time.time() - start_time_tan_kar_db
formatted_time_tan_kar_db = time.strftime('%H:%M:%S', time.gmtime(overall_execution_time_tan_kar_db))
input_token_cost_tan_kar_db = round((0.01/1000) * input_tokens_tan_kar_db_nonnull, 2)
output_token_cost_tan_kar_db = round((0.03/1000) * output_tokens_tan_kar_db_nonnull, 2)
total_cost_tan_kar_db = round(input_token_cost_tan_kar_db + output_token_cost_tan_kar_db, 2)    
    
print(f"Executed {batch_counter} Iterations")
print(f"Total Execution Time: {formatted_time_tan_kar_db}")
print(f"Total Input Tokens - {input_tokens_tan_kar_db_nonnull}")
print(f"Total Input Cost = {input_token_cost_tan_kar_db}")
print(f"Total Output Tokens - {output_tokens_tan_kar_db_nonnull}")
print(f"Total Output Cost = {output_token_cost_tan_kar_db}")
print(f"Total Cost = {total_cost_tan_kar_db}")

Executed [21] Iterations
Total Execution Time: 00:03:26
Total Input Tokens - 34017
Total Input Cost = 0.34
Total Output Tokens - 14301
Total Output Cost = 0.43
Total Cost = 0.77


In [326]:
#Remove "```json" and "```" from each string in tan_kar_db_nonnull_api & convert to DataFrame
tan_kar_db_nonnull_api_cleaned = [
    s.replace("```json", "")
     .replace("```", "")
     .replace("[]", "[{\"positive\": \"\", \"negative\": \"\"}]")
    for s in tan_kar_db_nonnull_api
]                                  
tan_kar_db_nonnull_api_cleaned_df = pd.DataFrame(tan_kar_db_nonnull_api_cleaned)
#tan_kar_db_nonnull_api_cleaned_df = pd.DataFrame(tan_kar_db_nonnull_api)
column_names2 = ['Trust',
                    'Store Experience',
                    'Store Staff',
                    'Product Design',
                    'Product Variety',
                    'Discount',
                    'Making Charge',
                    'Price',
                    'Product Quality',
                    'OLD Gold Jewellery Exchange']

In [327]:
#Initialize an empty list to store the processed data
processed_data_tan_kar_db_nonnull = []

#Iterate over each row in the DataFrame
for index, row in tan_kar_db_nonnull_api_cleaned_df.iterrows():
    try:
        #Assuming the JSON string is in the first column
        json_string = row.iloc[0] if len(row) > 0 else ''
        data = json.loads(json_string)

        #Iterate over each key-value pair in the dictionary
        for commentor_name, feedback_list in data.items():
            #Create a dictionary for each commentor with default values as 0
            commentor_data = {col: 0 for col in column_names2}
            commentor_data["Commentor Name"] = commentor_name

            #Check if there is any feedback for the commentor
            if feedback_list:
                #Since there could be multiple feedback entries for a single commentor, we aggregate them
                for feedback in feedback_list:
                    positive = feedback.get("positive", "").split(',')
                    negative = feedback.get("negative", "").split(',')

                    for topic in column_names2:
                        #Aggregate the value: 1 for positive, -1 for negative, 0 for not mentioned
                        commentor_data[topic] += assign_value(topic, positive, negative)

                #Finalize the values to be within -1, 0, or 1
                for topic in column_names2:
                    if commentor_data[topic] > 1:
                        commentor_data[topic] = 1
                    elif commentor_data[topic] < -1:
                        commentor_data[topic] = -1

            #Append the commentor data to the processed data list
            processed_data_tan_kar_db_nonnull.append(commentor_data)
    except:
        #Skip rows with invalid JSON data
        print(f"Invalid JSON in row {index}")

tan_kar_db_nonnull_api_cleaned_df.to_excel("tan_kar_db_nonnull_api_cleaned_df.xlsx",index=False)

tan_kar_db_nonnull_api_cleaned_df = pd.read_excel("tan_kar_db_nonnull_api_cleaned_df.xlsx")

tan_kar_db_nonnull_api_cleaned_df

In [328]:
#Create a DataFrame from the processed data
tan_kar_db_nonnull_sen_df = pd.DataFrame(processed_data_tan_kar_db_nonnull)
#Reorder the columns
ordered_columns = ["Commentor Name",
                   "Trust",
                    "Store Experience",
                    "Store Staff",
                    "Product Design",
                    "Product Variety",
                    "Discount",
                    "Making Charge",
                    "Price",
                    "Product Quality",
                    "OLD Gold Jewellery Exchange"]
#Apply the new order to the DataFrame
tan_kar_db_nonnull_sen_df = tan_kar_db_nonnull_sen_df[ordered_columns]

tan_kar_db_nonnull_merged_df = pd.concat([combined_df_tan_kar_db, tan_kar_db_nonnull_sen_df], axis=1)

tan_kar_db_final_sen_df = pd.concat([tan_kar_db_nonnull_merged_df,null_dataframes['tan_kar_db_null']], ignore_index=True)

tan_kar_db_final_sen_df_copy = tan_kar_db_final_sen_df.copy()
tan_kar_db_final_sen_df_copy["Published At Date"] = tan_kar_db_final_sen_df_copy["Published At Date"].astype(str).str[:10]

tan_kar_db_final_sen_df_copy.to_excel("sentiment_raw_output/tan_kar_db_final_sen_df_jul.xlsx",index=False)


### tan_ham_ad

In [329]:
batch_counter = [0]
total_batches = math.ceil(len(tan_ham_ad_nonnull_buckets['tan_ham_ad_nonnull_1-4'])/25)+math.ceil(len(tan_ham_ad_nonnull_buckets['tan_ham_ad_nonnull_5-15'])/25)+math.ceil(len(tan_ham_ad_nonnull_buckets['tan_ham_ad_nonnull_16-30'])/25)+math.ceil(len(tan_ham_ad_nonnull_buckets['tan_ham_ad_nonnull_31-60'])/25)+math.ceil(len(tan_ham_ad_nonnull_buckets['tan_ham_ad_nonnull_61-100'])/25)+math.ceil(len(tan_ham_ad_nonnull_buckets['tan_ham_ad_nonnull_greater_100'])/25)
key_counter = 0
total_keys=len(tan_ham_ad_nonnull_buckets.keys())
tan_ham_ad_nonnull_api = []
input_tokens_tan_ham_ad_nonnull=0
output_tokens_tan_ham_ad_nonnull=0
start_time_tan_ham_ad = time.time()

for key in tan_ham_ad_nonnull_buckets.keys():
    key_counter+=1
    current_df = tan_ham_ad_nonnull_buckets[key][['Name', 'review_text']]
    counter = [0]
    input_tokens = 0
    output_tokens = 0
        
    # Threading setup
    total_iterations = math.ceil(len(current_df) / 25)
    stop_event = threading.Event()
    message_thread = threading.Thread(target=print_dynamic_message, args=(batch_counter,key_counter,counter,total_batches,total_keys, total_iterations,start_time_tan_ham_ad, stop_event))
    message_thread.start()

    for start in range(0, len(current_df), 25):
        clear_output(wait=True)
        counter[0] += 1
        batch_counter[0] += 1
        end = start + 25
        result, it, ot = score_sentiments_jul(current_df[start:end])
        tan_ham_ad_nonnull_api.append(result)
        input_tokens += it
        output_tokens += ot

    # Stopping the dynamic message thread
    stop_event.set()
    message_thread.join()    
    input_tokens_tan_ham_ad_nonnull+=input_tokens
    output_tokens_tan_ham_ad_nonnull+=output_tokens
    clear_output(wait=True)
    
overall_execution_time_tan_ham_ad = time.time() - start_time_tan_ham_ad
formatted_time_tan_ham_ad = time.strftime('%H:%M:%S', time.gmtime(overall_execution_time_tan_ham_ad))
input_token_cost_tan_ham_ad = round((0.01/1000) * input_tokens_tan_ham_ad_nonnull, 2)
output_token_cost_tan_ham_ad = round((0.03/1000) * output_tokens_tan_ham_ad_nonnull, 2)
total_cost_tan_ham_ad = round(input_token_cost_tan_ham_ad + output_token_cost_tan_ham_ad, 2)    
    
print(f"Executed {batch_counter} Iterations")
print(f"Total Execution Time: {formatted_time_tan_ham_ad}")
print(f"Total Input Tokens - {input_tokens_tan_ham_ad_nonnull}")
print(f"Total Input Cost = {input_token_cost_tan_ham_ad}")
print(f"Total Output Tokens - {output_tokens_tan_ham_ad_nonnull}")
print(f"Total Output Cost = {output_token_cost_tan_ham_ad}")
print(f"Total Cost = {total_cost_tan_ham_ad}")

Executed [20] Iterations
Total Execution Time: 00:03:10
Total Input Tokens - 28070
Total Input Cost = 0.28
Total Output Tokens - 11357
Total Output Cost = 0.34
Total Cost = 0.62


In [330]:
#Remove "```json" and "```" from each string in tan_ham_ad_nonnull_api & convert to DataFrame
tan_ham_ad_nonnull_api_cleaned = [
    s.replace("```json", "")
     .replace("```", "")
     .replace("[]", "[{\"positive\": \"\", \"negative\": \"\"}]")
    for s in tan_ham_ad_nonnull_api
]                                  
tan_ham_ad_nonnull_api_cleaned_df = pd.DataFrame(tan_ham_ad_nonnull_api_cleaned)
#tan_ham_ad_nonnull_api_cleaned_df = pd.DataFrame(tan_ham_ad_nonnull_api)
column_names2 = ['Trust',
                    'Store Experience',
                    'Store Staff',
                    'Product Design',
                    'Product Variety',
                    'Discount',
                    'Making Charge',
                    'Price',
                    'Product Quality',
                    'OLD Gold Jewellery Exchange']

In [331]:
#Initialize an empty list to store the processed data
processed_data_tan_ham_ad_nonnull = []

#Iterate over each row in the DataFrame
for index, row in tan_ham_ad_nonnull_api_cleaned_df.iterrows():
    try:
        #Assuming the JSON string is in the first column
        json_string = row.iloc[0] if len(row) > 0 else ''
        data = json.loads(json_string)

        #Iterate over each key-value pair in the dictionary
        for commentor_name, feedback_list in data.items():
            #Create a dictionary for each commentor with default values as 0
            commentor_data = {col: 0 for col in column_names2}
            commentor_data["Commentor Name"] = commentor_name

            #Check if there is any feedback for the commentor
            if feedback_list:
                #Since there could be multiple feedback entries for a single commentor, we aggregate them
                for feedback in feedback_list:
                    positive = feedback.get("positive", "").split(',')
                    negative = feedback.get("negative", "").split(',')

                    for topic in column_names2:
                        #Aggregate the value: 1 for positive, -1 for negative, 0 for not mentioned
                        commentor_data[topic] += assign_value(topic, positive, negative)

                #Finalize the values to be within -1, 0, or 1
                for topic in column_names2:
                    if commentor_data[topic] > 1:
                        commentor_data[topic] = 1
                    elif commentor_data[topic] < -1:
                        commentor_data[topic] = -1

            #Append the commentor data to the processed data list
            processed_data_tan_ham_ad_nonnull.append(commentor_data)
    except:
        #Skip rows with invalid JSON data
        print(f"Invalid JSON in row {index}")

tan_ham_ad_nonnull_api_cleaned_df.to_excel("tan_ham_ad_nonnull_api_cleaned_df.xlsx",index=False)

tan_ham_ad_nonnull_api_cleaned_df = pd.read_excel("tan_ham_ad_nonnull_api_cleaned_df.xlsx")

tan_ham_ad_nonnull_api_cleaned_df

In [332]:
#Create a DataFrame from the processed data
tan_ham_ad_nonnull_sen_df = pd.DataFrame(processed_data_tan_ham_ad_nonnull)
#Reorder the columns
ordered_columns = ["Commentor Name",
                   "Trust",
                    "Store Experience",
                    "Store Staff",
                    "Product Design",
                    "Product Variety",
                    "Discount",
                    "Making Charge",
                    "Price",
                    "Product Quality",
                    "OLD Gold Jewellery Exchange"]
#Apply the new order to the DataFrame
tan_ham_ad_nonnull_sen_df = tan_ham_ad_nonnull_sen_df[ordered_columns]

tan_ham_ad_nonnull_merged_df = pd.concat([combined_df_tan_ham_ad, tan_ham_ad_nonnull_sen_df], axis=1)

tan_ham_ad_final_sen_df = pd.concat([tan_ham_ad_nonnull_merged_df,null_dataframes['tan_ham_ad_null']], ignore_index=True)

tan_ham_ad_final_sen_df_copy = tan_ham_ad_final_sen_df.copy()
tan_ham_ad_final_sen_df_copy["Published At Date"] = tan_ham_ad_final_sen_df_copy["Published At Date"].astype(str).str[:10]

tan_ham_ad_final_sen_df_copy.to_excel("sentiment_raw_output/tan_ham_ad_final_sen_df_jul.xlsx",index=False)


### tan_mee_db

In [333]:
batch_counter = [0]
total_batches = math.ceil(len(tan_mee_db_nonnull_buckets['tan_mee_db_nonnull_1-4'])/25)+math.ceil(len(tan_mee_db_nonnull_buckets['tan_mee_db_nonnull_5-15'])/25)+math.ceil(len(tan_mee_db_nonnull_buckets['tan_mee_db_nonnull_16-30'])/25)+math.ceil(len(tan_mee_db_nonnull_buckets['tan_mee_db_nonnull_31-60'])/25)+math.ceil(len(tan_mee_db_nonnull_buckets['tan_mee_db_nonnull_61-100'])/25)+math.ceil(len(tan_mee_db_nonnull_buckets['tan_mee_db_nonnull_greater_100'])/25)
key_counter = 0
total_keys=len(tan_mee_db_nonnull_buckets.keys())
tan_mee_db_nonnull_api = []
input_tokens_tan_mee_db_nonnull=0
output_tokens_tan_mee_db_nonnull=0
start_time_tan_mee_db = time.time()

for key in tan_mee_db_nonnull_buckets.keys():
    key_counter+=1
    current_df = tan_mee_db_nonnull_buckets[key][['Name', 'review_text']]
    counter = [0]
    input_tokens = 0
    output_tokens = 0
        
    # Threading setup
    total_iterations = math.ceil(len(current_df) / 25)
    stop_event = threading.Event()
    message_thread = threading.Thread(target=print_dynamic_message, args=(batch_counter,key_counter,counter,total_batches,total_keys, total_iterations,start_time_tan_mee_db, stop_event))
    message_thread.start()

    for start in range(0, len(current_df), 25):
        clear_output(wait=True)
        counter[0] += 1
        batch_counter[0] += 1
        end = start + 25
        result, it, ot = score_sentiments_jul(current_df[start:end])
        tan_mee_db_nonnull_api.append(result)
        input_tokens += it
        output_tokens += ot

    # Stopping the dynamic message thread
    stop_event.set()
    message_thread.join()    
    input_tokens_tan_mee_db_nonnull+=input_tokens
    output_tokens_tan_mee_db_nonnull+=output_tokens
    clear_output(wait=True)
    
overall_execution_time_tan_mee_db = time.time() - start_time_tan_mee_db
formatted_time_tan_mee_db = time.strftime('%H:%M:%S', time.gmtime(overall_execution_time_tan_mee_db))
input_token_cost_tan_mee_db = round((0.01/1000) * input_tokens_tan_mee_db_nonnull, 2)
output_token_cost_tan_mee_db = round((0.03/1000) * output_tokens_tan_mee_db_nonnull, 2)
total_cost_tan_mee_db = round(input_token_cost_tan_mee_db + output_token_cost_tan_mee_db, 2)    
    
print(f"Executed {batch_counter} Iterations")
print(f"Total Execution Time: {formatted_time_tan_mee_db}")
print(f"Total Input Tokens - {input_tokens_tan_mee_db_nonnull}")
print(f"Total Input Cost = {input_token_cost_tan_mee_db}")
print(f"Total Output Tokens - {output_tokens_tan_mee_db_nonnull}")
print(f"Total Output Cost = {output_token_cost_tan_mee_db}")
print(f"Total Cost = {total_cost_tan_mee_db}")

Executed [31] Iterations
Total Execution Time: 00:04:29
Total Input Tokens - 45905
Total Input Cost = 0.46
Total Output Tokens - 20129
Total Output Cost = 0.6
Total Cost = 1.06


In [334]:
#Remove "```json" and "```" from each string in tan_mee_db_nonnull_api & convert to DataFrame
tan_mee_db_nonnull_api_cleaned = [
    s.replace("```json", "")
     .replace("```", "")
     .replace("[]", "[{\"positive\": \"\", \"negative\": \"\"}]")
    for s in tan_mee_db_nonnull_api
]                                  
tan_mee_db_nonnull_api_cleaned_df = pd.DataFrame(tan_mee_db_nonnull_api_cleaned)
#tan_mee_db_nonnull_api_cleaned_df = pd.DataFrame(tan_mee_db_nonnull_api)
column_names2 = ['Trust',
                    'Store Experience',
                    'Store Staff',
                    'Product Design',
                    'Product Variety',
                    'Discount',
                    'Making Charge',
                    'Price',
                    'Product Quality',
                    'OLD Gold Jewellery Exchange']

In [335]:
#Initialize an empty list to store the processed data
processed_data_tan_mee_db_nonnull = []

#Iterate over each row in the DataFrame
for index, row in tan_mee_db_nonnull_api_cleaned_df.iterrows():
    try:
        #Assuming the JSON string is in the first column
        json_string = row.iloc[0] if len(row) > 0 else ''
        data = json.loads(json_string)

        #Iterate over each key-value pair in the dictionary
        for commentor_name, feedback_list in data.items():
            #Create a dictionary for each commentor with default values as 0
            commentor_data = {col: 0 for col in column_names2}
            commentor_data["Commentor Name"] = commentor_name

            #Check if there is any feedback for the commentor
            if feedback_list:
                #Since there could be multiple feedback entries for a single commentor, we aggregate them
                for feedback in feedback_list:
                    positive = feedback.get("positive", "").split(',')
                    negative = feedback.get("negative", "").split(',')

                    for topic in column_names2:
                        #Aggregate the value: 1 for positive, -1 for negative, 0 for not mentioned
                        commentor_data[topic] += assign_value(topic, positive, negative)

                #Finalize the values to be within -1, 0, or 1
                for topic in column_names2:
                    if commentor_data[topic] > 1:
                        commentor_data[topic] = 1
                    elif commentor_data[topic] < -1:
                        commentor_data[topic] = -1

            #Append the commentor data to the processed data list
            processed_data_tan_mee_db_nonnull.append(commentor_data)
    except:
        #Skip rows with invalid JSON data
        print(f"Invalid JSON in row {index}")

tan_mee_db_nonnull_api_cleaned_df.to_excel("tan_mee_db_nonnull_api_cleaned_df.xlsx",index=False)

tan_mee_db_nonnull_api_cleaned_df = pd.read_excel("tan_mee_db_nonnull_api_cleaned_df.xlsx")

tan_mee_db_nonnull_api_cleaned_df

In [336]:
#Create a DataFrame from the processed data
tan_mee_db_nonnull_sen_df = pd.DataFrame(processed_data_tan_mee_db_nonnull)
#Reorder the columns
ordered_columns = ["Commentor Name",
                   "Trust",
                    "Store Experience",
                    "Store Staff",
                    "Product Design",
                    "Product Variety",
                    "Discount",
                    "Making Charge",
                    "Price",
                    "Product Quality",
                    "OLD Gold Jewellery Exchange"]
#Apply the new order to the DataFrame
tan_mee_db_nonnull_sen_df = tan_mee_db_nonnull_sen_df[ordered_columns]

tan_mee_db_nonnull_merged_df = pd.concat([combined_df_tan_mee_db, tan_mee_db_nonnull_sen_df], axis=1)

tan_mee_db_final_sen_df = pd.concat([tan_mee_db_nonnull_merged_df,null_dataframes['tan_mee_db_null']], ignore_index=True)

tan_mee_db_final_sen_df_copy = tan_mee_db_final_sen_df.copy()
tan_mee_db_final_sen_df_copy["Published At Date"] = tan_mee_db_final_sen_df_copy["Published At Date"].astype(str).str[:10]

tan_mee_db_final_sen_df_copy.to_excel("sentiment_raw_output/tan_mee_db_final_sen_df_jul.xlsx",index=False)


### tan_sil_db

In [337]:
batch_counter = [0]
total_batches = math.ceil(len(tan_sil_db_nonnull_buckets['tan_sil_db_nonnull_1-4'])/25)+math.ceil(len(tan_sil_db_nonnull_buckets['tan_sil_db_nonnull_5-15'])/25)+math.ceil(len(tan_sil_db_nonnull_buckets['tan_sil_db_nonnull_16-30'])/25)+math.ceil(len(tan_sil_db_nonnull_buckets['tan_sil_db_nonnull_31-60'])/25)+math.ceil(len(tan_sil_db_nonnull_buckets['tan_sil_db_nonnull_61-100'])/25)+math.ceil(len(tan_sil_db_nonnull_buckets['tan_sil_db_nonnull_greater_100'])/25)
key_counter = 0
total_keys=len(tan_sil_db_nonnull_buckets.keys())
tan_sil_db_nonnull_api = []
input_tokens_tan_sil_db_nonnull=0
output_tokens_tan_sil_db_nonnull=0
start_time_tan_sil_db = time.time()

for key in tan_sil_db_nonnull_buckets.keys():
    key_counter+=1
    current_df = tan_sil_db_nonnull_buckets[key][['Name', 'review_text']]
    counter = [0]
    input_tokens = 0
    output_tokens = 0
        
    # Threading setup
    total_iterations = math.ceil(len(current_df) / 25)
    stop_event = threading.Event()
    message_thread = threading.Thread(target=print_dynamic_message, args=(batch_counter,key_counter,counter,total_batches,total_keys, total_iterations,start_time_tan_sil_db, stop_event))
    message_thread.start()

    for start in range(0, len(current_df), 25):
        clear_output(wait=True)
        counter[0] += 1
        batch_counter[0] += 1
        end = start + 25
        result, it, ot = score_sentiments_jul(current_df[start:end])
        tan_sil_db_nonnull_api.append(result)
        input_tokens += it
        output_tokens += ot

    # Stopping the dynamic message thread
    stop_event.set()
    message_thread.join()    
    input_tokens_tan_sil_db_nonnull+=input_tokens
    output_tokens_tan_sil_db_nonnull+=output_tokens
    clear_output(wait=True)
    
overall_execution_time_tan_sil_db = time.time() - start_time_tan_sil_db
formatted_time_tan_sil_db = time.strftime('%H:%M:%S', time.gmtime(overall_execution_time_tan_sil_db))
input_token_cost_tan_sil_db = round((0.01/1000) * input_tokens_tan_sil_db_nonnull, 2)
output_token_cost_tan_sil_db = round((0.03/1000) * output_tokens_tan_sil_db_nonnull, 2)
total_cost_tan_sil_db = round(input_token_cost_tan_sil_db + output_token_cost_tan_sil_db, 2)    
    
print(f"Executed {batch_counter} Iterations")
print(f"Total Execution Time: {formatted_time_tan_sil_db}")
print(f"Total Input Tokens - {input_tokens_tan_sil_db_nonnull}")
print(f"Total Input Cost = {input_token_cost_tan_sil_db}")
print(f"Total Output Tokens - {output_tokens_tan_sil_db_nonnull}")
print(f"Total Output Cost = {output_token_cost_tan_sil_db}")
print(f"Total Cost = {total_cost_tan_sil_db}")

Executed [30] Iterations
Total Execution Time: 00:04:50
Total Input Tokens - 44981
Total Input Cost = 0.45
Total Output Tokens - 19770
Total Output Cost = 0.59
Total Cost = 1.04


In [338]:
#Remove "```json" and "```" from each string in tan_sil_db_nonnull_api & convert to DataFrame
tan_sil_db_nonnull_api_cleaned = [
    s.replace("```json", "")
     .replace("```", "")
     .replace("[]", "[{\"positive\": \"\", \"negative\": \"\"}]")
    for s in tan_sil_db_nonnull_api
]                                  
tan_sil_db_nonnull_api_cleaned_df = pd.DataFrame(tan_sil_db_nonnull_api_cleaned)
#tan_sil_db_nonnull_api_cleaned_df = pd.DataFrame(tan_sil_db_nonnull_api)
column_names2 = ['Trust',
                    'Store Experience',
                    'Store Staff',
                    'Product Design',
                    'Product Variety',
                    'Discount',
                    'Making Charge',
                    'Price',
                    'Product Quality',
                    'OLD Gold Jewellery Exchange']

In [339]:
#Initialize an empty list to store the processed data
processed_data_tan_sil_db_nonnull = []

#Iterate over each row in the DataFrame
for index, row in tan_sil_db_nonnull_api_cleaned_df.iterrows():
    try:
        #Assuming the JSON string is in the first column
        json_string = row.iloc[0] if len(row) > 0 else ''
        data = json.loads(json_string)

        #Iterate over each key-value pair in the dictionary
        for commentor_name, feedback_list in data.items():
            #Create a dictionary for each commentor with default values as 0
            commentor_data = {col: 0 for col in column_names2}
            commentor_data["Commentor Name"] = commentor_name

            #Check if there is any feedback for the commentor
            if feedback_list:
                #Since there could be multiple feedback entries for a single commentor, we aggregate them
                for feedback in feedback_list:
                    positive = feedback.get("positive", "").split(',')
                    negative = feedback.get("negative", "").split(',')

                    for topic in column_names2:
                        #Aggregate the value: 1 for positive, -1 for negative, 0 for not mentioned
                        commentor_data[topic] += assign_value(topic, positive, negative)

                #Finalize the values to be within -1, 0, or 1
                for topic in column_names2:
                    if commentor_data[topic] > 1:
                        commentor_data[topic] = 1
                    elif commentor_data[topic] < -1:
                        commentor_data[topic] = -1

            #Append the commentor data to the processed data list
            processed_data_tan_sil_db_nonnull.append(commentor_data)
    except:
        #Skip rows with invalid JSON data
        print(f"Invalid JSON in row {index}")

tan_sil_db_nonnull_api_cleaned_df.to_excel("tan_sil_db_nonnull_api_cleaned_df.xlsx",index=False)

tan_sil_db_nonnull_api_cleaned_df = pd.read_excel("tan_sil_db_nonnull_api_cleaned_df.xlsx")

tan_sil_db_nonnull_api_cleaned_df

In [340]:
#Create a DataFrame from the processed data
tan_sil_db_nonnull_sen_df = pd.DataFrame(processed_data_tan_sil_db_nonnull)
#Reorder the columns
ordered_columns = ["Commentor Name",
                   "Trust",
                    "Store Experience",
                    "Store Staff",
                    "Product Design",
                    "Product Variety",
                    "Discount",
                    "Making Charge",
                    "Price",
                    "Product Quality",
                    "OLD Gold Jewellery Exchange"]
#Apply the new order to the DataFrame
tan_sil_db_nonnull_sen_df = tan_sil_db_nonnull_sen_df[ordered_columns]

tan_sil_db_nonnull_merged_df = pd.concat([combined_df_tan_sil_db, tan_sil_db_nonnull_sen_df], axis=1)

tan_sil_db_final_sen_df = pd.concat([tan_sil_db_nonnull_merged_df,null_dataframes['tan_sil_db_null']], ignore_index=True)

tan_sil_db_final_sen_df_copy = tan_sil_db_final_sen_df.copy()
tan_sil_db_final_sen_df_copy["Published At Date"] = tan_sil_db_final_sen_df_copy["Published At Date"].astype(str).str[:10]

tan_sil_db_final_sen_df_copy.to_excel("sentiment_raw_output/tan_sil_db_final_sen_df_jul.xlsx",index=False)


### mia_awm_ad

In [341]:
batch_counter = [0]
total_batches = math.ceil(len(mia_awm_ad_nonnull_buckets['mia_awm_ad_nonnull_1-4'])/25)+math.ceil(len(mia_awm_ad_nonnull_buckets['mia_awm_ad_nonnull_5-15'])/25)+math.ceil(len(mia_awm_ad_nonnull_buckets['mia_awm_ad_nonnull_16-30'])/25)+math.ceil(len(mia_awm_ad_nonnull_buckets['mia_awm_ad_nonnull_31-60'])/25)+math.ceil(len(mia_awm_ad_nonnull_buckets['mia_awm_ad_nonnull_61-100'])/25)+math.ceil(len(mia_awm_ad_nonnull_buckets['mia_awm_ad_nonnull_greater_100'])/25)
key_counter = 0
total_keys=len(mia_awm_ad_nonnull_buckets.keys())
mia_awm_ad_nonnull_api = []
input_tokens_mia_awm_ad_nonnull=0
output_tokens_mia_awm_ad_nonnull=0
start_time_mia_awm_ad = time.time()

for key in mia_awm_ad_nonnull_buckets.keys():
    key_counter+=1
    current_df = mia_awm_ad_nonnull_buckets[key][['Name', 'review_text']]
    counter = [0]
    input_tokens = 0
    output_tokens = 0
        
    # Threading setup
    total_iterations = math.ceil(len(current_df) / 25)
    stop_event = threading.Event()
    message_thread = threading.Thread(target=print_dynamic_message, args=(batch_counter,key_counter,counter,total_batches,total_keys, total_iterations,start_time_mia_awm_ad, stop_event))
    message_thread.start()

    for start in range(0, len(current_df), 25):
        clear_output(wait=True)
        counter[0] += 1
        batch_counter[0] += 1
        end = start + 25
        result, it, ot = score_sentiments_jul(current_df[start:end])
        mia_awm_ad_nonnull_api.append(result)
        input_tokens += it
        output_tokens += ot

    # Stopping the dynamic message thread
    stop_event.set()
    message_thread.join()    
    input_tokens_mia_awm_ad_nonnull+=input_tokens
    output_tokens_mia_awm_ad_nonnull+=output_tokens
    clear_output(wait=True)
    
overall_execution_time_mia_awm_ad = time.time() - start_time_mia_awm_ad
formatted_time_mia_awm_ad = time.strftime('%H:%M:%S', time.gmtime(overall_execution_time_mia_awm_ad))
input_token_cost_mia_awm_ad = round((0.01/1000) * input_tokens_mia_awm_ad_nonnull, 2)
output_token_cost_mia_awm_ad = round((0.03/1000) * output_tokens_mia_awm_ad_nonnull, 2)
total_cost_mia_awm_ad = round(input_token_cost_mia_awm_ad + output_token_cost_mia_awm_ad, 2)    
    
print(f"Executed {batch_counter} Iterations")
print(f"Total Execution Time: {formatted_time_mia_awm_ad}")
print(f"Total Input Tokens - {input_tokens_mia_awm_ad_nonnull}")
print(f"Total Input Cost = {input_token_cost_mia_awm_ad}")
print(f"Total Output Tokens - {output_tokens_mia_awm_ad_nonnull}")
print(f"Total Output Cost = {output_token_cost_mia_awm_ad}")
print(f"Total Cost = {total_cost_mia_awm_ad}")

Executed [3] Iterations
Total Execution Time: 00:00:12
Total Input Tokens - 2606
Total Input Cost = 0.03
Total Output Tokens - 721
Total Output Cost = 0.02
Total Cost = 0.05


In [342]:
#Remove "```json" and "```" from each string in mia_awm_ad_nonnull_api & convert to DataFrame
mia_awm_ad_nonnull_api_cleaned = [
    s.replace("```json", "")
     .replace("```", "")
     .replace("[]", "[{\"positive\": \"\", \"negative\": \"\"}]")
    for s in mia_awm_ad_nonnull_api
]                                  
mia_awm_ad_nonnull_api_cleaned_df = pd.DataFrame(mia_awm_ad_nonnull_api_cleaned)
#mia_awm_ad_nonnull_api_cleaned_df = pd.DataFrame(mia_awm_ad_nonnull_api)
column_names2 = ['Trust',
                    'Store Experience',
                    'Store Staff',
                    'Product Design',
                    'Product Variety',
                    'Discount',
                    'Making Charge',
                    'Price',
                    'Product Quality',
                    'OLD Gold Jewellery Exchange']

In [343]:
#Initialize an empty list to store the processed data
processed_data_mia_awm_ad_nonnull = []

#Iterate over each row in the DataFrame
for index, row in mia_awm_ad_nonnull_api_cleaned_df.iterrows():
    try:
        #Assuming the JSON string is in the first column
        json_string = row.iloc[0] if len(row) > 0 else ''
        data = json.loads(json_string)

        #Iterate over each key-value pair in the dictionary
        for commentor_name, feedback_list in data.items():
            #Create a dictionary for each commentor with default values as 0
            commentor_data = {col: 0 for col in column_names2}
            commentor_data["Commentor Name"] = commentor_name

            #Check if there is any feedback for the commentor
            if feedback_list:
                #Since there could be multiple feedback entries for a single commentor, we aggregate them
                for feedback in feedback_list:
                    positive = feedback.get("positive", "").split(',')
                    negative = feedback.get("negative", "").split(',')

                    for topic in column_names2:
                        #Aggregate the value: 1 for positive, -1 for negative, 0 for not mentioned
                        commentor_data[topic] += assign_value(topic, positive, negative)

                #Finalize the values to be within -1, 0, or 1
                for topic in column_names2:
                    if commentor_data[topic] > 1:
                        commentor_data[topic] = 1
                    elif commentor_data[topic] < -1:
                        commentor_data[topic] = -1

            #Append the commentor data to the processed data list
            processed_data_mia_awm_ad_nonnull.append(commentor_data)
    except:
        #Skip rows with invalid JSON data
        print(f"Invalid JSON in row {index}")

mia_awm_ad_nonnull_api_cleaned_df.to_excel("mia_awm_ad_nonnull_api_cleaned_df.xlsx",index=False)

mia_awm_ad_nonnull_api_cleaned_df = pd.read_excel("mia_awm_ad_nonnull_api_cleaned_df.xlsx")

mia_awm_ad_nonnull_api_cleaned_df

In [344]:
#Create a DataFrame from the processed data
mia_awm_ad_nonnull_sen_df = pd.DataFrame(processed_data_mia_awm_ad_nonnull)
#Reorder the columns
ordered_columns = ["Commentor Name",
                   "Trust",
                    "Store Experience",
                    "Store Staff",
                    "Product Design",
                    "Product Variety",
                    "Discount",
                    "Making Charge",
                    "Price",
                    "Product Quality",
                    "OLD Gold Jewellery Exchange"]
#Apply the new order to the DataFrame
mia_awm_ad_nonnull_sen_df = mia_awm_ad_nonnull_sen_df[ordered_columns]

mia_awm_ad_nonnull_merged_df = pd.concat([combined_df_mia_awm_ad, mia_awm_ad_nonnull_sen_df], axis=1)

mia_awm_ad_final_sen_df = pd.concat([mia_awm_ad_nonnull_merged_df,null_dataframes['mia_awm_ad_null']], ignore_index=True)

mia_awm_ad_final_sen_df_copy = mia_awm_ad_final_sen_df.copy()
mia_awm_ad_final_sen_df_copy["Published At Date"] = mia_awm_ad_final_sen_df_copy["Published At Date"].astype(str).str[:10]

mia_awm_ad_final_sen_df_copy.to_excel("sentiment_raw_output/mia_awm_ad_final_sen_df_jul.xlsx",index=False)


### mia_bur_db

In [345]:
batch_counter = [0]
total_batches = math.ceil(len(mia_bur_db_nonnull_buckets['mia_bur_db_nonnull_1-4'])/25)+math.ceil(len(mia_bur_db_nonnull_buckets['mia_bur_db_nonnull_5-15'])/25)+math.ceil(len(mia_bur_db_nonnull_buckets['mia_bur_db_nonnull_16-30'])/25)+math.ceil(len(mia_bur_db_nonnull_buckets['mia_bur_db_nonnull_31-60'])/25)+math.ceil(len(mia_bur_db_nonnull_buckets['mia_bur_db_nonnull_61-100'])/25)+math.ceil(len(mia_bur_db_nonnull_buckets['mia_bur_db_nonnull_greater_100'])/25)
key_counter = 0
total_keys=len(mia_bur_db_nonnull_buckets.keys())
mia_bur_db_nonnull_api = []
input_tokens_mia_bur_db_nonnull=0
output_tokens_mia_bur_db_nonnull=0
start_time_mia_bur_db = time.time()

for key in mia_bur_db_nonnull_buckets.keys():
    key_counter+=1
    current_df = mia_bur_db_nonnull_buckets[key][['Name', 'review_text']]
    counter = [0]
    input_tokens = 0
    output_tokens = 0
        
    # Threading setup
    total_iterations = math.ceil(len(current_df) / 25)
    stop_event = threading.Event()
    message_thread = threading.Thread(target=print_dynamic_message, args=(batch_counter,key_counter,counter,total_batches,total_keys, total_iterations,start_time_mia_bur_db, stop_event))
    message_thread.start()

    for start in range(0, len(current_df), 25):
        clear_output(wait=True)
        counter[0] += 1
        batch_counter[0] += 1
        end = start + 25
        result, it, ot = score_sentiments_jul(current_df[start:end])
        mia_bur_db_nonnull_api.append(result)
        input_tokens += it
        output_tokens += ot

    # Stopping the dynamic message thread
    stop_event.set()
    message_thread.join()    
    input_tokens_mia_bur_db_nonnull+=input_tokens
    output_tokens_mia_bur_db_nonnull+=output_tokens
    clear_output(wait=True)
    
overall_execution_time_mia_bur_db = time.time() - start_time_mia_bur_db
formatted_time_mia_bur_db = time.strftime('%H:%M:%S', time.gmtime(overall_execution_time_mia_bur_db))
input_token_cost_mia_bur_db = round((0.01/1000) * input_tokens_mia_bur_db_nonnull, 2)
output_token_cost_mia_bur_db = round((0.03/1000) * output_tokens_mia_bur_db_nonnull, 2)
total_cost_mia_bur_db = round(input_token_cost_mia_bur_db + output_token_cost_mia_bur_db, 2)    
    
print(f"Executed {batch_counter} Iterations")
print(f"Total Execution Time: {formatted_time_mia_bur_db}")
print(f"Total Input Tokens - {input_tokens_mia_bur_db_nonnull}")
print(f"Total Input Cost = {input_token_cost_mia_bur_db}")
print(f"Total Output Tokens - {output_tokens_mia_bur_db_nonnull}")
print(f"Total Output Cost = {output_token_cost_mia_bur_db}")
print(f"Total Cost = {total_cost_mia_bur_db}")

Executed [20] Iterations
Total Execution Time: 00:02:19
Total Input Tokens - 27191
Total Input Cost = 0.27
Total Output Tokens - 11633
Total Output Cost = 0.35
Total Cost = 0.62


In [346]:
#Remove "```json" and "```" from each string in mia_bur_db_nonnull_api & convert to DataFrame
mia_bur_db_nonnull_api_cleaned = [
    s.replace("```json", "")
     .replace("```", "")
     .replace("[]", "[{\"positive\": \"\", \"negative\": \"\"}]")
    for s in mia_bur_db_nonnull_api
]                                  
mia_bur_db_nonnull_api_cleaned_df = pd.DataFrame(mia_bur_db_nonnull_api_cleaned)
#mia_bur_db_nonnull_api_cleaned_df = pd.DataFrame(mia_bur_db_nonnull_api)
column_names2 = ['Trust',
                    'Store Experience',
                    'Store Staff',
                    'Product Design',
                    'Product Variety',
                    'Discount',
                    'Making Charge',
                    'Price',
                    'Product Quality',
                    'OLD Gold Jewellery Exchange']

In [347]:
#Initialize an empty list to store the processed data
processed_data_mia_bur_db_nonnull = []

#Iterate over each row in the DataFrame
for index, row in mia_bur_db_nonnull_api_cleaned_df.iterrows():
    try:
        #Assuming the JSON string is in the first column
        json_string = row.iloc[0] if len(row) > 0 else ''
        data = json.loads(json_string)

        #Iterate over each key-value pair in the dictionary
        for commentor_name, feedback_list in data.items():
            #Create a dictionary for each commentor with default values as 0
            commentor_data = {col: 0 for col in column_names2}
            commentor_data["Commentor Name"] = commentor_name

            #Check if there is any feedback for the commentor
            if feedback_list:
                #Since there could be multiple feedback entries for a single commentor, we aggregate them
                for feedback in feedback_list:
                    positive = feedback.get("positive", "").split(',')
                    negative = feedback.get("negative", "").split(',')

                    for topic in column_names2:
                        #Aggregate the value: 1 for positive, -1 for negative, 0 for not mentioned
                        commentor_data[topic] += assign_value(topic, positive, negative)

                #Finalize the values to be within -1, 0, or 1
                for topic in column_names2:
                    if commentor_data[topic] > 1:
                        commentor_data[topic] = 1
                    elif commentor_data[topic] < -1:
                        commentor_data[topic] = -1

            #Append the commentor data to the processed data list
            processed_data_mia_bur_db_nonnull.append(commentor_data)
    except:
        #Skip rows with invalid JSON data
        print(f"Invalid JSON in row {index}")

mia_bur_db_nonnull_api_cleaned_df.to_excel("mia_bur_db_nonnull_api_cleaned_df.xlsx",index=False)

mia_bur_db_nonnull_api_cleaned_df = pd.read_excel("mia_bur_db_nonnull_api_cleaned_df.xlsx")

mia_bur_db_nonnull_api_cleaned_df

In [348]:
#Create a DataFrame from the processed data
mia_bur_db_nonnull_sen_df = pd.DataFrame(processed_data_mia_bur_db_nonnull)
#Reorder the columns
ordered_columns = ["Commentor Name",
                   "Trust",
                    "Store Experience",
                    "Store Staff",
                    "Product Design",
                    "Product Variety",
                    "Discount",
                    "Making Charge",
                    "Price",
                    "Product Quality",
                    "OLD Gold Jewellery Exchange"]
#Apply the new order to the DataFrame
mia_bur_db_nonnull_sen_df = mia_bur_db_nonnull_sen_df[ordered_columns]

mia_bur_db_nonnull_merged_df = pd.concat([combined_df_mia_bur_db, mia_bur_db_nonnull_sen_df], axis=1)

mia_bur_db_final_sen_df = pd.concat([mia_bur_db_nonnull_merged_df,null_dataframes['mia_bur_db_null']], ignore_index=True)

mia_bur_db_final_sen_df_copy = mia_bur_db_final_sen_df.copy()
mia_bur_db_final_sen_df_copy["Published At Date"] = mia_bur_db_final_sen_df_copy["Published At Date"].astype(str).str[:10]

mia_bur_db_final_sen_df_copy.to_excel("sentiment_raw_output/mia_bur_db_final_sen_df_jul.xlsx",index=False)


### tan_am_om

In [349]:
batch_counter = [0]
total_batches = math.ceil(len(tan_am_om_nonnull_buckets['tan_am_om_nonnull_1-4'])/25)+math.ceil(len(tan_am_om_nonnull_buckets['tan_am_om_nonnull_5-15'])/25)+math.ceil(len(tan_am_om_nonnull_buckets['tan_am_om_nonnull_16-30'])/25)+math.ceil(len(tan_am_om_nonnull_buckets['tan_am_om_nonnull_31-60'])/25)+math.ceil(len(tan_am_om_nonnull_buckets['tan_am_om_nonnull_61-100'])/25)+math.ceil(len(tan_am_om_nonnull_buckets['tan_am_om_nonnull_greater_100'])/25)
key_counter = 0
total_keys=len(tan_am_om_nonnull_buckets.keys())
tan_am_om_nonnull_api = []
input_tokens_tan_am_om_nonnull=0
output_tokens_tan_am_om_nonnull=0
start_time_tan_am_om = time.time()

for key in tan_am_om_nonnull_buckets.keys():
    key_counter+=1
    current_df = tan_am_om_nonnull_buckets[key][['Name', 'review_text']]
    counter = [0]
    input_tokens = 0
    output_tokens = 0
        
    # Threading setup
    total_iterations = math.ceil(len(current_df) / 25)
    stop_event = threading.Event()
    message_thread = threading.Thread(target=print_dynamic_message, args=(batch_counter,key_counter,counter,total_batches,total_keys, total_iterations,start_time_tan_am_om, stop_event))
    message_thread.start()

    for start in range(0, len(current_df), 25):
        clear_output(wait=True)
        counter[0] += 1
        batch_counter[0] += 1
        end = start + 25
        result, it, ot = score_sentiments_jul(current_df[start:end])
        tan_am_om_nonnull_api.append(result)
        input_tokens += it
        output_tokens += ot

    # Stopping the dynamic message thread
    stop_event.set()
    message_thread.join()    
    input_tokens_tan_am_om_nonnull+=input_tokens
    output_tokens_tan_am_om_nonnull+=output_tokens
    clear_output(wait=True)
    
overall_execution_time_tan_am_om = time.time() - start_time_tan_am_om
formatted_time_tan_am_om = time.strftime('%H:%M:%S', time.gmtime(overall_execution_time_tan_am_om))
input_token_cost_tan_am_om = round((0.01/1000) * input_tokens_tan_am_om_nonnull, 2)
output_token_cost_tan_am_om = round((0.03/1000) * output_tokens_tan_am_om_nonnull, 2)
total_cost_tan_am_om = round(input_token_cost_tan_am_om + output_token_cost_tan_am_om, 2)    
    
print(f"Executed {batch_counter} Iterations")
print(f"Total Execution Time: {formatted_time_tan_am_om}")
print(f"Total Input Tokens - {input_tokens_tan_am_om_nonnull}")
print(f"Total Input Cost = {input_token_cost_tan_am_om}")
print(f"Total Output Tokens - {output_tokens_tan_am_om_nonnull}")
print(f"Total Output Cost = {output_token_cost_tan_am_om}")
print(f"Total Cost = {total_cost_tan_am_om}")

Executed [6] Iterations
Total Execution Time: 00:00:31
Total Input Tokens - 6374
Total Input Cost = 0.06
Total Output Tokens - 2299
Total Output Cost = 0.07
Total Cost = 0.13


In [350]:
#Remove "```json" and "```" from each string in tan_am_om_nonnull_api & convert to DataFrame
tan_am_om_nonnull_api_cleaned = [
    s.replace("```json", "")
     .replace("```", "")
     .replace("[]", "[{\"positive\": \"\", \"negative\": \"\"}]")
    for s in tan_am_om_nonnull_api
]                                  
tan_am_om_nonnull_api_cleaned_df = pd.DataFrame(tan_am_om_nonnull_api_cleaned)
#tan_am_om_nonnull_api_cleaned_df = pd.DataFrame(tan_am_om_nonnull_api)
column_names2 = ['Trust',
                    'Store Experience',
                    'Store Staff',
                    'Product Design',
                    'Product Variety',
                    'Discount',
                    'Making Charge',
                    'Price',
                    'Product Quality',
                    'OLD Gold Jewellery Exchange']

In [351]:
#Initialize an empty list to store the processed data
processed_data_tan_am_om_nonnull = []

#Iterate over each row in the DataFrame
for index, row in tan_am_om_nonnull_api_cleaned_df.iterrows():
    try:
        #Assuming the JSON string is in the first column
        json_string = row.iloc[0] if len(row) > 0 else ''
        data = json.loads(json_string)

        #Iterate over each key-value pair in the dictionary
        for commentor_name, feedback_list in data.items():
            #Create a dictionary for each commentor with default values as 0
            commentor_data = {col: 0 for col in column_names2}
            commentor_data["Commentor Name"] = commentor_name

            #Check if there is any feedback for the commentor
            if feedback_list:
                #Since there could be multiple feedback entries for a single commentor, we aggregate them
                for feedback in feedback_list:
                    positive = feedback.get("positive", "").split(',')
                    negative = feedback.get("negative", "").split(',')

                    for topic in column_names2:
                        #Aggregate the value: 1 for positive, -1 for negative, 0 for not mentioned
                        commentor_data[topic] += assign_value(topic, positive, negative)

                #Finalize the values to be within -1, 0, or 1
                for topic in column_names2:
                    if commentor_data[topic] > 1:
                        commentor_data[topic] = 1
                    elif commentor_data[topic] < -1:
                        commentor_data[topic] = -1

            #Append the commentor data to the processed data list
            processed_data_tan_am_om_nonnull.append(commentor_data)
    except:
        #Skip rows with invalid JSON data
        print(f"Invalid JSON in row {index}")

tan_am_om_nonnull_api_cleaned_df.to_excel("tan_am_om_nonnull_api_cleaned_df.xlsx",index=False)

tan_am_om_nonnull_api_cleaned_df = pd.read_excel("tan_am_om_nonnull_api_cleaned_df.xlsx")

tan_am_om_nonnull_api_cleaned_df

In [352]:
#Create a DataFrame from the processed data
tan_am_om_nonnull_sen_df = pd.DataFrame(processed_data_tan_am_om_nonnull)
#Reorder the columns
ordered_columns = ["Commentor Name",
                   "Trust",
                    "Store Experience",
                    "Store Staff",
                    "Product Design",
                    "Product Variety",
                    "Discount",
                    "Making Charge",
                    "Price",
                    "Product Quality",
                    "OLD Gold Jewellery Exchange"]
#Apply the new order to the DataFrame
tan_am_om_nonnull_sen_df = tan_am_om_nonnull_sen_df[ordered_columns]

tan_am_om_nonnull_merged_df = pd.concat([combined_df_tan_am_om, tan_am_om_nonnull_sen_df], axis=1)

tan_am_om_final_sen_df = pd.concat([tan_am_om_nonnull_merged_df,null_dataframes['tan_am_om_null']], ignore_index=True)

tan_am_om_final_sen_df_copy = tan_am_om_final_sen_df.copy()
tan_am_om_final_sen_df_copy["Published At Date"] = tan_am_om_final_sen_df_copy["Published At Date"].astype(str).str[:10]

tan_am_om_final_sen_df_copy.to_excel("sentiment_raw_output/tan_am_om_final_sen_df_jul.xlsx",index=False)


### tan_atl_ga

In [353]:
batch_counter = [0]
total_batches = math.ceil(len(tan_atl_ga_nonnull_buckets['tan_atl_ga_nonnull_1-4'])/25)+math.ceil(len(tan_atl_ga_nonnull_buckets['tan_atl_ga_nonnull_5-15'])/25)+math.ceil(len(tan_atl_ga_nonnull_buckets['tan_atl_ga_nonnull_16-30'])/25)+math.ceil(len(tan_atl_ga_nonnull_buckets['tan_atl_ga_nonnull_31-60'])/25)+math.ceil(len(tan_atl_ga_nonnull_buckets['tan_atl_ga_nonnull_61-100'])/25)+math.ceil(len(tan_atl_ga_nonnull_buckets['tan_atl_ga_nonnull_greater_100'])/25)
key_counter = 0
total_keys=len(tan_atl_ga_nonnull_buckets.keys())
tan_atl_ga_nonnull_api = []
input_tokens_tan_atl_ga_nonnull=0
output_tokens_tan_atl_ga_nonnull=0
start_time_tan_atl_ga = time.time()

for key in tan_atl_ga_nonnull_buckets.keys():
    key_counter+=1
    current_df = tan_atl_ga_nonnull_buckets[key][['Name', 'review_text']]
    counter = [0]
    input_tokens = 0
    output_tokens = 0
        
    # Threading setup
    total_iterations = math.ceil(len(current_df) / 25)
    stop_event = threading.Event()
    message_thread = threading.Thread(target=print_dynamic_message, args=(batch_counter,key_counter,counter,total_batches,total_keys, total_iterations,start_time_tan_atl_ga, stop_event))
    message_thread.start()

    for start in range(0, len(current_df), 25):
        clear_output(wait=True)
        counter[0] += 1
        batch_counter[0] += 1
        end = start + 25
        result, it, ot = score_sentiments_jul(current_df[start:end])
        tan_atl_ga_nonnull_api.append(result)
        input_tokens += it
        output_tokens += ot

    # Stopping the dynamic message thread
    stop_event.set()
    message_thread.join()    
    input_tokens_tan_atl_ga_nonnull+=input_tokens
    output_tokens_tan_atl_ga_nonnull+=output_tokens
    clear_output(wait=True)
    
overall_execution_time_tan_atl_ga = time.time() - start_time_tan_atl_ga
formatted_time_tan_atl_ga = time.strftime('%H:%M:%S', time.gmtime(overall_execution_time_tan_atl_ga))
input_token_cost_tan_atl_ga = round((0.01/1000) * input_tokens_tan_atl_ga_nonnull, 2)
output_token_cost_tan_atl_ga = round((0.03/1000) * output_tokens_tan_atl_ga_nonnull, 2)
total_cost_tan_atl_ga = round(input_token_cost_tan_atl_ga + output_token_cost_tan_atl_ga, 2)    
    
print(f"Executed {batch_counter} Iterations")
print(f"Total Execution Time: {formatted_time_tan_atl_ga}")
print(f"Total Input Tokens - {input_tokens_tan_atl_ga_nonnull}")
print(f"Total Input Cost = {input_token_cost_tan_atl_ga}")
print(f"Total Output Tokens - {output_tokens_tan_atl_ga_nonnull}")
print(f"Total Output Cost = {output_token_cost_tan_atl_ga}")
print(f"Total Cost = {total_cost_tan_atl_ga}")

Executed [18] Iterations
Total Execution Time: 00:02:09
Total Input Tokens - 25389
Total Input Cost = 0.25
Total Output Tokens - 10930
Total Output Cost = 0.33
Total Cost = 0.58


In [354]:
#Remove "```json" and "```" from each string in tan_atl_ga_nonnull_api & convert to DataFrame
tan_atl_ga_nonnull_api_cleaned = [
    s.replace("```json", "")
     .replace("```", "")
     .replace("[]", "[{\"positive\": \"\", \"negative\": \"\"}]")
    for s in tan_atl_ga_nonnull_api
]                                  
tan_atl_ga_nonnull_api_cleaned_df = pd.DataFrame(tan_atl_ga_nonnull_api_cleaned)
#tan_atl_ga_nonnull_api_cleaned_df = pd.DataFrame(tan_atl_ga_nonnull_api)
column_names2 = ['Trust',
                    'Store Experience',
                    'Store Staff',
                    'Product Design',
                    'Product Variety',
                    'Discount',
                    'Making Charge',
                    'Price',
                    'Product Quality',
                    'OLD Gold Jewellery Exchange']

In [355]:
#Initialize an empty list to store the processed data
processed_data_tan_atl_ga_nonnull = []

#Iterate over each row in the DataFrame
for index, row in tan_atl_ga_nonnull_api_cleaned_df.iterrows():
    try:
        #Assuming the JSON string is in the first column
        json_string = row.iloc[0] if len(row) > 0 else ''
        data = json.loads(json_string)

        #Iterate over each key-value pair in the dictionary
        for commentor_name, feedback_list in data.items():
            #Create a dictionary for each commentor with default values as 0
            commentor_data = {col: 0 for col in column_names2}
            commentor_data["Commentor Name"] = commentor_name

            #Check if there is any feedback for the commentor
            if feedback_list:
                #Since there could be multiple feedback entries for a single commentor, we aggregate them
                for feedback in feedback_list:
                    positive = feedback.get("positive", "").split(',')
                    negative = feedback.get("negative", "").split(',')

                    for topic in column_names2:
                        #Aggregate the value: 1 for positive, -1 for negative, 0 for not mentioned
                        commentor_data[topic] += assign_value(topic, positive, negative)

                #Finalize the values to be within -1, 0, or 1
                for topic in column_names2:
                    if commentor_data[topic] > 1:
                        commentor_data[topic] = 1
                    elif commentor_data[topic] < -1:
                        commentor_data[topic] = -1

            #Append the commentor data to the processed data list
            processed_data_tan_atl_ga_nonnull.append(commentor_data)
    except:
        #Skip rows with invalid JSON data
        print(f"Invalid JSON in row {index}")

tan_atl_ga_nonnull_api_cleaned_df.to_excel("tan_atl_ga_nonnull_api_cleaned_df.xlsx",index=False)

tan_atl_ga_nonnull_api_cleaned_df = pd.read_excel("tan_atl_ga_nonnull_api_cleaned_df.xlsx")

tan_atl_ga_nonnull_api_cleaned_df

In [356]:
#Create a DataFrame from the processed data
tan_atl_ga_nonnull_sen_df = pd.DataFrame(processed_data_tan_atl_ga_nonnull)
#Reorder the columns
ordered_columns = ["Commentor Name",
                   "Trust",
                    "Store Experience",
                    "Store Staff",
                    "Product Design",
                    "Product Variety",
                    "Discount",
                    "Making Charge",
                    "Price",
                    "Product Quality",
                    "OLD Gold Jewellery Exchange"]
#Apply the new order to the DataFrame
tan_atl_ga_nonnull_sen_df = tan_atl_ga_nonnull_sen_df[ordered_columns]

tan_atl_ga_nonnull_merged_df = pd.concat([combined_df_tan_atl_ga, tan_atl_ga_nonnull_sen_df], axis=1)

tan_atl_ga_final_sen_df = pd.concat([tan_atl_ga_nonnull_merged_df,null_dataframes['tan_atl_ga_null']], ignore_index=True)

tan_atl_ga_final_sen_df_copy = tan_atl_ga_final_sen_df.copy()
tan_atl_ga_final_sen_df_copy["Published At Date"] = tan_atl_ga_final_sen_df_copy["Published At Date"].astype(str).str[:10]

tan_atl_ga_final_sen_df_copy.to_excel("sentiment_raw_output/tan_atl_ga_final_sen_df_jul.xlsx",index=False)


### tan_fc_qa

In [357]:
batch_counter = [0]
total_batches = math.ceil(len(tan_fc_qa_nonnull_buckets['tan_fc_qa_nonnull_1-4'])/25)+math.ceil(len(tan_fc_qa_nonnull_buckets['tan_fc_qa_nonnull_5-15'])/25)+math.ceil(len(tan_fc_qa_nonnull_buckets['tan_fc_qa_nonnull_16-30'])/25)+math.ceil(len(tan_fc_qa_nonnull_buckets['tan_fc_qa_nonnull_31-60'])/25)+math.ceil(len(tan_fc_qa_nonnull_buckets['tan_fc_qa_nonnull_61-100'])/25)+math.ceil(len(tan_fc_qa_nonnull_buckets['tan_fc_qa_nonnull_greater_100'])/25)
key_counter = 0
total_keys=len(tan_fc_qa_nonnull_buckets.keys())
tan_fc_qa_nonnull_api = []
input_tokens_tan_fc_qa_nonnull=0
output_tokens_tan_fc_qa_nonnull=0
start_time_tan_fc_qa = time.time()

for key in tan_fc_qa_nonnull_buckets.keys():
    key_counter+=1
    current_df = tan_fc_qa_nonnull_buckets[key][['Name', 'review_text']]
    counter = [0]
    input_tokens = 0
    output_tokens = 0
        
    # Threading setup
    total_iterations = math.ceil(len(current_df) / 25)
    stop_event = threading.Event()
    message_thread = threading.Thread(target=print_dynamic_message, args=(batch_counter,key_counter,counter,total_batches,total_keys, total_iterations,start_time_tan_fc_qa, stop_event))
    message_thread.start()

    for start in range(0, len(current_df), 25):
        clear_output(wait=True)
        counter[0] += 1
        batch_counter[0] += 1
        end = start + 25
        result, it, ot = score_sentiments_jul(current_df[start:end])
        tan_fc_qa_nonnull_api.append(result)
        input_tokens += it
        output_tokens += ot

    # Stopping the dynamic message thread
    stop_event.set()
    message_thread.join()    
    input_tokens_tan_fc_qa_nonnull+=input_tokens
    output_tokens_tan_fc_qa_nonnull+=output_tokens
    clear_output(wait=True)
    
overall_execution_time_tan_fc_qa = time.time() - start_time_tan_fc_qa
formatted_time_tan_fc_qa = time.strftime('%H:%M:%S', time.gmtime(overall_execution_time_tan_fc_qa))
input_token_cost_tan_fc_qa = round((0.01/1000) * input_tokens_tan_fc_qa_nonnull, 2)
output_token_cost_tan_fc_qa = round((0.03/1000) * output_tokens_tan_fc_qa_nonnull, 2)
total_cost_tan_fc_qa = round(input_token_cost_tan_fc_qa + output_token_cost_tan_fc_qa, 2)    
    
print(f"Executed {batch_counter} Iterations")
print(f"Total Execution Time: {formatted_time_tan_fc_qa}")
print(f"Total Input Tokens - {input_tokens_tan_fc_qa_nonnull}")
print(f"Total Input Cost = {input_token_cost_tan_fc_qa}")
print(f"Total Output Tokens - {output_tokens_tan_fc_qa_nonnull}")
print(f"Total Output Cost = {output_token_cost_tan_fc_qa}")
print(f"Total Cost = {total_cost_tan_fc_qa}")

Executed [5] Iterations
Total Execution Time: 00:00:16
Total Input Tokens - 4137
Total Input Cost = 0.04
Total Output Tokens - 854
Total Output Cost = 0.03
Total Cost = 0.07


In [358]:
#Remove "```json" and "```" from each string in tan_fc_qa_nonnull_api & convert to DataFrame
tan_fc_qa_nonnull_api_cleaned = [
    s.replace("```json", "")
     .replace("```", "")
     .replace("[]", "[{\"positive\": \"\", \"negative\": \"\"}]")
    for s in tan_fc_qa_nonnull_api
]                                  
tan_fc_qa_nonnull_api_cleaned_df = pd.DataFrame(tan_fc_qa_nonnull_api_cleaned)
#tan_fc_qa_nonnull_api_cleaned_df = pd.DataFrame(tan_fc_qa_nonnull_api)
column_names2 = ['Trust',
                    'Store Experience',
                    'Store Staff',
                    'Product Design',
                    'Product Variety',
                    'Discount',
                    'Making Charge',
                    'Price',
                    'Product Quality',
                    'OLD Gold Jewellery Exchange']

In [359]:
#Initialize an empty list to store the processed data
processed_data_tan_fc_qa_nonnull = []

#Iterate over each row in the DataFrame
for index, row in tan_fc_qa_nonnull_api_cleaned_df.iterrows():
    try:
        #Assuming the JSON string is in the first column
        json_string = row.iloc[0] if len(row) > 0 else ''
        data = json.loads(json_string)

        #Iterate over each key-value pair in the dictionary
        for commentor_name, feedback_list in data.items():
            #Create a dictionary for each commentor with default values as 0
            commentor_data = {col: 0 for col in column_names2}
            commentor_data["Commentor Name"] = commentor_name

            #Check if there is any feedback for the commentor
            if feedback_list:
                #Since there could be multiple feedback entries for a single commentor, we aggregate them
                for feedback in feedback_list:
                    positive = feedback.get("positive", "").split(',')
                    negative = feedback.get("negative", "").split(',')

                    for topic in column_names2:
                        #Aggregate the value: 1 for positive, -1 for negative, 0 for not mentioned
                        commentor_data[topic] += assign_value(topic, positive, negative)

                #Finalize the values to be within -1, 0, or 1
                for topic in column_names2:
                    if commentor_data[topic] > 1:
                        commentor_data[topic] = 1
                    elif commentor_data[topic] < -1:
                        commentor_data[topic] = -1

            #Append the commentor data to the processed data list
            processed_data_tan_fc_qa_nonnull.append(commentor_data)
    except:
        #Skip rows with invalid JSON data
        print(f"Invalid JSON in row {index}")

tan_fc_qa_nonnull_api_cleaned_df.to_excel("tan_fc_qa_nonnull_api_cleaned_df.xlsx",index=False)

tan_fc_qa_nonnull_api_cleaned_df = pd.read_excel("tan_fc_qa_nonnull_api_cleaned_df.xlsx")

tan_fc_qa_nonnull_api_cleaned_df

In [360]:
#Create a DataFrame from the processed data
tan_fc_qa_nonnull_sen_df = pd.DataFrame(processed_data_tan_fc_qa_nonnull)
#Reorder the columns
ordered_columns = ["Commentor Name",
                   "Trust",
                    "Store Experience",
                    "Store Staff",
                    "Product Design",
                    "Product Variety",
                    "Discount",
                    "Making Charge",
                    "Price",
                    "Product Quality",
                    "OLD Gold Jewellery Exchange"]
#Apply the new order to the DataFrame
tan_fc_qa_nonnull_sen_df = tan_fc_qa_nonnull_sen_df[ordered_columns]

tan_fc_qa_nonnull_merged_df = pd.concat([combined_df_tan_fc_qa, tan_fc_qa_nonnull_sen_df], axis=1)

tan_fc_qa_final_sen_df = pd.concat([tan_fc_qa_nonnull_merged_df,null_dataframes['tan_fc_qa_null']], ignore_index=True)

tan_fc_qa_final_sen_df_copy = tan_fc_qa_final_sen_df.copy()
tan_fc_qa_final_sen_df_copy["Published At Date"] = tan_fc_qa_final_sen_df_copy["Published At Date"].astype(str).str[:10]

tan_fc_qa_final_sen_df_copy.to_excel("sentiment_raw_output/tan_fc_qa_final_sen_df_jul.xlsx",index=False)


### tan_gs_db

In [361]:
batch_counter = [0]
total_batches = math.ceil(len(tan_gs_db_nonnull_buckets['tan_gs_db_nonnull_1-4'])/25)+math.ceil(len(tan_gs_db_nonnull_buckets['tan_gs_db_nonnull_5-15'])/25)+math.ceil(len(tan_gs_db_nonnull_buckets['tan_gs_db_nonnull_16-30'])/25)+math.ceil(len(tan_gs_db_nonnull_buckets['tan_gs_db_nonnull_31-60'])/25)+math.ceil(len(tan_gs_db_nonnull_buckets['tan_gs_db_nonnull_61-100'])/25)+math.ceil(len(tan_gs_db_nonnull_buckets['tan_gs_db_nonnull_greater_100'])/25)
key_counter = 0
total_keys=len(tan_gs_db_nonnull_buckets.keys())
tan_gs_db_nonnull_api = []
input_tokens_tan_gs_db_nonnull=0
output_tokens_tan_gs_db_nonnull=0
start_time_tan_gs_db = time.time()

for key in tan_gs_db_nonnull_buckets.keys():
    key_counter+=1
    current_df = tan_gs_db_nonnull_buckets[key][['Name', 'review_text']]
    counter = [0]
    input_tokens = 0
    output_tokens = 0
        
    # Threading setup
    total_iterations = math.ceil(len(current_df) / 25)
    stop_event = threading.Event()
    message_thread = threading.Thread(target=print_dynamic_message, args=(batch_counter,key_counter,counter,total_batches,total_keys, total_iterations,start_time_tan_gs_db, stop_event))
    message_thread.start()

    for start in range(0, len(current_df), 25):
        clear_output(wait=True)
        counter[0] += 1
        batch_counter[0] += 1
        end = start + 25
        result, it, ot = score_sentiments_jul(current_df[start:end])
        tan_gs_db_nonnull_api.append(result)
        input_tokens += it
        output_tokens += ot

    # Stopping the dynamic message thread
    stop_event.set()
    message_thread.join()    
    input_tokens_tan_gs_db_nonnull+=input_tokens
    output_tokens_tan_gs_db_nonnull+=output_tokens
    clear_output(wait=True)
    
overall_execution_time_tan_gs_db = time.time() - start_time_tan_gs_db
formatted_time_tan_gs_db = time.strftime('%H:%M:%S', time.gmtime(overall_execution_time_tan_gs_db))
input_token_cost_tan_gs_db = round((0.01/1000) * input_tokens_tan_gs_db_nonnull, 2)
output_token_cost_tan_gs_db = round((0.03/1000) * output_tokens_tan_gs_db_nonnull, 2)
total_cost_tan_gs_db = round(input_token_cost_tan_gs_db + output_token_cost_tan_gs_db, 2)    
    
print(f"Executed {batch_counter} Iterations")
print(f"Total Execution Time: {formatted_time_tan_gs_db}")
print(f"Total Input Tokens - {input_tokens_tan_gs_db_nonnull}")
print(f"Total Input Cost = {input_token_cost_tan_gs_db}")
print(f"Total Output Tokens - {output_tokens_tan_gs_db_nonnull}")
print(f"Total Output Cost = {output_token_cost_tan_gs_db}")
print(f"Total Cost = {total_cost_tan_gs_db}")

Executed [43] Iterations
Total Execution Time: 00:07:19
Total Input Tokens - 73367
Total Input Cost = 0.73
Total Output Tokens - 30483
Total Output Cost = 0.91
Total Cost = 1.64


In [362]:
#Remove "```json" and "```" from each string in tan_gs_db_nonnull_api & convert to DataFrame
tan_gs_db_nonnull_api_cleaned = [
    s.replace("```json", "")
     .replace("```", "")
     .replace("[]", "[{\"positive\": \"\", \"negative\": \"\"}]")
    for s in tan_gs_db_nonnull_api
]                                  
tan_gs_db_nonnull_api_cleaned_df = pd.DataFrame(tan_gs_db_nonnull_api_cleaned)
#tan_gs_db_nonnull_api_cleaned_df = pd.DataFrame(tan_gs_db_nonnull_api)
column_names2 = ['Trust',
                    'Store Experience',
                    'Store Staff',
                    'Product Design',
                    'Product Variety',
                    'Discount',
                    'Making Charge',
                    'Price',
                    'Product Quality',
                    'OLD Gold Jewellery Exchange']

In [363]:
#Initialize an empty list to store the processed data
processed_data_tan_gs_db_nonnull = []

#Iterate over each row in the DataFrame
for index, row in tan_gs_db_nonnull_api_cleaned_df.iterrows():
    try:
        #Assuming the JSON string is in the first column
        json_string = row.iloc[0] if len(row) > 0 else ''
        data = json.loads(json_string)

        #Iterate over each key-value pair in the dictionary
        for commentor_name, feedback_list in data.items():
            #Create a dictionary for each commentor with default values as 0
            commentor_data = {col: 0 for col in column_names2}
            commentor_data["Commentor Name"] = commentor_name

            #Check if there is any feedback for the commentor
            if feedback_list:
                #Since there could be multiple feedback entries for a single commentor, we aggregate them
                for feedback in feedback_list:
                    positive = feedback.get("positive", "").split(',')
                    negative = feedback.get("negative", "").split(',')

                    for topic in column_names2:
                        #Aggregate the value: 1 for positive, -1 for negative, 0 for not mentioned
                        commentor_data[topic] += assign_value(topic, positive, negative)

                #Finalize the values to be within -1, 0, or 1
                for topic in column_names2:
                    if commentor_data[topic] > 1:
                        commentor_data[topic] = 1
                    elif commentor_data[topic] < -1:
                        commentor_data[topic] = -1

            #Append the commentor data to the processed data list
            processed_data_tan_gs_db_nonnull.append(commentor_data)
    except:
        #Skip rows with invalid JSON data
        print(f"Invalid JSON in row {index}")

tan_gs_db_nonnull_api_cleaned_df.to_excel("tan_gs_db_nonnull_api_cleaned_df.xlsx",index=False)

tan_gs_db_nonnull_api_cleaned_df = pd.read_excel("tan_gs_db_nonnull_api_cleaned_df.xlsx")

tan_gs_db_nonnull_api_cleaned_df

In [364]:
#Create a DataFrame from the processed data
tan_gs_db_nonnull_sen_df = pd.DataFrame(processed_data_tan_gs_db_nonnull)
#Reorder the columns
ordered_columns = ["Commentor Name",
                   "Trust",
                    "Store Experience",
                    "Store Staff",
                    "Product Design",
                    "Product Variety",
                    "Discount",
                    "Making Charge",
                    "Price",
                    "Product Quality",
                    "OLD Gold Jewellery Exchange"]
#Apply the new order to the DataFrame
tan_gs_db_nonnull_sen_df = tan_gs_db_nonnull_sen_df[ordered_columns]

tan_gs_db_nonnull_merged_df = pd.concat([combined_df_tan_gs_db, tan_gs_db_nonnull_sen_df], axis=1)

tan_gs_db_final_sen_df = pd.concat([tan_gs_db_nonnull_merged_df,null_dataframes['tan_gs_db_null']], ignore_index=True)

tan_gs_db_final_sen_df_copy = tan_gs_db_final_sen_df.copy()
tan_gs_db_final_sen_df_copy["Published At Date"] = tan_gs_db_final_sen_df_copy["Published At Date"].astype(str).str[:10]

tan_gs_db_final_sen_df_copy.to_excel("sentiment_raw_output/tan_gs_db_final_sen_df_jul.xlsx",index=False)


### tan_lul_qa

In [365]:
batch_counter = [0]
total_batches = math.ceil(len(tan_lul_qa_nonnull_buckets['tan_lul_qa_nonnull_1-4'])/25)+math.ceil(len(tan_lul_qa_nonnull_buckets['tan_lul_qa_nonnull_5-15'])/25)+math.ceil(len(tan_lul_qa_nonnull_buckets['tan_lul_qa_nonnull_16-30'])/25)+math.ceil(len(tan_lul_qa_nonnull_buckets['tan_lul_qa_nonnull_31-60'])/25)+math.ceil(len(tan_lul_qa_nonnull_buckets['tan_lul_qa_nonnull_61-100'])/25)+math.ceil(len(tan_lul_qa_nonnull_buckets['tan_lul_qa_nonnull_greater_100'])/25)
key_counter = 0
total_keys=len(tan_lul_qa_nonnull_buckets.keys())
tan_lul_qa_nonnull_api = []
input_tokens_tan_lul_qa_nonnull=0
output_tokens_tan_lul_qa_nonnull=0
start_time_tan_lul_qa = time.time()

for key in tan_lul_qa_nonnull_buckets.keys():
    key_counter+=1
    current_df = tan_lul_qa_nonnull_buckets[key][['Name', 'review_text']]
    counter = [0]
    input_tokens = 0
    output_tokens = 0
        
    # Threading setup
    total_iterations = math.ceil(len(current_df) / 25)
    stop_event = threading.Event()
    message_thread = threading.Thread(target=print_dynamic_message, args=(batch_counter,key_counter,counter,total_batches,total_keys, total_iterations,start_time_tan_lul_qa, stop_event))
    message_thread.start()

    for start in range(0, len(current_df), 25):
        clear_output(wait=True)
        counter[0] += 1
        batch_counter[0] += 1
        end = start + 25
        result, it, ot = score_sentiments_jul(current_df[start:end])
        tan_lul_qa_nonnull_api.append(result)
        input_tokens += it
        output_tokens += ot

    # Stopping the dynamic message thread
    stop_event.set()
    message_thread.join()    
    input_tokens_tan_lul_qa_nonnull+=input_tokens
    output_tokens_tan_lul_qa_nonnull+=output_tokens
    clear_output(wait=True)
    
overall_execution_time_tan_lul_qa = time.time() - start_time_tan_lul_qa
formatted_time_tan_lul_qa = time.strftime('%H:%M:%S', time.gmtime(overall_execution_time_tan_lul_qa))
input_token_cost_tan_lul_qa = round((0.01/1000) * input_tokens_tan_lul_qa_nonnull, 2)
output_token_cost_tan_lul_qa = round((0.03/1000) * output_tokens_tan_lul_qa_nonnull, 2)
total_cost_tan_lul_qa = round(input_token_cost_tan_lul_qa + output_token_cost_tan_lul_qa, 2)    
    
print(f"Executed {batch_counter} Iterations")
print(f"Total Execution Time: {formatted_time_tan_lul_qa}")
print(f"Total Input Tokens - {input_tokens_tan_lul_qa_nonnull}")
print(f"Total Input Cost = {input_token_cost_tan_lul_qa}")
print(f"Total Output Tokens - {output_tokens_tan_lul_qa_nonnull}")
print(f"Total Output Cost = {output_token_cost_tan_lul_qa}")
print(f"Total Cost = {total_cost_tan_lul_qa}")

Executed [11] Iterations
Total Execution Time: 00:00:56
Total Input Tokens - 12367
Total Input Cost = 0.12
Total Output Tokens - 4177
Total Output Cost = 0.13
Total Cost = 0.25


In [366]:
#Remove "```json" and "```" from each string in tan_lul_qa_nonnull_api & convert to DataFrame
tan_lul_qa_nonnull_api_cleaned = [
    s.replace("```json", "")
     .replace("```", "")
     .replace("[]", "[{\"positive\": \"\", \"negative\": \"\"}]")
    for s in tan_lul_qa_nonnull_api
]                                  
tan_lul_qa_nonnull_api_cleaned_df = pd.DataFrame(tan_lul_qa_nonnull_api_cleaned)
#tan_lul_qa_nonnull_api_cleaned_df = pd.DataFrame(tan_lul_qa_nonnull_api)
column_names2 = ['Trust',
                    'Store Experience',
                    'Store Staff',
                    'Product Design',
                    'Product Variety',
                    'Discount',
                    'Making Charge',
                    'Price',
                    'Product Quality',
                    'OLD Gold Jewellery Exchange']

In [367]:
#Initialize an empty list to store the processed data
processed_data_tan_lul_qa_nonnull = []

#Iterate over each row in the DataFrame
for index, row in tan_lul_qa_nonnull_api_cleaned_df.iterrows():
    try:
        #Assuming the JSON string is in the first column
        json_string = row.iloc[0] if len(row) > 0 else ''
        data = json.loads(json_string)

        #Iterate over each key-value pair in the dictionary
        for commentor_name, feedback_list in data.items():
            #Create a dictionary for each commentor with default values as 0
            commentor_data = {col: 0 for col in column_names2}
            commentor_data["Commentor Name"] = commentor_name

            #Check if there is any feedback for the commentor
            if feedback_list:
                #Since there could be multiple feedback entries for a single commentor, we aggregate them
                for feedback in feedback_list:
                    positive = feedback.get("positive", "").split(',')
                    negative = feedback.get("negative", "").split(',')

                    for topic in column_names2:
                        #Aggregate the value: 1 for positive, -1 for negative, 0 for not mentioned
                        commentor_data[topic] += assign_value(topic, positive, negative)

                #Finalize the values to be within -1, 0, or 1
                for topic in column_names2:
                    if commentor_data[topic] > 1:
                        commentor_data[topic] = 1
                    elif commentor_data[topic] < -1:
                        commentor_data[topic] = -1

            #Append the commentor data to the processed data list
            processed_data_tan_lul_qa_nonnull.append(commentor_data)
    except:
        #Skip rows with invalid JSON data
        print(f"Invalid JSON in row {index}")

tan_lul_qa_nonnull_api_cleaned_df.to_excel("tan_lul_qa_nonnull_api_cleaned_df.xlsx",index=False)

tan_lul_qa_nonnull_api_cleaned_df = pd.read_excel("tan_lul_qa_nonnull_api_cleaned_df.xlsx")

tan_lul_qa_nonnull_api_cleaned_df

In [368]:
#Create a DataFrame from the processed data
tan_lul_qa_nonnull_sen_df = pd.DataFrame(processed_data_tan_lul_qa_nonnull)
#Reorder the columns
ordered_columns = ["Commentor Name",
                   "Trust",
                    "Store Experience",
                    "Store Staff",
                    "Product Design",
                    "Product Variety",
                    "Discount",
                    "Making Charge",
                    "Price",
                    "Product Quality",
                    "OLD Gold Jewellery Exchange"]
#Apply the new order to the DataFrame
tan_lul_qa_nonnull_sen_df = tan_lul_qa_nonnull_sen_df[ordered_columns]

tan_lul_qa_nonnull_merged_df = pd.concat([combined_df_tan_lul_qa, tan_lul_qa_nonnull_sen_df], axis=1)

tan_lul_qa_final_sen_df = pd.concat([tan_lul_qa_nonnull_merged_df,null_dataframes['tan_lul_qa_null']], ignore_index=True)

tan_lul_qa_final_sen_df_copy = tan_lul_qa_final_sen_df.copy()
tan_lul_qa_final_sen_df_copy["Published At Date"] = tan_lul_qa_final_sen_df_copy["Published At Date"].astype(str).str[:10]

tan_lul_qa_final_sen_df_copy.to_excel("sentiment_raw_output/tan_lul_qa_final_sen_df_jul.xlsx",index=False)


### tan_mank_db

In [369]:
batch_counter = [0]
total_batches = math.ceil(len(tan_mank_db_nonnull_buckets['tan_mank_db_nonnull_1-4'])/25)+math.ceil(len(tan_mank_db_nonnull_buckets['tan_mank_db_nonnull_5-15'])/25)+math.ceil(len(tan_mank_db_nonnull_buckets['tan_mank_db_nonnull_16-30'])/25)+math.ceil(len(tan_mank_db_nonnull_buckets['tan_mank_db_nonnull_31-60'])/25)+math.ceil(len(tan_mank_db_nonnull_buckets['tan_mank_db_nonnull_61-100'])/25)+math.ceil(len(tan_mank_db_nonnull_buckets['tan_mank_db_nonnull_greater_100'])/25)
key_counter = 0
total_keys=len(tan_mank_db_nonnull_buckets.keys())
tan_mank_db_nonnull_api = []
input_tokens_tan_mank_db_nonnull=0
output_tokens_tan_mank_db_nonnull=0
start_time_tan_mank_db = time.time()

for key in tan_mank_db_nonnull_buckets.keys():
    key_counter+=1
    current_df = tan_mank_db_nonnull_buckets[key][['Name', 'review_text']]
    counter = [0]
    input_tokens = 0
    output_tokens = 0
        
    # Threading setup
    total_iterations = math.ceil(len(current_df) / 25)
    stop_event = threading.Event()
    message_thread = threading.Thread(target=print_dynamic_message, args=(batch_counter,key_counter,counter,total_batches,total_keys, total_iterations,start_time_tan_mank_db, stop_event))
    message_thread.start()

    for start in range(0, len(current_df), 25):
        clear_output(wait=True)
        counter[0] += 1
        batch_counter[0] += 1
        end = start + 25
        result, it, ot = score_sentiments_jul(current_df[start:end])
        tan_mank_db_nonnull_api.append(result)
        input_tokens += it
        output_tokens += ot

    # Stopping the dynamic message thread
    stop_event.set()
    message_thread.join()    
    input_tokens_tan_mank_db_nonnull+=input_tokens
    output_tokens_tan_mank_db_nonnull+=output_tokens
    clear_output(wait=True)
    
overall_execution_time_tan_mank_db = time.time() - start_time_tan_mank_db
formatted_time_tan_mank_db = time.strftime('%H:%M:%S', time.gmtime(overall_execution_time_tan_mank_db))
input_token_cost_tan_mank_db = round((0.01/1000) * input_tokens_tan_mank_db_nonnull, 2)
output_token_cost_tan_mank_db = round((0.03/1000) * output_tokens_tan_mank_db_nonnull, 2)
total_cost_tan_mank_db = round(input_token_cost_tan_mank_db + output_token_cost_tan_mank_db, 2)    
    
print(f"Executed {batch_counter} Iterations")
print(f"Total Execution Time: {formatted_time_tan_mank_db}")
print(f"Total Input Tokens - {input_tokens_tan_mank_db_nonnull}")
print(f"Total Input Cost = {input_token_cost_tan_mank_db}")
print(f"Total Output Tokens - {output_tokens_tan_mank_db_nonnull}")
print(f"Total Output Cost = {output_token_cost_tan_mank_db}")
print(f"Total Cost = {total_cost_tan_mank_db}")

Executed [5] Iterations
Total Execution Time: 00:00:15
Total Input Tokens - 4649
Total Input Cost = 0.05
Total Output Tokens - 897
Total Output Cost = 0.03
Total Cost = 0.08


In [370]:
#Remove "```json" and "```" from each string in tan_mank_db_nonnull_api & convert to DataFrame
tan_mank_db_nonnull_api_cleaned = [
    s.replace("```json", "")
     .replace("```", "")
     .replace("[]", "[{\"positive\": \"\", \"negative\": \"\"}]")
    for s in tan_mank_db_nonnull_api
]                                  
tan_mank_db_nonnull_api_cleaned_df = pd.DataFrame(tan_mank_db_nonnull_api_cleaned)
#tan_mank_db_nonnull_api_cleaned_df = pd.DataFrame(tan_mank_db_nonnull_api)
column_names2 = ['Trust',
                    'Store Experience',
                    'Store Staff',
                    'Product Design',
                    'Product Variety',
                    'Discount',
                    'Making Charge',
                    'Price',
                    'Product Quality',
                    'OLD Gold Jewellery Exchange']

In [371]:
#Initialize an empty list to store the processed data
processed_data_tan_mank_db_nonnull = []

#Iterate over each row in the DataFrame
for index, row in tan_mank_db_nonnull_api_cleaned_df.iterrows():
    try:
        #Assuming the JSON string is in the first column
        json_string = row.iloc[0] if len(row) > 0 else ''
        data = json.loads(json_string)

        #Iterate over each key-value pair in the dictionary
        for commentor_name, feedback_list in data.items():
            #Create a dictionary for each commentor with default values as 0
            commentor_data = {col: 0 for col in column_names2}
            commentor_data["Commentor Name"] = commentor_name

            #Check if there is any feedback for the commentor
            if feedback_list:
                #Since there could be multiple feedback entries for a single commentor, we aggregate them
                for feedback in feedback_list:
                    positive = feedback.get("positive", "").split(',')
                    negative = feedback.get("negative", "").split(',')

                    for topic in column_names2:
                        #Aggregate the value: 1 for positive, -1 for negative, 0 for not mentioned
                        commentor_data[topic] += assign_value(topic, positive, negative)

                #Finalize the values to be within -1, 0, or 1
                for topic in column_names2:
                    if commentor_data[topic] > 1:
                        commentor_data[topic] = 1
                    elif commentor_data[topic] < -1:
                        commentor_data[topic] = -1

            #Append the commentor data to the processed data list
            processed_data_tan_mank_db_nonnull.append(commentor_data)
    except:
        #Skip rows with invalid JSON data
        print(f"Invalid JSON in row {index}")

tan_mank_db_nonnull_api_cleaned_df.to_excel("tan_mank_db_nonnull_api_cleaned_df.xlsx",index=False)

tan_mank_db_nonnull_api_cleaned_df = pd.read_excel("tan_mank_db_nonnull_api_cleaned_df.xlsx")

tan_mank_db_nonnull_api_cleaned_df

In [372]:
#Create a DataFrame from the processed data
tan_mank_db_nonnull_sen_df = pd.DataFrame(processed_data_tan_mank_db_nonnull)
#Reorder the columns
ordered_columns = ["Commentor Name",
                   "Trust",
                    "Store Experience",
                    "Store Staff",
                    "Product Design",
                    "Product Variety",
                    "Discount",
                    "Making Charge",
                    "Price",
                    "Product Quality",
                    "OLD Gold Jewellery Exchange"]
#Apply the new order to the DataFrame
tan_mank_db_nonnull_sen_df = tan_mank_db_nonnull_sen_df[ordered_columns]

tan_mank_db_nonnull_merged_df = pd.concat([combined_df_tan_mank_db, tan_mank_db_nonnull_sen_df], axis=1)

tan_mank_db_final_sen_df = pd.concat([tan_mank_db_nonnull_merged_df,null_dataframes['tan_mank_db_null']], ignore_index=True)

tan_mank_db_final_sen_df_copy = tan_mank_db_final_sen_df.copy()
tan_mank_db_final_sen_df_copy["Published At Date"] = tan_mank_db_final_sen_df_copy["Published At Date"].astype(str).str[:10]

tan_mank_db_final_sen_df_copy.to_excel("sentiment_raw_output/tan_mank_db_final_sen_df_jul.xlsx",index=False)


### tan_rol_sh

In [373]:
batch_counter = [0]
total_batches = math.ceil(len(tan_rol_sh_nonnull_buckets['tan_rol_sh_nonnull_1-4'])/25)+math.ceil(len(tan_rol_sh_nonnull_buckets['tan_rol_sh_nonnull_5-15'])/25)+math.ceil(len(tan_rol_sh_nonnull_buckets['tan_rol_sh_nonnull_16-30'])/25)+math.ceil(len(tan_rol_sh_nonnull_buckets['tan_rol_sh_nonnull_31-60'])/25)+math.ceil(len(tan_rol_sh_nonnull_buckets['tan_rol_sh_nonnull_61-100'])/25)+math.ceil(len(tan_rol_sh_nonnull_buckets['tan_rol_sh_nonnull_greater_100'])/25)
key_counter = 0
total_keys=len(tan_rol_sh_nonnull_buckets.keys())
tan_rol_sh_nonnull_api = []
input_tokens_tan_rol_sh_nonnull=0
output_tokens_tan_rol_sh_nonnull=0
start_time_tan_rol_sh = time.time()

for key in tan_rol_sh_nonnull_buckets.keys():
    key_counter+=1
    current_df = tan_rol_sh_nonnull_buckets[key][['Name', 'review_text']]
    counter = [0]
    input_tokens = 0
    output_tokens = 0
        
    # Threading setup
    total_iterations = math.ceil(len(current_df) / 25)
    stop_event = threading.Event()
    message_thread = threading.Thread(target=print_dynamic_message, args=(batch_counter,key_counter,counter,total_batches,total_keys, total_iterations,start_time_tan_rol_sh, stop_event))
    message_thread.start()

    for start in range(0, len(current_df), 25):
        clear_output(wait=True)
        counter[0] += 1
        batch_counter[0] += 1
        end = start + 25
        result, it, ot = score_sentiments_jul(current_df[start:end])
        tan_rol_sh_nonnull_api.append(result)
        input_tokens += it
        output_tokens += ot

    # Stopping the dynamic message thread
    stop_event.set()
    message_thread.join()    
    input_tokens_tan_rol_sh_nonnull+=input_tokens
    output_tokens_tan_rol_sh_nonnull+=output_tokens
    clear_output(wait=True)
    
overall_execution_time_tan_rol_sh = time.time() - start_time_tan_rol_sh
formatted_time_tan_rol_sh = time.strftime('%H:%M:%S', time.gmtime(overall_execution_time_tan_rol_sh))
input_token_cost_tan_rol_sh = round((0.01/1000) * input_tokens_tan_rol_sh_nonnull, 2)
output_token_cost_tan_rol_sh = round((0.03/1000) * output_tokens_tan_rol_sh_nonnull, 2)
total_cost_tan_rol_sh = round(input_token_cost_tan_rol_sh + output_token_cost_tan_rol_sh, 2)    
    
print(f"Executed {batch_counter} Iterations")
print(f"Total Execution Time: {formatted_time_tan_rol_sh}")
print(f"Total Input Tokens - {input_tokens_tan_rol_sh_nonnull}")
print(f"Total Input Cost = {input_token_cost_tan_rol_sh}")
print(f"Total Output Tokens - {output_tokens_tan_rol_sh_nonnull}")
print(f"Total Output Cost = {output_token_cost_tan_rol_sh}")
print(f"Total Cost = {total_cost_tan_rol_sh}")

Executed [11] Iterations
Total Execution Time: 00:01:19
Total Input Tokens - 17087
Total Input Cost = 0.17
Total Output Tokens - 6736
Total Output Cost = 0.2
Total Cost = 0.37


In [374]:
#Remove "```json" and "```" from each string in tan_rol_sh_nonnull_api & convert to DataFrame
tan_rol_sh_nonnull_api_cleaned = [
    s.replace("```json", "")
     .replace("```", "")
     .replace("[]", "[{\"positive\": \"\", \"negative\": \"\"}]")
    for s in tan_rol_sh_nonnull_api
]                                  
tan_rol_sh_nonnull_api_cleaned_df = pd.DataFrame(tan_rol_sh_nonnull_api_cleaned)
#tan_rol_sh_nonnull_api_cleaned_df = pd.DataFrame(tan_rol_sh_nonnull_api)
column_names2 = ['Trust',
                    'Store Experience',
                    'Store Staff',
                    'Product Design',
                    'Product Variety',
                    'Discount',
                    'Making Charge',
                    'Price',
                    'Product Quality',
                    'OLD Gold Jewellery Exchange']

In [375]:
#Initialize an empty list to store the processed data
processed_data_tan_rol_sh_nonnull = []

#Iterate over each row in the DataFrame
for index, row in tan_rol_sh_nonnull_api_cleaned_df.iterrows():
    try:
        #Assuming the JSON string is in the first column
        json_string = row.iloc[0] if len(row) > 0 else ''
        data = json.loads(json_string)

        #Iterate over each key-value pair in the dictionary
        for commentor_name, feedback_list in data.items():
            #Create a dictionary for each commentor with default values as 0
            commentor_data = {col: 0 for col in column_names2}
            commentor_data["Commentor Name"] = commentor_name

            #Check if there is any feedback for the commentor
            if feedback_list:
                #Since there could be multiple feedback entries for a single commentor, we aggregate them
                for feedback in feedback_list:
                    positive = feedback.get("positive", "").split(',')
                    negative = feedback.get("negative", "").split(',')

                    for topic in column_names2:
                        #Aggregate the value: 1 for positive, -1 for negative, 0 for not mentioned
                        commentor_data[topic] += assign_value(topic, positive, negative)

                #Finalize the values to be within -1, 0, or 1
                for topic in column_names2:
                    if commentor_data[topic] > 1:
                        commentor_data[topic] = 1
                    elif commentor_data[topic] < -1:
                        commentor_data[topic] = -1

            #Append the commentor data to the processed data list
            processed_data_tan_rol_sh_nonnull.append(commentor_data)
    except:
        #Skip rows with invalid JSON data
        print(f"Invalid JSON in row {index}")

tan_rol_sh_nonnull_api_cleaned_df.to_excel("tan_rol_sh_nonnull_api_cleaned_df.xlsx",index=False)

tan_rol_sh_nonnull_api_cleaned_df = pd.read_excel("tan_rol_sh_nonnull_api_cleaned_df.xlsx")

tan_rol_sh_nonnull_api_cleaned_df

In [376]:
#Create a DataFrame from the processed data
tan_rol_sh_nonnull_sen_df = pd.DataFrame(processed_data_tan_rol_sh_nonnull)
#Reorder the columns
ordered_columns = ["Commentor Name",
                   "Trust",
                    "Store Experience",
                    "Store Staff",
                    "Product Design",
                    "Product Variety",
                    "Discount",
                    "Making Charge",
                    "Price",
                    "Product Quality",
                    "OLD Gold Jewellery Exchange"]
#Apply the new order to the DataFrame
tan_rol_sh_nonnull_sen_df = tan_rol_sh_nonnull_sen_df[ordered_columns]

tan_rol_sh_nonnull_merged_df = pd.concat([combined_df_tan_rol_sh, tan_rol_sh_nonnull_sen_df], axis=1)

tan_rol_sh_final_sen_df = pd.concat([tan_rol_sh_nonnull_merged_df,null_dataframes['tan_rol_sh_null']], ignore_index=True)

tan_rol_sh_final_sen_df_copy = tan_rol_sh_final_sen_df.copy()
tan_rol_sh_final_sen_df_copy["Published At Date"] = tan_rol_sh_final_sen_df_copy["Published At Date"].astype(str).str[:10]

tan_rol_sh_final_sen_df_copy.to_excel("sentiment_raw_output/tan_rol_sh_final_sen_df_jul.xlsx",index=False)


### tan_rse_wa

In [377]:
batch_counter = [0]
total_batches = math.ceil(len(tan_rse_wa_nonnull_buckets['tan_rse_wa_nonnull_1-4'])/25)+math.ceil(len(tan_rse_wa_nonnull_buckets['tan_rse_wa_nonnull_5-15'])/25)+math.ceil(len(tan_rse_wa_nonnull_buckets['tan_rse_wa_nonnull_16-30'])/25)+math.ceil(len(tan_rse_wa_nonnull_buckets['tan_rse_wa_nonnull_31-60'])/25)+math.ceil(len(tan_rse_wa_nonnull_buckets['tan_rse_wa_nonnull_61-100'])/25)+math.ceil(len(tan_rse_wa_nonnull_buckets['tan_rse_wa_nonnull_greater_100'])/25)
key_counter = 0
total_keys=len(tan_rse_wa_nonnull_buckets.keys())
tan_rse_wa_nonnull_api = []
input_tokens_tan_rse_wa_nonnull=0
output_tokens_tan_rse_wa_nonnull=0
start_time_tan_rse_wa = time.time()

for key in tan_rse_wa_nonnull_buckets.keys():
    key_counter+=1
    current_df = tan_rse_wa_nonnull_buckets[key][['Name', 'review_text']]
    counter = [0]
    input_tokens = 0
    output_tokens = 0
        
    # Threading setup
    total_iterations = math.ceil(len(current_df) / 25)
    stop_event = threading.Event()
    message_thread = threading.Thread(target=print_dynamic_message, args=(batch_counter,key_counter,counter,total_batches,total_keys, total_iterations,start_time_tan_rse_wa, stop_event))
    message_thread.start()

    for start in range(0, len(current_df), 25):
        clear_output(wait=True)
        counter[0] += 1
        batch_counter[0] += 1
        end = start + 25
        result, it, ot = score_sentiments_jul(current_df[start:end])
        tan_rse_wa_nonnull_api.append(result)
        input_tokens += it
        output_tokens += ot

    # Stopping the dynamic message thread
    stop_event.set()
    message_thread.join()    
    input_tokens_tan_rse_wa_nonnull+=input_tokens
    output_tokens_tan_rse_wa_nonnull+=output_tokens
    clear_output(wait=True)
    
overall_execution_time_tan_rse_wa = time.time() - start_time_tan_rse_wa
formatted_time_tan_rse_wa = time.strftime('%H:%M:%S', time.gmtime(overall_execution_time_tan_rse_wa))
input_token_cost_tan_rse_wa = round((0.01/1000) * input_tokens_tan_rse_wa_nonnull, 2)
output_token_cost_tan_rse_wa = round((0.03/1000) * output_tokens_tan_rse_wa_nonnull, 2)
total_cost_tan_rse_wa = round(input_token_cost_tan_rse_wa + output_token_cost_tan_rse_wa, 2)    
    
print(f"Executed {batch_counter} Iterations")
print(f"Total Execution Time: {formatted_time_tan_rse_wa}")
print(f"Total Input Tokens - {input_tokens_tan_rse_wa_nonnull}")
print(f"Total Input Cost = {input_token_cost_tan_rse_wa}")
print(f"Total Output Tokens - {output_tokens_tan_rse_wa_nonnull}")
print(f"Total Output Cost = {output_token_cost_tan_rse_wa}")
print(f"Total Cost = {total_cost_tan_rse_wa}")

Executed [9] Iterations
Total Execution Time: 00:00:56
Total Input Tokens - 12207
Total Input Cost = 0.12
Total Output Tokens - 4070
Total Output Cost = 0.12
Total Cost = 0.24


In [378]:
#Remove "```json" and "```" from each string in tan_rse_wa_nonnull_api & convert to DataFrame
tan_rse_wa_nonnull_api_cleaned = [
    s.replace("```json", "")
     .replace("```", "")
     .replace("[]", "[{\"positive\": \"\", \"negative\": \"\"}]")
    for s in tan_rse_wa_nonnull_api
]                                  
tan_rse_wa_nonnull_api_cleaned_df = pd.DataFrame(tan_rse_wa_nonnull_api_cleaned)
#tan_rse_wa_nonnull_api_cleaned_df = pd.DataFrame(tan_rse_wa_nonnull_api)
column_names2 = ['Trust',
                    'Store Experience',
                    'Store Staff',
                    'Product Design',
                    'Product Variety',
                    'Discount',
                    'Making Charge',
                    'Price',
                    'Product Quality',
                    'OLD Gold Jewellery Exchange']

In [379]:
#Initialize an empty list to store the processed data
processed_data_tan_rse_wa_nonnull = []

#Iterate over each row in the DataFrame
for index, row in tan_rse_wa_nonnull_api_cleaned_df.iterrows():
    try:
        #Assuming the JSON string is in the first column
        json_string = row.iloc[0] if len(row) > 0 else ''
        data = json.loads(json_string)

        #Iterate over each key-value pair in the dictionary
        for commentor_name, feedback_list in data.items():
            #Create a dictionary for each commentor with default values as 0
            commentor_data = {col: 0 for col in column_names2}
            commentor_data["Commentor Name"] = commentor_name

            #Check if there is any feedback for the commentor
            if feedback_list:
                #Since there could be multiple feedback entries for a single commentor, we aggregate them
                for feedback in feedback_list:
                    positive = feedback.get("positive", "").split(',')
                    negative = feedback.get("negative", "").split(',')

                    for topic in column_names2:
                        #Aggregate the value: 1 for positive, -1 for negative, 0 for not mentioned
                        commentor_data[topic] += assign_value(topic, positive, negative)

                #Finalize the values to be within -1, 0, or 1
                for topic in column_names2:
                    if commentor_data[topic] > 1:
                        commentor_data[topic] = 1
                    elif commentor_data[topic] < -1:
                        commentor_data[topic] = -1

            #Append the commentor data to the processed data list
            processed_data_tan_rse_wa_nonnull.append(commentor_data)
    except:
        #Skip rows with invalid JSON data
        print(f"Invalid JSON in row {index}")

tan_rse_wa_nonnull_api_cleaned_df.to_excel("tan_rse_wa_nonnull_api_cleaned_df.xlsx",index=False)

tan_rse_wa_nonnull_api_cleaned_df = pd.read_excel("tan_rse_wa_nonnull_api_cleaned_df.xlsx")

tan_rse_wa_nonnull_api_cleaned_df

In [380]:
#Create a DataFrame from the processed data
tan_rse_wa_nonnull_sen_df = pd.DataFrame(processed_data_tan_rse_wa_nonnull)
#Reorder the columns
ordered_columns = ["Commentor Name",
                   "Trust",
                    "Store Experience",
                    "Store Staff",
                    "Product Design",
                    "Product Variety",
                    "Discount",
                    "Making Charge",
                    "Price",
                    "Product Quality",
                    "OLD Gold Jewellery Exchange"]
#Apply the new order to the DataFrame
tan_rse_wa_nonnull_sen_df = tan_rse_wa_nonnull_sen_df[ordered_columns]

tan_rse_wa_nonnull_merged_df = pd.concat([combined_df_tan_rse_wa, tan_rse_wa_nonnull_sen_df], axis=1)

tan_rse_wa_final_sen_df = pd.concat([tan_rse_wa_nonnull_merged_df,null_dataframes['tan_rse_wa_null']], ignore_index=True)

tan_rse_wa_final_sen_df_copy = tan_rse_wa_final_sen_df.copy()
tan_rse_wa_final_sen_df_copy["Published At Date"] = tan_rse_wa_final_sen_df_copy["Published At Date"].astype(str).str[:10]

tan_rse_wa_final_sen_df_copy.to_excel("sentiment_raw_output/tan_rse_wa_final_sen_df_jul.xlsx",index=False)


### tan_sc_ca

In [381]:
batch_counter = [0]
total_batches = math.ceil(len(tan_sc_ca_nonnull_buckets['tan_sc_ca_nonnull_1-4'])/25)+math.ceil(len(tan_sc_ca_nonnull_buckets['tan_sc_ca_nonnull_5-15'])/25)+math.ceil(len(tan_sc_ca_nonnull_buckets['tan_sc_ca_nonnull_16-30'])/25)+math.ceil(len(tan_sc_ca_nonnull_buckets['tan_sc_ca_nonnull_31-60'])/25)+math.ceil(len(tan_sc_ca_nonnull_buckets['tan_sc_ca_nonnull_61-100'])/25)+math.ceil(len(tan_sc_ca_nonnull_buckets['tan_sc_ca_nonnull_greater_100'])/25)
key_counter = 0
total_keys=len(tan_sc_ca_nonnull_buckets.keys())
tan_sc_ca_nonnull_api = []
input_tokens_tan_sc_ca_nonnull=0
output_tokens_tan_sc_ca_nonnull=0
start_time_tan_sc_ca = time.time()

for key in tan_sc_ca_nonnull_buckets.keys():
    key_counter+=1
    current_df = tan_sc_ca_nonnull_buckets[key][['Name', 'review_text']]
    counter = [0]
    input_tokens = 0
    output_tokens = 0
        
    # Threading setup
    total_iterations = math.ceil(len(current_df) / 25)
    stop_event = threading.Event()
    message_thread = threading.Thread(target=print_dynamic_message, args=(batch_counter,key_counter,counter,total_batches,total_keys, total_iterations,start_time_tan_sc_ca, stop_event))
    message_thread.start()

    for start in range(0, len(current_df), 25):
        clear_output(wait=True)
        counter[0] += 1
        batch_counter[0] += 1
        end = start + 25
        result, it, ot = score_sentiments_jul(current_df[start:end])
        tan_sc_ca_nonnull_api.append(result)
        input_tokens += it
        output_tokens += ot

    # Stopping the dynamic message thread
    stop_event.set()
    message_thread.join()    
    input_tokens_tan_sc_ca_nonnull+=input_tokens
    output_tokens_tan_sc_ca_nonnull+=output_tokens
    clear_output(wait=True)
    
overall_execution_time_tan_sc_ca = time.time() - start_time_tan_sc_ca
formatted_time_tan_sc_ca = time.strftime('%H:%M:%S', time.gmtime(overall_execution_time_tan_sc_ca))
input_token_cost_tan_sc_ca = round((0.01/1000) * input_tokens_tan_sc_ca_nonnull, 2)
output_token_cost_tan_sc_ca = round((0.03/1000) * output_tokens_tan_sc_ca_nonnull, 2)
total_cost_tan_sc_ca = round(input_token_cost_tan_sc_ca + output_token_cost_tan_sc_ca, 2)    
    
print(f"Executed {batch_counter} Iterations")
print(f"Total Execution Time: {formatted_time_tan_sc_ca}")
print(f"Total Input Tokens - {input_tokens_tan_sc_ca_nonnull}")
print(f"Total Input Cost = {input_token_cost_tan_sc_ca}")
print(f"Total Output Tokens - {output_tokens_tan_sc_ca_nonnull}")
print(f"Total Output Cost = {output_token_cost_tan_sc_ca}")
print(f"Total Cost = {total_cost_tan_sc_ca}")

Executed [6] Iterations
Total Execution Time: 00:00:28
Total Input Tokens - 8090
Total Input Cost = 0.08
Total Output Tokens - 1981
Total Output Cost = 0.06
Total Cost = 0.14


In [382]:
#Remove "```json" and "```" from each string in tan_sc_ca_nonnull_api & convert to DataFrame
tan_sc_ca_nonnull_api_cleaned = [
    s.replace("```json", "")
     .replace("```", "")
     .replace("[]", "[{\"positive\": \"\", \"negative\": \"\"}]")
    for s in tan_sc_ca_nonnull_api
]                                  
tan_sc_ca_nonnull_api_cleaned_df = pd.DataFrame(tan_sc_ca_nonnull_api_cleaned)
#tan_sc_ca_nonnull_api_cleaned_df = pd.DataFrame(tan_sc_ca_nonnull_api)
column_names2 = ['Trust',
                    'Store Experience',
                    'Store Staff',
                    'Product Design',
                    'Product Variety',
                    'Discount',
                    'Making Charge',
                    'Price',
                    'Product Quality',
                    'OLD Gold Jewellery Exchange']

In [383]:
#Initialize an empty list to store the processed data
processed_data_tan_sc_ca_nonnull = []

#Iterate over each row in the DataFrame
for index, row in tan_sc_ca_nonnull_api_cleaned_df.iterrows():
    try:
        #Assuming the JSON string is in the first column
        json_string = row.iloc[0] if len(row) > 0 else ''
        data = json.loads(json_string)

        #Iterate over each key-value pair in the dictionary
        for commentor_name, feedback_list in data.items():
            #Create a dictionary for each commentor with default values as 0
            commentor_data = {col: 0 for col in column_names2}
            commentor_data["Commentor Name"] = commentor_name

            #Check if there is any feedback for the commentor
            if feedback_list:
                #Since there could be multiple feedback entries for a single commentor, we aggregate them
                for feedback in feedback_list:
                    positive = feedback.get("positive", "").split(',')
                    negative = feedback.get("negative", "").split(',')

                    for topic in column_names2:
                        #Aggregate the value: 1 for positive, -1 for negative, 0 for not mentioned
                        commentor_data[topic] += assign_value(topic, positive, negative)

                #Finalize the values to be within -1, 0, or 1
                for topic in column_names2:
                    if commentor_data[topic] > 1:
                        commentor_data[topic] = 1
                    elif commentor_data[topic] < -1:
                        commentor_data[topic] = -1

            #Append the commentor data to the processed data list
            processed_data_tan_sc_ca_nonnull.append(commentor_data)
    except:
        #Skip rows with invalid JSON data
        print(f"Invalid JSON in row {index}")

tan_sc_ca_nonnull_api_cleaned_df.to_excel("tan_sc_ca_nonnull_api_cleaned_df.xlsx",index=False)

tan_sc_ca_nonnull_api_cleaned_df = pd.read_excel("tan_sc_ca_nonnull_api_cleaned_df.xlsx")

tan_sc_ca_nonnull_api_cleaned_df

In [384]:
#Create a DataFrame from the processed data
tan_sc_ca_nonnull_sen_df = pd.DataFrame(processed_data_tan_sc_ca_nonnull)
#Reorder the columns
ordered_columns = ["Commentor Name",
                   "Trust",
                    "Store Experience",
                    "Store Staff",
                    "Product Design",
                    "Product Variety",
                    "Discount",
                    "Making Charge",
                    "Price",
                    "Product Quality",
                    "OLD Gold Jewellery Exchange"]
#Apply the new order to the DataFrame
tan_sc_ca_nonnull_sen_df = tan_sc_ca_nonnull_sen_df[ordered_columns]

tan_sc_ca_nonnull_merged_df = pd.concat([combined_df_tan_sc_ca, tan_sc_ca_nonnull_sen_df], axis=1)

tan_sc_ca_final_sen_df = pd.concat([tan_sc_ca_nonnull_merged_df,null_dataframes['tan_sc_ca_null']], ignore_index=True)

tan_sc_ca_final_sen_df_copy = tan_sc_ca_final_sen_df.copy()
tan_sc_ca_final_sen_df_copy["Published At Date"] = tan_sc_ca_final_sen_df_copy["Published At Date"].astype(str).str[:10]

tan_sc_ca_final_sen_df_copy.to_excel("sentiment_raw_output/tan_sc_ca_final_sen_df_jul.xlsx",index=False)


### tan_sc_sh

In [385]:
batch_counter = [0]
total_batches = math.ceil(len(tan_sc_sh_nonnull_buckets['tan_sc_sh_nonnull_1-4'])/25)+math.ceil(len(tan_sc_sh_nonnull_buckets['tan_sc_sh_nonnull_5-15'])/25)+math.ceil(len(tan_sc_sh_nonnull_buckets['tan_sc_sh_nonnull_16-30'])/25)+math.ceil(len(tan_sc_sh_nonnull_buckets['tan_sc_sh_nonnull_31-60'])/25)+math.ceil(len(tan_sc_sh_nonnull_buckets['tan_sc_sh_nonnull_61-100'])/25)+math.ceil(len(tan_sc_sh_nonnull_buckets['tan_sc_sh_nonnull_greater_100'])/25)
key_counter = 0
total_keys=len(tan_sc_sh_nonnull_buckets.keys())
tan_sc_sh_nonnull_api = []
input_tokens_tan_sc_sh_nonnull=0
output_tokens_tan_sc_sh_nonnull=0
start_time_tan_sc_sh = time.time()

for key in tan_sc_sh_nonnull_buckets.keys():
    key_counter+=1
    current_df = tan_sc_sh_nonnull_buckets[key][['Name', 'review_text']]
    counter = [0]
    input_tokens = 0
    output_tokens = 0
        
    # Threading setup
    total_iterations = math.ceil(len(current_df) / 25)
    stop_event = threading.Event()
    message_thread = threading.Thread(target=print_dynamic_message, args=(batch_counter,key_counter,counter,total_batches,total_keys, total_iterations,start_time_tan_sc_sh, stop_event))
    message_thread.start()

    for start in range(0, len(current_df), 25):
        clear_output(wait=True)
        counter[0] += 1
        batch_counter[0] += 1
        end = start + 25
        result, it, ot = score_sentiments_jul(current_df[start:end])
        tan_sc_sh_nonnull_api.append(result)
        input_tokens += it
        output_tokens += ot

    # Stopping the dynamic message thread
    stop_event.set()
    message_thread.join()    
    input_tokens_tan_sc_sh_nonnull+=input_tokens
    output_tokens_tan_sc_sh_nonnull+=output_tokens
    clear_output(wait=True)
    
overall_execution_time_tan_sc_sh = time.time() - start_time_tan_sc_sh
formatted_time_tan_sc_sh = time.strftime('%H:%M:%S', time.gmtime(overall_execution_time_tan_sc_sh))
input_token_cost_tan_sc_sh = round((0.01/1000) * input_tokens_tan_sc_sh_nonnull, 2)
output_token_cost_tan_sc_sh = round((0.03/1000) * output_tokens_tan_sc_sh_nonnull, 2)
total_cost_tan_sc_sh = round(input_token_cost_tan_sc_sh + output_token_cost_tan_sc_sh, 2)    
    
print(f"Executed {batch_counter} Iterations")
print(f"Total Execution Time: {formatted_time_tan_sc_sh}")
print(f"Total Input Tokens - {input_tokens_tan_sc_sh_nonnull}")
print(f"Total Input Cost = {input_token_cost_tan_sc_sh}")
print(f"Total Output Tokens - {output_tokens_tan_sc_sh_nonnull}")
print(f"Total Output Cost = {output_token_cost_tan_sc_sh}")
print(f"Total Cost = {total_cost_tan_sc_sh}")

Executed [17] Iterations
Total Execution Time: 00:02:07
Total Input Tokens - 23068
Total Input Cost = 0.23
Total Output Tokens - 9282
Total Output Cost = 0.28
Total Cost = 0.51


In [386]:
#Remove "```json" and "```" from each string in tan_sc_sh_nonnull_api & convert to DataFrame
tan_sc_sh_nonnull_api_cleaned = [
    s.replace("```json", "")
     .replace("```", "")
     .replace("[]", "[{\"positive\": \"\", \"negative\": \"\"}]")
    for s in tan_sc_sh_nonnull_api
]                                  
tan_sc_sh_nonnull_api_cleaned_df = pd.DataFrame(tan_sc_sh_nonnull_api_cleaned)
#tan_sc_sh_nonnull_api_cleaned_df = pd.DataFrame(tan_sc_sh_nonnull_api)
column_names2 = ['Trust',
                    'Store Experience',
                    'Store Staff',
                    'Product Design',
                    'Product Variety',
                    'Discount',
                    'Making Charge',
                    'Price',
                    'Product Quality',
                    'OLD Gold Jewellery Exchange']

In [387]:
#Initialize an empty list to store the processed data
processed_data_tan_sc_sh_nonnull = []

#Iterate over each row in the DataFrame
for index, row in tan_sc_sh_nonnull_api_cleaned_df.iterrows():
    try:
        #Assuming the JSON string is in the first column
        json_string = row.iloc[0] if len(row) > 0 else ''
        data = json.loads(json_string)

        #Iterate over each key-value pair in the dictionary
        for commentor_name, feedback_list in data.items():
            #Create a dictionary for each commentor with default values as 0
            commentor_data = {col: 0 for col in column_names2}
            commentor_data["Commentor Name"] = commentor_name

            #Check if there is any feedback for the commentor
            if feedback_list:
                #Since there could be multiple feedback entries for a single commentor, we aggregate them
                for feedback in feedback_list:
                    positive = feedback.get("positive", "").split(',')
                    negative = feedback.get("negative", "").split(',')

                    for topic in column_names2:
                        #Aggregate the value: 1 for positive, -1 for negative, 0 for not mentioned
                        commentor_data[topic] += assign_value(topic, positive, negative)

                #Finalize the values to be within -1, 0, or 1
                for topic in column_names2:
                    if commentor_data[topic] > 1:
                        commentor_data[topic] = 1
                    elif commentor_data[topic] < -1:
                        commentor_data[topic] = -1

            #Append the commentor data to the processed data list
            processed_data_tan_sc_sh_nonnull.append(commentor_data)
    except:
        #Skip rows with invalid JSON data
        print(f"Invalid JSON in row {index}")

tan_sc_sh_nonnull_api_cleaned_df.to_excel("tan_sc_sh_nonnull_api_cleaned_df.xlsx",index=False)

tan_sc_sh_nonnull_api_cleaned_df = pd.read_excel("tan_sc_sh_nonnull_api_cleaned_df.xlsx")

tan_sc_sh_nonnull_api_cleaned_df

In [388]:
#Create a DataFrame from the processed data
tan_sc_sh_nonnull_sen_df = pd.DataFrame(processed_data_tan_sc_sh_nonnull)
#Reorder the columns
ordered_columns = ["Commentor Name",
                   "Trust",
                    "Store Experience",
                    "Store Staff",
                    "Product Design",
                    "Product Variety",
                    "Discount",
                    "Making Charge",
                    "Price",
                    "Product Quality",
                    "OLD Gold Jewellery Exchange"]
#Apply the new order to the DataFrame
tan_sc_sh_nonnull_sen_df = tan_sc_sh_nonnull_sen_df[ordered_columns]

tan_sc_sh_nonnull_merged_df = pd.concat([combined_df_tan_sc_sh, tan_sc_sh_nonnull_sen_df], axis=1)

tan_sc_sh_final_sen_df = pd.concat([tan_sc_sh_nonnull_merged_df,null_dataframes['tan_sc_sh_null']], ignore_index=True)

tan_sc_sh_final_sen_df_copy = tan_sc_sh_final_sen_df.copy()
tan_sc_sh_final_sen_df_copy["Published At Date"] = tan_sc_sh_final_sen_df_copy["Published At Date"].astype(str).str[:10]

tan_sc_sh_final_sen_df_copy.to_excel("sentiment_raw_output/tan_sc_sh_final_sen_df_jul.xlsx",index=False)


### tan_taj_db

In [389]:
batch_counter = [0]
total_batches = math.ceil(len(tan_taj_db_nonnull_buckets['tan_taj_db_nonnull_1-4'])/25)+math.ceil(len(tan_taj_db_nonnull_buckets['tan_taj_db_nonnull_5-15'])/25)+math.ceil(len(tan_taj_db_nonnull_buckets['tan_taj_db_nonnull_16-30'])/25)+math.ceil(len(tan_taj_db_nonnull_buckets['tan_taj_db_nonnull_31-60'])/25)+math.ceil(len(tan_taj_db_nonnull_buckets['tan_taj_db_nonnull_61-100'])/25)+math.ceil(len(tan_taj_db_nonnull_buckets['tan_taj_db_nonnull_greater_100'])/25)
key_counter = 0
total_keys=len(tan_taj_db_nonnull_buckets.keys())
tan_taj_db_nonnull_api = []
input_tokens_tan_taj_db_nonnull=0
output_tokens_tan_taj_db_nonnull=0
start_time_tan_taj_db = time.time()

for key in tan_taj_db_nonnull_buckets.keys():
    key_counter+=1
    current_df = tan_taj_db_nonnull_buckets[key][['Name', 'review_text']]
    counter = [0]
    input_tokens = 0
    output_tokens = 0
        
    # Threading setup
    total_iterations = math.ceil(len(current_df) / 25)
    stop_event = threading.Event()
    message_thread = threading.Thread(target=print_dynamic_message, args=(batch_counter,key_counter,counter,total_batches,total_keys, total_iterations,start_time_tan_taj_db, stop_event))
    message_thread.start()

    for start in range(0, len(current_df), 25):
        clear_output(wait=True)
        counter[0] += 1
        batch_counter[0] += 1
        end = start + 25
        result, it, ot = score_sentiments_jul(current_df[start:end])
        tan_taj_db_nonnull_api.append(result)
        input_tokens += it
        output_tokens += ot

    # Stopping the dynamic message thread
    stop_event.set()
    message_thread.join()    
    input_tokens_tan_taj_db_nonnull+=input_tokens
    output_tokens_tan_taj_db_nonnull+=output_tokens
    clear_output(wait=True)
    
overall_execution_time_tan_taj_db = time.time() - start_time_tan_taj_db
formatted_time_tan_taj_db = time.strftime('%H:%M:%S', time.gmtime(overall_execution_time_tan_taj_db))
input_token_cost_tan_taj_db = round((0.01/1000) * input_tokens_tan_taj_db_nonnull, 2)
output_token_cost_tan_taj_db = round((0.03/1000) * output_tokens_tan_taj_db_nonnull, 2)
total_cost_tan_taj_db = round(input_token_cost_tan_taj_db + output_token_cost_tan_taj_db, 2)    
    
print(f"Executed {batch_counter} Iterations")
print(f"Total Execution Time: {formatted_time_tan_taj_db}")
print(f"Total Input Tokens - {input_tokens_tan_taj_db_nonnull}")
print(f"Total Input Cost = {input_token_cost_tan_taj_db}")
print(f"Total Output Tokens - {output_tokens_tan_taj_db_nonnull}")
print(f"Total Output Cost = {output_token_cost_tan_taj_db}")
print(f"Total Cost = {total_cost_tan_taj_db}")

Executed [10] Iterations
Total Execution Time: 00:00:54
Total Input Tokens - 10253
Total Input Cost = 0.1
Total Output Tokens - 4116
Total Output Cost = 0.12
Total Cost = 0.22


In [390]:
#Remove "```json" and "```" from each string in tan_taj_db_nonnull_api & convert to DataFrame
tan_taj_db_nonnull_api_cleaned = [
    s.replace("```json", "")
     .replace("```", "")
     .replace("[]", "[{\"positive\": \"\", \"negative\": \"\"}]")
    for s in tan_taj_db_nonnull_api
]                                  
tan_taj_db_nonnull_api_cleaned_df = pd.DataFrame(tan_taj_db_nonnull_api_cleaned)
#tan_taj_db_nonnull_api_cleaned_df = pd.DataFrame(tan_taj_db_nonnull_api)
column_names2 = ['Trust',
                    'Store Experience',
                    'Store Staff',
                    'Product Design',
                    'Product Variety',
                    'Discount',
                    'Making Charge',
                    'Price',
                    'Product Quality',
                    'OLD Gold Jewellery Exchange']

In [391]:
#Initialize an empty list to store the processed data
processed_data_tan_taj_db_nonnull = []

#Iterate over each row in the DataFrame
for index, row in tan_taj_db_nonnull_api_cleaned_df.iterrows():
    try:
        #Assuming the JSON string is in the first column
        json_string = row.iloc[0] if len(row) > 0 else ''
        data = json.loads(json_string)

        #Iterate over each key-value pair in the dictionary
        for commentor_name, feedback_list in data.items():
            #Create a dictionary for each commentor with default values as 0
            commentor_data = {col: 0 for col in column_names2}
            commentor_data["Commentor Name"] = commentor_name

            #Check if there is any feedback for the commentor
            if feedback_list:
                #Since there could be multiple feedback entries for a single commentor, we aggregate them
                for feedback in feedback_list:
                    positive = feedback.get("positive", "").split(',')
                    negative = feedback.get("negative", "").split(',')

                    for topic in column_names2:
                        #Aggregate the value: 1 for positive, -1 for negative, 0 for not mentioned
                        commentor_data[topic] += assign_value(topic, positive, negative)

                #Finalize the values to be within -1, 0, or 1
                for topic in column_names2:
                    if commentor_data[topic] > 1:
                        commentor_data[topic] = 1
                    elif commentor_data[topic] < -1:
                        commentor_data[topic] = -1

            #Append the commentor data to the processed data list
            processed_data_tan_taj_db_nonnull.append(commentor_data)
    except:
        #Skip rows with invalid JSON data
        print(f"Invalid JSON in row {index}")

tan_taj_db_nonnull_api_cleaned_df.to_excel("tan_taj_db_nonnull_api_cleaned_df.xlsx",index=False)

tan_taj_db_nonnull_api_cleaned_df = pd.read_excel("tan_taj_db_nonnull_api_cleaned_df.xlsx")

tan_taj_db_nonnull_api_cleaned_df

In [392]:
#Create a DataFrame from the processed data
tan_taj_db_nonnull_sen_df = pd.DataFrame(processed_data_tan_taj_db_nonnull)
#Reorder the columns
ordered_columns = ["Commentor Name",
                   "Trust",
                    "Store Experience",
                    "Store Staff",
                    "Product Design",
                    "Product Variety",
                    "Discount",
                    "Making Charge",
                    "Price",
                    "Product Quality",
                    "OLD Gold Jewellery Exchange"]
#Apply the new order to the DataFrame
tan_taj_db_nonnull_sen_df = tan_taj_db_nonnull_sen_df[ordered_columns]

tan_taj_db_nonnull_merged_df = pd.concat([combined_df_tan_taj_db, tan_taj_db_nonnull_sen_df], axis=1)

tan_taj_db_final_sen_df = pd.concat([tan_taj_db_nonnull_merged_df,null_dataframes['tan_taj_db_null']], ignore_index=True)

tan_taj_db_final_sen_df_copy = tan_taj_db_final_sen_df.copy()
tan_taj_db_final_sen_df_copy["Published At Date"] = tan_taj_db_final_sen_df_copy["Published At Date"].astype(str).str[:10]

tan_taj_db_final_sen_df_copy.to_excel("sentiment_raw_output/tan_taj_db_final_sen_df_jul.xlsx",index=False)


# Combining DataFrames & creating final data with all reviews from DAY 0

## Current Data

In [393]:
#Initialize an empty DataFrame for the combined data
combined_df_with_s_current = pd.DataFrame()

In [394]:
df_name_list_s = [
                    "agd_mb_final_sen_df_jul",
                    "bhi_ak_final_sen_df_jul",
                    "bhi_dec_ga_final_sen_df_jul",
                    #"eve_joh_ga_final_sen_df_jul",
                    "jar_alg_il_final_sen_df_jul",
                    "jar_aur_il_final_sen_df_jul",
                    "jar_bol_il_final_sen_df_jul",
                    "jar_lom_il_final_sen_df_jul",
                    "jar_orl_il_final_sen_df_jul",
                    "jar_sch_il_final_sen_df_jul",
                    "jar_ver_il_final_sen_df_jul",
                    "joy_ab_final_sen_df_jul",
                    "joy_ak_final_sen_df_jul",
                    "joy_chi_il_final_sen_df_jul",
                    "joy_dm_ad_final_sen_df_jul",
                    "joy_fri_tx_final_sen_df_jul",
                    "joy_hou_tx_final_sen_df_jul",
                    "joy_mz_ad_final_sen_df_jul",
                    "joy_sh_ad_final_sen_df_jul",
                    "joy_st_af_final_sen_df_jul",
                    "joy_suw_ga_final_sen_df_jul",
                    "kan_mb_final_sen_df_jul",
                    "mal_ab_final_sen_df_jul",
                    "mal_ak_final_sen_df_jul",
                    "mal_aw_ad_final_sen_df_jul",
                    "mal_b1_ad_final_sen_df_jul",
                    "mal_b1_af_final_sen_df_jul",
                    "mal_b2_ad_final_sen_df_jul",
                    "mal_b2_af_final_sen_df_jul",
                    "mal_chi_il_final_sen_df_jul",
                    "mal_dm_ad_final_sen_df_jul",
                    "mal_fri_tx_final_sen_df_jul",
                    "mal_ise_nj_final_sen_df_jul",
                    "mal_lu_ad_final_sen_df_jul",
                    "mal_mb_final_sen_df_jul",
                    "mal_nap_il_final_sen_df_jul",
                    "mal_ric_tx_final_sen_df_jul",
                    "mal_sc_final_sen_df_jul",
                    "mal_sh_ad_final_sen_df_jul",
                    "may_vie_va_final_sen_df_jul",
                    "mia_awm_ad_final_sen_df_jul",
                    "mia_bur_db_final_sen_df_jul",
                    "min_ak_final_sen_df_jul",
                    "mna_mb_final_sen_df_jul",
                    "son_ise_nj_final_sen_df_jul",
                    "tan_am_om_final_sen_df_jul",
                    "tan_atl_ga_final_sen_df_jul",
                    "tan_bar_db_final_sen_df_jul",
                    "tan_chi_il_final_sen_df_jul",
                    "tan_fah_db_final_sen_df_jul",
                    "tan_fc_qa_final_sen_df_jul",
                    "tan_fri_tx_final_sen_df_jul",
                    "tan_gs_db_final_sen_df_jul",
                    "tan_ham_ad_final_sen_df_jul",
                    "tan_hou_tx_final_sen_df_jul",
                    "tan_kar_db_final_sen_df_jul",
                    "tan_lul_qa_final_sen_df_jul",
                    "tan_mank_db_final_sen_df_jul",
                    "tan_mee_db_final_sen_df_jul",
                    "tan_new_nj_final_sen_df_jul",
                    "tan_rol_sh_final_sen_df_jul",
                    "tan_rse_wa_final_sen_df_jul",
                    "tan_sc_ca_final_sen_df_jul",
                    "tan_sc_sh_final_sen_df_jul",
                    "tan_sil_db_final_sen_df_jul",
                    "tan_taj_db_final_sen_df_jul",
                    "tif_chi_il_final_sen_df_jul",
                    "tif_eas_nj_final_sen_df_jul",
                    "tif_hac_nj_final_sen_df_jul",
                    "tif_nor_il_final_sen_df_jul",
                    #"tif_par_nj_final_sen_df_jul",
                    "tif_red_nj_final_sen_df_jul",
                    "tif_ric_va_final_sen_df_jul",
                    "tif_sho_nj_final_sen_df_jul",
                    "tif_sko_il_final_sen_df_jul",
                    "tif_vie_va_final_sen_df_jul",
                    "vbj_fri_tx_final_sen_df_jul"
                    ]


In [395]:
#Read Data
for i in df_name_list_s:
    #Read the file into a DataFrame
    df = pd.read_excel(f"sentiment_raw_output/checked/{i}.xlsx")
    #Append the DataFrame to the combined DataFrame
    combined_df_with_s_current = pd.concat([combined_df_with_s_current, df], ignore_index=True)


In [396]:
combined_df_with_s_current.columns

Index(['Store Name', 'Name', 'Published At Date', 'Stars', 'Total Score',
       'year', 'month', 'review_text', 'Store Code Cleaned', 'word_count',
       'count_buckets', 'Commentor Name', 'Trust', 'Store Experience',
       'Store Staff', 'Product Design', 'Product Variety', 'Discount',
       'Making Charge', 'Price', 'Product Quality',
       'OLD Gold Jewellery Exchange'],
      dtype='object')

### Create the columns Country, Catchment & Grouped Store Name for the purpose of Front end requirement

In [404]:
#Mapping Dictionaries
country_mapping_dict = {
                            "Arakkal Gold and Diamonds LLC - Meena Bazar - Bur Dubai (Branch 3)" : "GCC",
                            "Bhima Jewellers - Al Karama" : "GCC",
                            "Bhindi Jewellers-Decatur, GA" : "USA",
                            "Evermark Jewelry-Johns Creek, GA" : "USA",
                            "Jared-Algonquin, IL" : "USA",
                            "Jared-Aurora, IL" : "USA",
                            "Jared-Bolingbrook, IL" : "USA",
                            "Jared-Lombard, IL" : "USA",
                            "Jared-Orland Park, IL" : "USA",
                            "Jared-Schaumburg, IL" : "USA",
                            "Jared-Vernon Hills, IL" : "USA",
                            "Joyalukkas Jewellery - Al Barsha" : "GCC",
                            "Joyalukkas Jewellery - Al Fahidi st - Al Fahidi" : "GCC",
                            "Joyalukkas Jewellery - Al Karama" : "GCC",
                            "Joyalukkas Jewellery - Dalma Plaza - Abu Dhabi" : "GCC",
                            "Joyalukkas Jewellery - Madinat Zayed Shopping Centre - Abu Dhabi" : "GCC",
                            "Joyalukkas Jewellery - Shabia - Abu Dhabi" : "GCC",
                            "Joyalukkas Jewellery-Chicago, IL" : "USA",
                            "Joyalukkas Jewellery-Frisco, TX" : "USA",
                            "Joyalukkas Jewellery-Houston, TX" : "USA",
                            "Joyalukkas Jewellery-Suwanee, GA" : "USA",
                            "Kanz Jewellers" : "GCC",
                            "Malabar Gold & Diamonds - Silicon Oasis Central" : "GCC",
                            "Malabar Gold & Diamonds-Chicago, IL" : "USA",
                            "Malabar Gold & Diamonds-Frisco, TX" : "USA",
                            "Malabar Gold & Diamonds-Iselin, NJ" : "USA",
                            "Malabar Gold & Diamonds-Naperville, IL" : "USA",
                            "Malabar Gold and Diamonds - Al Barsha - Dubai" : "GCC",
                            "Malabar Gold and Diamonds - Al Fahidi Street - Bur Dubai (Branch 1)" : "GCC",
                            "Malabar Gold and Diamonds - Al Karama - Dubai" : "GCC",
                            "Malabar Gold and Diamonds - Al Wahda Mall - Abu Dhabi" : "GCC",
                            "Malabar Gold and Diamonds - Dalma Mall - Abu Dhabi" : "GCC",
                            "Malabar Gold and Diamonds - Hamdan Street ( Branch 1)" : "GCC",
                            "Malabar Gold and Diamonds - Hamdan Street (Branch 2)" : "GCC",
                            "Malabar Gold and Diamonds - Lulu Hypermarket - Madinat Zayed" : "GCC",
                            "Malabar Gold and Diamonds - Meena Bazar - Dubai" : "GCC",
                            "Malabar Gold and Diamonds - Shabia Musaffah" : "GCC",
                            "Malabar Gold and Diamonds - Souq Al Kabeer Building - Bur Dubai (Branch 2)" : "GCC",
                            "Malani Jewellers-Richardson, TX" : "USA",
                            "May Jewelers-Vienna, VA" : "USA",
                            "Meena Jewellers - Meena Bazar" : "GCC",
                            "Mia-Al Wahda Mall, AD" : "GCC",
                            "Mia-Burjuman, DB" : "GCC",
                            "Mint Jewels - Al Karama" : "GCC",
                            "Sona Jewelers-Iselin, NJ" : "USA",
                            "Tanishq Jewellers-Al Barsha, DB" : "GCC",
                            "Tanishq Jewellers-Al Fahidi, DB" : "GCC",
                            "Tanishq Jewellers-Al Karama, DB" : "GCC",
                            "Tanishq Jewellers-Avenues Mall, OM" : "GCC",
                            "Tanishq Jewellers-Festival City, QA" : "GCC",
                            "Tanishq Jewellers-Gold Souk, DB" : "GCC",
                            "Tanishq Jewellers-Hamdan Bin Mohammed Street, AD" : "GCC",
                            "Tanishq Jewellers-Lulu Hypermarket, QA" : "GCC",
                            "Tanishq Jewellers-Meena Bazar, DB" : "GCC",
                            "Tanishq Jewellers-Rolla, SH" : "GCC",
                            "Tanishq Jewellers-Sharjah Central, SH" : "GCC",
                            "Tanishq Jewellers-Silicon Central, DB" : "GCC",
                            "Tanishq Jewellers-Taj, DB" : "GCC",
                            "Tanishq Jewellers-UW Mall Al Mankhool, DB" : "GCC",
                            "Tanishq-Atlanta, GA" : "USA",
                            "Tanishq-Chicago, IL" : "USA",
                            "Tanishq-Frisco, TX" : "USA",
                            "Tanishq-Houston, TX" : "USA",
                            "Tanishq-New Jersey, NJ" : "USA",
                            "Tanishq-Redmond Seattle, WA" : "USA",
                            "Tanishq-Santa Clara, CA" : "USA",
                            "Tiffany & Co-Chicago, IL" : "USA",
                            "Tiffany & Co-East Rutherford, NJ" : "USA",
                            "Tiffany & Co-Hackensack, NJ" : "USA",
                            "Tiffany & Co-Northbrook, IL" : "USA",
                            "Tiffany & Co-Paramus, NJ" : "USA",
                            "Tiffany & Co-Red Bank, NJ" : "USA",
                            "Tiffany & Co-Richmond, VA" : "USA",
                            "Tiffany & Co-Short Hills, NJ" : "USA",
                            "Tiffany & Co-Skokie, IL" : "USA",
                            "Tiffany & Co-Vienna, VA" : "USA",
                            "VBJ Jewellers-Frisco, TX" : "USA"
                        }

##############################
catchment_mapping_dict = {
                            "Arakkal Gold and Diamonds LLC - Meena Bazar - Bur Dubai (Branch 3)" : "Meena Bazar",
                            "Bhima Jewellers - Al Karama" : "Al Karama",
                            "Bhindi Jewellers-Decatur, GA" : "GA",
                            "Evermark Jewelry-Johns Creek, GA" : "GA",
                            "Jared-Algonquin, IL" : "Chicago",
                            "Jared-Aurora, IL" : "Chicago",
                            "Jared-Bolingbrook, IL" : "Chicago",
                            "Jared-Lombard, IL" : "Chicago",
                            "Jared-Orland Park, IL" : "Chicago",
                            "Jared-Schaumburg, IL" : "Chicago",
                            "Jared-Vernon Hills, IL" : "Chicago",
                            "Joyalukkas Jewellery - Al Barsha" : "Al Barsha",
                            "Joyalukkas Jewellery - Al Fahidi st - Al Fahidi" : "Al Fahidi",
                            "Joyalukkas Jewellery - Al Karama" : "Al Karama",
                            "Joyalukkas Jewellery - Dalma Plaza - Abu Dhabi" : "Abu Dhabi",
                            "Joyalukkas Jewellery - Madinat Zayed Shopping Centre - Abu Dhabi" : "Abu Dhabi",
                            "Joyalukkas Jewellery - Shabia - Abu Dhabi" : "Abu Dhabi",
                            "Joyalukkas Jewellery-Chicago, IL" : "Chicago",
                            "Joyalukkas Jewellery-Frisco, TX" : "Dallas",
                            "Joyalukkas Jewellery-Houston, TX" : "Houston",
                            "Joyalukkas Jewellery-Suwanee, GA" : "GA",
                            "Kanz Jewellers" : "Meena Bazar",
                            "Malabar Gold & Diamonds - Silicon Oasis Central" : "Silicon Central",
                            "Malabar Gold & Diamonds-Chicago, IL" : "Chicago",
                            "Malabar Gold & Diamonds-Frisco, TX" : "Dallas",
                            "Malabar Gold & Diamonds-Iselin, NJ" : "New Jersey",
                            "Malabar Gold & Diamonds-Naperville, IL" : "Chicago",
                            "Malabar Gold and Diamonds - Al Barsha - Dubai" : "Al Barsha",
                            "Malabar Gold and Diamonds - Al Fahidi Street - Bur Dubai (Branch 1)" : "Al Fahidi",
                            "Malabar Gold and Diamonds - Al Karama - Dubai" : "Al Karama",
                            "Malabar Gold and Diamonds - Al Wahda Mall - Abu Dhabi" : "Abu Dhabi",
                            "Malabar Gold and Diamonds - Dalma Mall - Abu Dhabi" : "Abu Dhabi",
                            "Malabar Gold and Diamonds - Hamdan Street ( Branch 1)" : "Abu Dhabi",
                            "Malabar Gold and Diamonds - Hamdan Street (Branch 2)" : "Abu Dhabi",
                            "Malabar Gold and Diamonds - Lulu Hypermarket - Madinat Zayed" : "Abu Dhabi",
                            "Malabar Gold and Diamonds - Meena Bazar - Dubai" : "Meena Bazar",
                            "Malabar Gold and Diamonds - Shabia Musaffah" : "Abu Dhabi",
                            "Malabar Gold and Diamonds - Souq Al Kabeer Building - Bur Dubai (Branch 2)" : "Al Fahidi",
                            "Malani Jewellers-Richardson, TX" : "Dallas",
                            "May Jewelers-Vienna, VA" : "New Jersey",
                            "Meena Jewellers - Meena Bazar" : "Meena Bazar",
                            "Mint Jewels - Al Karama" : "Al Karama",
                            "Sona Jewelers-Iselin, NJ" : "New Jersey",
                            "Tiffany & Co-Chicago, IL" : "Chicago",
                            "Tiffany & Co-East Rutherford, NJ" : "New Jersey",
                            "Tiffany & Co-Hackensack, NJ" : "New Jersey",
                            "Tiffany & Co-Northbrook, IL" : "Chicago",
                            "Tiffany & Co-Paramus, NJ" : "New Jersey",
                            "Tiffany & Co-Red Bank, NJ" : "New Jersey",
                            "Tiffany & Co-Richmond, VA" : "New Jersey",
                            "Tiffany & Co-Short Hills, NJ" : "New Jersey",
                            "Tiffany & Co-Skokie, IL" : "Chicago",
                            "Tiffany & Co-Vienna, VA" : "New Jersey",
                            "VBJ Jewellers-Frisco, TX" : "Dallas",
                            "Mia-Al Wahda Mall, AD" : "Abu Dhabi",
                            "Mia-Burjuman, DB" : "Burjuman",
                            "Tanishq Jewellers-Al Barsha, DB" : "Al Barsha",
                            "Tanishq Jewellers-Al Fahidi, DB" : "Al Fahidi",
                            "Tanishq Jewellers-Al Karama, DB" : "Al Karama",
                            "Tanishq Jewellers-Avenues Mall, OM" : "Oman",
                            "Tanishq Jewellers-Festival City, QA" : "Qatar",
                            "Tanishq Jewellers-Gold Souk, DB" : "Gold Souk",
                            "Tanishq Jewellers-Hamdan Bin Mohammed Street, AD" : "Abu Dhabi",
                            "Tanishq Jewellers-Lulu Hypermarket, QA" : "Qatar",
                            "Tanishq Jewellers-Meena Bazar, DB" : "Meena Bazar",
                            "Tanishq Jewellers-Rolla, SH" : "Sharjah",
                            "Tanishq Jewellers-Sharjah Central, SH" : "Sharjah",
                            "Tanishq Jewellers-Silicon Central, DB" : "Silicon Central",
                            "Tanishq Jewellers-Taj, DB" : "Taj",
                            "Tanishq Jewellers-UW Mall Al Mankhool, DB" : "Mankhool",
                            "Tanishq-Atlanta, GA" : "GA",
                            "Tanishq-Chicago, IL" : "Chicago",
                            "Tanishq-Frisco, TX" : "Dallas",
                            "Tanishq-Houston, TX" : "Houston",
                            "Tanishq-New Jersey, NJ" : "New Jersey",
                            "Tanishq-Redmond Seattle, WA" : "Seattle",
                            "Tanishq-Santa Clara, CA" : "Santa Clara"
                        }


#########################
grp_store_mapping_dict = {
                            "Arakkal Gold and Diamonds LLC - Meena Bazar - Bur Dubai (Branch 3)" :  "Arakkal",
                            "Bhima Jewellers - Al Karama" :  "Bhima",
                            "Bhindi Jewellers-Decatur, GA" :  "Bhindi",
                            "Evermark Jewelry-Johns Creek, GA" :  "Evermark Jewelry",
                            "Jared-Algonquin, IL" :  "Jared",
                            "Jared-Aurora, IL" :  "Jared",
                            "Jared-Bolingbrook, IL" :  "Jared",
                            "Jared-Lombard, IL" :  "Jared",
                            "Jared-Orland Park, IL" :  "Jared",
                            "Jared-Schaumburg, IL" :  "Jared",
                            "Jared-Vernon Hills, IL" :  "Jared",
                            "Joyalukkas Jewellery - Al Barsha" :  "Joyalukkas",
                            "Joyalukkas Jewellery - Al Fahidi st - Al Fahidi" :  "Joyalukkas",
                            "Joyalukkas Jewellery - Al Karama" :  "Joyalukkas",
                            "Joyalukkas Jewellery - Dalma Plaza - Abu Dhabi" :  "Joyalukkas",
                            "Joyalukkas Jewellery - Madinat Zayed Shopping Centre - Abu Dhabi" :  "Joyalukkas",
                            "Joyalukkas Jewellery - Shabia - Abu Dhabi" :  "Joyalukkas",
                            "Joyalukkas Jewellery-Chicago, IL" :  "Joyalukkas",
                            "Joyalukkas Jewellery-Frisco, TX" :  "Joyalukkas",
                            "Joyalukkas Jewellery-Houston, TX" :  "Joyalukkas",
                            "Joyalukkas Jewellery-Suwanee, GA" :  "Joyalukkas",
                            "Kanz Jewellers" :  "Kanz",
                            "Malabar Gold & Diamonds - Silicon Oasis Central" :  "Malabar",
                            "Malabar Gold & Diamonds-Chicago, IL" :  "Malabar",
                            "Malabar Gold & Diamonds-Frisco, TX" :  "Malabar",
                            "Malabar Gold & Diamonds-Iselin, NJ" :  "Malabar",
                            "Malabar Gold & Diamonds-Naperville, IL" :  "Malabar",
                            "Malabar Gold and Diamonds - Al Barsha - Dubai" :  "Malabar",
                            "Malabar Gold and Diamonds - Al Fahidi Street - Bur Dubai (Branch 1)" :  "Malabar",
                            "Malabar Gold and Diamonds - Al Karama - Dubai" :  "Malabar",
                            "Malabar Gold and Diamonds - Al Wahda Mall - Abu Dhabi" :  "Malabar",
                            "Malabar Gold and Diamonds - Dalma Mall - Abu Dhabi" :  "Malabar",
                            "Malabar Gold and Diamonds - Hamdan Street ( Branch 1)" :  "Malabar",
                            "Malabar Gold and Diamonds - Hamdan Street (Branch 2)" :  "Malabar",
                            "Malabar Gold and Diamonds - Lulu Hypermarket - Madinat Zayed" :  "Malabar",
                            "Malabar Gold and Diamonds - Meena Bazar - Dubai" :  "Malabar",
                            "Malabar Gold and Diamonds - Shabia Musaffah" :  "Malabar",
                            "Malabar Gold and Diamonds - Souq Al Kabeer Building - Bur Dubai (Branch 2)" :  "Malabar",
                            "Malani Jewellers-Richardson, TX" :  "Malani Jewellers",
                            "May Jewelers-Vienna, VA" :  "May Jewelers",
                            "Meena Jewellers - Meena Bazar" :  "Meena",
                            "Mint Jewels - Al Karama" :  "Mint",
                            "Sona Jewelers-Iselin, NJ" :  "Sona",
                            "Tiffany & Co-Chicago, IL" :  "Tiffany",
                            "Tiffany & Co-East Rutherford, NJ" :  "Tiffany",
                            "Tiffany & Co-Hackensack, NJ" :  "Tiffany",
                            "Tiffany & Co-Northbrook, IL" :  "Tiffany",
                            "Tiffany & Co-Paramus, NJ" :  "Tiffany",
                            "Tiffany & Co-Red Bank, NJ" :  "Tiffany",
                            "Tiffany & Co-Richmond, VA" :  "Tiffany",
                            "Tiffany & Co-Short Hills, NJ" :  "Tiffany",
                            "Tiffany & Co-Skokie, IL" :  "Tiffany",
                            "Tiffany & Co-Vienna, VA" :  "Tiffany",
                            "VBJ Jewellers-Frisco, TX" :  "VBJ",
                            "Mia-Al Wahda Mall, AD" : "Mia",
                            "Mia-Burjuman, DB" : "Mia",
                            "Tanishq Jewellers-Al Barsha, DB" :  "Tanishq",
                            "Tanishq Jewellers-Al Fahidi, DB" :  "Tanishq",
                            "Tanishq Jewellers-Al Karama, DB" :  "Tanishq",
                            "Tanishq Jewellers-Avenues Mall, OM" :  "Tanishq",
                            "Tanishq Jewellers-Festival City, QA" :  "Tanishq",
                            "Tanishq Jewellers-Gold Souk, DB" :  "Tanishq",
                            "Tanishq Jewellers-Hamdan Bin Mohammed Street, AD" :  "Tanishq",
                            "Tanishq Jewellers-Lulu Hypermarket, QA" :  "Tanishq",
                            "Tanishq Jewellers-Meena Bazar, DB" :  "Tanishq",
                            "Tanishq Jewellers-Rolla, SH" :  "Tanishq",
                            "Tanishq Jewellers-Sharjah Central, SH" :  "Tanishq",
                            "Tanishq Jewellers-Silicon Central, DB" :  "Tanishq",
                            "Tanishq Jewellers-Taj, DB" :  "Tanishq",
                            "Tanishq Jewellers-UW Mall Al Mankhool, DB" :  "Tanishq",
                            "Tanishq-Atlanta, GA" :  "Tanishq",
                            "Tanishq-Chicago, IL" :  "Tanishq",
                            "Tanishq-Frisco, TX" :  "Tanishq",
                            "Tanishq-Houston, TX" :  "Tanishq",
                            "Tanishq-New Jersey, NJ" :  "Tanishq",
                            "Tanishq-Redmond Seattle, WA" :  "Tanishq",
                            "Tanishq-Santa Clara, CA" :  "Tanishq"
                        }

In [405]:
#Column Creations - Country, Catchment & Grouped Store Name
combined_df_with_s_current['Country'] = combined_df_with_s_current['Store Name'].map(country_mapping_dict)
combined_df_with_s_current['Catchment'] = combined_df_with_s_current['Store Name'].map(catchment_mapping_dict)
combined_df_with_s_current['Grouped Store Name'] = combined_df_with_s_current['Store Name'].map(grp_store_mapping_dict)


In [406]:
combined_df_with_s_current.columns

Index(['Store Name', 'Name', 'Published At Date', 'Stars', 'Total Score',
       'year', 'month', 'review_text', 'Store Code Cleaned', 'word_count',
       'count_buckets', 'Commentor Name', 'Trust', 'Store Experience',
       'Store Staff', 'Product Design', 'Product Variety', 'Discount',
       'Making Charge', 'Price', 'Product Quality',
       'OLD Gold Jewellery Exchange', 'Country', 'Catchment',
       'Grouped Store Name'],
      dtype='object')

In [409]:
combined_df_with_s_current.isnull().sum()

Store Name                        0
Name                              0
Published At Date                 0
Stars                             0
Total Score                       0
year                              0
month                             0
review_text                    1374
Store Code Cleaned             8472
word_count                     1374
count_buckets                  1374
Commentor Name                    0
Trust                             0
Store Experience                  0
Store Staff                       0
Product Design                    0
Product Variety                   0
Discount                          0
Making Charge                     0
Price                             0
Product Quality                   0
OLD Gold Jewellery Exchange       0
Country                           0
Catchment                         0
Grouped Store Name                0
dtype: int64

In [410]:
combined_df_with_s_current["Store Code Cleaned"].value_counts()

Store Code Cleaned
XDF    1781
XDB    1028
XDG    1015
XTD     776
XDM     682
XDS     681
XDK     488
XDJ     407
XAH     390
XAC     372
XSL     318
XTH     248
XNJ     231
XSR     222
XCG     203
XDT     152
XQD     144
XWS     133
XOM      81
XBA      63
XDX      30
XQF      29
XAW      26
Name: count, dtype: int64

In [411]:
#Rename Columns
rename_dict = {
                   'Name' : 'Name of the Reviewer',
                   'Published At Date' : 'review_datetime_utc',
                   'Stars' : 'review_rating',
                   'Total Score' : 'Avg Rating',
                   # 'Check' : 'Reviewer Name Check', 
                   'Trust':'Customer Confidence', 
                   'OLD Gold Jewellery Exchange':'Jewellery Exchange'
                }

combined_df_with_s_current.rename(columns=rename_dict, inplace=True)

In [412]:
#Drop unwanted columns
combined_df_with_s_current = combined_df_with_s_current.drop(['word_count','count_buckets'],axis=1)

In [413]:
combined_df_with_s_current.columns

Index(['Store Name', 'Name of the Reviewer', 'review_datetime_utc',
       'review_rating', 'Avg Rating', 'year', 'month', 'review_text',
       'Store Code Cleaned', 'Commentor Name', 'Customer Confidence',
       'Store Experience', 'Store Staff', 'Product Design', 'Product Variety',
       'Discount', 'Making Charge', 'Price', 'Product Quality',
       'Jewellery Exchange', 'Country', 'Catchment', 'Grouped Store Name'],
      dtype='object')

#Drop unwanted columns
#combined_df_with_s_current = combined_df_with_s_current.drop(['Unnamed: 22'], axis=1)
combined_df_with_s_current = combined_df_with_s_current.drop(['check'], axis=1)

In [414]:
combined_df_with_s_current.to_excel("temp/sentiment_mapped_current.xlsx", index=False)

In [415]:
combined_df_with_s_current.head()

Unnamed: 0,Store Name,Name of the Reviewer,review_datetime_utc,review_rating,Avg Rating,year,month,review_text,Store Code Cleaned,Commentor Name,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange,Country,Catchment,Grouped Store Name
0,Arakkal Gold and Diamonds LLC - Meena Bazar - ...,AlJawharaflowers&Gifts,2025-05-04,5,5.0,2025,5,We are their customer for gold and they are ou...,,AlJawharaflowers&Gifts,1,0,0,0,0,0,0,0,0,0,GCC,Meena Bazar,Arakkal
1,Bhima Jewellers - Al Karama,Binu Pillai,2025-05-29,4,4.7,2025,5,Excellent customer care.,,Binu Pillai,0,0,1,0,0,0,0,0,0,0,GCC,Al Karama,Bhima
2,Bhima Jewellers - Al Karama,Sajani Manikandan,2025-05-23,5,4.7,2025,5,Excellent customer service.,,Sajani Manikandan,0,0,1,0,0,0,0,0,0,0,GCC,Al Karama,Bhima
3,Bhima Jewellers - Al Karama,ameen sb,2025-05-20,5,4.7,2025,5,Good collections,,ameen sb,0,0,0,0,1,0,0,0,0,0,GCC,Al Karama,Bhima
4,Bhima Jewellers - Al Karama,Parthibarajan s,2025-05-15,5,4.7,2025,5,Good,,Parthibarajan s,0,0,0,0,0,0,0,0,0,0,GCC,Al Karama,Bhima


## Past Data

In [418]:
#Initialize an empty DataFrame for the combined data
combined_df_with_s_past = pd.read_excel("sentiment_raw_output/old_full/combined_df_final_S_full.xlsx")

In [419]:
combined_df_with_s_past['Avg Rating'].unique().tolist()

[5.0,
 4.7,
 4.6,
 4.3,
 4.4,
 4.5,
 4.9,
 4.8,
 4.2,
 4.1,
 4.562133891213389,
 4.900000000000001]

In [420]:
combined_df_with_s_past.columns

Index(['Store Name', 'Name of the Reviewer', 'review_datetime_utc',
       'review_rating', 'Avg Rating', 'year', 'month', 'review_text',
       'Store Code Cleaned', 'Commentor Name', 'Customer Confidence',
       'Store Experience', 'Store Staff', 'Product Design', 'Product Variety',
       'Discount', 'Making Charge', 'Price', 'Product Quality',
       'Jewellery Exchange', 'Country', 'Catchment', 'Grouped Store Name',
       'Total Reviews'],
      dtype='object')

In [421]:
#Drop unwanted columns (Total Reviews to be altered as per the latest)
combined_df_with_s_past = combined_df_with_s_past.drop(['Avg Rating','Total Reviews'], axis=1) #,'Reviewer Name Check'

In [422]:
combined_df_with_s_current['Avg Rating'].unique().tolist()

[5.0, 4.7, 4.5, 4.6, 4.9, 4.8, 4.2, 4.3, 4.4, 4.1]

In [423]:
nan_avg_rating_stores = combined_df_with_s_current[combined_df_with_s_current['Avg Rating'].isna()]['Store Name'].unique().tolist()
print(nan_avg_rating_stores)

[]


In [424]:
#Aggregate the 'Avg Rating' by 'Store Name'
rating_map_df = combined_df_with_s_current.groupby('Store Name')['Avg Rating'].mean()

rating_map_df = rating_map_df.reset_index()  # Make index a column
rating_map_df.columns = rating_map_df.columns.str.strip()
rating_map_df = rating_map_df.loc[:, ~rating_map_df.columns.duplicated()]

# Now build the map
rating_map = dict(zip(rating_map_df['Store Name'], rating_map_df['Avg Rating']))

# Apply the map to your target DataFrame
combined_df_with_s_past['Avg Rating'] = combined_df_with_s_past['Store Name'].map(rating_map)


In [425]:
rating_map

{'Arakkal Gold and Diamonds LLC - Meena Bazar - Bur Dubai (Branch 3)': 5.0,
 'Bhima Jewellers - Al Karama': 4.7,
 'Bhindi Jewellers-Decatur, GA': 4.7,
 'Jared-Algonquin, IL': 4.7,
 'Jared-Aurora, IL': 4.5,
 'Jared-Bolingbrook, IL': 4.6,
 'Jared-Lombard, IL': 4.5,
 'Jared-Orland Park, IL': 4.5,
 'Jared-Schaumburg, IL': 4.5,
 'Jared-Vernon Hills, IL': 4.6,
 'Joyalukkas Jewellery - Al Barsha': 4.9,
 'Joyalukkas Jewellery - Al Fahidi st - Al Fahidi': 4.9,
 'Joyalukkas Jewellery - Al Karama': 4.8,
 'Joyalukkas Jewellery - Dalma Plaza - Abu Dhabi': 4.8,
 'Joyalukkas Jewellery - Madinat Zayed Shopping Centre - Abu Dhabi': 4.9,
 'Joyalukkas Jewellery - Shabia - Abu Dhabi': 4.6,
 'Joyalukkas Jewellery-Chicago, IL': 4.8,
 'Joyalukkas Jewellery-Frisco, TX': 4.9,
 'Joyalukkas Jewellery-Houston, TX': 4.7,
 'Joyalukkas Jewellery-Suwanee, GA': 4.9,
 'Kanz Jewellers': 4.8,
 'Malabar Gold & Diamonds - Silicon Oasis Central': 4.9,
 'Malabar Gold & Diamonds-Chicago, IL': 4.9,
 'Malabar Gold & Diamonds-Fr

In [426]:
#Aligning Column Orders
desired_column_order = [
                            'Store Name',
                             'Name of the Reviewer',
                             'review_datetime_utc',
                             'review_rating',
                             'Avg Rating',
                             'year',
                             'month',
                             'review_text',
                             'Store Code Cleaned',
                             'Commentor Name',
                             'Customer Confidence',
                             'Store Experience',
                             'Store Staff',
                             'Product Design',
                             'Product Variety',
                             'Discount',
                             'Making Charge',
                             'Price',
                             'Product Quality',
                             'Jewellery Exchange',
                             'Country',
                             'Catchment',
                             'Grouped Store Name'
                        ]

combined_df_with_s_past = combined_df_with_s_past[desired_column_order]

In [428]:
combined_df_with_s_past['Avg Rating'].unique().tolist()

[5.0, 4.7, nan, 4.5, 4.6, 4.9, 4.8, 4.2, 4.3, 4.4, 4.1]

In [429]:
nan_avg_rating_stores = combined_df_with_s_past[combined_df_with_s_past['Avg Rating'].isna()]['Store Name'].unique().tolist()
print(nan_avg_rating_stores)

['Evermark Jewelry-Johns Creek, GA', 'Tiffany & Co-Paramus, NJ']


In [430]:
combined_df_with_s_past.columns

Index(['Store Name', 'Name of the Reviewer', 'review_datetime_utc',
       'review_rating', 'Avg Rating', 'year', 'month', 'review_text',
       'Store Code Cleaned', 'Commentor Name', 'Customer Confidence',
       'Store Experience', 'Store Staff', 'Product Design', 'Product Variety',
       'Discount', 'Making Charge', 'Price', 'Product Quality',
       'Jewellery Exchange', 'Country', 'Catchment', 'Grouped Store Name'],
      dtype='object')

In [431]:
combined_df_with_s_current.columns

Index(['Store Name', 'Name of the Reviewer', 'review_datetime_utc',
       'review_rating', 'Avg Rating', 'year', 'month', 'review_text',
       'Store Code Cleaned', 'Commentor Name', 'Customer Confidence',
       'Store Experience', 'Store Staff', 'Product Design', 'Product Variety',
       'Discount', 'Making Charge', 'Price', 'Product Quality',
       'Jewellery Exchange', 'Country', 'Catchment', 'Grouped Store Name'],
      dtype='object')

## Combining past & current data

In [432]:
combined_df_with_s = pd.DataFrame()
combined_df_with_s = pd.concat([combined_df_with_s_past, combined_df_with_s_current], ignore_index=True)

In [433]:
combined_df_with_s.columns

Index(['Store Name', 'Name of the Reviewer', 'review_datetime_utc',
       'review_rating', 'Avg Rating', 'year', 'month', 'review_text',
       'Store Code Cleaned', 'Commentor Name', 'Customer Confidence',
       'Store Experience', 'Store Staff', 'Product Design', 'Product Variety',
       'Discount', 'Making Charge', 'Price', 'Product Quality',
       'Jewellery Exchange', 'Country', 'Catchment', 'Grouped Store Name'],
      dtype='object')

In [434]:
combined_df_with_s.isnull().sum()

Store Name                   0
Name of the Reviewer         0
review_datetime_utc      17162
review_rating                0
Avg Rating                  70
year                         0
month                        0
review_text              23978
Store Code Cleaned      179308
Commentor Name               0
Customer Confidence          0
Store Experience             0
Store Staff                  0
Product Design               0
Product Variety              0
Discount                     0
Making Charge                0
Price                        0
Product Quality              0
Jewellery Exchange           0
Country                      0
Catchment                    0
Grouped Store Name           0
dtype: int64

## Adding 'Total Reviews' Column

In [435]:
combined_df_with_s['Store Name'].unique().tolist()

['Arakkal Gold and Diamonds LLC - Meena Bazar - Bur Dubai (Branch 3)',
 'Bhima Jewellers - Al Karama',
 'Bhindi Jewellers-Decatur, GA',
 'Evermark Jewelry-Johns Creek, GA',
 'Jared-Algonquin, IL',
 'Jared-Aurora, IL',
 'Jared-Bolingbrook, IL',
 'Jared-Lombard, IL',
 'Jared-Orland Park, IL',
 'Jared-Schaumburg, IL',
 'Jared-Vernon Hills, IL',
 'Joyalukkas Jewellery - Al Barsha',
 'Joyalukkas Jewellery - Al Fahidi st - Al Fahidi',
 'Joyalukkas Jewellery - Al Karama',
 'Joyalukkas Jewellery - Dalma Plaza - Abu Dhabi',
 'Joyalukkas Jewellery - Madinat Zayed Shopping Centre - Abu Dhabi',
 'Joyalukkas Jewellery - Shabia - Abu Dhabi',
 'Joyalukkas Jewellery-Chicago, IL',
 'Joyalukkas Jewellery-Frisco, TX',
 'Joyalukkas Jewellery-Houston, TX',
 'Joyalukkas Jewellery-Suwanee, GA',
 'Kanz Jewellers',
 'Malabar Gold & Diamonds - Silicon Oasis Central',
 'Malabar Gold & Diamonds-Chicago, IL',
 'Malabar Gold & Diamonds-Frisco, TX',
 'Malabar Gold & Diamonds-Iselin, NJ',
 'Malabar Gold & Diamonds-Na

In [436]:
store_to_total_rating_dict = {
                                "Arakkal Gold and Diamonds LLC - Meena Bazar - Bur Dubai (Branch 3)" : 1337,
                                "Bhima Jewellers - Al Karama" : 1303,
                                "Bhindi Jewellers-Decatur, GA" : 409,
                                "Evermark Jewelry-Johns Creek, GA" : 27,
                                "Jared-Algonquin, IL" : 378,
                                "Jared-Aurora, IL" : 254,
                                "Jared-Bolingbrook, IL" : 397,
                                "Jared-Lombard, IL" : 193,
                                "Jared-Orland Park, IL" : 349,
                                "Jared-Schaumburg, IL" : 531,
                                "Jared-Vernon Hills, IL" : 343,
                                "Joyalukkas Jewellery - Al Barsha" : 3090,
                                "Joyalukkas Jewellery - Al Fahidi st - Al Fahidi" : 12261,
                                "Joyalukkas Jewellery - Al Karama" : 7174,
                                "Joyalukkas Jewellery - Dalma Plaza - Abu Dhabi" : 3463,
                                "Joyalukkas Jewellery - Madinat Zayed Shopping Centre - Abu Dhabi" : 1730,
                                "Joyalukkas Jewellery - Shabia - Abu Dhabi" : 1985,
                                "Joyalukkas Jewellery-Chicago, IL" : 2780,
                                "Joyalukkas Jewellery-Frisco, TX" : 2296,
                                "Joyalukkas Jewellery-Houston, TX" : 2200,
                                "Joyalukkas Jewellery-Suwanee, GA" : 1970,
                                "Kanz Jewellers" : 1238,
                                "Malabar Gold & Diamonds - Silicon Oasis Central" : 1713,
                                "Malabar Gold & Diamonds-Chicago, IL" : 3574,
                                "Malabar Gold & Diamonds-Frisco, TX" : 3969,
                                "Malabar Gold & Diamonds-Iselin, NJ" : 7416,
                                "Malabar Gold & Diamonds-Naperville, IL" : 2027,
                                "Malabar Gold and Diamonds - Al Barsha - Dubai" : 11456,
                                "Malabar Gold and Diamonds - Al Fahidi Street - Bur Dubai (Branch 1)" : 11326,
                                "Malabar Gold and Diamonds - Al Karama - Dubai" : 9035,
                                "Malabar Gold and Diamonds - Al Wahda Mall - Abu Dhabi" : 7872,
                                "Malabar Gold and Diamonds - Dalma Mall - Abu Dhabi" : 5331,
                                "Malabar Gold and Diamonds - Hamdan Street ( Branch 1)" : 6132,
                                "Malabar Gold and Diamonds - Hamdan Street (Branch 2)" : 11562,
                                "Malabar Gold and Diamonds - Lulu Hypermarket - Madinat Zayed" : 14859,
                                "Malabar Gold and Diamonds - Meena Bazar - Dubai" : 12438,
                                "Malabar Gold and Diamonds - Shabia Musaffah" : 10887,
                                "Malabar Gold and Diamonds - Souq Al Kabeer Building - Bur Dubai (Branch 2)" : 4620,
                                "Malani Jewellers-Richardson, TX" : 6913,
                                "May Jewelers-Vienna, VA" : 164,
                                "Meena Jewellers - Meena Bazar" : 2723,
                                "Mia-Al Wahda Mall, AD" : 23,
                                "Mia-Burjuman, DB" : 619,
                                "Mint Jewels - Al Karama" : 5413,
                                "Sona Jewelers-Iselin, NJ" : 1288,
                                "Tanishq Jewellers-Al Barsha, DB" : 3267,
                                "Tanishq Jewellers-Al Fahidi, DB" : 3880,
                                "Tanishq Jewellers-Al Karama, DB" : 2049,
                                "Tanishq Jewellers-Avenues Mall, OM" : 78,
                                "Tanishq Jewellers-Festival City, QA" : 111,
                                "Tanishq Jewellers-Gold Souk, DB" : 979,
                                "Tanishq Jewellers-Hamdan Bin Mohammed Street, AD" : 2270,
                                "Tanishq Jewellers-Lulu Hypermarket, QA" : 425,
                                "Tanishq Jewellers-Meena Bazar, DB" : 3973,
                                "Tanishq Jewellers-Rolla, SH" : 270,
                                "Tanishq Jewellers-Sharjah Central, SH" : 864,
                                "Tanishq Jewellers-Silicon Central, DB" : 1538,
                                "Tanishq Jewellers-Taj, DB" : 166,
                                "Tanishq Jewellers-UW Mall Al Mankhool, DB" : 36,
                                "Tanishq-Atlanta, GA" : 377,
                                "Tanishq-Chicago, IL" : 297,
                                "Tanishq-Frisco, TX" : 1203,
                                "Tanishq-Houston, TX" : 382,
                                "Tanishq-New Jersey, NJ" : 749,
                                "Tanishq-Redmond Seattle, WA" : 173,
                                "Tanishq-Santa Clara, CA" : 89,
                                "Tiffany & Co-Chicago, IL" : 548,
                                "Tiffany & Co-East Rutherford, NJ" : 30,
                                "Tiffany & Co-Hackensack, NJ" : 73,
                                "Tiffany & Co-Northbrook, IL" : 90,
                                "Tiffany & Co-Paramus, NJ" : 103,
                                "Tiffany & Co-Red Bank, NJ" : 97,
                                "Tiffany & Co-Richmond, VA" : 88,
                                "Tiffany & Co-Short Hills, NJ" : 199,
                                "Tiffany & Co-Skokie, IL" : 87,
                                "Tiffany & Co-Vienna, VA" : 296,
                                "VBJ Jewellers-Frisco, TX" : 1268
                            }

In [437]:
#Column Creation - 'Total Reviews'
combined_df_with_s['Total Reviews'] = combined_df_with_s['Store Name'].map(store_to_total_rating_dict)

In [439]:
combined_df_with_s['Total Reviews'].nunique()

77

In [441]:
combined_df_with_s.duplicated().sum()

3186

In [442]:
combined_df_with_s.drop_duplicates(inplace=True)
combined_df_with_s.reset_index(drop=True, inplace=True)

In [443]:
combined_df_with_s.to_excel("final_sentiment_mapped/combined_df_final_S_full.xlsx", index=False)

In [445]:
combined_df_with_s['review_datetime_utc'] = pd.to_datetime(combined_df_with_s['review_datetime_utc'], errors='coerce')

combined_df_with_s.to_parquet("final_sentiment_mapped/combined_df_final_S_full.parquet", index=False)


In [446]:
check_parquet_df = pd.read_parquet("final_sentiment_mapped/combined_df_final_S_full.parquet")
print(len(check_parquet_df))

204047


In [447]:
print(len(combined_df_with_s))

204047


In [448]:
combined_df_with_s.shape

(204047, 24)

# Keywords & Phrases

In [449]:
combined_df_with_s_current = pd.read_parquet("final_sentiment_mapped/combined_df_final_S_full.parquet")

In [450]:
combined_df_with_s_current.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 204047 entries, 0 to 204046
Data columns (total 24 columns):
 #   Column                Non-Null Count   Dtype         
---  ------                --------------   -----         
 0   Store Name            204047 non-null  object        
 1   Name of the Reviewer  204047 non-null  object        
 2   review_datetime_utc   186890 non-null  datetime64[ns]
 3   review_rating         204047 non-null  int64         
 4   Avg Rating            203977 non-null  float64       
 5   year                  204047 non-null  int64         
 6   month                 204047 non-null  int64         
 7   review_text           180317 non-null  object        
 8   Store Code Cleaned    27828 non-null   object        
 9   Commentor Name        204047 non-null  object        
 10  Customer Confidence   204047 non-null  int64         
 11  Store Experience      204047 non-null  int64         
 12  Store Staff           204047 non-null  int64         
 13 

In [451]:
combined_df_with_s_current['year'].unique().tolist()                 

[2024, 2023, 2022, 2021, 2025]

In [452]:
combined_df_with_s_current = combined_df_with_s_current[combined_df_with_s_current['year'].isin([2024,2025])]

In [453]:
len(combined_df_with_s_current)

115594

In [454]:
combined_df_with_s_current['year'].value_counts()

year
2024    77913
2025    37681
Name: count, dtype: int64

In [455]:
combined_df_with_s_current.to_excel("temp/data_for_phrases.xlsx")

In [456]:
combined_df_with_s_current.to_excel("final_sentiment_mapped/combined_df_final_S.xlsx", index=False)
combined_df_with_s_current.to_parquet("final_sentiment_mapped/combined_df_final_S.parquet", index=False)

In [460]:
keyword_mappings = {
                        "agd_mb_final_sen_df_jul" : "Arakkal Gold and Diamonds LLC - Meena Bazar - Bur Dubai (Branch 3)",
                        "bhi_ak_final_sen_df_jul" : "Bhima Jewellers - Al Karama",
                        "bhi_dec_ga_final_sen_df_jul" : "Bhindi Jewellers-Decatur, GA",
                        "eve_joh_ga_final_sen_df_jul" : "Evermark Jewelry-Johns Creek, GA",
                        "jar_alg_il_final_sen_df_jul" : "Jared-Algonquin, IL",
                        "jar_aur_il_final_sen_df_jul" : "Jared-Aurora, IL",
                        "jar_bol_il_final_sen_df_jul" : "Jared-Bolingbrook, IL",
                        "jar_lom_il_final_sen_df_jul" : "Jared-Lombard, IL",
                        "jar_orl_il_final_sen_df_jul" : "Jared-Orland Park, IL",
                        "jar_sch_il_final_sen_df_jul" : "Jared-Schaumburg, IL",
                        "jar_ver_il_final_sen_df_jul" : "Jared-Vernon Hills, IL",
                        "joy_ab_final_sen_df_jul" : "Joyalukkas Jewellery - Al Barsha",
                        "joy_ak_final_sen_df_jul" : "Joyalukkas Jewellery - Al Karama",
                        "joy_chi_il_final_sen_df_jul" : "Joyalukkas Jewellery-Chicago, IL",
                        "joy_dm_ad_final_sen_df_jul" : "Joyalukkas Jewellery - Dalma Plaza - Abu Dhabi",
                        "joy_fri_tx_final_sen_df_jul" : "Joyalukkas Jewellery-Frisco, TX",
                        "joy_hou_tx_final_sen_df_jul" : "Joyalukkas Jewellery-Houston, TX",
                        "joy_mz_ad_final_sen_df_jul" : "Joyalukkas Jewellery - Madinat Zayed Shopping Centre - Abu Dhabi",
                        "joy_sh_ad_final_sen_df_jul" : "Joyalukkas Jewellery - Shabia - Abu Dhabi",
                        "joy_st_af_final_sen_df_jul" : "Joyalukkas Jewellery - Al Fahidi st - Al Fahidi",
                        "joy_suw_ga_final_sen_df_jul" : "Joyalukkas Jewellery-Suwanee, GA",
                        "kan_mb_final_sen_df_jul" : "Kanz Jewellers",
                        "mal_ab_final_sen_df_jul" : "Malabar Gold and Diamonds - Al Barsha - Dubai",
                        "mal_ak_final_sen_df_jul" : "Malabar Gold and Diamonds - Al Karama - Dubai",
                        "mal_aw_ad_final_sen_df_jul" : "Malabar Gold and Diamonds - Al Wahda Mall - Abu Dhabi",
                        "mal_b1_ad_final_sen_df_jul" : "Malabar Gold and Diamonds - Hamdan Street ( Branch 1)",
                        "mal_b1_af_final_sen_df_jul" : "Malabar Gold and Diamonds - Al Fahidi Street - Bur Dubai (Branch 1)",
                        "mal_b2_ad_final_sen_df_jul" : "Malabar Gold and Diamonds - Hamdan Street (Branch 2)",
                        "mal_b2_af_final_sen_df_jul" : "Malabar Gold and Diamonds - Souq Al Kabeer Building - Bur Dubai (Branch 2)",
                        "mal_chi_il_final_sen_df_jul" : "Malabar Gold & Diamonds-Chicago, IL",
                        "mal_dm_ad_final_sen_df_jul" : "Malabar Gold and Diamonds - Dalma Mall - Abu Dhabi",
                        "mal_fri_tx_final_sen_df_jul" : "Malabar Gold & Diamonds-Frisco, TX",
                        "mal_ise_nj_final_sen_df_jul" : "Malabar Gold & Diamonds-Iselin, NJ",
                        "mal_lu_ad_final_sen_df_jul" : "Malabar Gold and Diamonds - Lulu Hypermarket - Madinat Zayed",
                        "mal_mb_final_sen_df_jul" : "Malabar Gold and Diamonds - Meena Bazar - Dubai",
                        "mal_nap_il_final_sen_df_jul" : "Malabar Gold & Diamonds-Naperville, IL",
                        "mal_ric_tx_final_sen_df_jul" : "Malani Jewellers-Richardson, TX",
                        "mal_sc_final_sen_df_jul" : "Malabar Gold & Diamonds - Silicon Oasis Central",
                        "mal_sh_ad_final_sen_df_jul" : "Malabar Gold and Diamonds - Shabia Musaffah",
                        "may_vie_va_final_sen_df_jul" : "May Jewelers-Vienna, VA",
                        "mia_awm_ad_final_sen_df_jul" : "Mia-Al Wahda Mall, AD",
                        "mia_bur_db_final_sen_df_jul" : "Mia-Burjuman, DB",
                        "min_ak_final_sen_df_jul" : "Mint Jewels - Al Karama",
                        "mna_mb_final_sen_df_jul" : "Meena Jewellers - Meena Bazar",
                        "son_ise_nj_final_sen_df_jul" : "Sona Jewelers-Iselin, NJ",
                        "tan_am_om_final_sen_df_jul" : "Tanishq Jewellers-Avenues Mall, OM",
                        "tan_atl_ga_final_sen_df_jul" : "Tanishq-Atlanta, GA",
                        "tan_bar_db_final_sen_df_jul" : "Tanishq Jewellers-Al Barsha, DB",
                        "tan_chi_il_final_sen_df_jul" : "Tanishq-Chicago, IL",
                        "tan_fah_db_final_sen_df_jul" : "Tanishq Jewellers-Al Fahidi, DB",
                        "tan_fc_qa_final_sen_df_jul" : "Tanishq Jewellers-Festival City, QA",
                        "tan_fri_tx_final_sen_df_jul" : "Tanishq-Frisco, TX",
                        "tan_gs_db_final_sen_df_jul" : "Tanishq Jewellers-Gold Souk, DB",
                        "tan_ham_ad_final_sen_df_jul" : "Tanishq Jewellers-Hamdan Bin Mohammed Street, AD",
                        "tan_hou_tx_final_sen_df_jul" : "Tanishq-Houston, TX",
                        "tan_kar_db_final_sen_df_jul" : "Tanishq Jewellers-Al Karama, DB",
                        "tan_lul_qa_final_sen_df_jul" : "Tanishq Jewellers-Lulu Hypermarket, QA",
                        "tan_mank_db_final_sen_df_jul" : "Tanishq Jewellers-UW Mall Al Mankhool, DB",
                        "tan_mee_db_final_sen_df_jul" : "Tanishq Jewellers-Meena Bazar, DB",
                        "tan_new_nj_final_sen_df_jul" : "Tanishq-New Jersey, NJ",
                        "tan_rol_sh_final_sen_df_jul" : "Tanishq Jewellers-Rolla, SH",
                        "tan_rse_wa_final_sen_df_jul" : "Tanishq-Redmond Seattle, WA",
                        "tan_sc_ca_final_sen_df_jul" : "Tanishq-Santa Clara, CA",
                        "tan_sc_sh_final_sen_df_jul" : "Tanishq Jewellers-Sharjah Central, SH",
                        "tan_sil_db_final_sen_df_jul" : "Tanishq Jewellers-Silicon Central, DB",
                        "tan_taj_db_final_sen_df_jul" : "Tanishq Jewellers-Taj, DB",
                        "tif_chi_il_final_sen_df_jul" : "Tiffany & Co-Chicago, IL",
                        "tif_eas_nj_final_sen_df_jul" : "Tiffany & Co-East Rutherford, NJ",
                        "tif_hac_nj_final_sen_df_jul" : "Tiffany & Co-Hackensack, NJ",
                        "tif_nor_il_final_sen_df_jul" : "Tiffany & Co-Northbrook, IL",
                        "tif_par_nj_final_sen_df_jul" : "Tiffany & Co-Paramus, NJ",
                        "tif_red_nj_final_sen_df_jul" : "Tiffany & Co-Red Bank, NJ",
                        "tif_ric_va_final_sen_df_jul" : "Tiffany & Co-Richmond, VA",
                        "tif_sho_nj_final_sen_df_jul" : "Tiffany & Co-Short Hills, NJ",
                        "tif_sko_il_final_sen_df_jul" : "Tiffany & Co-Skokie, IL",
                        "tif_vie_va_final_sen_df_jul" : "Tiffany & Co-Vienna, VA",
                        "vbj_fri_tx_final_sen_df_jul" : "VBJ Jewellers-Frisco, TX"
                    }

#Initialize an empty dictionary to store your dataframes
keyword_dataframes = {}

#Loop through the mappings and filter combined_df_21to23 for each title
for df_name, title in keyword_mappings.items():
    filtered_df = combined_df_with_s_current[combined_df_with_s_current['Store Name'] == title].reset_index(drop=True)
    keyword_dataframes[df_name] = filtered_df

In [461]:
keyword_dataframes["bhi_ak_final_sen_df_jul"].head()

Unnamed: 0,Store Name,Name of the Reviewer,review_datetime_utc,review_rating,Avg Rating,year,month,review_text,Store Code Cleaned,Commentor Name,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange,Country,Catchment,Grouped Store Name,Total Reviews
0,Bhima Jewellers - Al Karama,Renuka Acharya,2024-06-10 14:03:38.830,5,4.7,2024,6,Very good collection and service was excellent,,Renuka Acharya,0,0,1,0,1,0,0,0,0,0,GCC,Al Karama,Bhima,1303
1,Bhima Jewellers - Al Karama,Junaid Pervaiz,2024-06-09 22:23:49.653,5,4.7,2024,6,We visited the shop 09/06/2024 evening. We wer...,,Junaid Pervaiz,0,1,1,0,0,0,0,0,0,0,GCC,Al Karama,Bhima,1303
2,Bhima Jewellers - Al Karama,Vineetha Vijayan,2024-06-08 16:45:40.125,5,4.7,2024,6,,,Vineetha Vijayan,0,0,0,0,0,0,0,0,0,0,GCC,Al Karama,Bhima,1303
3,Bhima Jewellers - Al Karama,Bency,2024-06-08 15:38:22.884,5,4.7,2024,6,,,Bency,0,0,0,0,0,0,0,0,0,0,GCC,Al Karama,Bhima,1303
4,Bhima Jewellers - Al Karama,jubin jose,2024-06-08 14:45:58.498,5,4.7,2024,6,Good,,jubin jose,0,0,0,0,0,0,0,0,0,0,GCC,Al Karama,Bhima,1303


## Positive

### bhi_ak

In [462]:
# Initialize the output dictionary
keyword_positive_output_bhi_ak = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_bhi_ak=[0]
keyword_input_token_bhi_ak = 0
keyword_output_token_bhi_ak = 0
keyword_start_time_loop_bhi_ak = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_bhi_ak, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_bhi_ak[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_bhi_ak = keyword_dataframes['bhi_ak_final_sen_df_jul'][keyword_dataframes['bhi_ak_final_sen_df_jul'][topic]==1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are positive comments, call the positive_keywords function
    if filtered_comments_bhi_ak:
        # Call the positive_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = positive_keywords(filtered_comments_bhi_ak,topic)        
        # Add the result to the output dictionary
        keyword_positive_output_bhi_ak.append(keywords)
        keyword_input_token_bhi_ak += input_tokens_loop
        keyword_output_token_bhi_ak += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_bhi_ak = time.time()
keyword_cost_input_token_bhi_ak = round((0.01/1000)*keyword_input_token_bhi_ak,2)
keyword_cost_output_token_bhi_ak = round((0.03/1000)*keyword_output_token_bhi_ak,2)
keyword_total_cost_bhi_ak = keyword_cost_input_token_bhi_ak + keyword_cost_output_token_bhi_ak
keyword_total_time_loop_bhi_ak = keyword_end_time_loop_bhi_ak - keyword_start_time_loop_bhi_ak

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_bhi_ak[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_bhi_ak,1))
print("Total Input Tokens - ", keyword_input_token_bhi_ak)
print("Total Input Cost = USD ",keyword_cost_input_token_bhi_ak)
print("Total Output Tokens - ", keyword_output_token_bhi_ak)
print("Total Output Cost = USD ",keyword_cost_output_token_bhi_ak)
print("Total Cost = USD ",round(keyword_total_cost_bhi_ak,2))

Executed  10  Iterations
Total Execution time (in secs) -  24.5
Total Input Tokens -  16005
Total Input Cost = USD  0.16
Total Output Tokens -  617
Total Output Cost = USD  0.02
Total Cost = USD  0.18


In [463]:
#Initialize an empty DataFrame
positive_keywords_bhi_ak = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']

# Add these columns to the DataFrame
for column in columns:
    positive_keywords_bhi_ak[column] = None

# Process each JSON string
for json_str in keyword_positive_output_bhi_ak:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'bhi_ak'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'Positive'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            positive_keywords_bhi_ak = pd.concat([positive_keywords_bhi_ak, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            positive_keywords_bhi_ak = pd.concat([positive_keywords_bhi_ak, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
positive_keywords_bhi_ak = positive_keywords_bhi_ak.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

positive_keywords_bhi_ak

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,Bhima Jewellers - Al Karama,Positive,keywords,"Trustable :1, Trusted :1, Reliable :1, Trustwo...","great experience :8, good experience :7, excel...","helpful :12, friendly :10, patient :8, courteo...","designs :5, Design :3, designs :2, design :1","selection :4, collections :4, range :1","discounts :1, offers :1","reasonable :1, less :1","good pricing :1, fair :1, affordable :1, good ...","Good Quality:4, Durability:1, Quality:1",
1,Bhima Jewellers - Al Karama,Positive,phrases,Customer-centric and trustable major player :1...,"great shopping experience :3, smooth shopping ...","very helpful :5, friendly and helpful :3, very...","great designs :2, excellent designs :1, stunni...","good selection :2, great selection :2, best co...","great discounts :1, good offers :1",transparency regarding making charges :1,"right price :1, transparency and honesty in th...","Quality is the best:1, Quality and durability ...",


### joy_ab

In [464]:
# Initialize the output dictionary
keyword_positive_output_joy_ab = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_joy_ab=[0]
keyword_input_token_joy_ab = 0
keyword_output_token_joy_ab = 0
keyword_start_time_loop_joy_ab = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_joy_ab, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_joy_ab[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_joy_ab = keyword_dataframes['joy_ab_final_sen_df_jul'][keyword_dataframes['joy_ab_final_sen_df_jul'][topic]==1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are positive comments, call the positive_keywords function
    if filtered_comments_joy_ab:
        # Call the positive_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = positive_keywords(filtered_comments_joy_ab,topic)        
        # Add the result to the output dictionary
        keyword_positive_output_joy_ab.append(keywords)
        keyword_input_token_joy_ab += input_tokens_loop
        keyword_output_token_joy_ab += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_joy_ab = time.time()
keyword_cost_input_token_joy_ab = round((0.01/1000)*keyword_input_token_joy_ab,2)
keyword_cost_output_token_joy_ab = round((0.03/1000)*keyword_output_token_joy_ab,2)
keyword_total_cost_joy_ab = keyword_cost_input_token_joy_ab + keyword_cost_output_token_joy_ab
keyword_total_time_loop_joy_ab = keyword_end_time_loop_joy_ab - keyword_start_time_loop_joy_ab

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_joy_ab[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_joy_ab,1))
print("Total Input Tokens - ", keyword_input_token_joy_ab)
print("Total Input Cost = USD ",keyword_cost_input_token_joy_ab)
print("Total Output Tokens - ", keyword_output_token_joy_ab)
print("Total Output Cost = USD ",keyword_cost_output_token_joy_ab)
print("Total Cost = USD ",round(keyword_total_cost_joy_ab,2))

Executed  10  Iterations
Total Execution time (in secs) -  31.1
Total Input Tokens -  28660
Total Input Cost = USD  0.29
Total Output Tokens -  714
Total Output Cost = USD  0.02
Total Cost = USD  0.31


In [465]:
#Initialize an empty DataFrame
positive_keywords_joy_ab = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']

# Add these columns to the DataFrame
for column in columns:
    positive_keywords_joy_ab[column] = None

# Process each JSON string
for json_str in keyword_positive_output_joy_ab:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'joy_ab'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'Positive'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            positive_keywords_joy_ab = pd.concat([positive_keywords_joy_ab, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            positive_keywords_joy_ab = pd.concat([positive_keywords_joy_ab, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
positive_keywords_joy_ab = positive_keywords_joy_ab.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

positive_keywords_joy_ab

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,Joyalukkas Jewellery - Al Barsha,Positive,keywords,"Reliable :2, Trust :1","good location :1, good ambience :1, pleasant a...","helpful : 45, friendly : 40, patient : 20, kno...","Good designs: 12, Nice designs: 5, Beautiful d...","Variety :3, Options :2, Choices :2, Selections...","discount :6, offer :5, offers :3, deal :2, dea...","making charges :3, making charge :1","good price :5, great prices :3, reasonable pri...","Good quality:3, Quality:2, Excellent:1",exchanging :1
1,Joyalukkas Jewellery - Al Barsha,Positive,phrases,You can fully trust the quality of their produ...,"pleasant shopping atmosphere :1, enjoyable and...","very helpful : 10, extremely friendly : 8, ver...","Plenty of designs to choose from: 1, Beautiful...","Wide range of collections :2, Large collection...","good discount :3, great discount :2, attractiv...","reasonable making charges :1, good making char...","value for money :1, prices are the best :1, ch...","Good quality of jewelry:1, Quality exceeded my...",helping us buying new jewelry :1


### joy_st_af

In [466]:
# Initialize the output dictionary
keyword_positive_output_joy_st_af = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_joy_st_af=[0]
keyword_input_token_joy_st_af = 0
keyword_output_token_joy_st_af = 0
keyword_start_time_loop_joy_st_af = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_joy_st_af, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_joy_st_af[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_joy_st_af = keyword_dataframes['joy_st_af_final_sen_df_jul'][keyword_dataframes['joy_st_af_final_sen_df_jul'][topic]==1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are positive comments, call the positive_keywords function
    if filtered_comments_joy_st_af:
        # Call the positive_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = positive_keywords(filtered_comments_joy_st_af,topic)        
        # Add the result to the output dictionary
        keyword_positive_output_joy_st_af.append(keywords)
        keyword_input_token_joy_st_af += input_tokens_loop
        keyword_output_token_joy_st_af += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_joy_st_af = time.time()
keyword_cost_input_token_joy_st_af = round((0.01/1000)*keyword_input_token_joy_st_af,2)
keyword_cost_output_token_joy_st_af = round((0.03/1000)*keyword_output_token_joy_st_af,2)
keyword_total_cost_joy_st_af = keyword_cost_input_token_joy_st_af + keyword_cost_output_token_joy_st_af
keyword_total_time_loop_joy_st_af = keyword_end_time_loop_joy_st_af - keyword_start_time_loop_joy_st_af

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_joy_st_af[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_joy_st_af,1))
print("Total Input Tokens - ", keyword_input_token_joy_st_af)
print("Total Input Cost = USD ",keyword_cost_input_token_joy_st_af)
print("Total Output Tokens - ", keyword_output_token_joy_st_af)
print("Total Output Cost = USD ",keyword_cost_output_token_joy_st_af)
print("Total Cost = USD ",round(keyword_total_cost_joy_st_af,2))

Executed  10  Iterations
Total Execution time (in secs) -  44.6
Total Input Tokens -  143319
Total Input Cost = USD  1.43
Total Output Tokens -  845
Total Output Cost = USD  0.03
Total Cost = USD  1.46


In [467]:
#Initialize an empty DataFrame
positive_keywords_joy_st_af = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']

# Add these columns to the DataFrame
for column in columns:
    positive_keywords_joy_st_af[column] = None

# Process each JSON string
for json_str in keyword_positive_output_joy_st_af:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'joy_st_af'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'Positive'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            positive_keywords_joy_st_af = pd.concat([positive_keywords_joy_st_af, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            positive_keywords_joy_st_af = pd.concat([positive_keywords_joy_st_af, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
positive_keywords_joy_st_af = positive_keywords_joy_st_af.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

positive_keywords_joy_st_af

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,Joyalukkas Jewellery - Al Fahidi st - Al Fahidi,Positive,keywords,"Trust :3, Reliable :3, Trustable :2, Honest :2...","Good : 100, Excellent : 80, Nice : 60, Great :...","friendly : 50, helpful : 45, polite : 30, prof...","Good designs: 45, Excellent designs: 10, Nice ...","collection : 100, variety : 10, options : 5, c...","discount : 45, offer : 30, scheme : 15, deal :...","reasonable :5, low :3, discount :3, affordable...","good price :10, best price :9, reasonable pric...","Good quality :12, Excellent quality :8, Best q...","exchange :5, exchange policy :1, exchange pric..."
1,Joyalukkas Jewellery - Al Fahidi st - Al Fahidi,Positive,phrases,"Trustworthy Jeweler :1, Reliable choice for je...","Good experience : 40, Excellent service : 30, ...","very friendly staff : 10, extremely helpful : ...","Good design of jewellery: 2, Nice design of je...","good collection : 50, nice collection : 30, ex...","good discount : 20, great discount : 10, best ...","50% off making charges :3, reasonable making c...","value for money :5, great prices :5, reasonabl...","Quality of the gold is also amazing :1, Qualit...","easy exchange :2, flexible diamond exchange po..."


### joy_dm_ad

In [468]:
# Initialize the output dictionary
keyword_positive_output_joy_dm_ad = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_joy_dm_ad=[0]
keyword_input_token_joy_dm_ad = 0
keyword_output_token_joy_dm_ad = 0
keyword_start_time_loop_joy_dm_ad = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_joy_dm_ad, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_joy_dm_ad[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_joy_dm_ad = keyword_dataframes['joy_dm_ad_final_sen_df_jul'][keyword_dataframes['joy_dm_ad_final_sen_df_jul'][topic]==1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are positive comments, call the positive_keywords function
    if filtered_comments_joy_dm_ad:
        # Call the positive_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = positive_keywords(filtered_comments_joy_dm_ad,topic)        
        # Add the result to the output dictionary
        keyword_positive_output_joy_dm_ad.append(keywords)
        keyword_input_token_joy_dm_ad += input_tokens_loop
        keyword_output_token_joy_dm_ad += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_joy_dm_ad = time.time()
keyword_cost_input_token_joy_dm_ad = round((0.01/1000)*keyword_input_token_joy_dm_ad,2)
keyword_cost_output_token_joy_dm_ad = round((0.03/1000)*keyword_output_token_joy_dm_ad,2)
keyword_total_cost_joy_dm_ad = keyword_cost_input_token_joy_dm_ad + keyword_cost_output_token_joy_dm_ad
keyword_total_time_loop_joy_dm_ad = keyword_end_time_loop_joy_dm_ad - keyword_start_time_loop_joy_dm_ad

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_joy_dm_ad[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_joy_dm_ad,1))
print("Total Input Tokens - ", keyword_input_token_joy_dm_ad)
print("Total Input Cost = USD ",keyword_cost_input_token_joy_dm_ad)
print("Total Output Tokens - ", keyword_output_token_joy_dm_ad)
print("Total Output Cost = USD ",keyword_cost_output_token_joy_dm_ad)
print("Total Cost = USD ",round(keyword_total_cost_joy_dm_ad,2))

Executed  10  Iterations
Total Execution time (in secs) -  28.0
Total Input Tokens -  39987
Total Input Cost = USD  0.4
Total Output Tokens -  801
Total Output Cost = USD  0.02
Total Cost = USD  0.42


In [469]:
#Initialize an empty DataFrame
positive_keywords_joy_dm_ad = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']

# Add these columns to the DataFrame
for column in columns:
    positive_keywords_joy_dm_ad[column] = None

# Process each JSON string
for json_str in keyword_positive_output_joy_dm_ad:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'joy_dm_ad'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'Positive'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            positive_keywords_joy_dm_ad = pd.concat([positive_keywords_joy_dm_ad, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            positive_keywords_joy_dm_ad = pd.concat([positive_keywords_joy_dm_ad, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
positive_keywords_joy_dm_ad = positive_keywords_joy_dm_ad.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

positive_keywords_joy_dm_ad

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,Joyalukkas Jewellery - Dalma Plaza - Abu Dhabi,Positive,keywords,"Trusted :2, Trust :2, Trustworthy :2, Reliable...","Good experience :15, Great experience :10, Exc...","helpful : 50, friendly : 45, patient : 40, pro...","Good design :5, Beautiful designs :4, Nice des...","Good collection : 50, Nice collection : 20, Ex...","discount :10, deal :5, offer :3, voucher :2, p...","affordable :1, cheap :1, worth :1, excellence ...","good price :5, best price :4, great price :3, ...","Good quality :4, Excellent quality :3, High qu...","exchange :5, value :1, choice :1"
1,Joyalukkas Jewellery - Dalma Plaza - Abu Dhabi,Positive,phrases,"Trusted shop :1, My trusted gold shop :1, Trus...","Very good experience :10, Wonderful experience...","very helpful : 20, very friendly : 18, very pa...","Beautiful designs :3, Nice designs :3, Best de...","variety of jewellery : 3, variety of designs :...","good discount :5, best discount :4, maximum di...","less making charges :2, discount on the making...","value for money :2, prices are fair :1, pricin...","Good quality jewerelly :1, Excellent quality j...","exchange process was really smooth :1, exchang..."


### joy_mz_ad

In [470]:
# Initialize the output dictionary
keyword_positive_output_joy_mz_ad = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_joy_mz_ad=[0]
keyword_input_token_joy_mz_ad = 0
keyword_output_token_joy_mz_ad = 0
keyword_start_time_loop_joy_mz_ad = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_joy_mz_ad, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_joy_mz_ad[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_joy_mz_ad = keyword_dataframes['joy_mz_ad_final_sen_df_jul'][keyword_dataframes['joy_mz_ad_final_sen_df_jul'][topic]==1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are positive comments, call the positive_keywords function
    if filtered_comments_joy_mz_ad:
        # Call the positive_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = positive_keywords(filtered_comments_joy_mz_ad,topic)        
        # Add the result to the output dictionary
        keyword_positive_output_joy_mz_ad.append(keywords)
        keyword_input_token_joy_mz_ad += input_tokens_loop
        keyword_output_token_joy_mz_ad += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_joy_mz_ad = time.time()
keyword_cost_input_token_joy_mz_ad = round((0.01/1000)*keyword_input_token_joy_mz_ad,2)
keyword_cost_output_token_joy_mz_ad = round((0.03/1000)*keyword_output_token_joy_mz_ad,2)
keyword_total_cost_joy_mz_ad = keyword_cost_input_token_joy_mz_ad + keyword_cost_output_token_joy_mz_ad
keyword_total_time_loop_joy_mz_ad = keyword_end_time_loop_joy_mz_ad - keyword_start_time_loop_joy_mz_ad

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_joy_mz_ad[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_joy_mz_ad,1))
print("Total Input Tokens - ", keyword_input_token_joy_mz_ad)
print("Total Input Cost = USD ",keyword_cost_input_token_joy_mz_ad)
print("Total Output Tokens - ", keyword_output_token_joy_mz_ad)
print("Total Output Cost = USD ",keyword_cost_output_token_joy_mz_ad)
print("Total Cost = USD ",round(keyword_total_cost_joy_mz_ad,2))

Executed  10  Iterations
Total Execution time (in secs) -  31.1
Total Input Tokens -  41020
Total Input Cost = USD  0.41
Total Output Tokens -  760
Total Output Cost = USD  0.02
Total Cost = USD  0.43


In [471]:
#Initialize an empty DataFrame
positive_keywords_joy_mz_ad = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']

# Add these columns to the DataFrame
for column in columns:
    positive_keywords_joy_mz_ad[column] = None

# Process each JSON string
for json_str in keyword_positive_output_joy_mz_ad:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'joy_mz_ad'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'Positive'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            positive_keywords_joy_mz_ad = pd.concat([positive_keywords_joy_mz_ad, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            positive_keywords_joy_mz_ad = pd.concat([positive_keywords_joy_mz_ad, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
positive_keywords_joy_mz_ad = positive_keywords_joy_mz_ad.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

positive_keywords_joy_mz_ad

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,Joyalukkas Jewellery - Madinat Zayed Shopping ...,Positive,keywords,"Trusted :4, Trust :2, Trusted brand :2, Trustw...","great experience : 20, good experience : 18, n...","patient : 10, helpful : 9, friendly : 8, accom...","designs :15, variety :3, collection :3, option...","collection : 45, variety : 3, selection : 3, o...","discount :10, deal :6, offers :2, discounts :2...","reasonable :1, affordable :1, low :1, less :1","good price :5, best price :4, reasonable price...","good quality :3, high-quality :3, quality :3, ...",exchange :2
1,Joyalukkas Jewellery - Madinat Zayed Shopping ...,Positive,phrases,"Trusted shop :1, Trustworthy and premium gold ...","great shopping experience : 5, pleasant experi...","very patient and helpful : 3, very friendly an...","beautiful earings :1, beautiful nose ring :1, ...","good collection : 20, nice collection : 10, gr...","good discount :5, very good discount :3, great...",50 percent deduction in the making charge :1,"best rates :1, price value very remarkable :1,...","quality exceeded my expectations :1, product q...",make the exchange smoothly :1


### joy_sh_ad

In [472]:
# Initialize the output dictionary
keyword_positive_output_joy_sh_ad = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_joy_sh_ad=[0]
keyword_input_token_joy_sh_ad = 0
keyword_output_token_joy_sh_ad = 0
keyword_start_time_loop_joy_sh_ad = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_joy_sh_ad, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_joy_sh_ad[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_joy_sh_ad = keyword_dataframes['joy_sh_ad_final_sen_df_jul'][keyword_dataframes['joy_sh_ad_final_sen_df_jul'][topic]==1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are positive comments, call the positive_keywords function
    if filtered_comments_joy_sh_ad:
        # Call the positive_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = positive_keywords(filtered_comments_joy_sh_ad,topic)        
        # Add the result to the output dictionary
        keyword_positive_output_joy_sh_ad.append(keywords)
        keyword_input_token_joy_sh_ad += input_tokens_loop
        keyword_output_token_joy_sh_ad += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_joy_sh_ad = time.time()
keyword_cost_input_token_joy_sh_ad = round((0.01/1000)*keyword_input_token_joy_sh_ad,2)
keyword_cost_output_token_joy_sh_ad = round((0.03/1000)*keyword_output_token_joy_sh_ad,2)
keyword_total_cost_joy_sh_ad = keyword_cost_input_token_joy_sh_ad + keyword_cost_output_token_joy_sh_ad
keyword_total_time_loop_joy_sh_ad = keyword_end_time_loop_joy_sh_ad - keyword_start_time_loop_joy_sh_ad

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_joy_sh_ad[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_joy_sh_ad,1))
print("Total Input Tokens - ", keyword_input_token_joy_sh_ad)
print("Total Input Cost = USD ",keyword_cost_input_token_joy_sh_ad)
print("Total Output Tokens - ", keyword_output_token_joy_sh_ad)
print("Total Output Cost = USD ",keyword_cost_output_token_joy_sh_ad)
print("Total Cost = USD ",round(keyword_total_cost_joy_sh_ad,2))

Executed  10  Iterations
Total Execution time (in secs) -  27.0
Total Input Tokens -  21157
Total Input Cost = USD  0.21
Total Output Tokens -  738
Total Output Cost = USD  0.02
Total Cost = USD  0.23


In [473]:
#Initialize an empty DataFrame
positive_keywords_joy_sh_ad = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']

# Add these columns to the DataFrame
for column in columns:
    positive_keywords_joy_sh_ad[column] = None

# Process each JSON string
for json_str in keyword_positive_output_joy_sh_ad:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'joy_sh_ad'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'Positive'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            positive_keywords_joy_sh_ad = pd.concat([positive_keywords_joy_sh_ad, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            positive_keywords_joy_sh_ad = pd.concat([positive_keywords_joy_sh_ad, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
positive_keywords_joy_sh_ad = positive_keywords_joy_sh_ad.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

positive_keywords_joy_sh_ad

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,Joyalukkas Jewellery - Shabia - Abu Dhabi,Positive,keywords,"Trusted :2, Regular :2, Consistent :1, Securel...","pleasant :3, smooth :3, welcoming :2, seamless...","polite :3, helpful :3, friendly :3, kind :3, s...","designs :5, design :4, beautiful :2, impressiv...","collection : 30, collections : 20, variety : 3...","discount :10, offer :6, offers :3, discounted ...","reasonable :2, low :1, genuine :1","Good price :5, Reasonable price :2, Affordable...","quality :5, good quality :2, best quality :2, ...",same value :1
1,Joyalukkas Jewellery - Shabia - Abu Dhabi,Positive,phrases,"Trusted name in jewelry :1, Regular customer f...","pleasant experience :2, smooth transaction :1,...","very friendly staff :3, very helpful and suppo...","good designs :1, impressive designs :1, beauti...","variety of collections : 3, good collection : ...","good discount :5, good offer :2, big discount ...","reasonable making charge :2, genuine making ch...","Great prices :1, Amazing prices :1, Nice price...",quality of the bracelet is simply outstanding ...,exchanged our old ring with same value :1


### mal_sc

In [474]:
# Initialize the output dictionary
keyword_positive_output_mal_sc = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_mal_sc=[0]
keyword_input_token_mal_sc = 0
keyword_output_token_mal_sc = 0
keyword_start_time_loop_mal_sc = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_mal_sc, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_mal_sc[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_mal_sc = keyword_dataframes['mal_sc_final_sen_df_jul'][keyword_dataframes['mal_sc_final_sen_df_jul'][topic]==1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are positive comments, call the positive_keywords function
    if filtered_comments_mal_sc:
        # Call the positive_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = positive_keywords(filtered_comments_mal_sc,topic)        
        # Add the result to the output dictionary
        keyword_positive_output_mal_sc.append(keywords)
        keyword_input_token_mal_sc += input_tokens_loop
        keyword_output_token_mal_sc += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_mal_sc = time.time()
keyword_cost_input_token_mal_sc = round((0.01/1000)*keyword_input_token_mal_sc,2)
keyword_cost_output_token_mal_sc = round((0.03/1000)*keyword_output_token_mal_sc,2)
keyword_total_cost_mal_sc = keyword_cost_input_token_mal_sc + keyword_cost_output_token_mal_sc
keyword_total_time_loop_mal_sc = keyword_end_time_loop_mal_sc - keyword_start_time_loop_mal_sc

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_mal_sc[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_mal_sc,1))
print("Total Input Tokens - ", keyword_input_token_mal_sc)
print("Total Input Cost = USD ",keyword_cost_input_token_mal_sc)
print("Total Output Tokens - ", keyword_output_token_mal_sc)
print("Total Output Cost = USD ",keyword_cost_output_token_mal_sc)
print("Total Cost = USD ",round(keyword_total_cost_mal_sc,2))

Executed  10  Iterations
Total Execution time (in secs) -  36.1
Total Input Tokens -  42793
Total Input Cost = USD  0.43
Total Output Tokens -  765
Total Output Cost = USD  0.02
Total Cost = USD  0.45


In [475]:
#Initialize an empty DataFrame
positive_keywords_mal_sc = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']

# Add these columns to the DataFrame
for column in columns:
    positive_keywords_mal_sc[column] = None

# Process each JSON string
for json_str in keyword_positive_output_mal_sc:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'mal_sc'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'Positive'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            positive_keywords_mal_sc = pd.concat([positive_keywords_mal_sc, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            positive_keywords_mal_sc = pd.concat([positive_keywords_mal_sc, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
positive_keywords_mal_sc = positive_keywords_mal_sc.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

positive_keywords_mal_sc

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,Malabar Gold & Diamonds - Silicon Oasis Central,Positive,keywords,"Trust :2, Transparent :2, Knowledgeable :2, Pr...","pleasant :3, smooth :2, seamless :1, enjoyable...","helpful : 78, patient : 36, professional : 34,...","designs :15, good design :10, beautiful design...","collection : 45, selections : 5, variety : 4, ...","good discount :6, best deal :3, best offer :1,...",making charges :2,"best price :3, good price :2, reasonable rates...","good quality:3, excellent quality:2, top notch...","exchange rates :1, exchange :1"
1,Malabar Gold & Diamonds - Silicon Oasis Central,Positive,phrases,"Trust and rapport with customers :1, Completel...","wonderful experience :10, amazing experience :...","very helpful : 20, very patient : 10, very pro...","great designs :3, amazing designs :3, best des...","amazing collection : 4, good collection : 4, g...","gave me good discount :1, giving good discount...",proper making charges :1,"within our budget :2, fitted within our budget...","quality of their pieces is top notch:1, qualit...","best deal :1, good exchange rates :1, explaine..."


### mal_ab

In [476]:
# Initialize the output dictionary
keyword_positive_output_mal_ab = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = [
    'Customer Confidence', 'Store Experience', 'Store Staff', 'Product Design',
    'Product Variety', 'Discount', 'Making Charge', 'Price', 
    'Product Quality', 'Jewellery Exchange'
]

keyword_counter_mal_ab = [0]
keyword_input_token_mal_ab = 0
keyword_output_token_mal_ab = 0
keyword_start_time_loop_mal_ab = time.time()

# Threading setup
keyword_total_iterations = len(keyword_topics)
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_mal_ab, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_mal_ab[0] += 1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_mal_ab = keyword_dataframes['mal_ab_final_sen_df_jul'][keyword_dataframes['mal_ab_final_sen_df_jul'][topic] == 1]['review_text'].tolist()
    
    # If there are positive comments, process them in chunks of 25
    if filtered_comments_mal_ab:
        # Loop through the filtered comments in batches of 25
        for i in range(0, len(filtered_comments_mal_ab), 25):
            # Get the current batch of 25 comments (or less if it's the last batch)
            comment_batch = filtered_comments_mal_ab[i:i + 25]
            # Call the positive_keywords function and store the result for each batch
            keywords, input_tokens_loop, output_token_loop = positive_keywords(comment_batch, topic)
            # Add the result to the output dictionary
            keyword_positive_output_mal_ab.append(keywords)
            keyword_input_token_mal_ab += input_tokens_loop
            keyword_output_token_mal_ab += output_token_loop

# Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_mal_ab = time.time()
keyword_cost_input_token_mal_ab = round((0.01 / 1000) * keyword_input_token_mal_ab, 2)
keyword_cost_output_token_mal_ab = round((0.03 / 1000) * keyword_output_token_mal_ab, 2)
keyword_total_cost_mal_ab = keyword_cost_input_token_mal_ab + keyword_cost_output_token_mal_ab
keyword_total_time_loop_mal_ab = keyword_end_time_loop_mal_ab - keyword_start_time_loop_mal_ab

# Display loop performance parameters & cost
clear_output(wait=True)
print("Executed", keyword_counter_mal_ab[0], "Iterations")
print("Total Execution time (in secs) -", round(keyword_total_time_loop_mal_ab, 1))
print("Total Input Tokens -", keyword_input_token_mal_ab)
print("Total Input Cost = USD", keyword_cost_input_token_mal_ab)
print("Total Output Tokens -", keyword_output_token_mal_ab)
print("Total Output Cost = USD", keyword_cost_output_token_mal_ab)
print("Total Cost = USD", round(keyword_total_cost_mal_ab, 2))


Executed 10 Iterations
Total Execution time (in secs) - 696.7
Total Input Tokens - 348324
Total Input Cost = USD 3.48
Total Output Tokens - 21210
Total Output Cost = USD 0.64
Total Cost = USD 4.12


In [477]:
#Initialize an empty DataFrame
positive_keywords_mal_ab = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']

# Add these columns to the DataFrame
for column in columns:
    positive_keywords_mal_ab[column] = None

# Process each JSON string
for json_str in keyword_positive_output_mal_ab:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'mal_ab'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'Positive'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            positive_keywords_mal_ab = pd.concat([positive_keywords_mal_ab, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            positive_keywords_mal_ab = pd.concat([positive_keywords_mal_ab, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
positive_keywords_mal_ab = positive_keywords_mal_ab.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

positive_keywords_mal_ab

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,Malabar Gold and Diamonds - Al Barsha - Dubai,Positive,keywords,"Trust :3, Reliable :1, Authentic :1, Trusted :...","good experience :5, excellent experience :4, g...","helpful :4, polite :3, courteous :1, knowledge...","designs :15, good designs :4, beautiful design...","collections :12, variety :2, options :2, array...","good discount :6, best discount :2, good deal ...","good making charge :2, reasonable making charg...","best price :4, good price :4, great price :3, ...","good quality:5, best quality:2, excellent qual...","exchange :8, exchanging :3, exchanged :2, retu..."
1,Malabar Gold and Diamonds - Al Barsha - Dubai,Positive,phrases,"Trust worthy :1, Trusted shop of buying gold :...","very good experience :3, excellent customer se...","excellent service :5, amazing support :1, very...","lot of designs :2, wonderful designs :2, pleth...","wide variety of collections :2, vast variety o...","providing good discount :1, offered discount :...","very low making charges than market :2, seriou...","best price and discounts :1, suited our tastes...","quality of the product:1, quality of gold jewe...","100% refund on exchange :1, easy exchange hass..."


### mal_b1_af

In [478]:
# Initialize the output dictionary
keyword_positive_output_mal_b1_af = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = [
    'Customer Confidence', 'Store Experience', 'Store Staff', 'Product Design',
    'Product Variety', 'Discount', 'Making Charge', 'Price', 
    'Product Quality', 'Jewellery Exchange'
]

keyword_counter_mal_b1_af = [0]
keyword_input_token_mal_b1_af = 0
keyword_output_token_mal_b1_af = 0
keyword_start_time_loop_mal_b1_af = time.time()

# Threading setup
keyword_total_iterations = len(keyword_topics)
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_mal_b1_af, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_mal_b1_af[0] += 1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_mal_b1_af = keyword_dataframes['mal_b1_af_final_sen_df_jul'][keyword_dataframes['mal_b1_af_final_sen_df_jul'][topic] == 1]['review_text'].tolist()
    
    # If there are positive comments, process them in chunks of 25
    if filtered_comments_mal_b1_af:
        # Loop through the filtered comments in batches of 25
        for i in range(0, len(filtered_comments_mal_b1_af), 25):
            # Get the current batch of 25 comments (or less if it's the last batch)
            comment_batch = filtered_comments_mal_b1_af[i:i + 25]
            # Call the positive_keywords function and store the result for each batch
            keywords, input_tokens_loop, output_token_loop = positive_keywords(comment_batch, topic)
            # Add the result to the output dictionary
            keyword_positive_output_mal_b1_af.append(keywords)
            keyword_input_token_mal_b1_af += input_tokens_loop
            keyword_output_token_mal_b1_af += output_token_loop

# Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_mal_b1_af = time.time()
keyword_cost_input_token_mal_b1_af = round((0.01 / 1000) * keyword_input_token_mal_b1_af, 2)
keyword_cost_output_token_mal_b1_af = round((0.03 / 1000) * keyword_output_token_mal_b1_af, 2)
keyword_total_cost_mal_b1_af = keyword_cost_input_token_mal_b1_af + keyword_cost_output_token_mal_b1_af
keyword_total_time_loop_mal_b1_af = keyword_end_time_loop_mal_b1_af - keyword_start_time_loop_mal_b1_af

# Display loop performance parameters & cost
clear_output(wait=True)
print("Executed", keyword_counter_mal_b1_af[0], "Iterations")
print("Total Execution time (in secs) -", round(keyword_total_time_loop_mal_b1_af, 1))
print("Total Input Tokens -", keyword_input_token_mal_b1_af)
print("Total Input Cost = USD", keyword_cost_input_token_mal_b1_af)
print("Total Output Tokens -", keyword_output_token_mal_b1_af)
print("Total Output Cost = USD", keyword_cost_output_token_mal_b1_af)
print("Total Cost = USD", round(keyword_total_cost_mal_b1_af, 2))


Executed 10 Iterations
Total Execution time (in secs) - 638.1
Total Input Tokens - 319980
Total Input Cost = USD 3.2
Total Output Tokens - 21670
Total Output Cost = USD 0.65
Total Cost = USD 3.85


In [479]:
import json
import pandas as pd

# Initialize an empty DataFrame
positive_keywords_mal_b1_af = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']

# Add these columns to the DataFrame
for column in columns:
    positive_keywords_mal_b1_af[column] = None

# Process each JSON string
for json_str in keyword_positive_output_mal_b1_af:
    if not json_str.strip():  # Check if the string is empty
        print(f"Warning: Empty JSON string encountered.")
        continue  # Skip this iteration
    
    try:
        # Load the JSON string into a dictionary
        data = json.loads(json_str)
    except json.JSONDecodeError as e:
        print(f"Error decoding JSON: {e}. Skipping this entry: {json_str}")
        continue  # Skip this iteration

    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'mal_b1_af'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'Positive'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content.get('keywords', '').split(',')])
            positive_keywords_mal_b1_af = pd.concat([positive_keywords_mal_b1_af, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content.get('phrases', '').split(',')])
            positive_keywords_mal_b1_af = pd.concat([positive_keywords_mal_b1_af, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
positive_keywords_mal_b1_af = positive_keywords_mal_b1_af.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

# Display the result
positive_keywords_mal_b1_af


Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,Malabar Gold and Diamonds - Al Fahidi Street -...,Positive,keywords,"Trust :2, Confidence :1, Assurance :1, Reliabl...","great experience :3, good experience :3, pleas...","service :15, professional :2, helpful :2, assi...","design :12, designs :8, variety :2, options :1...","variety :2, options :1 collection :24 Variety ...","best deal :7, good discount :5, best discount ...","reasonable :3, less :2, discount :1","best price :5, good price :4, competitive :1, ...","good quality:4, best quality:2, great quality:...","exchange policy:1, exchange gold:1, exchange o..."
1,Malabar Gold and Diamonds - Al Fahidi Street -...,Positive,phrases,"Trusted purchase :3, Highly recommended :3, Fe...","shopping experience was great :1, great & plea...","very good service :3, good service by :3, best...","nice design :3, best design :3, excellent desi...","wide range :1, lot of variety :1, lot of varie...","good discount on making :1, reasonable discoun...","reasonable making charges :2, best price on ma...","special price :1, better price :1, excellent p...","quality and service:5, quality of the gold is ...","comfortable to exchange:1, assisted us with th..."


### mal_ak

In [480]:
# Initialize the output dictionary
keyword_positive_output_mal_ak = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = [
    'Customer Confidence', 'Store Experience', 'Store Staff', 'Product Design',
    'Product Variety', 'Discount', 'Making Charge', 'Price', 
    'Product Quality', 'Jewellery Exchange'
]

keyword_counter_mal_ak = [0]
keyword_input_token_mal_ak = 0
keyword_output_token_mal_ak = 0
keyword_start_time_loop_mal_ak = time.time()

# Threading setup
keyword_total_iterations = len(keyword_topics)
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_mal_ak, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_mal_ak[0] += 1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_mal_ak = keyword_dataframes['mal_ak_final_sen_df_jul'][keyword_dataframes['mal_ak_final_sen_df_jul'][topic] == 1]['review_text'].tolist()
    
    # If there are positive comments, process them in chunks of 25
    if filtered_comments_mal_ak:
        # Loop through the filtered comments in batches of 25
        for i in range(0, len(filtered_comments_mal_ak), 25):
            # Get the current batch of 25 comments (or less if it's the last batch)
            comment_batch = filtered_comments_mal_ak[i:i + 25]
            # Call the positive_keywords function and store the result for each batch
            keywords, input_tokens_loop, output_token_loop = positive_keywords(comment_batch, topic)
            # Add the result to the output dictionary
            keyword_positive_output_mal_ak.append(keywords)
            keyword_input_token_mal_ak += input_tokens_loop
            keyword_output_token_mal_ak += output_token_loop

# Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_mal_ak = time.time()
keyword_cost_input_token_mal_ak = round((0.01 / 1000) * keyword_input_token_mal_ak, 2)
keyword_cost_output_token_mal_ak = round((0.03 / 1000) * keyword_output_token_mal_ak, 2)
keyword_total_cost_mal_ak = keyword_cost_input_token_mal_ak + keyword_cost_output_token_mal_ak
keyword_total_time_loop_mal_ak = keyword_end_time_loop_mal_ak - keyword_start_time_loop_mal_ak

# Display loop performance parameters & cost
clear_output(wait=True)
print("Executed", keyword_counter_mal_ak[0], "Iterations")
print("Total Execution time (in secs) -", round(keyword_total_time_loop_mal_ak, 1))
print("Total Input Tokens -", keyword_input_token_mal_ak)
print("Total Input Cost = USD", keyword_cost_input_token_mal_ak)
print("Total Output Tokens -", keyword_output_token_mal_ak)
print("Total Output Cost = USD", keyword_cost_output_token_mal_ak)
print("Total Cost = USD", round(keyword_total_cost_mal_ak, 2))


Executed 10 Iterations
Total Execution time (in secs) - 455.3
Total Input Tokens - 247921
Total Input Cost = USD 2.48
Total Output Tokens - 14973
Total Output Cost = USD 0.45
Total Cost = USD 2.93


In [481]:
#Initialize an empty DataFrame
positive_keywords_mal_ak = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']

# Add these columns to the DataFrame
for column in columns:
    positive_keywords_mal_ak[column] = None

# Process each JSON string
for json_str in keyword_positive_output_mal_ak:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'mal_ak'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'Positive'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            positive_keywords_mal_ak = pd.concat([positive_keywords_mal_ak, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            positive_keywords_mal_ak = pd.concat([positive_keywords_mal_ak, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
positive_keywords_mal_ak = positive_keywords_mal_ak.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

positive_keywords_mal_ak

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,Malabar Gold and Diamonds - Al Karama - Dubai,Positive,keywords,"Trusted brand :1, Reliable salesperson :1 Reli...","pleasant :3, comfortable :1, amazing :1, wonde...","helpful :3, cooperative :2, patient :2, polite...","good design :5, unique design :2, attractive d...","collection :12, options :3, variety :2, choice...","discount :10, deals :3, offers :2, price :1, r...","reasonable :1, less :1, cheap :1","best price :3, good price :3, affordable price...","quality :8, good quality :3, high-quality :2, ...","exchange :11, value :3, price :2, option :1, p..."
1,Malabar Gold and Diamonds - Al Karama - Dubai,Positive,phrases,"Trusted brand :1, Reliable salesperson for any...","pleasant experience :3, amazing experience :2,...","excellent service :2, very good service :2, aw...","rare designs are available :1, unique collecti...","nice collection :3, good collection :3, excell...","good discount :5, best discount :2, bargain di...","reasonable making charge :1, reduce making cha...","best price that we can afford :1, best price b...",quality of the jewelry surpassed our expectati...,"exchange of gold :3, exchange process :2, exch..."


### mal_aw_ad

In [482]:
# Initialize the output dictionary
keyword_positive_output_mal_aw_ad = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_mal_aw_ad=[0]
keyword_input_token_mal_aw_ad = 0
keyword_output_token_mal_aw_ad = 0
keyword_start_time_loop_mal_aw_ad = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_mal_aw_ad, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_mal_aw_ad[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_mal_aw_ad = keyword_dataframes['mal_aw_ad_final_sen_df_jul'][keyword_dataframes['mal_aw_ad_final_sen_df_jul'][topic]==1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are positive comments, call the positive_keywords function
    if filtered_comments_mal_aw_ad:
        # Call the positive_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = positive_keywords(filtered_comments_mal_aw_ad,topic)        
        # Add the result to the output dictionary
        keyword_positive_output_mal_aw_ad.append(keywords)
        keyword_input_token_mal_aw_ad += input_tokens_loop
        keyword_output_token_mal_aw_ad += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_mal_aw_ad = time.time()
keyword_cost_input_token_mal_aw_ad = round((0.01/1000)*keyword_input_token_mal_aw_ad,2)
keyword_cost_output_token_mal_aw_ad = round((0.03/1000)*keyword_output_token_mal_aw_ad,2)
keyword_total_cost_mal_aw_ad = keyword_cost_input_token_mal_aw_ad + keyword_cost_output_token_mal_aw_ad
keyword_total_time_loop_mal_aw_ad = keyword_end_time_loop_mal_aw_ad - keyword_start_time_loop_mal_aw_ad

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_mal_aw_ad[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_mal_aw_ad,1))
print("Total Input Tokens - ", keyword_input_token_mal_aw_ad)
print("Total Input Cost = USD ",keyword_cost_input_token_mal_aw_ad)
print("Total Output Tokens - ", keyword_output_token_mal_aw_ad)
print("Total Output Cost = USD ",keyword_cost_output_token_mal_aw_ad)
print("Total Cost = USD ",round(keyword_total_cost_mal_aw_ad,2))

Executed  10  Iterations
Total Execution time (in secs) -  36.6
Total Input Tokens -  103086
Total Input Cost = USD  1.03
Total Output Tokens -  774
Total Output Cost = USD  0.02
Total Cost = USD  1.05


In [483]:
#Initialize an empty DataFrame
positive_keywords_mal_aw_ad = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']

# Add these columns to the DataFrame
for column in columns:
    positive_keywords_mal_aw_ad[column] = None

# Process each JSON string
for json_str in keyword_positive_output_mal_aw_ad:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'mal_aw_ad'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'Positive'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            positive_keywords_mal_aw_ad = pd.concat([positive_keywords_mal_aw_ad, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            positive_keywords_mal_aw_ad = pd.concat([positive_keywords_mal_aw_ad, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
positive_keywords_mal_aw_ad = positive_keywords_mal_aw_ad.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

positive_keywords_mal_aw_ad

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,Malabar Gold and Diamonds - Al Wahda Mall - Ab...,Positive,keywords,"trust :3, trusted :2, trustable :1, honest :1,...","good experience : 45, great experience : 40, n...","helpful : 98, friendly : 85, accommodating : 8...","designs :30, beautiful :5, unique :3, stunning...","collection : 45, variety : 8, selections : 6, ...","discount :15, offers :8, deal :6, good discoun...","good making :2, affordable :1","best price :10, good price :9, reasonable pric...","Good quality :5, High quality :5, Quality :4, ...","good price :2, best price :1, affordable rate :1"
1,Malabar Gold and Diamonds - Al Wahda Mall - Ab...,Positive,phrases,"fully trust Malabar :1, most trusted Malabar :...","very good experience : 10, amazing experience ...","very helpful : 25, very accommodating : 20, ex...","beautiful designs :3, amazing designs :3, nice...","wide variety : 4, good collection : 3, variety...","good discount :5, special discount :5, great d...","good making cost :1, good making rate :1, affo...","within my budget :3, fits your budget :2, with...","Quality are always 100% :1, The product qualit...","completely satisfied :1, extremely satisfied :..."


### mal_dm_ad

In [484]:
# Initialize the output dictionary
keyword_positive_output_mal_dm_ad = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_mal_dm_ad=[0]
keyword_input_token_mal_dm_ad = 0
keyword_output_token_mal_dm_ad = 0
keyword_start_time_loop_mal_dm_ad = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_mal_dm_ad, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_mal_dm_ad[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_mal_dm_ad = keyword_dataframes['mal_dm_ad_final_sen_df_jul'][keyword_dataframes['mal_dm_ad_final_sen_df_jul'][topic]==1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are positive comments, call the positive_keywords function
    if filtered_comments_mal_dm_ad:
        # Call the positive_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = positive_keywords(filtered_comments_mal_dm_ad,topic)        
        # Add the result to the output dictionary
        keyword_positive_output_mal_dm_ad.append(keywords)
        keyword_input_token_mal_dm_ad += input_tokens_loop
        keyword_output_token_mal_dm_ad += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_mal_dm_ad = time.time()
keyword_cost_input_token_mal_dm_ad = round((0.01/1000)*keyword_input_token_mal_dm_ad,2)
keyword_cost_output_token_mal_dm_ad = round((0.03/1000)*keyword_output_token_mal_dm_ad,2)
keyword_total_cost_mal_dm_ad = keyword_cost_input_token_mal_dm_ad + keyword_cost_output_token_mal_dm_ad
keyword_total_time_loop_mal_dm_ad = keyword_end_time_loop_mal_dm_ad - keyword_start_time_loop_mal_dm_ad

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_mal_dm_ad[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_mal_dm_ad,1))
print("Total Input Tokens - ", keyword_input_token_mal_dm_ad)
print("Total Input Cost = USD ",keyword_cost_input_token_mal_dm_ad)
print("Total Output Tokens - ", keyword_output_token_mal_dm_ad)
print("Total Output Cost = USD ",keyword_cost_output_token_mal_dm_ad)
print("Total Cost = USD ",round(keyword_total_cost_mal_dm_ad,2))

Executed  10  Iterations
Total Execution time (in secs) -  26.0
Total Input Tokens -  57634
Total Input Cost = USD  0.58
Total Output Tokens -  632
Total Output Cost = USD  0.02
Total Cost = USD  0.6


In [485]:
#Initialize an empty DataFrame
positive_keywords_mal_dm_ad = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']

# Add these columns to the DataFrame
for column in columns:
    positive_keywords_mal_dm_ad[column] = None

# Process each JSON string
for json_str in keyword_positive_output_mal_dm_ad:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'mal_dm_ad'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'Positive'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            positive_keywords_mal_dm_ad = pd.concat([positive_keywords_mal_dm_ad, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            positive_keywords_mal_dm_ad = pd.concat([positive_keywords_mal_dm_ad, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
positive_keywords_mal_dm_ad = positive_keywords_mal_dm_ad.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

positive_keywords_mal_dm_ad

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,Malabar Gold and Diamonds - Dalma Mall - Abu D...,Positive,keywords,"Trust :2, Honest :2, Trustworthy :1, Reliable ...",No relevant positive keywords/ phrases,"helpful : 50, friendly : 45, professional : 40...","Good designs :4, Awesome designs :1, Nice desi...","variety :3, options :3, selection :2, range :2...","discount :5, deal :5, offer :2, offers :1, sch...",low making :1,"reasonable :2, affordable :2, good price :2, b...","Good Quality :2, Quality :2, High-Quality :1",
1,Malabar Gold and Diamonds - Dalma Mall - Abu D...,Positive,phrases,"trusted advices :1, trustworthy, friendly :1, ...",No relevant positive keywords/ phrases,"very helpful and kind : 10, excellent customer...","best design :4, perfect design :2, unique desi...","variety of collections :1, wide collection :1,...","best possible discount :1, maximum discount :2...",making charge details very clearly :1,"reasonable price :2, affordable prices :2, goo...","Good quality golds :1, Quality of their jewelr...",


### mal_b1_ad

In [486]:
# Initialize the output dictionary
keyword_positive_output_mal_b1_ad = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_mal_b1_ad=[0]
keyword_input_token_mal_b1_ad = 0
keyword_output_token_mal_b1_ad = 0
keyword_start_time_loop_mal_b1_ad = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_mal_b1_ad, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_mal_b1_ad[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_mal_b1_ad = keyword_dataframes['mal_b1_ad_final_sen_df_jul'][keyword_dataframes['mal_b1_ad_final_sen_df_jul'][topic]==1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are positive comments, call the positive_keywords function
    if filtered_comments_mal_b1_ad:
        # Call the positive_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = positive_keywords(filtered_comments_mal_b1_ad,topic)        
        # Add the result to the output dictionary
        keyword_positive_output_mal_b1_ad.append(keywords)
        keyword_input_token_mal_b1_ad += input_tokens_loop
        keyword_output_token_mal_b1_ad += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_mal_b1_ad = time.time()
keyword_cost_input_token_mal_b1_ad = round((0.01/1000)*keyword_input_token_mal_b1_ad,2)
keyword_cost_output_token_mal_b1_ad = round((0.03/1000)*keyword_output_token_mal_b1_ad,2)
keyword_total_cost_mal_b1_ad = keyword_cost_input_token_mal_b1_ad + keyword_cost_output_token_mal_b1_ad
keyword_total_time_loop_mal_b1_ad = keyword_end_time_loop_mal_b1_ad - keyword_start_time_loop_mal_b1_ad

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_mal_b1_ad[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_mal_b1_ad,1))
print("Total Input Tokens - ", keyword_input_token_mal_b1_ad)
print("Total Input Cost = USD ",keyword_cost_input_token_mal_b1_ad)
print("Total Output Tokens - ", keyword_output_token_mal_b1_ad)
print("Total Output Cost = USD ",keyword_cost_output_token_mal_b1_ad)
print("Total Cost = USD ",round(keyword_total_cost_mal_b1_ad,2))

Executed  10  Iterations
Total Execution time (in secs) -  35.6
Total Input Tokens -  47091
Total Input Cost = USD  0.47
Total Output Tokens -  760
Total Output Cost = USD  0.02
Total Cost = USD  0.49


In [487]:
#Initialize an empty DataFrame
positive_keywords_mal_b1_ad = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']

# Add these columns to the DataFrame
for column in columns:
    positive_keywords_mal_b1_ad[column] = None

# Process each JSON string
for json_str in keyword_positive_output_mal_b1_ad:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'mal_b1_ad'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'Positive'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            positive_keywords_mal_b1_ad = pd.concat([positive_keywords_mal_b1_ad, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            positive_keywords_mal_b1_ad = pd.concat([positive_keywords_mal_b1_ad, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
positive_keywords_mal_b1_ad = positive_keywords_mal_b1_ad.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

positive_keywords_mal_b1_ad

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,Malabar Gold and Diamonds - Hamdan Street ( Br...,Positive,keywords,"Reliable :1, Trust :1, Confidence :1, Assuranc...","good experience : 25, great experience : 15, w...","Good service : 300, Excellent service : 50, Ve...","designs :15, collection :5, models :1, pieces ...","collections : 15, variety : 3, models : 2, opt...","discount :7, offers :3, deal :3, deals :2, off...","reasonable :1, low :1, less :1","Good price :4, Best price :3, Reasonable price...","quality :8, good :2, outstanding :2, high :1, ...","exchanging :1, traded :1"
1,Malabar Gold and Diamonds - Hamdan Street ( Br...,Positive,phrases,No relevant positive phrases,"very good experience : 5, had a great experien...","Good customer service : 10, Very accommodating...","beautiful designs :3, nice designs :3, wonderf...","Good collection : 50, Very good collection : 1...","good discount :3, good deals :2, excellent dis...","very reasonable and very low :1, reasonably go...","Good prices :1, Beat price :1, Very good price...","high quality of the product :1, quality gold :...","buying and exchanging the jewelry :1, sell our..."


### mal_b2_ad

In [488]:
# Initialize the output dictionary
keyword_positive_output_mal_b2_ad = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_mal_b2_ad=[0]
keyword_input_token_mal_b2_ad = 0
keyword_output_token_mal_b2_ad = 0
keyword_start_time_loop_mal_b2_ad = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_mal_b2_ad, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_mal_b2_ad[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_mal_b2_ad = keyword_dataframes['mal_b2_ad_final_sen_df_jul'][keyword_dataframes['mal_b2_ad_final_sen_df_jul'][topic]==1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are positive comments, call the positive_keywords function
    if filtered_comments_mal_b2_ad:
        # Call the positive_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = positive_keywords(filtered_comments_mal_b2_ad,topic)        
        # Add the result to the output dictionary
        keyword_positive_output_mal_b2_ad.append(keywords)
        keyword_input_token_mal_b2_ad += input_tokens_loop
        keyword_output_token_mal_b2_ad += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_mal_b2_ad = time.time()
keyword_cost_input_token_mal_b2_ad = round((0.01/1000)*keyword_input_token_mal_b2_ad,2)
keyword_cost_output_token_mal_b2_ad = round((0.03/1000)*keyword_output_token_mal_b2_ad,2)
keyword_total_cost_mal_b2_ad = keyword_cost_input_token_mal_b2_ad + keyword_cost_output_token_mal_b2_ad
keyword_total_time_loop_mal_b2_ad = keyword_end_time_loop_mal_b2_ad - keyword_start_time_loop_mal_b2_ad

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_mal_b2_ad[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_mal_b2_ad,1))
print("Total Input Tokens - ", keyword_input_token_mal_b2_ad)
print("Total Input Cost = USD ",keyword_cost_input_token_mal_b2_ad)
print("Total Output Tokens - ", keyword_output_token_mal_b2_ad)
print("Total Output Cost = USD ",keyword_cost_output_token_mal_b2_ad)
print("Total Cost = USD ",round(keyword_total_cost_mal_b2_ad,2))

Executed  10  Iterations
Total Execution time (in secs) -  44.1
Total Input Tokens -  122486
Total Input Cost = USD  1.22
Total Output Tokens -  815
Total Output Cost = USD  0.02
Total Cost = USD  1.24


In [489]:
#Initialize an empty DataFrame
positive_keywords_mal_b2_ad = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']

# Add these columns to the DataFrame
for column in columns:
    positive_keywords_mal_b2_ad[column] = None

# Process each JSON string
for json_str in keyword_positive_output_mal_b2_ad:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'mal_b2_ad'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'Positive'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            positive_keywords_mal_b2_ad = pd.concat([positive_keywords_mal_b2_ad, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            positive_keywords_mal_b2_ad = pd.concat([positive_keywords_mal_b2_ad, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
positive_keywords_mal_b2_ad = positive_keywords_mal_b2_ad.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

positive_keywords_mal_b2_ad

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,Malabar Gold and Diamonds - Hamdan Street (Bra...,Positive,keywords,"Trusted :3, Trust :2, Confidence :1, Secure :1...","good experience : 50, great experience : 40, n...","good service : 100, excellent service : 50, he...","design : 15, designs : 12, unique : 3, beautif...","collection : 98, variety : 15, options : 14, m...","good discount :15, best discount :8, maximum d...","reasonable making charges:3, minimum making ch...","best price :5, good price :5, reasonable price...","quality :10, good :6, pure :2, superior :1, ge...",exchange :4
1,Malabar Gold and Diamonds - Hamdan Street (Bra...,Positive,phrases,"Trusted shop :1, Trusted jewellery shop :1, Tr...","pleasant experience : 10, smooth experience : ...","very good service : 15, great service : 10, go...","amazing designs : 3, unique designs : 2, beaut...","wide variety : 3, variety of options : 3, vari...","good deal :5, great discount :4, best offer :3...","good rate on making:1, amazing deals on making...","best rates for make customers satisfied :2, go...","good quality :3, high-quality :3, quality prod...","very accommodating :1, very professional and h..."


### mal_lu_ad

In [490]:
# Initialize the output dictionary
keyword_positive_output_mal_lu_ad = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_mal_lu_ad=[0]
keyword_input_token_mal_lu_ad = 0
keyword_output_token_mal_lu_ad = 0
keyword_start_time_loop_mal_lu_ad = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_mal_lu_ad, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_mal_lu_ad[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_mal_lu_ad = keyword_dataframes['mal_lu_ad_final_sen_df_jul'][keyword_dataframes['mal_lu_ad_final_sen_df_jul'][topic]==1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are positive comments, call the positive_keywords function
    if filtered_comments_mal_lu_ad:
        # Call the positive_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = positive_keywords(filtered_comments_mal_lu_ad,topic)        
        # Add the result to the output dictionary
        keyword_positive_output_mal_lu_ad.append(keywords)
        keyword_input_token_mal_lu_ad += input_tokens_loop
        keyword_output_token_mal_lu_ad += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_mal_lu_ad = time.time()
keyword_cost_input_token_mal_lu_ad = round((0.01/1000)*keyword_input_token_mal_lu_ad,2)
keyword_cost_output_token_mal_lu_ad = round((0.03/1000)*keyword_output_token_mal_lu_ad,2)
keyword_total_cost_mal_lu_ad = keyword_cost_input_token_mal_lu_ad + keyword_cost_output_token_mal_lu_ad
keyword_total_time_loop_mal_lu_ad = keyword_end_time_loop_mal_lu_ad - keyword_start_time_loop_mal_lu_ad

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_mal_lu_ad[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_mal_lu_ad,1))
print("Total Input Tokens - ", keyword_input_token_mal_lu_ad)
print("Total Input Cost = USD ",keyword_cost_input_token_mal_lu_ad)
print("Total Output Tokens - ", keyword_output_token_mal_lu_ad)
print("Total Output Cost = USD ",keyword_cost_output_token_mal_lu_ad)
print("Total Cost = USD ",round(keyword_total_cost_mal_lu_ad,2))

Executed  10  Iterations
Total Execution time (in secs) -  51.1
Total Input Tokens -  104545
Total Input Cost = USD  1.05
Total Output Tokens -  863
Total Output Cost = USD  0.03
Total Cost = USD  1.08


In [491]:
#Initialize an empty DataFrame
positive_keywords_mal_lu_ad = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']

# Add these columns to the DataFrame
for column in columns:
    positive_keywords_mal_lu_ad[column] = None

# Process each JSON string
for json_str in keyword_positive_output_mal_lu_ad:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'mal_lu_ad'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'Positive'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            positive_keywords_mal_lu_ad = pd.concat([positive_keywords_mal_lu_ad, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            positive_keywords_mal_lu_ad = pd.concat([positive_keywords_mal_lu_ad, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
positive_keywords_mal_lu_ad = positive_keywords_mal_lu_ad.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

positive_keywords_mal_lu_ad

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,Malabar Gold and Diamonds - Lulu Hypermarket -...,Positive,keywords,"trustworthy :2, reliable :2, authentic :1, tru...","Good experience : 50, Great experience : 30, W...","good service : 320, excellent service : 150, h...","Good designs: 12, Unique designs: 3, Beautiful...","Good collection : 20, Nice collection : 15, Va...","good discount :15, best discount :5, good deal...","less charges :2, best making charges :1, less ...","best price :15, good price :14, reasonable pri...","quality :10, good quality :8, excellent qualit...","exchange :5, exchanging :2"
1,Malabar Gold and Diamonds - Lulu Hypermarket -...,Positive,phrases,"always trust Malabar Gold :1, trustworthy to b...","Good customer service : 5, Great customer serv...","very helpful and kind : 10, friendly and helpf...","Beautiful designs at Malabar: 2, Nice design a...","Variety of collections : 3, Wide variety of de...","giving good discount :3, giving the best disco...",No relevant positive phrases,"Chetan giving the best price :10, Chetan givin...","quality surpassed my expectations :1, quality ...","good price :1, generous discount :1, Best poli..."


### mal_mb

In [492]:
# Initialize the output dictionary
keyword_positive_output_mal_mb = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = [
    'Customer Confidence', 'Store Experience', 'Store Staff', 'Product Design',
    'Product Variety', 'Discount', 'Making Charge', 'Price', 
    'Product Quality', 'Jewellery Exchange'
]

keyword_counter_mal_mb = [0]
keyword_input_token_mal_mb = 0
keyword_output_token_mal_mb = 0
keyword_start_time_loop_mal_mb = time.time()

# Threading setup
keyword_total_iterations = len(keyword_topics)
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_mal_mb, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_mal_mb[0] += 1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_mal_mb = keyword_dataframes['mal_mb_final_sen_df_jul'][keyword_dataframes['mal_mb_final_sen_df_jul'][topic] == 1]['review_text'].tolist()
    
    # If there are positive comments, process them in chunks of 25
    if filtered_comments_mal_mb:
        # Loop through the filtered comments in batches of 25
        for i in range(0, len(filtered_comments_mal_mb), 25):
            # Get the current batch of 25 comments (or less if it's the last batch)
            comment_batch = filtered_comments_mal_mb[i:i + 25]
            # Call the positive_keywords function and store the result for each batch
            keywords, input_tokens_loop, output_token_loop = positive_keywords(comment_batch, topic)
            # Add the result to the output dictionary
            keyword_positive_output_mal_mb.append(keywords)
            keyword_input_token_mal_mb += input_tokens_loop
            keyword_output_token_mal_mb += output_token_loop

# Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_mal_mb = time.time()
keyword_cost_input_token_mal_mb = round((0.01 / 1000) * keyword_input_token_mal_mb, 2)
keyword_cost_output_token_mal_mb = round((0.03 / 1000) * keyword_output_token_mal_mb, 2)
keyword_total_cost_mal_mb = keyword_cost_input_token_mal_mb + keyword_cost_output_token_mal_mb
keyword_total_time_loop_mal_mb = keyword_end_time_loop_mal_mb - keyword_start_time_loop_mal_mb

# Display loop performance parameters & cost
clear_output(wait=True)
print("Executed", keyword_counter_mal_mb[0], "Iterations")
print("Total Execution time (in secs) -", round(keyword_total_time_loop_mal_mb, 1))
print("Total Input Tokens -", keyword_input_token_mal_mb)
print("Total Input Cost = USD", keyword_cost_input_token_mal_mb)
print("Total Output Tokens -", keyword_output_token_mal_mb)
print("Total Output Cost = USD", keyword_cost_output_token_mal_mb)
print("Total Cost = USD", round(keyword_total_cost_mal_mb, 2))


Executed 10 Iterations
Total Execution time (in secs) - 678.1
Total Input Tokens - 363872
Total Input Cost = USD 3.64
Total Output Tokens - 22514
Total Output Cost = USD 0.68
Total Cost = USD 4.32


In [493]:
#Initialize an empty DataFrame
positive_keywords_mal_mb = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']

# Add these columns to the DataFrame
for column in columns:
    positive_keywords_mal_mb[column] = None

# Process each JSON string
for json_str in keyword_positive_output_mal_mb:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'mal_mb'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'Positive'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            positive_keywords_mal_mb = pd.concat([positive_keywords_mal_mb, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            positive_keywords_mal_mb = pd.concat([positive_keywords_mal_mb, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
positive_keywords_mal_mb = positive_keywords_mal_mb.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

positive_keywords_mal_mb

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,Malabar Gold and Diamonds - Meena Bazar - Dubai,Positive,keywords,"Trust :2, Reliable :2, Genuine :2, Loyal :1, F...","Good experience :8, Great experience :6, Nice ...","helpful :5, friendly :3, patient :3, courteous...","designs :15, good designs :5, best design :2, ...","collection :10, options :3, variety :2, select...","discount :8, deal :5, offers :3, good discount...","discount :3, reduction :1, competitive :2, low...","best price :5, reasonable prices :2, great pri...","quality :8, good quality :3, excellent :2, top...","exchange :9, value :2, process :2, deal :1, co..."
1,Malabar Gold and Diamonds - Meena Bazar - Dubai,Positive,phrases,"Trust him with any purchase :1, Trust I have w...","Great shopping experience :2, Nice shopping ex...","very helpful :2, good service :2, excellent se...","good designs and good prices :1, designs were ...","great variety of jewelry collection :1, variou...","best possible discount :2, good discount on ma...","best reduction on making charge :1, discount o...","offered us the best possible price :1, helped ...",quality of the products exceeded my expectatio...,"great value :1, excellent service :1, great de..."


### mal_sh_ad

In [494]:
# Initialize the output dictionary
keyword_positive_output_mal_sh_ad = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_mal_sh_ad=[0]
keyword_input_token_mal_sh_ad = 0
keyword_output_token_mal_sh_ad = 0
keyword_start_time_loop_mal_sh_ad = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_mal_sh_ad, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_mal_sh_ad[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_mal_sh_ad = keyword_dataframes['mal_sh_ad_final_sen_df_jul'][keyword_dataframes['mal_sh_ad_final_sen_df_jul'][topic]==1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are positive comments, call the positive_keywords function
    if filtered_comments_mal_sh_ad:
        # Call the positive_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = positive_keywords(filtered_comments_mal_sh_ad,topic)        
        # Add the result to the output dictionary
        keyword_positive_output_mal_sh_ad.append(keywords)
        keyword_input_token_mal_sh_ad += input_tokens_loop
        keyword_output_token_mal_sh_ad += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_mal_sh_ad = time.time()
keyword_cost_input_token_mal_sh_ad = round((0.01/1000)*keyword_input_token_mal_sh_ad,2)
keyword_cost_output_token_mal_sh_ad = round((0.03/1000)*keyword_output_token_mal_sh_ad,2)
keyword_total_cost_mal_sh_ad = keyword_cost_input_token_mal_sh_ad + keyword_cost_output_token_mal_sh_ad
keyword_total_time_loop_mal_sh_ad = keyword_end_time_loop_mal_sh_ad - keyword_start_time_loop_mal_sh_ad

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_mal_sh_ad[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_mal_sh_ad,1))
print("Total Input Tokens - ", keyword_input_token_mal_sh_ad)
print("Total Input Cost = USD ",keyword_cost_input_token_mal_sh_ad)
print("Total Output Tokens - ", keyword_output_token_mal_sh_ad)
print("Total Output Cost = USD ",keyword_cost_output_token_mal_sh_ad)
print("Total Cost = USD ",round(keyword_total_cost_mal_sh_ad,2))

Executed  10  Iterations
Total Execution time (in secs) -  33.6
Total Input Tokens -  51326
Total Input Cost = USD  0.51
Total Output Tokens -  770
Total Output Cost = USD  0.02
Total Cost = USD  0.53


In [495]:
#Initialize an empty DataFrame
positive_keywords_mal_sh_ad = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']

# Add these columns to the DataFrame
for column in columns:
    positive_keywords_mal_sh_ad[column] = None

# Process each JSON string
for json_str in keyword_positive_output_mal_sh_ad:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'mal_sh_ad'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'Positive'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            positive_keywords_mal_sh_ad = pd.concat([positive_keywords_mal_sh_ad, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            positive_keywords_mal_sh_ad = pd.concat([positive_keywords_mal_sh_ad, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
positive_keywords_mal_sh_ad = positive_keywords_mal_sh_ad.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

positive_keywords_mal_sh_ad

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,Malabar Gold and Diamonds - Shabia Musaffah,Positive,keywords,"Trustworthy :1, Credibility :1, Transparency :...","Good experience : 45, Great experience : 20, N...","Good service : 1000, Excellent service : 500, ...","Good design :30, Nice design :10, Best design ...","collection : 45, collections : 40, varieties :...","discount :5, deal :3, sale :2, offers :1, prom...","reasonable making charges :1, best rates :1","reasonable :3, best price :3, good price :3, f...","quality :5, Durable :1","exchange policy :1, gold exchange :1"
1,Malabar Gold and Diamonds - Shabia Musaffah,Positive,phrases,"Complete credibility :1, Trustworthy and for t...","Very good experience : 10, Had a good experien...","Very good service : 50, Good customer service ...","Very good designs :15, Very nice designs :3, B...","good collection : 10, nice collection : 8, bes...","good discount :2, nice deal :1, best discount ...","reasonable making charges :1, best rates in ma...","reasonable prices :1, best price gold :1, good...","great quality :1, high quality :2, excellent q...",exchange your gold without any deductions :1


### mal_b2_af

In [496]:
# Initialize the output dictionary
keyword_positive_output_mal_b2_af = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_mal_b2_af=[0]
keyword_input_token_mal_b2_af = 0
keyword_output_token_mal_b2_af = 0
keyword_start_time_loop_mal_b2_af = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_mal_b2_af, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_mal_b2_af[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_mal_b2_af = keyword_dataframes['mal_b2_af_final_sen_df_jul'][keyword_dataframes['mal_b2_af_final_sen_df_jul'][topic]==1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are positive comments, call the positive_keywords function
    if filtered_comments_mal_b2_af:
        # Call the positive_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = positive_keywords(filtered_comments_mal_b2_af,topic)        
        # Add the result to the output dictionary
        keyword_positive_output_mal_b2_af.append(keywords)
        keyword_input_token_mal_b2_af += input_tokens_loop
        keyword_output_token_mal_b2_af += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_mal_b2_af = time.time()
keyword_cost_input_token_mal_b2_af = round((0.01/1000)*keyword_input_token_mal_b2_af,2)
keyword_cost_output_token_mal_b2_af = round((0.03/1000)*keyword_output_token_mal_b2_af,2)
keyword_total_cost_mal_b2_af = keyword_cost_input_token_mal_b2_af + keyword_cost_output_token_mal_b2_af
keyword_total_time_loop_mal_b2_af = keyword_end_time_loop_mal_b2_af - keyword_start_time_loop_mal_b2_af

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_mal_b2_af[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_mal_b2_af,1))
print("Total Input Tokens - ", keyword_input_token_mal_b2_af)
print("Total Input Cost = USD ",keyword_cost_input_token_mal_b2_af)
print("Total Output Tokens - ", keyword_output_token_mal_b2_af)
print("Total Output Cost = USD ",keyword_cost_output_token_mal_b2_af)
print("Total Cost = USD ",round(keyword_total_cost_mal_b2_af,2))

Executed  10  Iterations
Total Execution time (in secs) -  30.1
Total Input Tokens -  57161
Total Input Cost = USD  0.57
Total Output Tokens -  734
Total Output Cost = USD  0.02
Total Cost = USD  0.59


In [497]:
#Initialize an empty DataFrame
positive_keywords_mal_b2_af = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']

# Add these columns to the DataFrame
for column in columns:
    positive_keywords_mal_b2_af[column] = None

# Process each JSON string
for json_str in keyword_positive_output_mal_b2_af:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'mal_b2_af'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'Positive'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            positive_keywords_mal_b2_af = pd.concat([positive_keywords_mal_b2_af, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            positive_keywords_mal_b2_af = pd.concat([positive_keywords_mal_b2_af, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
positive_keywords_mal_b2_af = positive_keywords_mal_b2_af.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

positive_keywords_mal_b2_af

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,Malabar Gold and Diamonds - Souq Al Kabeer Bui...,Positive,keywords,"Trust :3, Trustable :2, Reliable :1, Honest :1...","Great : 15, Good : 10, Nice : 8, Excellent : 5...","good service : 320, very good : 120, excellent...","design : 20, designs : 15, model : 1","varieties :3, variety :3, selection :3, option...","good discount :10, best discount :4, additiona...","reasonable making charges:1, best making charge:1","good price :4, reasonable prices :3, best pric...","quality :3, good :2, premium :1, amazing :1","exchanging :2, old gold :2, new :2"
1,Malabar Gold and Diamonds - Souq Al Kabeer Bui...,Positive,phrases,"Trustable place to buy gold :1, Malabar Gold h...","Great experience : 20, Good experience : 15, N...","very good service : 30, excellent service by :...","beautiful designs : 2, good designs : 5, nice ...","lots of variety :2, wide variety :2, variety o...","good deal :8, best offer :2, amazing deal :2, ...",No relevant positive phrases,"make the most of your money :1, best pricing p...","good quality :2, premium quality :1, amazing q...","exchanging my old gold :1, exchanging old gold..."


### mna_mb

In [498]:
# Initialize the output dictionary
keyword_positive_output_mna_mb = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_mna_mb=[0]
keyword_input_token_mna_mb = 0
keyword_output_token_mna_mb = 0
keyword_start_time_loop_mna_mb = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_mna_mb, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_mna_mb[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_mna_mb = keyword_dataframes['mna_mb_final_sen_df_jul'][keyword_dataframes['mna_mb_final_sen_df_jul'][topic]==1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are positive comments, call the positive_keywords function
    if filtered_comments_mna_mb:
        # Call the positive_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = positive_keywords(filtered_comments_mna_mb,topic)        
        # Add the result to the output dictionary
        keyword_positive_output_mna_mb.append(keywords)
        keyword_input_token_mna_mb += input_tokens_loop
        keyword_output_token_mna_mb += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_mna_mb = time.time()
keyword_cost_input_token_mna_mb = round((0.01/1000)*keyword_input_token_mna_mb,2)
keyword_cost_output_token_mna_mb = round((0.03/1000)*keyword_output_token_mna_mb,2)
keyword_total_cost_mna_mb = keyword_cost_input_token_mna_mb + keyword_cost_output_token_mna_mb
keyword_total_time_loop_mna_mb = keyword_end_time_loop_mna_mb - keyword_start_time_loop_mna_mb

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_mna_mb[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_mna_mb,1))
print("Total Input Tokens - ", keyword_input_token_mna_mb)
print("Total Input Cost = USD ",keyword_cost_input_token_mna_mb)
print("Total Output Tokens - ", keyword_output_token_mna_mb)
print("Total Output Cost = USD ",keyword_cost_output_token_mna_mb)
print("Total Cost = USD ",round(keyword_total_cost_mna_mb,2))

Executed  10  Iterations
Total Execution time (in secs) -  33.6
Total Input Tokens -  46524
Total Input Cost = USD  0.47
Total Output Tokens -  687
Total Output Cost = USD  0.02
Total Cost = USD  0.49


In [499]:
#Initialize an empty DataFrame
positive_keywords_mna_mb = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']

# Add these columns to the DataFrame
for column in columns:
    positive_keywords_mna_mb[column] = None

# Process each JSON string
for json_str in keyword_positive_output_mna_mb:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'mna_mb'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'Positive'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            positive_keywords_mna_mb = pd.concat([positive_keywords_mna_mb, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            positive_keywords_mna_mb = pd.concat([positive_keywords_mna_mb, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
positive_keywords_mna_mb = positive_keywords_mna_mb.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

positive_keywords_mna_mb

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,Meena Jewellers - Meena Bazar,Positive,keywords,"trusted :3, trust :2, trustworthy :2, reliable...","professional : 10, helpful : 8, knowledgeable ...","professional : 10, helpful : 9, knowledgeable ...","unique :5, elegant :4, stylish :4, intricate :...","variety :8, collection :7, collections :5, des...","discount :3, deals :2","reasonable :3, best :3, decent :1, less :1, ve...","value : 8, reasonable : 6, transparent : 6, af...","craftsmanship :10, quality :9, authentic :3, t...",
1,Meena Jewellers - Meena Bazar,Positive,phrases,"trusted showroom :1, trust this place :1, trus...","great experience : 5, wonderful experience : 4...","very helpful staff : 3, extremely helpful and ...","best designs :5, excellent design :3, unique d...","wide variety :3, variety of designs :2, variet...","best discounts :2, great discount :2, best dea...","reasonable making charges :2, best making char...","value for money : 12, reasonable prices : 3, b...","quality of craftsmanship is excellent :3, craf...",


### min_ak

In [500]:
# Initialize the output dictionary
keyword_positive_output_min_ak = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_min_ak=[0]
keyword_input_token_min_ak = 0
keyword_output_token_min_ak = 0
keyword_start_time_loop_min_ak = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_min_ak, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_min_ak[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_min_ak = keyword_dataframes['min_ak_final_sen_df_jul'][keyword_dataframes['min_ak_final_sen_df_jul'][topic]==1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are positive comments, call the positive_keywords function
    if filtered_comments_min_ak:
        # Call the positive_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = positive_keywords(filtered_comments_min_ak,topic)        
        # Add the result to the output dictionary
        keyword_positive_output_min_ak.append(keywords)
        keyword_input_token_min_ak += input_tokens_loop
        keyword_output_token_min_ak += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_min_ak = time.time()
keyword_cost_input_token_min_ak = round((0.01/1000)*keyword_input_token_min_ak,2)
keyword_cost_output_token_min_ak = round((0.03/1000)*keyword_output_token_min_ak,2)
keyword_total_cost_min_ak = keyword_cost_input_token_min_ak + keyword_cost_output_token_min_ak
keyword_total_time_loop_min_ak = keyword_end_time_loop_min_ak - keyword_start_time_loop_min_ak

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_min_ak[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_min_ak,1))
print("Total Input Tokens - ", keyword_input_token_min_ak)
print("Total Input Cost = USD ",keyword_cost_input_token_min_ak)
print("Total Output Tokens - ", keyword_output_token_min_ak)
print("Total Output Cost = USD ",keyword_cost_output_token_min_ak)
print("Total Cost = USD ",round(keyword_total_cost_min_ak,2))

Executed  10  Iterations
Total Execution time (in secs) -  36.1
Total Input Tokens -  66627
Total Input Cost = USD  0.67
Total Output Tokens -  842
Total Output Cost = USD  0.03
Total Cost = USD  0.7


In [501]:
#Initialize an empty DataFrame
positive_keywords_min_ak = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']

# Add these columns to the DataFrame
for column in columns:
    positive_keywords_min_ak[column] = None

# Process each JSON string
for json_str in keyword_positive_output_min_ak:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'min_ak'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'Positive'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            positive_keywords_min_ak = pd.concat([positive_keywords_min_ak, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            positive_keywords_min_ak = pd.concat([positive_keywords_min_ak, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
positive_keywords_min_ak = positive_keywords_min_ak.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

positive_keywords_min_ak

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,Mint Jewels - Al Karama,Positive,keywords,"Trustworthy :5, Reliable :4, Genuine :3, Hones...","smooth transaction :5, great experience :4, ea...","accommodating : 50, friendly : 45, helpful : 4...","designs :3, craftsmanship :2, collection :2, s...","variety :4, collections :3, collection :3, ran...","Good Deal :10, Best Deal :5, Amazing Deal :2, ...",low making charge :2,"best rate : 15, good price : 12, good rate : 1...","quality :8, good quality :4, top-notch :2, aut...","good rates :3, high value :2, best price :2, e..."
1,Mint Jewels - Al Karama,Positive,phrases,"Trustworthy place to buy :1, Very trusted peop...","very smooth transaction :3, great experience w...","very accommodating staff : 10, friendly and ac...","great designs :1, very good designs :1, beauti...","good collections :5, great collection :3, stun...","Best deals in the market :1, Got a good deal a...","lower making charge :1, big difference compari...","best price for selling gold : 3, good price fo...","quality of the gold is exceptional :1, top-qua...","hassle-free transaction :4, smooth transaction..."


### joy_ak

In [502]:
# Initialize the output dictionary
keyword_positive_output_joy_ak = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_joy_ak=[0]
keyword_input_token_joy_ak = 0
keyword_output_token_joy_ak = 0
keyword_start_time_loop_joy_ak = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_joy_ak, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_joy_ak[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_joy_ak = keyword_dataframes['joy_ak_final_sen_df_jul'][keyword_dataframes['joy_ak_final_sen_df_jul'][topic]==1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are positive comments, call the positive_keywords function
    if filtered_comments_joy_ak:
        # Call the positive_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = positive_keywords(filtered_comments_joy_ak,topic)        
        # Add the result to the output dictionary
        keyword_positive_output_joy_ak.append(keywords)
        keyword_input_token_joy_ak += input_tokens_loop
        keyword_output_token_joy_ak += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_joy_ak = time.time()
keyword_cost_input_token_joy_ak = round((0.01/1000)*keyword_input_token_joy_ak,2)
keyword_cost_output_token_joy_ak = round((0.03/1000)*keyword_output_token_joy_ak,2)
keyword_total_cost_joy_ak = keyword_cost_input_token_joy_ak + keyword_cost_output_token_joy_ak
keyword_total_time_loop_joy_ak = keyword_end_time_loop_joy_ak - keyword_start_time_loop_joy_ak

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_joy_ak[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_joy_ak,1))
print("Total Input Tokens - ", keyword_input_token_joy_ak)
print("Total Input Cost = USD ",keyword_cost_input_token_joy_ak)
print("Total Output Tokens - ", keyword_output_token_joy_ak)
print("Total Output Cost = USD ",keyword_cost_output_token_joy_ak)
print("Total Cost = USD ",round(keyword_total_cost_joy_ak,2))

Executed  10  Iterations
Total Execution time (in secs) -  35.1
Total Input Tokens -  50551
Total Input Cost = USD  0.51
Total Output Tokens -  813
Total Output Cost = USD  0.02
Total Cost = USD  0.53


In [503]:
#Initialize an empty DataFrame
positive_keywords_joy_ak = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']

# Add these columns to the DataFrame
for column in columns:
    positive_keywords_joy_ak[column] = None

# Process each JSON string
for json_str in keyword_positive_output_joy_ak:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'joy_ak'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'Positive'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            positive_keywords_joy_ak = pd.concat([positive_keywords_joy_ak, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            positive_keywords_joy_ak = pd.concat([positive_keywords_joy_ak, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
positive_keywords_joy_ak = positive_keywords_joy_ak.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

positive_keywords_joy_ak

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,Joyalukkas Jewellery - Al Karama,Positive,keywords,"Trust :3, Reliable :2, Trustable :1, Confident...","pleasant :3, welcoming :3, smooth :3, enjoyabl...","friendly : 50, helpful : 45, accommodating : 3...","Good designs: 15, Nice designs: 10, Excellent ...","Good collection : 30, Nice collection : 20, Wi...","discount :10, deal :8, offer :6, vouchers :1, ...","reasonable :2, low :1, discounted :1, less exp...","Good price :5, Affordable :4, Competitive pric...","Good Quality :8, High-quality :1, Best Quality...","exchange :5, purchase :2, sale :1"
1,Joyalukkas Jewellery - Al Karama,Positive,phrases,"Trustworthy and reliable brand :1, Trust in th...","pleasant experience :3, welcoming ambiance :2,...","very friendly staff : 10, helpful staff : 8, a...","Intricate designs: 2, Exquisite designs: 1, Un...","Variety of choices : 3, Wide variety of design...","good discount :5, best discount :3, great deal...","making charges are reasonable :2, low making c...","Value for money :3, Better price :2, Excellent...","Quality of the gold is impeccable :1, Quality ...","delightful experience :1, wonderful experience..."


### kan_mb

In [504]:
# Initialize the output dictionary
keyword_positive_output_kan_mb = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_kan_mb=[0]
keyword_input_token_kan_mb = 0
keyword_output_token_kan_mb = 0
keyword_start_time_loop_kan_mb = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_kan_mb, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_kan_mb[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_kan_mb = keyword_dataframes['kan_mb_final_sen_df_jul'][keyword_dataframes['kan_mb_final_sen_df_jul'][topic]==1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are positive comments, call the positive_keywords function
    if filtered_comments_kan_mb:
        # Call the positive_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = positive_keywords(filtered_comments_kan_mb,topic)        
        # Add the result to the output dictionary
        keyword_positive_output_kan_mb.append(keywords)
        keyword_input_token_kan_mb += input_tokens_loop
        keyword_output_token_kan_mb += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_kan_mb = time.time()
keyword_cost_input_token_kan_mb = round((0.01/1000)*keyword_input_token_kan_mb,2)
keyword_cost_output_token_kan_mb = round((0.03/1000)*keyword_output_token_kan_mb,2)
keyword_total_cost_kan_mb = keyword_cost_input_token_kan_mb + keyword_cost_output_token_kan_mb
keyword_total_time_loop_kan_mb = keyword_end_time_loop_kan_mb - keyword_start_time_loop_kan_mb

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_kan_mb[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_kan_mb,1))
print("Total Input Tokens - ", keyword_input_token_kan_mb)
print("Total Input Cost = USD ",keyword_cost_input_token_kan_mb)
print("Total Output Tokens - ", keyword_output_token_kan_mb)
print("Total Output Cost = USD ",keyword_cost_output_token_kan_mb)
print("Total Cost = USD ",round(keyword_total_cost_kan_mb,2))

Executed  10  Iterations
Total Execution time (in secs) -  23.0
Total Input Tokens -  25525
Total Input Cost = USD  0.26
Total Output Tokens -  767
Total Output Cost = USD  0.02
Total Cost = USD  0.28


In [505]:
#Initialize an empty DataFrame
positive_keywords_kan_mb = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']

# Add these columns to the DataFrame
for column in columns:
    positive_keywords_kan_mb[column] = None

# Process each JSON string
for json_str in keyword_positive_output_kan_mb:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'kan_mb'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'Positive'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            positive_keywords_kan_mb = pd.concat([positive_keywords_kan_mb, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            positive_keywords_kan_mb = pd.concat([positive_keywords_kan_mb, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
positive_keywords_kan_mb = positive_keywords_kan_mb.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

positive_keywords_kan_mb

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,Kanz Jewellers,Positive,keywords,"Trustworthy :1, Authenticity :1, Genuine :1","good experience : 10, great experience : 9, am...","helpful : 45, excellent : 40, good : 35, polit...","Good design :5, Best design :4, Unique design ...","variety :3, range :2, selection :2, choice :1,...","Good deal :9, Good discount :6, Great discount...","affordable :2, low :1, best :1, favorable :1","best price :5, good price :4, affordable price...","quality :3, top-notch :2, excellent :1, great :1","favorable making rates:1, comprehensive gold e..."
1,Kanz Jewellers,Positive,phrases,"Genuinely a trustworthy jeweller :1, Authentic...","pleasant shopping experience : 2, very good ex...","very helpful : 20, excellent service : 18, goo...","Good designs :3, Best designs :2, Unique & ele...","nice variety :1, wide collection :1, amazing c...","Good deal with jasvinder singh :3, Gave us a p...","affordable making charge :1, low making charge...","best value for our purchase :1, great value fo...","excellent product quality :1, top-notch qualit...","exchange my existing gold chain:1, benefiting ..."


### agd_mb

In [506]:
# Initialize the output dictionary
keyword_positive_output_agd_mb = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_agd_mb=[0]
keyword_input_token_agd_mb = 0
keyword_output_token_agd_mb = 0
keyword_start_time_loop_agd_mb = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_agd_mb, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_agd_mb[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_agd_mb = keyword_dataframes['agd_mb_final_sen_df_jul'][keyword_dataframes['agd_mb_final_sen_df_jul'][topic]==1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are positive comments, call the positive_keywords function
    if filtered_comments_agd_mb:
        # Call the positive_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = positive_keywords(filtered_comments_agd_mb,topic)        
        # Add the result to the output dictionary
        keyword_positive_output_agd_mb.append(keywords)
        keyword_input_token_agd_mb += input_tokens_loop
        keyword_output_token_agd_mb += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_agd_mb = time.time()
keyword_cost_input_token_agd_mb = round((0.01/1000)*keyword_input_token_agd_mb,2)
keyword_cost_output_token_agd_mb = round((0.03/1000)*keyword_output_token_agd_mb,2)
keyword_total_cost_agd_mb = keyword_cost_input_token_agd_mb + keyword_cost_output_token_agd_mb
keyword_total_time_loop_agd_mb = keyword_end_time_loop_agd_mb - keyword_start_time_loop_agd_mb

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_agd_mb[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_agd_mb,1))
print("Total Input Tokens - ", keyword_input_token_agd_mb)
print("Total Input Cost = USD ",keyword_cost_input_token_agd_mb)
print("Total Output Tokens - ", keyword_output_token_agd_mb)
print("Total Output Cost = USD ",keyword_cost_output_token_agd_mb)
print("Total Cost = USD ",round(keyword_total_cost_agd_mb,2))

Executed  10  Iterations
Total Execution time (in secs) -  25.0
Total Input Tokens -  19168
Total Input Cost = USD  0.19
Total Output Tokens -  705
Total Output Cost = USD  0.02
Total Cost = USD  0.21


In [507]:
#Initialize an empty DataFrame
positive_keywords_agd_mb = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']

# Add these columns to the DataFrame
for column in columns:
    positive_keywords_agd_mb[column] = None

# Process each JSON string
for json_str in keyword_positive_output_agd_mb:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'agd_mb'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'Positive'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            positive_keywords_agd_mb = pd.concat([positive_keywords_agd_mb, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            positive_keywords_agd_mb = pd.concat([positive_keywords_agd_mb, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
positive_keywords_agd_mb = positive_keywords_agd_mb.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

positive_keywords_agd_mb

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,Arakkal Gold and Diamonds LLC - Meena Bazar - ...,Positive,keywords,"Trusted :3, Trustable :1, Genuine :1, Honesty ...","Good experience :10, Wonderful experience :5, ...","helpful :10, patient :3, knowledgeable :3, fri...","Good design :5, Unique designs :3, Best design...","collection : 30, varieties : 2, variety : 1, i...","discount :5, discounts :4, deals :2, offers :1...","reasonable :2, 0% :2","best price: 18, good price: 8, fair price: 2, ...","Good quality:3, Best quality:3, Quality:2, Exc...",
1,Arakkal Gold and Diamonds LLC - Meena Bazar - ...,Positive,phrases,"Most trustworthy people :1, Very trustworthy :...","Very good experience :3, Had a great experienc...","very helpful :5, very nice service :3, very go...","Very beautiful designs :1, Very unique designs...","Nice collection : 6, Good collection : 6, Grea...","best discount :4, good discount :2, amazing di...","0% making charge :2, reasonable making charges :1","best price: 18, good price: 8, fair price: 2, ...","Quality of the gold:1, High-quality jewelry:1,...",


### bhi_dec_ga

In [508]:
# Initialize the output dictionary
keyword_positive_output_bhi_dec_ga = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_bhi_dec_ga=[0]
keyword_input_token_bhi_dec_ga = 0
keyword_output_token_bhi_dec_ga = 0
keyword_start_time_loop_bhi_dec_ga = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_bhi_dec_ga, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_bhi_dec_ga[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_bhi_dec_ga = keyword_dataframes['bhi_dec_ga_final_sen_df_jul'][keyword_dataframes['bhi_dec_ga_final_sen_df_jul'][topic]==1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are positive comments, call the positive_keywords function
    if filtered_comments_bhi_dec_ga:
        # Call the positive_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = positive_keywords(filtered_comments_bhi_dec_ga,topic)        
        # Add the result to the output dictionary
        keyword_positive_output_bhi_dec_ga.append(keywords)
        keyword_input_token_bhi_dec_ga += input_tokens_loop
        keyword_output_token_bhi_dec_ga += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_bhi_dec_ga = time.time()
keyword_cost_input_token_bhi_dec_ga = round((0.01/1000)*keyword_input_token_bhi_dec_ga,2)
keyword_cost_output_token_bhi_dec_ga = round((0.03/1000)*keyword_output_token_bhi_dec_ga,2)
keyword_total_cost_bhi_dec_ga = keyword_cost_input_token_bhi_dec_ga + keyword_cost_output_token_bhi_dec_ga
keyword_total_time_loop_bhi_dec_ga = keyword_end_time_loop_bhi_dec_ga - keyword_start_time_loop_bhi_dec_ga

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_bhi_dec_ga[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_bhi_dec_ga,1))
print("Total Input Tokens - ", keyword_input_token_bhi_dec_ga)
print("Total Input Cost = USD ",keyword_cost_input_token_bhi_dec_ga)
print("Total Output Tokens - ", keyword_output_token_bhi_dec_ga)
print("Total Output Cost = USD ",keyword_cost_output_token_bhi_dec_ga)
print("Total Cost = USD ",round(keyword_total_cost_bhi_dec_ga,2))

Executed  10  Iterations
Total Execution time (in secs) -  10.0
Total Input Tokens -  5799
Total Input Cost = USD  0.06
Total Output Tokens -  304
Total Output Cost = USD  0.01
Total Cost = USD  0.07


In [509]:
#Initialize an empty DataFrame
positive_keywords_bhi_dec_ga = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']

# Add these columns to the DataFrame
for column in columns:
    positive_keywords_bhi_dec_ga[column] = None

# Process each JSON string
for json_str in keyword_positive_output_bhi_dec_ga:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'bhi_dec_ga'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'Positive'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            positive_keywords_bhi_dec_ga = pd.concat([positive_keywords_bhi_dec_ga, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            positive_keywords_bhi_dec_ga = pd.concat([positive_keywords_bhi_dec_ga, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
positive_keywords_bhi_dec_ga = positive_keywords_bhi_dec_ga.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

positive_keywords_bhi_dec_ga

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Bhindi Jewellers-Decatur, GA",Positive,keywords,"trustworthy :1, reliable :1","wonderful :1, great :1, amazing :1, good :1","helpful :5, sweet :3, professional :1, excelle...",,No relevant positive keywords/ phrases,,,"good price :1, lower :1",No relevant positive keywords/ phrases,
1,"Bhindi Jewellers-Decatur, GA",Positive,phrases,Perfect place to buy gold and jewelry :1,"very good experience :1, great experience :1, ...","very helpful :3, very sweet :2, very kind :2, ...",,No relevant positive keywords/ phrases,,,able to lower the price :1,No relevant positive keywords/ phrases,


### eve_joh_ga

In [510]:
# Initialize the output dictionary
keyword_positive_output_eve_joh_ga = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_eve_joh_ga=[0]
keyword_input_token_eve_joh_ga = 0
keyword_output_token_eve_joh_ga = 0
keyword_start_time_loop_eve_joh_ga = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_eve_joh_ga, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_eve_joh_ga[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_eve_joh_ga = keyword_dataframes['eve_joh_ga_final_sen_df_jul'][keyword_dataframes['eve_joh_ga_final_sen_df_jul'][topic]==1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are positive comments, call the positive_keywords function
    if filtered_comments_eve_joh_ga:
        # Call the positive_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = positive_keywords(filtered_comments_eve_joh_ga,topic)        
        # Add the result to the output dictionary
        keyword_positive_output_eve_joh_ga.append(keywords)
        keyword_input_token_eve_joh_ga += input_tokens_loop
        keyword_output_token_eve_joh_ga += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_eve_joh_ga = time.time()
keyword_cost_input_token_eve_joh_ga = round((0.01/1000)*keyword_input_token_eve_joh_ga,2)
keyword_cost_output_token_eve_joh_ga = round((0.03/1000)*keyword_output_token_eve_joh_ga,2)
keyword_total_cost_eve_joh_ga = keyword_cost_input_token_eve_joh_ga + keyword_cost_output_token_eve_joh_ga
keyword_total_time_loop_eve_joh_ga = keyword_end_time_loop_eve_joh_ga - keyword_start_time_loop_eve_joh_ga

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_eve_joh_ga[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_eve_joh_ga,1))
print("Total Input Tokens - ", keyword_input_token_eve_joh_ga)
print("Total Input Cost = USD ",keyword_cost_input_token_eve_joh_ga)
print("Total Output Tokens - ", keyword_output_token_eve_joh_ga)
print("Total Output Cost = USD ",keyword_cost_output_token_eve_joh_ga)
print("Total Cost = USD ",round(keyword_total_cost_eve_joh_ga,2))

Executed  10  Iterations
Total Execution time (in secs) -  0.0
Total Input Tokens -  0
Total Input Cost = USD  0.0
Total Output Tokens -  0
Total Output Cost = USD  0.0
Total Cost = USD  0.0


In [511]:
#Initialize an empty DataFrame
positive_keywords_eve_joh_ga = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']

# Add these columns to the DataFrame
for column in columns:
    positive_keywords_eve_joh_ga[column] = None

# Process each JSON string
for json_str in keyword_positive_output_eve_joh_ga:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'eve_joh_ga'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'Positive'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            positive_keywords_eve_joh_ga = pd.concat([positive_keywords_eve_joh_ga, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            positive_keywords_eve_joh_ga = pd.concat([positive_keywords_eve_joh_ga, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
positive_keywords_eve_joh_ga = positive_keywords_eve_joh_ga.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

positive_keywords_eve_joh_ga

ValueError: cannot insert Type, already exists

### jar_bol_il

In [512]:
# Initialize the output dictionary
keyword_positive_output_jar_bol_il = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_jar_bol_il=[0]
keyword_input_token_jar_bol_il = 0
keyword_output_token_jar_bol_il = 0
keyword_start_time_loop_jar_bol_il = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_jar_bol_il, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_jar_bol_il[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_jar_bol_il = keyword_dataframes['jar_bol_il_final_sen_df_jul'][keyword_dataframes['jar_bol_il_final_sen_df_jul'][topic]==1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are positive comments, call the positive_keywords function
    if filtered_comments_jar_bol_il:
        # Call the positive_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = positive_keywords(filtered_comments_jar_bol_il,topic)        
        # Add the result to the output dictionary
        keyword_positive_output_jar_bol_il.append(keywords)
        keyword_input_token_jar_bol_il += input_tokens_loop
        keyword_output_token_jar_bol_il += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_jar_bol_il = time.time()
keyword_cost_input_token_jar_bol_il = round((0.01/1000)*keyword_input_token_jar_bol_il,2)
keyword_cost_output_token_jar_bol_il = round((0.03/1000)*keyword_output_token_jar_bol_il,2)
keyword_total_cost_jar_bol_il = keyword_cost_input_token_jar_bol_il + keyword_cost_output_token_jar_bol_il
keyword_total_time_loop_jar_bol_il = keyword_end_time_loop_jar_bol_il - keyword_start_time_loop_jar_bol_il

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_jar_bol_il[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_jar_bol_il,1))
print("Total Input Tokens - ", keyword_input_token_jar_bol_il)
print("Total Input Cost = USD ",keyword_cost_input_token_jar_bol_il)
print("Total Output Tokens - ", keyword_output_token_jar_bol_il)
print("Total Output Cost = USD ",keyword_cost_output_token_jar_bol_il)
print("Total Cost = USD ",round(keyword_total_cost_jar_bol_il,2))

Executed  10  Iterations
Total Execution time (in secs) -  18.0
Total Input Tokens -  15498
Total Input Cost = USD  0.15
Total Output Tokens -  525
Total Output Cost = USD  0.02
Total Cost = USD  0.17


In [513]:
#Initialize an empty DataFrame
positive_keywords_jar_bol_il = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']

# Add these columns to the DataFrame
for column in columns:
    positive_keywords_jar_bol_il[column] = None

# Process each JSON string
for json_str in keyword_positive_output_jar_bol_il:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'jar_bol_il'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'Positive'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            positive_keywords_jar_bol_il = pd.concat([positive_keywords_jar_bol_il, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            positive_keywords_jar_bol_il = pd.concat([positive_keywords_jar_bol_il, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
positive_keywords_jar_bol_il = positive_keywords_jar_bol_il.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

positive_keywords_jar_bol_il

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Jared-Bolingbrook, IL",Positive,keywords,No relevant positive keywords/ phrases,"great experience :5, amazing experience :4, aw...","helpful :15, knowledgeable :10, friendly :9, p...","custom design:1, gorgeous:1, beautiful:1","selection :3, variety :1, options :1","deal :2, price :1",,"affordable :1, budget :1, fair pricing :1, pri...",No relevant positive keywords/ phrases,"trade in :1, upsize :1"
1,"Jared-Bolingbrook, IL",Positive,phrases,No relevant positive keywords/ phrases,"great customer service :4, very helpful :3, hi...","great customer service :5, amazing experience ...","fantastic job sketching:1, made it look beauti...","great selection :2, impressive selection :1, v...","amazing deals :1, best deal possible :1",,"in our budget :1, worth coming to check out :1...",No relevant positive keywords/ phrases,trade in and upsize your certified diamonds :1


### jar_ver_il

In [514]:
# Initialize the output dictionary
keyword_positive_output_jar_ver_il = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_jar_ver_il=[0]
keyword_input_token_jar_ver_il = 0
keyword_output_token_jar_ver_il = 0
keyword_start_time_loop_jar_ver_il = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_jar_ver_il, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_jar_ver_il[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_jar_ver_il = keyword_dataframes['jar_ver_il_final_sen_df_jul'][keyword_dataframes['jar_ver_il_final_sen_df_jul'][topic]==1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are positive comments, call the positive_keywords function
    if filtered_comments_jar_ver_il:
        # Call the positive_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = positive_keywords(filtered_comments_jar_ver_il,topic)        
        # Add the result to the output dictionary
        keyword_positive_output_jar_ver_il.append(keywords)
        keyword_input_token_jar_ver_il += input_tokens_loop
        keyword_output_token_jar_ver_il += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_jar_ver_il = time.time()
keyword_cost_input_token_jar_ver_il = round((0.01/1000)*keyword_input_token_jar_ver_il,2)
keyword_cost_output_token_jar_ver_il = round((0.03/1000)*keyword_output_token_jar_ver_il,2)
keyword_total_cost_jar_ver_il = keyword_cost_input_token_jar_ver_il + keyword_cost_output_token_jar_ver_il
keyword_total_time_loop_jar_ver_il = keyword_end_time_loop_jar_ver_il - keyword_start_time_loop_jar_ver_il

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_jar_ver_il[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_jar_ver_il,1))
print("Total Input Tokens - ", keyword_input_token_jar_ver_il)
print("Total Input Cost = USD ",keyword_cost_input_token_jar_ver_il)
print("Total Output Tokens - ", keyword_output_token_jar_ver_il)
print("Total Output Cost = USD ",keyword_cost_output_token_jar_ver_il)
print("Total Cost = USD ",round(keyword_total_cost_jar_ver_il,2))

Executed  10  Iterations
Total Execution time (in secs) -  11.0
Total Input Tokens -  7697
Total Input Cost = USD  0.08
Total Output Tokens -  311
Total Output Cost = USD  0.01
Total Cost = USD  0.09


In [515]:
#Initialize an empty DataFrame
positive_keywords_jar_ver_il = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']

# Add these columns to the DataFrame
for column in columns:
    positive_keywords_jar_ver_il[column] = None

# Process each JSON string
for json_str in keyword_positive_output_jar_ver_il:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'jar_ver_il'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'Positive'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            positive_keywords_jar_ver_il = pd.concat([positive_keywords_jar_ver_il, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            positive_keywords_jar_ver_il = pd.concat([positive_keywords_jar_ver_il, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
positive_keywords_jar_ver_il = positive_keywords_jar_ver_il.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

positive_keywords_jar_ver_il

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Jared-Vernon Hills, IL",Positive,keywords,"Trust :2, Reliable :1, Honest :1","helpful :10, great :9, amazing :5, wonderful :...","helpful :15, knowledgeable :6, patient :5, fri...",,"options :2, selection :1",,,,"top quality:1, excellent:1",
1,"Jared-Vernon Hills, IL",Positive,phrases,"Always going to Jared :1, Highly recommend :1,...","great experience :6, wonderful experience :3, ...","very helpful :5, extremely helpful :2, very kn...",,"excellent selection to choose from :1, many op...",,,,"quality is as exceptional:1, quality of this r...",


### jar_lom_il

In [516]:
# Initialize the output dictionary
keyword_positive_output_jar_lom_il = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_jar_lom_il=[0]
keyword_input_token_jar_lom_il = 0
keyword_output_token_jar_lom_il = 0
keyword_start_time_loop_jar_lom_il = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_jar_lom_il, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_jar_lom_il[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_jar_lom_il = keyword_dataframes['jar_lom_il_final_sen_df_jul'][keyword_dataframes['jar_lom_il_final_sen_df_jul'][topic]==1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are positive comments, call the positive_keywords function
    if filtered_comments_jar_lom_il:
        # Call the positive_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = positive_keywords(filtered_comments_jar_lom_il,topic)        
        # Add the result to the output dictionary
        keyword_positive_output_jar_lom_il.append(keywords)
        keyword_input_token_jar_lom_il += input_tokens_loop
        keyword_output_token_jar_lom_il += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_jar_lom_il = time.time()
keyword_cost_input_token_jar_lom_il = round((0.01/1000)*keyword_input_token_jar_lom_il,2)
keyword_cost_output_token_jar_lom_il = round((0.03/1000)*keyword_output_token_jar_lom_il,2)
keyword_total_cost_jar_lom_il = keyword_cost_input_token_jar_lom_il + keyword_cost_output_token_jar_lom_il
keyword_total_time_loop_jar_lom_il = keyword_end_time_loop_jar_lom_il - keyword_start_time_loop_jar_lom_il

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_jar_lom_il[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_jar_lom_il,1))
print("Total Input Tokens - ", keyword_input_token_jar_lom_il)
print("Total Input Cost = USD ",keyword_cost_input_token_jar_lom_il)
print("Total Output Tokens - ", keyword_output_token_jar_lom_il)
print("Total Output Cost = USD ",keyword_cost_output_token_jar_lom_il)
print("Total Cost = USD ",round(keyword_total_cost_jar_lom_il,2))

Executed  10  Iterations
Total Execution time (in secs) -  17.0
Total Input Tokens -  11461
Total Input Cost = USD  0.11
Total Output Tokens -  436
Total Output Cost = USD  0.01
Total Cost = USD  0.12


In [517]:
#Initialize an empty DataFrame
positive_keywords_jar_lom_il = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']

# Add these columns to the DataFrame
for column in columns:
    positive_keywords_jar_lom_il[column] = None

# Process each JSON string
for json_str in keyword_positive_output_jar_lom_il:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'jar_lom_il'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'Positive'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            positive_keywords_jar_lom_il = pd.concat([positive_keywords_jar_lom_il, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            positive_keywords_jar_lom_il = pd.concat([positive_keywords_jar_lom_il, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
positive_keywords_jar_lom_il = positive_keywords_jar_lom_il.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

positive_keywords_jar_lom_il

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Jared-Lombard, IL",Positive,keywords,"Honesty :1, Trust :1","helpful :8, knowledgeable :4, friendly :4, wel...","helpful :10, knowledgeable :5, friendly :5, at...","cookie cutter :2, top notch :2","selections :2, options :1, choices :1, selecti...","discount :1, 25% off :1",,budget :2,,
1,"Jared-Lombard, IL",Positive,phrases,"Loyal customer for the past five years :1, Con...","great experience :5, wonderful experience :3, ...","very helpful and knowledgeable :2, helpful and...",perfect engagement ring :2,"many different options of jewelry :1, great se...","small discount on the appraisal :1, couldn’t p...",,"well within our budget :1, extremely tight tim...",,


### jar_orl_il

In [518]:
# Initialize the output dictionary
keyword_positive_output_jar_orl_il = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_jar_orl_il=[0]
keyword_input_token_jar_orl_il = 0
keyword_output_token_jar_orl_il = 0
keyword_start_time_loop_jar_orl_il = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_jar_orl_il, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_jar_orl_il[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_jar_orl_il = keyword_dataframes['jar_orl_il_final_sen_df_jul'][keyword_dataframes['jar_orl_il_final_sen_df_jul'][topic]==1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are positive comments, call the positive_keywords function
    if filtered_comments_jar_orl_il:
        # Call the positive_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = positive_keywords(filtered_comments_jar_orl_il,topic)        
        # Add the result to the output dictionary
        keyword_positive_output_jar_orl_il.append(keywords)
        keyword_input_token_jar_orl_il += input_tokens_loop
        keyword_output_token_jar_orl_il += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_jar_orl_il = time.time()
keyword_cost_input_token_jar_orl_il = round((0.01/1000)*keyword_input_token_jar_orl_il,2)
keyword_cost_output_token_jar_orl_il = round((0.03/1000)*keyword_output_token_jar_orl_il,2)
keyword_total_cost_jar_orl_il = keyword_cost_input_token_jar_orl_il + keyword_cost_output_token_jar_orl_il
keyword_total_time_loop_jar_orl_il = keyword_end_time_loop_jar_orl_il - keyword_start_time_loop_jar_orl_il

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_jar_orl_il[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_jar_orl_il,1))
print("Total Input Tokens - ", keyword_input_token_jar_orl_il)
print("Total Input Cost = USD ",keyword_cost_input_token_jar_orl_il)
print("Total Output Tokens - ", keyword_output_token_jar_orl_il)
print("Total Output Cost = USD ",keyword_cost_output_token_jar_orl_il)
print("Total Cost = USD ",round(keyword_total_cost_jar_orl_il,2))

Executed  10  Iterations
Total Execution time (in secs) -  13.0
Total Input Tokens -  14405
Total Input Cost = USD  0.14
Total Output Tokens -  467
Total Output Cost = USD  0.01
Total Cost = USD  0.15


In [519]:
#Initialize an empty DataFrame
positive_keywords_jar_orl_il = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']

# Add these columns to the DataFrame
for column in columns:
    positive_keywords_jar_orl_il[column] = None

# Process each JSON string
for json_str in keyword_positive_output_jar_orl_il:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'jar_orl_il'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'Positive'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            positive_keywords_jar_orl_il = pd.concat([positive_keywords_jar_orl_il, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            positive_keywords_jar_orl_il = pd.concat([positive_keywords_jar_orl_il, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
positive_keywords_jar_orl_il = positive_keywords_jar_orl_il.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

positive_keywords_jar_orl_il

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Jared-Orland Park, IL",Positive,keywords,"Honest :2, Trustworthy :1, Sincere :1, Knowled...","clean :2, organized :1, inviting :1, welcoming :1","helpful :10, knowledgeable :5, professional :4...",,"selection :3, variety :1",,,"budget :1, prices :1","quality :3, exceptional :1, stunning :1",trade :1
1,"Jared-Orland Park, IL",Positive,phrases,"Trustworthy opinions :1, Real deal :1, Represe...","pleasant experience :2, amazing experience :2,...","great service :3, amazing experience :3, excel...",,"great selection :1, amazing selection :1, wide...",,,"great price :1, budget price :1, beat these pr...","quality of the jewelry is exceptional :1, impr...",No relevant positive keywords/ phrases


### jar_aur_il

In [520]:
# Initialize the output dictionary
keyword_positive_output_jar_aur_il = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_jar_aur_il=[0]
keyword_input_token_jar_aur_il = 0
keyword_output_token_jar_aur_il = 0
keyword_start_time_loop_jar_aur_il = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_jar_aur_il, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_jar_aur_il[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_jar_aur_il = keyword_dataframes['jar_aur_il_final_sen_df_jul'][keyword_dataframes['jar_aur_il_final_sen_df_jul'][topic]==1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are positive comments, call the positive_keywords function
    if filtered_comments_jar_aur_il:
        # Call the positive_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = positive_keywords(filtered_comments_jar_aur_il,topic)        
        # Add the result to the output dictionary
        keyword_positive_output_jar_aur_il.append(keywords)
        keyword_input_token_jar_aur_il += input_tokens_loop
        keyword_output_token_jar_aur_il += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_jar_aur_il = time.time()
keyword_cost_input_token_jar_aur_il = round((0.01/1000)*keyword_input_token_jar_aur_il,2)
keyword_cost_output_token_jar_aur_il = round((0.03/1000)*keyword_output_token_jar_aur_il,2)
keyword_total_cost_jar_aur_il = keyword_cost_input_token_jar_aur_il + keyword_cost_output_token_jar_aur_il
keyword_total_time_loop_jar_aur_il = keyword_end_time_loop_jar_aur_il - keyword_start_time_loop_jar_aur_il

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_jar_aur_il[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_jar_aur_il,1))
print("Total Input Tokens - ", keyword_input_token_jar_aur_il)
print("Total Input Cost = USD ",keyword_cost_input_token_jar_aur_il)
print("Total Output Tokens - ", keyword_output_token_jar_aur_il)
print("Total Output Cost = USD ",keyword_cost_output_token_jar_aur_il)
print("Total Cost = USD ",round(keyword_total_cost_jar_aur_il,2))

Executed  10  Iterations
Total Execution time (in secs) -  13.5
Total Input Tokens -  9574
Total Input Cost = USD  0.1
Total Output Tokens -  374
Total Output Cost = USD  0.01
Total Cost = USD  0.11


In [521]:
#Initialize an empty DataFrame
positive_keywords_jar_aur_il = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']

# Add these columns to the DataFrame
for column in columns:
    positive_keywords_jar_aur_il[column] = None

# Process each JSON string
for json_str in keyword_positive_output_jar_aur_il:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'jar_aur_il'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'Positive'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            positive_keywords_jar_aur_il = pd.concat([positive_keywords_jar_aur_il, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            positive_keywords_jar_aur_il = pd.concat([positive_keywords_jar_aur_il, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
positive_keywords_jar_aur_il = positive_keywords_jar_aur_il.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

positive_keywords_jar_aur_il

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Jared-Aurora, IL",Positive,keywords,"Professional :1, Knowledge :1, Quality :1","helpful :5, friendly :4, welcoming :3, attenti...","helpful :10, knowledgeable :5, friendly :5, pr...",,"options :2, collection :2, selection :1",,,great price :1,"quality :2, high-quality :1, expertise :1, imp...",
1,"Jared-Aurora, IL",Positive,phrases,"Life time jewelers :1, Class A experience :1","great customer service :3, exceptional custome...","great customer service :4, exceptional custome...",,"extensive collection :1, great collection :1, ...",,,No relevant positive phrases,"The quality, atmosphere and ambiance was outst...",


### jar_alg_il

In [522]:
# Initialize the output dictionary
keyword_positive_output_jar_alg_il = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_jar_alg_il=[0]
keyword_input_token_jar_alg_il = 0
keyword_output_token_jar_alg_il = 0
keyword_start_time_loop_jar_alg_il = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_jar_alg_il, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_jar_alg_il[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_jar_alg_il = keyword_dataframes['jar_alg_il_final_sen_df_jul'][keyword_dataframes['jar_alg_il_final_sen_df_jul'][topic]==1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are positive comments, call the positive_keywords function
    if filtered_comments_jar_alg_il:
        # Call the positive_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = positive_keywords(filtered_comments_jar_alg_il,topic)        
        # Add the result to the output dictionary
        keyword_positive_output_jar_alg_il.append(keywords)
        keyword_input_token_jar_alg_il += input_tokens_loop
        keyword_output_token_jar_alg_il += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_jar_alg_il = time.time()
keyword_cost_input_token_jar_alg_il = round((0.01/1000)*keyword_input_token_jar_alg_il,2)
keyword_cost_output_token_jar_alg_il = round((0.03/1000)*keyword_output_token_jar_alg_il,2)
keyword_total_cost_jar_alg_il = keyword_cost_input_token_jar_alg_il + keyword_cost_output_token_jar_alg_il
keyword_total_time_loop_jar_alg_il = keyword_end_time_loop_jar_alg_il - keyword_start_time_loop_jar_alg_il

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_jar_alg_il[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_jar_alg_il,1))
print("Total Input Tokens - ", keyword_input_token_jar_alg_il)
print("Total Input Cost = USD ",keyword_cost_input_token_jar_alg_il)
print("Total Output Tokens - ", keyword_output_token_jar_alg_il)
print("Total Output Cost = USD ",keyword_cost_output_token_jar_alg_il)
print("Total Cost = USD ",round(keyword_total_cost_jar_alg_il,2))

Executed  10  Iterations
Total Execution time (in secs) -  11.0
Total Input Tokens -  10892
Total Input Cost = USD  0.11
Total Output Tokens -  360
Total Output Cost = USD  0.01
Total Cost = USD  0.12


In [523]:
#Initialize an empty DataFrame
positive_keywords_jar_alg_il = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']

# Add these columns to the DataFrame
for column in columns:
    positive_keywords_jar_alg_il[column] = None

# Process each JSON string
for json_str in keyword_positive_output_jar_alg_il:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'jar_alg_il'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'Positive'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            positive_keywords_jar_alg_il = pd.concat([positive_keywords_jar_alg_il, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            positive_keywords_jar_alg_il = pd.concat([positive_keywords_jar_alg_il, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
positive_keywords_jar_alg_il = positive_keywords_jar_alg_il.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

positive_keywords_jar_alg_il

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Jared-Algonquin, IL",Positive,keywords,"Knowledgeable :3, Recommended :2, Trusted :1, ...","friendly : 10, helpful : 9, welcoming : 3, kno...","helpful :15, friendly :12, knowledgeable :8, p...",,options :3,,,,"Exceptional :1, Fantastic :1, Excellent :1, Sp...",
1,"Jared-Algonquin, IL",Positive,phrases,"Shop with confidence :1, Highly recommend this...","great experience : 5, amazing staff : 3, wonde...","extremely helpful :3, very helpful :3, super f...",,"many options :2, wonderful choices :1",,,,"Exceptional quality of work :1, Quality of the...",


### jar_sch_il

In [524]:
# Initialize the output dictionary
keyword_positive_output_jar_sch_il = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_jar_sch_il=[0]
keyword_input_token_jar_sch_il = 0
keyword_output_token_jar_sch_il = 0
keyword_start_time_loop_jar_sch_il = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_jar_sch_il, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_jar_sch_il[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_jar_sch_il = keyword_dataframes['jar_sch_il_final_sen_df_jul'][keyword_dataframes['jar_sch_il_final_sen_df_jul'][topic]==1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are positive comments, call the positive_keywords function
    if filtered_comments_jar_sch_il:
        # Call the positive_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = positive_keywords(filtered_comments_jar_sch_il,topic)        
        # Add the result to the output dictionary
        keyword_positive_output_jar_sch_il.append(keywords)
        keyword_input_token_jar_sch_il += input_tokens_loop
        keyword_output_token_jar_sch_il += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_jar_sch_il = time.time()
keyword_cost_input_token_jar_sch_il = round((0.01/1000)*keyword_input_token_jar_sch_il,2)
keyword_cost_output_token_jar_sch_il = round((0.03/1000)*keyword_output_token_jar_sch_il,2)
keyword_total_cost_jar_sch_il = keyword_cost_input_token_jar_sch_il + keyword_cost_output_token_jar_sch_il
keyword_total_time_loop_jar_sch_il = keyword_end_time_loop_jar_sch_il - keyword_start_time_loop_jar_sch_il

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_jar_sch_il[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_jar_sch_il,1))
print("Total Input Tokens - ", keyword_input_token_jar_sch_il)
print("Total Input Cost = USD ",keyword_cost_input_token_jar_sch_il)
print("Total Output Tokens - ", keyword_output_token_jar_sch_il)
print("Total Output Cost = USD ",keyword_cost_output_token_jar_sch_il)
print("Total Cost = USD ",round(keyword_total_cost_jar_sch_il,2))

Executed  10  Iterations
Total Execution time (in secs) -  16.0
Total Input Tokens -  17086
Total Input Cost = USD  0.17
Total Output Tokens -  519
Total Output Cost = USD  0.02
Total Cost = USD  0.19


In [525]:
#Initialize an empty DataFrame
positive_keywords_jar_sch_il = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']

# Add these columns to the DataFrame
for column in columns:
    positive_keywords_jar_sch_il[column] = None

# Process each JSON string
for json_str in keyword_positive_output_jar_sch_il:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'jar_sch_il'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'Positive'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            positive_keywords_jar_sch_il = pd.concat([positive_keywords_jar_sch_il, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            positive_keywords_jar_sch_il = pd.concat([positive_keywords_jar_sch_il, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
positive_keywords_jar_sch_il = positive_keywords_jar_sch_il.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

positive_keywords_jar_sch_il

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Jared-Schaumburg, IL",Positive,keywords,"Knowledgeable :2, Professional :1, Honest :1","Great experience :5, Wonderful experience :4, ...","helpful : 30, knowledgeable : 8, kind : 8, pat...","custom design:2, unique vision:1, design ideas...",No relevant positive keywords/ phrases,No relevant positive keywords/ phrases,,"reasonable :1, Great Prices :1",No relevant positive keywords/ phrases,
1,"Jared-Schaumburg, IL",Positive,phrases,"Definitely found my jewelry spot :1, Made sure...","Great staff :1, Made us feel comfortable :1, M...","great to work with : 2, went out of their way ...","beautifully crafted:1, turned out so beautiful...",Great selection of diamonds and gemstones :1,No relevant positive keywords/ phrases,,reasonable price :1,No relevant positive keywords/ phrases,


### joy_suw_ga

In [526]:
# Initialize the output dictionary
keyword_positive_output_joy_suw_ga = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_joy_suw_ga=[0]
keyword_input_token_joy_suw_ga = 0
keyword_output_token_joy_suw_ga = 0
keyword_start_time_loop_joy_suw_ga = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_joy_suw_ga, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_joy_suw_ga[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_joy_suw_ga = keyword_dataframes['joy_suw_ga_final_sen_df_jul'][keyword_dataframes['joy_suw_ga_final_sen_df_jul'][topic]==1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are positive comments, call the positive_keywords function
    if filtered_comments_joy_suw_ga:
        # Call the positive_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = positive_keywords(filtered_comments_joy_suw_ga,topic)        
        # Add the result to the output dictionary
        keyword_positive_output_joy_suw_ga.append(keywords)
        keyword_input_token_joy_suw_ga += input_tokens_loop
        keyword_output_token_joy_suw_ga += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_joy_suw_ga = time.time()
keyword_cost_input_token_joy_suw_ga = round((0.01/1000)*keyword_input_token_joy_suw_ga,2)
keyword_cost_output_token_joy_suw_ga = round((0.03/1000)*keyword_output_token_joy_suw_ga,2)
keyword_total_cost_joy_suw_ga = keyword_cost_input_token_joy_suw_ga + keyword_cost_output_token_joy_suw_ga
keyword_total_time_loop_joy_suw_ga = keyword_end_time_loop_joy_suw_ga - keyword_start_time_loop_joy_suw_ga

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_joy_suw_ga[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_joy_suw_ga,1))
print("Total Input Tokens - ", keyword_input_token_joy_suw_ga)
print("Total Input Cost = USD ",keyword_cost_input_token_joy_suw_ga)
print("Total Output Tokens - ", keyword_output_token_joy_suw_ga)
print("Total Output Cost = USD ",keyword_cost_output_token_joy_suw_ga)
print("Total Cost = USD ",round(keyword_total_cost_joy_suw_ga,2))

Executed  10  Iterations
Total Execution time (in secs) -  33.6
Total Input Tokens -  110925
Total Input Cost = USD  1.11
Total Output Tokens -  801
Total Output Cost = USD  0.02
Total Cost = USD  1.13


In [527]:
#Initialize an empty DataFrame
positive_keywords_joy_suw_ga = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']

# Add these columns to the DataFrame
for column in columns:
    positive_keywords_joy_suw_ga[column] = None

# Process each JSON string
for json_str in keyword_positive_output_joy_suw_ga:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'joy_suw_ga'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'Positive'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            positive_keywords_joy_suw_ga = pd.concat([positive_keywords_joy_suw_ga, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            positive_keywords_joy_suw_ga = pd.concat([positive_keywords_joy_suw_ga, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
positive_keywords_joy_suw_ga = positive_keywords_joy_suw_ga.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

positive_keywords_joy_suw_ga

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Joyalukkas Jewellery-Suwanee, GA",Positive,keywords,"Recommend :3, Reliable :2, Trust :2, Depend :1...","Great experience : 20, Good experience : 15, P...","helpful : 98, patient : 85, friendly : 75, pol...","good designs: 15, beautiful designs: 10, lates...","collection : 78, variety : 15, options : 12, r...","discount :5, deal :5, offers :2, price :2, pri...",No relevant positive keywords/ phrases,"reasonable :8, affordable :3, competitive :3, ...","quality :8, good quality :4, excellent quality...","exchange :8, exchanging :3, change :1"
1,"Joyalukkas Jewellery-Suwanee, GA",Positive,phrases,"Highly recommend this place to buy gold :1, I ...","Great shopping experience : 5, Wonderful shopp...","very helpful and patient : 10, extremely helpf...","love their designs: 2, beautiful piece of jewe...","wide variety : 5, wide range : 4, huge collect...","good discount :3, good deal :3, best deal :2, ...",No relevant positive keywords/ phrases,"best price :8, good price :7, reasonable price...","quality gold jewelries :1, quality of their pr...","exchange gold jewelry :2, helped us exchange :..."


### joy_chi_il

In [528]:
# Initialize the output dictionary
keyword_positive_output_joy_chi_il = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_joy_chi_il=[0]
keyword_input_token_joy_chi_il = 0
keyword_output_token_joy_chi_il = 0
keyword_start_time_loop_joy_chi_il = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_joy_chi_il, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_joy_chi_il[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_joy_chi_il = keyword_dataframes['joy_chi_il_final_sen_df_jul'][keyword_dataframes['joy_chi_il_final_sen_df_jul'][topic]==1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are positive comments, call the positive_keywords function
    if filtered_comments_joy_chi_il:
        # Call the positive_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = positive_keywords(filtered_comments_joy_chi_il,topic)        
        # Add the result to the output dictionary
        keyword_positive_output_joy_chi_il.append(keywords)
        keyword_input_token_joy_chi_il += input_tokens_loop
        keyword_output_token_joy_chi_il += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_joy_chi_il = time.time()
keyword_cost_input_token_joy_chi_il = round((0.01/1000)*keyword_input_token_joy_chi_il,2)
keyword_cost_output_token_joy_chi_il = round((0.03/1000)*keyword_output_token_joy_chi_il,2)
keyword_total_cost_joy_chi_il = keyword_cost_input_token_joy_chi_il + keyword_cost_output_token_joy_chi_il
keyword_total_time_loop_joy_chi_il = keyword_end_time_loop_joy_chi_il - keyword_start_time_loop_joy_chi_il

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_joy_chi_il[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_joy_chi_il,1))
print("Total Input Tokens - ", keyword_input_token_joy_chi_il)
print("Total Input Cost = USD ",keyword_cost_input_token_joy_chi_il)
print("Total Output Tokens - ", keyword_output_token_joy_chi_il)
print("Total Output Cost = USD ",keyword_cost_output_token_joy_chi_il)
print("Total Cost = USD ",round(keyword_total_cost_joy_chi_il,2))

Executed  10  Iterations
Total Execution time (in secs) -  27.5
Total Input Tokens -  59389
Total Input Cost = USD  0.59
Total Output Tokens -  778
Total Output Cost = USD  0.02
Total Cost = USD  0.61


In [529]:
#Initialize an empty DataFrame
positive_keywords_joy_chi_il = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']

# Add these columns to the DataFrame
for column in columns:
    positive_keywords_joy_chi_il[column] = None

# Process each JSON string
for json_str in keyword_positive_output_joy_chi_il:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'joy_chi_il'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'Positive'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            positive_keywords_joy_chi_il = pd.concat([positive_keywords_joy_chi_il, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            positive_keywords_joy_chi_il = pd.concat([positive_keywords_joy_chi_il, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
positive_keywords_joy_chi_il = positive_keywords_joy_chi_il.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

positive_keywords_joy_chi_il

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Joyalukkas Jewellery-Chicago, IL",Positive,keywords,"Recommend :5, Knowledgeable :4, Trust :3, Reli...","good experience : 15, great experience : 14, w...","helpful : 50, patient : 20, friendly : 15, kno...","Good designs: 15, Great designs: 6, Nice desig...","varieties :3, variety :3, options :3, selectio...","discount :15, deal :10, offers :3, price :3, b...","discount :1, low :1","reasonable :5, affordable :4, fair :4, best :3...","high quality :4, good quality :4, best quality...","exchange rate:2, gold exchange:1"
1,"Joyalukkas Jewellery-Chicago, IL",Positive,phrases,"Highly recommend this place :2, Best place to ...","great shopping experience : 4, wonderful shopp...","very helpful : 30, very patient : 10, very fri...","variety of designs: 2, exquisite designs: 2, e...","wide variety :2, diverse selection :1, variety...","good discount :4, best discount :2, special di...","good discount on the making charge :1, very lo...","reasonable prices :3, great price :3, best pri...","quality is top-notch :1, jewelry is premium :1...","great place to exchange gold:1, smooth and eff..."


### joy_hou_tx

In [530]:
# Initialize the output dictionary
keyword_positive_output_joy_hou_tx = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_joy_hou_tx=[0]
keyword_input_token_joy_hou_tx = 0
keyword_output_token_joy_hou_tx = 0
keyword_start_time_loop_joy_hou_tx = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_joy_hou_tx, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_joy_hou_tx[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_joy_hou_tx = keyword_dataframes['joy_hou_tx_final_sen_df_jul'][keyword_dataframes['joy_hou_tx_final_sen_df_jul'][topic]==1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are positive comments, call the positive_keywords function
    if filtered_comments_joy_hou_tx:
        # Call the positive_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = positive_keywords(filtered_comments_joy_hou_tx,topic)        
        # Add the result to the output dictionary
        keyword_positive_output_joy_hou_tx.append(keywords)
        keyword_input_token_joy_hou_tx += input_tokens_loop
        keyword_output_token_joy_hou_tx += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_joy_hou_tx = time.time()
keyword_cost_input_token_joy_hou_tx = round((0.01/1000)*keyword_input_token_joy_hou_tx,2)
keyword_cost_output_token_joy_hou_tx = round((0.03/1000)*keyword_output_token_joy_hou_tx,2)
keyword_total_cost_joy_hou_tx = keyword_cost_input_token_joy_hou_tx + keyword_cost_output_token_joy_hou_tx
keyword_total_time_loop_joy_hou_tx = keyword_end_time_loop_joy_hou_tx - keyword_start_time_loop_joy_hou_tx

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_joy_hou_tx[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_joy_hou_tx,1))
print("Total Input Tokens - ", keyword_input_token_joy_hou_tx)
print("Total Input Cost = USD ",keyword_cost_input_token_joy_hou_tx)
print("Total Output Tokens - ", keyword_output_token_joy_hou_tx)
print("Total Output Cost = USD ",keyword_cost_output_token_joy_hou_tx)
print("Total Cost = USD ",round(keyword_total_cost_joy_hou_tx,2))

Executed  10  Iterations
Total Execution time (in secs) -  30.6
Total Input Tokens -  52675
Total Input Cost = USD  0.53
Total Output Tokens -  769
Total Output Cost = USD  0.02
Total Cost = USD  0.55


In [531]:
#Initialize an empty DataFrame
positive_keywords_joy_hou_tx = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']

# Add these columns to the DataFrame
for column in columns:
    positive_keywords_joy_hou_tx[column] = None

# Process each JSON string
for json_str in keyword_positive_output_joy_hou_tx:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'joy_hou_tx'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'Positive'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            positive_keywords_joy_hou_tx = pd.concat([positive_keywords_joy_hou_tx, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            positive_keywords_joy_hou_tx = pd.concat([positive_keywords_joy_hou_tx, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
positive_keywords_joy_hou_tx = positive_keywords_joy_hou_tx.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

positive_keywords_joy_hou_tx

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Joyalukkas Jewellery-Houston, TX",Positive,keywords,"Trust :3, Reliable :2, Honest :2, Transparent ...","Great experience : 15, Wonderful experience : ...","helpful : 98, patient : 72, friendly : 68, kno...","designs :10, design :8, intricate :2, elegant ...","variety :5, models :4, options :3, designs :3,...","good deal :10, great discount :6, good discoun...",No relevant positive keywords/ phrases,"reasonable :7, good price :6, best price :5, a...","quality :6, good quality :3, great quality :2,...","exchange :10, exchanging :3, swap :1"
1,"Joyalukkas Jewellery-Houston, TX",Positive,phrases,"Trustworthy place to buy :1, Always trusted :1...","Great shopping experience : 4, Wonderful shopp...","very helpful : 45, very patient : 30, great cu...","good designs :3, awesome designs :2, amazing d...","wide variety :3, great selection :3, variety o...","great discounts :2, great deals going on :1, d...",great offer on making charges :1,"reasonable prices :4, good prices :3, best pri...","high-quality, beautiful jewelry :1, quality of...","exchange process easy :1, smooth exchange proc..."


### joy_fri_tx

In [532]:
# Initialize the output dictionary
keyword_positive_output_joy_fri_tx = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_joy_fri_tx=[0]
keyword_input_token_joy_fri_tx = 0
keyword_output_token_joy_fri_tx = 0
keyword_start_time_loop_joy_fri_tx = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_joy_fri_tx, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_joy_fri_tx[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_joy_fri_tx = keyword_dataframes['joy_fri_tx_final_sen_df_jul'][keyword_dataframes['joy_fri_tx_final_sen_df_jul'][topic]==1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are positive comments, call the positive_keywords function
    if filtered_comments_joy_fri_tx:
        # Call the positive_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = positive_keywords(filtered_comments_joy_fri_tx,topic)        
        # Add the result to the output dictionary
        keyword_positive_output_joy_fri_tx.append(keywords)
        keyword_input_token_joy_fri_tx += input_tokens_loop
        keyword_output_token_joy_fri_tx += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_joy_fri_tx = time.time()
keyword_cost_input_token_joy_fri_tx = round((0.01/1000)*keyword_input_token_joy_fri_tx,2)
keyword_cost_output_token_joy_fri_tx = round((0.03/1000)*keyword_output_token_joy_fri_tx,2)
keyword_total_cost_joy_fri_tx = keyword_cost_input_token_joy_fri_tx + keyword_cost_output_token_joy_fri_tx
keyword_total_time_loop_joy_fri_tx = keyword_end_time_loop_joy_fri_tx - keyword_start_time_loop_joy_fri_tx

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_joy_fri_tx[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_joy_fri_tx,1))
print("Total Input Tokens - ", keyword_input_token_joy_fri_tx)
print("Total Input Cost = USD ",keyword_cost_input_token_joy_fri_tx)
print("Total Output Tokens - ", keyword_output_token_joy_fri_tx)
print("Total Output Cost = USD ",keyword_cost_output_token_joy_fri_tx)
print("Total Cost = USD ",round(keyword_total_cost_joy_fri_tx,2))

Executed  10  Iterations
Total Execution time (in secs) -  67.6
Total Input Tokens -  130944
Total Input Cost = USD  1.31
Total Output Tokens -  818
Total Output Cost = USD  0.02
Total Cost = USD  1.33


In [533]:
#Initialize an empty DataFrame
positive_keywords_joy_fri_tx = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']

# Add these columns to the DataFrame
for column in columns:
    positive_keywords_joy_fri_tx[column] = None

# Process each JSON string
for json_str in keyword_positive_output_joy_fri_tx:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'joy_fri_tx'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'Positive'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            positive_keywords_joy_fri_tx = pd.concat([positive_keywords_joy_fri_tx, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            positive_keywords_joy_fri_tx = pd.concat([positive_keywords_joy_fri_tx, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
positive_keywords_joy_fri_tx = positive_keywords_joy_fri_tx.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

positive_keywords_joy_fri_tx

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Joyalukkas Jewellery-Frisco, TX",Positive,keywords,"Trust :3, Trustworthy :2, Trusted :2, Confiden...","organized :1, clean :1, neat :1, spacious :1, ...","patient : 98, helpful : 95, friendly : 85, kno...","designs : 45, models : 8, craftsmanship : 1, v...","collection : 98, variety : 15, options : 12, m...","discount :10, deal :9, offers :4, sale :2, off :2","discount :1, deal :1, reasonable :1","reasonable :5, affordable :3, competitive :3, ...","high-quality :3, exceptional :3, quality :3, e...","good price :2, fair price :1, good rate :1, re..."
1,"Joyalukkas Jewellery-Frisco, TX",Positive,phrases,"Trustworthy store to buy :1, Most trusted bran...","pleasant experience :3, wonderful experience :...","very patient and helpful : 10, very friendly a...","beautiful designs : 3, unique designs : 3, goo...","wide range : 6, wide variety : 5, vast collect...","good discount :5, best deal :4, additional dis...","detailed breakdown of the making charges :1, m...","reasonable price :5, best price :5, good price...","quality of their jewelry is exceptional :1, qu...","credit us for trading in old gold :1, exchange..."


### mal_chi_il

In [534]:
# Initialize the output dictionary
keyword_positive_output_mal_chi_il = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_mal_chi_il=[0]
keyword_input_token_mal_chi_il = 0
keyword_output_token_mal_chi_il = 0
keyword_start_time_loop_mal_chi_il = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_mal_chi_il, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_mal_chi_il[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_mal_chi_il = keyword_dataframes['mal_chi_il_final_sen_df_jul'][keyword_dataframes['mal_chi_il_final_sen_df_jul'][topic]==1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are positive comments, call the positive_keywords function
    if filtered_comments_mal_chi_il:
        # Call the positive_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = positive_keywords(filtered_comments_mal_chi_il,topic)        
        # Add the result to the output dictionary
        keyword_positive_output_mal_chi_il.append(keywords)
        keyword_input_token_mal_chi_il += input_tokens_loop
        keyword_output_token_mal_chi_il += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_mal_chi_il = time.time()
keyword_cost_input_token_mal_chi_il = round((0.01/1000)*keyword_input_token_mal_chi_il,2)
keyword_cost_output_token_mal_chi_il = round((0.03/1000)*keyword_output_token_mal_chi_il,2)
keyword_total_cost_mal_chi_il = keyword_cost_input_token_mal_chi_il + keyword_cost_output_token_mal_chi_il
keyword_total_time_loop_mal_chi_il = keyword_end_time_loop_mal_chi_il - keyword_start_time_loop_mal_chi_il

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_mal_chi_il[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_mal_chi_il,1))
print("Total Input Tokens - ", keyword_input_token_mal_chi_il)
print("Total Input Cost = USD ",keyword_cost_input_token_mal_chi_il)
print("Total Output Tokens - ", keyword_output_token_mal_chi_il)
print("Total Output Cost = USD ",keyword_cost_output_token_mal_chi_il)
print("Total Cost = USD ",round(keyword_total_cost_mal_chi_il,2))

Executed  10  Iterations
Total Execution time (in secs) -  36.6
Total Input Tokens -  77968
Total Input Cost = USD  0.78
Total Output Tokens -  823
Total Output Cost = USD  0.02
Total Cost = USD  0.8


In [535]:
#Initialize an empty DataFrame
positive_keywords_mal_chi_il = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']

# Add these columns to the DataFrame
for column in columns:
    positive_keywords_mal_chi_il[column] = None

# Process each JSON string
for json_str in keyword_positive_output_mal_chi_il:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'mal_chi_il'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'Positive'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            positive_keywords_mal_chi_il = pd.concat([positive_keywords_mal_chi_il, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            positive_keywords_mal_chi_il = pd.concat([positive_keywords_mal_chi_il, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
positive_keywords_mal_chi_il = positive_keywords_mal_chi_il.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

positive_keywords_mal_chi_il

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Malabar Gold & Diamonds-Chicago, IL",Positive,keywords,"Trust :3, Genuine :2, Honest :2, Reliable :1, ...","great experience : 45, good experience : 20, a...","helpful : 98, patient : 56, friendly : 54, kno...","unique :5, beautiful :4, stunning :2, exquisit...","variety :5, options :5, selection :4, collecti...","good discount: 15, great deal: 10, best discou...","reasonable :1, discount :1","good price :15, best price :12, reasonable pri...","good quality :5, high quality :3, best quality...","exchange policy :2, exchanged :2, exchanging :..."
1,"Malabar Gold & Diamonds-Chicago, IL",Positive,phrases,"Trustworthy place for gold :1, Reputation buil...","very good experience : 5, pleasant shopping ex...","very helpful and patient : 5, very friendly an...","unique designs :3, beautiful designs :3, stunn...","wide variety :2, lot of variety :2, variety of...","gave us a good discount: 8, gave us a great de...",best discount on making charges :1,"gave us a good price :5, gave us the best pric...","quality of the product :1, quality of gold is ...","entire exchange process :1, exchanging gold je..."


### mal_nap_il

In [536]:
# Initialize the output dictionary
keyword_positive_output_mal_nap_il = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_mal_nap_il=[0]
keyword_input_token_mal_nap_il = 0
keyword_output_token_mal_nap_il = 0
keyword_start_time_loop_mal_nap_il = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_mal_nap_il, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_mal_nap_il[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_mal_nap_il = keyword_dataframes['mal_nap_il_final_sen_df_jul'][keyword_dataframes['mal_nap_il_final_sen_df_jul'][topic]==1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are positive comments, call the positive_keywords function
    if filtered_comments_mal_nap_il:
        # Call the positive_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = positive_keywords(filtered_comments_mal_nap_il,topic)        
        # Add the result to the output dictionary
        keyword_positive_output_mal_nap_il.append(keywords)
        keyword_input_token_mal_nap_il += input_tokens_loop
        keyword_output_token_mal_nap_il += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_mal_nap_il = time.time()
keyword_cost_input_token_mal_nap_il = round((0.01/1000)*keyword_input_token_mal_nap_il,2)
keyword_cost_output_token_mal_nap_il = round((0.03/1000)*keyword_output_token_mal_nap_il,2)
keyword_total_cost_mal_nap_il = keyword_cost_input_token_mal_nap_il + keyword_cost_output_token_mal_nap_il
keyword_total_time_loop_mal_nap_il = keyword_end_time_loop_mal_nap_il - keyword_start_time_loop_mal_nap_il

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_mal_nap_il[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_mal_nap_il,1))
print("Total Input Tokens - ", keyword_input_token_mal_nap_il)
print("Total Input Cost = USD ",keyword_cost_input_token_mal_nap_il)
print("Total Output Tokens - ", keyword_output_token_mal_nap_il)
print("Total Output Cost = USD ",keyword_cost_output_token_mal_nap_il)
print("Total Cost = USD ",round(keyword_total_cost_mal_nap_il,2))

Executed  10  Iterations
Total Execution time (in secs) -  34.6
Total Input Tokens -  96426
Total Input Cost = USD  0.96
Total Output Tokens -  725
Total Output Cost = USD  0.02
Total Cost = USD  0.98


In [537]:
#Initialize an empty DataFrame
positive_keywords_mal_nap_il = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']

# Add these columns to the DataFrame
for column in columns:
    positive_keywords_mal_nap_il[column] = None

# Process each JSON string
for json_str in keyword_positive_output_mal_nap_il:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'mal_nap_il'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'Positive'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            positive_keywords_mal_nap_il = pd.concat([positive_keywords_mal_nap_il, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            positive_keywords_mal_nap_il = pd.concat([positive_keywords_mal_nap_il, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
positive_keywords_mal_nap_il = positive_keywords_mal_nap_il.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

positive_keywords_mal_nap_il

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Malabar Gold & Diamonds-Naperville, IL",Positive,keywords,"Trustworthy :2, Trusted :2, Recommend :2, Reco...","pleasant : 15, helpful : 10, friendly : 8, wel...","helpful : 98, patient : 85, friendly : 75, kno...","designs : 45, collection : 10, variety : 5, un...","collection : 45, selection : 15, variety : 10,...","discount :10, deal :9, price :3, offers :1, pr...",,"reasonable :5, affordable :3, fair :2, negotia...","high quality :3, quality :3, top-notch :2, exc...","exchange :10, helpful :8, patient :3, consider..."
1,"Malabar Gold & Diamonds-Naperville, IL",Positive,phrases,"Trustworthy store :1, Trusted destination for ...","great experience : 20, wonderful experience : ...","very helpful : 40, very patient : 30, very fri...","nice designs : 3, beautiful designs : 3, great...","great collection : 10, good collection : 8, am...","good discount :5, great deal :4, best discount...",,"best price :10, good price :9, great price :8,...","quality and craftsmanship :1, high-quality gol...","helped with the exchange :2, helped me exchang..."


### mal_ise_nj

In [538]:
# Initialize the output dictionary
keyword_positive_output_mal_ise_nj = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_mal_ise_nj=[0]
keyword_input_token_mal_ise_nj = 0
keyword_output_token_mal_ise_nj = 0
keyword_start_time_loop_mal_ise_nj = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_mal_ise_nj, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_mal_ise_nj[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_mal_ise_nj = keyword_dataframes['mal_ise_nj_final_sen_df_jul'][keyword_dataframes['mal_ise_nj_final_sen_df_jul'][topic]==1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are positive comments, call the positive_keywords function
    if filtered_comments_mal_ise_nj:
        # Call the positive_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = positive_keywords(filtered_comments_mal_ise_nj,topic)        
        # Add the result to the output dictionary
        keyword_positive_output_mal_ise_nj.append(keywords)
        keyword_input_token_mal_ise_nj += input_tokens_loop
        keyword_output_token_mal_ise_nj += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_mal_ise_nj = time.time()
keyword_cost_input_token_mal_ise_nj = round((0.01/1000)*keyword_input_token_mal_ise_nj,2)
keyword_cost_output_token_mal_ise_nj = round((0.03/1000)*keyword_output_token_mal_ise_nj,2)
keyword_total_cost_mal_ise_nj = keyword_cost_input_token_mal_ise_nj + keyword_cost_output_token_mal_ise_nj
keyword_total_time_loop_mal_ise_nj = keyword_end_time_loop_mal_ise_nj - keyword_start_time_loop_mal_ise_nj

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_mal_ise_nj[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_mal_ise_nj,1))
print("Total Input Tokens - ", keyword_input_token_mal_ise_nj)
print("Total Input Cost = USD ",keyword_cost_input_token_mal_ise_nj)
print("Total Output Tokens - ", keyword_output_token_mal_ise_nj)
print("Total Output Cost = USD ",keyword_cost_output_token_mal_ise_nj)
print("Total Cost = USD ",round(keyword_total_cost_mal_ise_nj,2))

Executed  10  Iterations
Total Execution time (in secs) -  46.6
Total Input Tokens -  108786
Total Input Cost = USD  1.09
Total Output Tokens -  818
Total Output Cost = USD  0.02
Total Cost = USD  1.11


In [539]:
#Initialize an empty DataFrame
positive_keywords_mal_ise_nj = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']

# Add these columns to the DataFrame
for column in columns:
    positive_keywords_mal_ise_nj[column] = None

# Process each JSON string
for json_str in keyword_positive_output_mal_ise_nj:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'mal_ise_nj'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'Positive'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            positive_keywords_mal_ise_nj = pd.concat([positive_keywords_mal_ise_nj, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            positive_keywords_mal_ise_nj = pd.concat([positive_keywords_mal_ise_nj, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
positive_keywords_mal_ise_nj = positive_keywords_mal_ise_nj.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

positive_keywords_mal_ise_nj

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Malabar Gold & Diamonds-Iselin, NJ",Positive,keywords,"Trustworthy :1, Reliable :1, Transparency :1, ...","pleasant : 10, smooth : 8, enjoyable : 7, welc...","helpful : 150, patient : 100, friendly : 80, p...","good design :10, nice designs :8, great design...","wide selection :1, ample choices :1, good vari...","good discount: 10, great deal: 9, best deal: 6...","genuine making charge :1, good price :1","reasonable :8, best :7, good :6, great :5, fai...","quality :8, good quality :5, excellent quality...","exchange :10, refund :1, resale value :1"
1,"Malabar Gold & Diamonds-Iselin, NJ",Positive,phrases,"Trustworthy in their customer dealings :1, Kno...","great experience : 15, wonderful experience : ...","very helpful : 30, extremely helpful : 20, ver...","showing us so many stunning gold designs :1, b...","wide selection of jewelry :1, ample choices an...","gave us a good discount: 3, gave us a great de...",No relevant positive phrases,"reasonable price :5, best price :5, great pric...","quality of the jewelry is excellent :1, qualit...","great price :1, easy peasy :1, effortlessly ea..."


### mal_fri_tx

In [540]:
# Initialize the output dictionary
keyword_positive_output_mal_fri_tx = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_mal_fri_tx=[0]
keyword_input_token_mal_fri_tx = 0
keyword_output_token_mal_fri_tx = 0
keyword_start_time_loop_mal_fri_tx = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_mal_fri_tx, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_mal_fri_tx[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_mal_fri_tx = keyword_dataframes['mal_fri_tx_final_sen_df_jul'][keyword_dataframes['mal_fri_tx_final_sen_df_jul'][topic]==1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are positive comments, call the positive_keywords function
    if filtered_comments_mal_fri_tx:
        # Call the positive_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = positive_keywords(filtered_comments_mal_fri_tx,topic)        
        # Add the result to the output dictionary
        keyword_positive_output_mal_fri_tx.append(keywords)
        keyword_input_token_mal_fri_tx += input_tokens_loop
        keyword_output_token_mal_fri_tx += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_mal_fri_tx = time.time()
keyword_cost_input_token_mal_fri_tx = round((0.01/1000)*keyword_input_token_mal_fri_tx,2)
keyword_cost_output_token_mal_fri_tx = round((0.03/1000)*keyword_output_token_mal_fri_tx,2)
keyword_total_cost_mal_fri_tx = keyword_cost_input_token_mal_fri_tx + keyword_cost_output_token_mal_fri_tx
keyword_total_time_loop_mal_fri_tx = keyword_end_time_loop_mal_fri_tx - keyword_start_time_loop_mal_fri_tx

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_mal_fri_tx[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_mal_fri_tx,1))
print("Total Input Tokens - ", keyword_input_token_mal_fri_tx)
print("Total Input Cost = USD ",keyword_cost_input_token_mal_fri_tx)
print("Total Output Tokens - ", keyword_output_token_mal_fri_tx)
print("Total Output Cost = USD ",keyword_cost_output_token_mal_fri_tx)
print("Total Cost = USD ",round(keyword_total_cost_mal_fri_tx,2))

Executed  10  Iterations
Total Execution time (in secs) -  34.6
Total Input Tokens -  113893
Total Input Cost = USD  1.14
Total Output Tokens -  833
Total Output Cost = USD  0.02
Total Cost = USD  1.16


In [541]:
#Initialize an empty DataFrame
positive_keywords_mal_fri_tx = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']

# Add these columns to the DataFrame
for column in columns:
    positive_keywords_mal_fri_tx[column] = None

# Process each JSON string
for json_str in keyword_positive_output_mal_fri_tx:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'mal_fri_tx'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'Positive'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            positive_keywords_mal_fri_tx = pd.concat([positive_keywords_mal_fri_tx, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            positive_keywords_mal_fri_tx = pd.concat([positive_keywords_mal_fri_tx, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
positive_keywords_mal_fri_tx = positive_keywords_mal_fri_tx.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

positive_keywords_mal_fri_tx

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Malabar Gold & Diamonds-Frisco, TX",Positive,keywords,"Reliable :2, Trust :1, Trustworthy :1, Confide...","pleasant : 10, wonderful : 9, great : 8, nice ...","patient : 45, helpful : 40, friendly : 35, kin...","designs : 45, collections : 8, models : 5, cra...","variety : 5, collections : 4, selection : 3, o...","good discount :10, best deal :6, great discoun...",No relevant positive keywords/ phrases,"best price :10, good price :9, reasonable pric...","quality :8, exceptional :5, well-made :2, impe...","exchange :7, helpful :6, good deal :2, transpa..."
1,"Malabar Gold & Diamonds-Frisco, TX",Positive,phrases,"Trust the store and the quality :1, Reliable a...","great experience : 15, wonderful experience : ...","very patient and helpful : 10, very friendly a...","beautiful designs : 3, unique designs : 3, ama...","wide variety : 3, great collection : 3, lots o...","gave me good discount :3, got a good discount ...",helped me in reducing the price for making cha...,"prices are reasonable :2, prices were reasonab...","quality is exceptional :3, quality of the gold...","helped with my exchange :2, very helpful with ..."


### mal_ric_tx

In [542]:
# Initialize the output dictionary
keyword_positive_output_mal_ric_tx = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = [
    'Customer Confidence', 'Store Experience', 'Store Staff', 'Product Design',
    'Product Variety', 'Discount', 'Making Charge', 'Price', 
    'Product Quality', 'Jewellery Exchange'
]

keyword_counter_mal_ric_tx = [0]
keyword_input_token_mal_ric_tx = 0
keyword_output_token_mal_ric_tx = 0
keyword_start_time_loop_mal_ric_tx = time.time()

# Threading setup
keyword_total_iterations = len(keyword_topics)
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_mal_ric_tx, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_mal_ric_tx[0] += 1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_mal_ric_tx = keyword_dataframes['mal_ric_tx_final_sen_df_jul'][keyword_dataframes['mal_ric_tx_final_sen_df_jul'][topic] == 1]['review_text'].tolist()
    
    # If there are positive comments, process them in chunks of 25
    if filtered_comments_mal_ric_tx:
        # Loop through the filtered comments in batches of 25
        for i in range(0, len(filtered_comments_mal_ric_tx), 25):
            # Get the current batch of 25 comments (or less if it's the last batch)
            comment_batch = filtered_comments_mal_ric_tx[i:i + 25]
            # Call the positive_keywords function and store the result for each batch
            keywords, input_tokens_loop, output_token_loop = positive_keywords(comment_batch, topic)
            # Add the result to the output dictionary
            keyword_positive_output_mal_ric_tx.append(keywords)
            keyword_input_token_mal_ric_tx += input_tokens_loop
            keyword_output_token_mal_ric_tx += output_token_loop

# Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_mal_ric_tx = time.time()
keyword_cost_input_token_mal_ric_tx = round((0.01 / 1000) * keyword_input_token_mal_ric_tx, 2)
keyword_cost_output_token_mal_ric_tx = round((0.03 / 1000) * keyword_output_token_mal_ric_tx, 2)
keyword_total_cost_mal_ric_tx = keyword_cost_input_token_mal_ric_tx + keyword_cost_output_token_mal_ric_tx
keyword_total_time_loop_mal_ric_tx = keyword_end_time_loop_mal_ric_tx - keyword_start_time_loop_mal_ric_tx

# Display loop performance parameters & cost
clear_output(wait=True)
print("Executed", keyword_counter_mal_ric_tx[0], "Iterations")
print("Total Execution time (in secs) -", round(keyword_total_time_loop_mal_ric_tx, 1))
print("Total Input Tokens -", keyword_input_token_mal_ric_tx)
print("Total Input Cost = USD", keyword_cost_input_token_mal_ric_tx)
print("Total Output Tokens -", keyword_output_token_mal_ric_tx)
print("Total Output Cost = USD", keyword_cost_output_token_mal_ric_tx)
print("Total Cost = USD", round(keyword_total_cost_mal_ric_tx, 2))


Executed 10 Iterations
Total Execution time (in secs) - 255.4
Total Input Tokens - 162920
Total Input Cost = USD 1.63
Total Output Tokens - 8309
Total Output Cost = USD 0.25
Total Cost = USD 1.88


In [543]:
#Initialize an empty DataFrame
positive_keywords_mal_ric_tx = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']

# Add these columns to the DataFrame
for column in columns:
    positive_keywords_mal_ric_tx[column] = None

# Process each JSON string
for json_str in keyword_positive_output_mal_ric_tx:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'mal_ric_tx'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'Positive'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            positive_keywords_mal_ric_tx = pd.concat([positive_keywords_mal_ric_tx, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            positive_keywords_mal_ric_tx = pd.concat([positive_keywords_mal_ric_tx, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
positive_keywords_mal_ric_tx = positive_keywords_mal_ric_tx.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

positive_keywords_mal_ric_tx

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Malani Jewellers-Richardson, TX",Positive,keywords,"Trustworthy :2, Reliable :2, Confident :1, Aut...","pleasant :3, wonderful :3, amazing :3, awesome...","helpful :6, excellent :4, friendly :4, patient...","designs :10, intricate :1, beautiful :1, elega...","collection :12, varieties :2, selection :2, op...","discount :5, deal :5, offer :2, price :2, sale...",,"best price :6, good price :4, great price :3, ...","quality :8, high quality :4, good quality :3, ...","trade :1, exchange :1, sold :1"
1,"Malani Jewellers-Richardson, TX",Positive,phrases,"Trustworthy, patient, and always smiling :1, M...","pleasant experience :3, wonderful experience :...","very helpful :3, excellent service :2, great s...","newest designs :1, intricate designs :1, beaut...","great collection :4, excellent collections :2,...","good discount :3, best deal :3, amazing deal :...",,"gave us the best price :3, got the great price...","quality of the jewelry is exceptional :1, qual...","trade out my gold :1, entire process stress-fr..."


### may_vie_va

In [544]:
# Initialize the output dictionary
keyword_positive_output_may_vie_va = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_may_vie_va=[0]
keyword_input_token_may_vie_va = 0
keyword_output_token_may_vie_va = 0
keyword_start_time_loop_may_vie_va = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_may_vie_va, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_may_vie_va[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_may_vie_va = keyword_dataframes['may_vie_va_final_sen_df_jul'][keyword_dataframes['may_vie_va_final_sen_df_jul'][topic]==1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are positive comments, call the positive_keywords function
    if filtered_comments_may_vie_va:
        # Call the positive_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = positive_keywords(filtered_comments_may_vie_va,topic)        
        # Add the result to the output dictionary
        keyword_positive_output_may_vie_va.append(keywords)
        keyword_input_token_may_vie_va += input_tokens_loop
        keyword_output_token_may_vie_va += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_may_vie_va = time.time()
keyword_cost_input_token_may_vie_va = round((0.01/1000)*keyword_input_token_may_vie_va,2)
keyword_cost_output_token_may_vie_va = round((0.03/1000)*keyword_output_token_may_vie_va,2)
keyword_total_cost_may_vie_va = keyword_cost_input_token_may_vie_va + keyword_cost_output_token_may_vie_va
keyword_total_time_loop_may_vie_va = keyword_end_time_loop_may_vie_va - keyword_start_time_loop_may_vie_va

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_may_vie_va[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_may_vie_va,1))
print("Total Input Tokens - ", keyword_input_token_may_vie_va)
print("Total Input Cost = USD ",keyword_cost_input_token_may_vie_va)
print("Total Output Tokens - ", keyword_output_token_may_vie_va)
print("Total Output Cost = USD ",keyword_cost_output_token_may_vie_va)
print("Total Cost = USD ",round(keyword_total_cost_may_vie_va,2))

Executed  10  Iterations
Total Execution time (in secs) -  14.0
Total Input Tokens -  8808
Total Input Cost = USD  0.09
Total Output Tokens -  486
Total Output Cost = USD  0.01
Total Cost = USD  0.1


In [545]:
#Initialize an empty DataFrame
positive_keywords_may_vie_va = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']

# Add these columns to the DataFrame
for column in columns:
    positive_keywords_may_vie_va[column] = None

# Process each JSON string
for json_str in keyword_positive_output_may_vie_va:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'may_vie_va'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'Positive'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            positive_keywords_may_vie_va = pd.concat([positive_keywords_may_vie_va, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            positive_keywords_may_vie_va = pd.concat([positive_keywords_may_vie_va, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
positive_keywords_may_vie_va = positive_keywords_may_vie_va.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

positive_keywords_may_vie_va

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"May Jewelers-Vienna, VA",Positive,keywords,"Trustworthy :1, Professional :1, Knowledgeable...","great experience :5, fantastic experience :2, ...","professional :3, helpful :3, knowledgeable :3,...","custom designs :1, handmade :1","selection :1, unique :1",deals :1,,reasonable :1,Outstanding quality :1,
1,"May Jewelers-Vienna, VA",Positive,phrases,"Trustworthy throughout the process :1, Always ...","very welcoming :1, truly welcomed :1, very kin...","great experience :3, highly recommended :2, fa...","design exactly what she wanted :1, custom desi...","access to any kind of gem :1, more subtle piec...",best deals in the area :1,,much more reasonable compared to other jeweler...,No relevant positive keywords/ phrases,


### son_ise_nj

In [546]:
# Initialize the output dictionary
keyword_positive_output_son_ise_nj = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_son_ise_nj=[0]
keyword_input_token_son_ise_nj = 0
keyword_output_token_son_ise_nj = 0
keyword_start_time_loop_son_ise_nj = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_son_ise_nj, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_son_ise_nj[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_son_ise_nj = keyword_dataframes['son_ise_nj_final_sen_df_jul'][keyword_dataframes['son_ise_nj_final_sen_df_jul'][topic]==1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are positive comments, call the positive_keywords function
    if filtered_comments_son_ise_nj:
        # Call the positive_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = positive_keywords(filtered_comments_son_ise_nj,topic)        
        # Add the result to the output dictionary
        keyword_positive_output_son_ise_nj.append(keywords)
        keyword_input_token_son_ise_nj += input_tokens_loop
        keyword_output_token_son_ise_nj += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_son_ise_nj = time.time()
keyword_cost_input_token_son_ise_nj = round((0.01/1000)*keyword_input_token_son_ise_nj,2)
keyword_cost_output_token_son_ise_nj = round((0.03/1000)*keyword_output_token_son_ise_nj,2)
keyword_total_cost_son_ise_nj = keyword_cost_input_token_son_ise_nj + keyword_cost_output_token_son_ise_nj
keyword_total_time_loop_son_ise_nj = keyword_end_time_loop_son_ise_nj - keyword_start_time_loop_son_ise_nj

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_son_ise_nj[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_son_ise_nj,1))
print("Total Input Tokens - ", keyword_input_token_son_ise_nj)
print("Total Input Cost = USD ",keyword_cost_input_token_son_ise_nj)
print("Total Output Tokens - ", keyword_output_token_son_ise_nj)
print("Total Output Cost = USD ",keyword_cost_output_token_son_ise_nj)
print("Total Cost = USD ",round(keyword_total_cost_son_ise_nj,2))

Executed  10  Iterations
Total Execution time (in secs) -  22.5
Total Input Tokens -  20976
Total Input Cost = USD  0.21
Total Output Tokens -  670
Total Output Cost = USD  0.02
Total Cost = USD  0.23


In [547]:
#Initialize an empty DataFrame
positive_keywords_son_ise_nj = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']

# Add these columns to the DataFrame
for column in columns:
    positive_keywords_son_ise_nj[column] = None

# Process each JSON string
for json_str in keyword_positive_output_son_ise_nj:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'son_ise_nj'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'Positive'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            positive_keywords_son_ise_nj = pd.concat([positive_keywords_son_ise_nj, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            positive_keywords_son_ise_nj = pd.concat([positive_keywords_son_ise_nj, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
positive_keywords_son_ise_nj = positive_keywords_son_ise_nj.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

positive_keywords_son_ise_nj

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Sona Jewelers-Iselin, NJ",Positive,keywords,"Trust :5, Reliable :4, Honest :3, Dependable :...","great experience :10, wonderful experience :5,...","helpful : 50, patient : 20, knowledgeable : 15...","designs :5, good design :2, beautiful :2, stun...","variety :5, collections :5, selection :5, choi...","discount :3, deal :3",,"best price :5, good price :4, fair price :3, r...","quality :6, good :3, outstanding :2, best :2, ...",exchanging :1
1,"Sona Jewelers-Iselin, NJ",Positive,phrases,"Always come again and again :3, Our favorite s...","great shopping experience :4, pleasant experie...","very helpful : 30, great service : 25, excelle...","beautiful jewelry :2, beautiful pieces :1, ama...","wide variety of selection :1, very large colle...","good deal :2, great deal :3, best deals :1",,"best price in town :1, very very good price :1...","quality is best :1, quality exceeded my expect...",exchanging our old gold :1


### tif_chi_il

In [548]:
# Initialize the output dictionary
keyword_positive_output_tif_chi_il = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_tif_chi_il=[0]
keyword_input_token_tif_chi_il = 0
keyword_output_token_tif_chi_il = 0
keyword_start_time_loop_tif_chi_il = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_tif_chi_il, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_tif_chi_il[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_tif_chi_il = keyword_dataframes['tif_chi_il_final_sen_df_jul'][keyword_dataframes['tif_chi_il_final_sen_df_jul'][topic]==1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are positive comments, call the positive_keywords function
    if filtered_comments_tif_chi_il:
        # Call the positive_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = positive_keywords(filtered_comments_tif_chi_il,topic)        
        # Add the result to the output dictionary
        keyword_positive_output_tif_chi_il.append(keywords)
        keyword_input_token_tif_chi_il += input_tokens_loop
        keyword_output_token_tif_chi_il += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_tif_chi_il = time.time()
keyword_cost_input_token_tif_chi_il = round((0.01/1000)*keyword_input_token_tif_chi_il,2)
keyword_cost_output_token_tif_chi_il = round((0.03/1000)*keyword_output_token_tif_chi_il,2)
keyword_total_cost_tif_chi_il = keyword_cost_input_token_tif_chi_il + keyword_cost_output_token_tif_chi_il
keyword_total_time_loop_tif_chi_il = keyword_end_time_loop_tif_chi_il - keyword_start_time_loop_tif_chi_il

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_tif_chi_il[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_tif_chi_il,1))
print("Total Input Tokens - ", keyword_input_token_tif_chi_il)
print("Total Input Cost = USD ",keyword_cost_input_token_tif_chi_il)
print("Total Output Tokens - ", keyword_output_token_tif_chi_il)
print("Total Output Cost = USD ",keyword_cost_output_token_tif_chi_il)
print("Total Cost = USD ",round(keyword_total_cost_tif_chi_il,2))

Executed  10  Iterations
Total Execution time (in secs) -  19.5
Total Input Tokens -  9192
Total Input Cost = USD  0.09
Total Output Tokens -  374
Total Output Cost = USD  0.01
Total Cost = USD  0.1


In [549]:
#Initialize an empty DataFrame
positive_keywords_tif_chi_il = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']

# Add these columns to the DataFrame
for column in columns:
    positive_keywords_tif_chi_il[column] = None

# Process each JSON string
for json_str in keyword_positive_output_tif_chi_il:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'tif_chi_il'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'Positive'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            positive_keywords_tif_chi_il = pd.concat([positive_keywords_tif_chi_il, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            positive_keywords_tif_chi_il = pd.concat([positive_keywords_tif_chi_il, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
positive_keywords_tif_chi_il = positive_keywords_tif_chi_il.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

positive_keywords_tif_chi_il

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Tiffany & Co-Chicago, IL",Positive,keywords,No relevant positive keywords/ phrases,"welcoming :2, stunning :2, breathtaking :1, go...","helpful :4, kind :3, attentive :3, knowledgeab...","beautiful :1, exquisite :1","selection :2, options :1",,,affordable :1,"Good product:1, Quality:1",
1,"Tiffany & Co-Chicago, IL",Positive,phrases,No relevant positive keywords/ phrases,"stress-free experience :1, unforgettable exper...","great customer service :2, made me feel specia...",No relevant positive phrases,"large selection :1, huge selection :1, numerou...",,,something special and affordable :1,the quality and look of my bracelet is STUNNING:1,


### tif_nor_il

In [550]:
# Initialize the output dictionary
keyword_positive_output_tif_nor_il = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_tif_nor_il=[0]
keyword_input_token_tif_nor_il = 0
keyword_output_token_tif_nor_il = 0
keyword_start_time_loop_tif_nor_il = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_tif_nor_il, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_tif_nor_il[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_tif_nor_il = keyword_dataframes['tif_nor_il_final_sen_df_jul'][keyword_dataframes['tif_nor_il_final_sen_df_jul'][topic]==1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are positive comments, call the positive_keywords function
    if filtered_comments_tif_nor_il:
        # Call the positive_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = positive_keywords(filtered_comments_tif_nor_il,topic)        
        # Add the result to the output dictionary
        keyword_positive_output_tif_nor_il.append(keywords)
        keyword_input_token_tif_nor_il += input_tokens_loop
        keyword_output_token_tif_nor_il += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_tif_nor_il = time.time()
keyword_cost_input_token_tif_nor_il = round((0.01/1000)*keyword_input_token_tif_nor_il,2)
keyword_cost_output_token_tif_nor_il = round((0.03/1000)*keyword_output_token_tif_nor_il,2)
keyword_total_cost_tif_nor_il = keyword_cost_input_token_tif_nor_il + keyword_cost_output_token_tif_nor_il
keyword_total_time_loop_tif_nor_il = keyword_end_time_loop_tif_nor_il - keyword_start_time_loop_tif_nor_il

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_tif_nor_il[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_tif_nor_il,1))
print("Total Input Tokens - ", keyword_input_token_tif_nor_il)
print("Total Input Cost = USD ",keyword_cost_input_token_tif_nor_il)
print("Total Output Tokens - ", keyword_output_token_tif_nor_il)
print("Total Output Cost = USD ",keyword_cost_output_token_tif_nor_il)
print("Total Cost = USD ",round(keyword_total_cost_tif_nor_il,2))

Executed  10  Iterations
Total Execution time (in secs) -  5.5
Total Input Tokens -  2443
Total Input Cost = USD  0.02
Total Output Tokens -  166
Total Output Cost = USD  0.0
Total Cost = USD  0.02


In [551]:
#Initialize an empty DataFrame
positive_keywords_tif_nor_il = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']

# Add these columns to the DataFrame
for column in columns:
    positive_keywords_tif_nor_il[column] = None

# Process each JSON string
for json_str in keyword_positive_output_tif_nor_il:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'tif_nor_il'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'Positive'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            positive_keywords_tif_nor_il = pd.concat([positive_keywords_tif_nor_il, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            positive_keywords_tif_nor_il = pd.concat([positive_keywords_tif_nor_il, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
positive_keywords_tif_nor_il = positive_keywords_tif_nor_il.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

positive_keywords_tif_nor_il

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Tiffany & Co-Northbrook, IL",Positive,keywords,No relevant positive keywords/ phrases,"beautiful store :1, Easy place :1, Nice place :1","helpful :1, professional :1, amazing :1, excep...",,,,,,,
1,"Tiffany & Co-Northbrook, IL",Positive,phrases,No relevant positive keywords/ phrases,very positive experience :1,"super helpful :1, was amazing :1, very positiv...",,,,,,,


### tif_sko_il

In [552]:
# Initialize the output dictionary
keyword_positive_output_tif_sko_il = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_tif_sko_il=[0]
keyword_input_token_tif_sko_il = 0
keyword_output_token_tif_sko_il = 0
keyword_start_time_loop_tif_sko_il = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_tif_sko_il, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_tif_sko_il[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_tif_sko_il = keyword_dataframes['tif_sko_il_final_sen_df_jul'][keyword_dataframes['tif_sko_il_final_sen_df_jul'][topic]==1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are positive comments, call the positive_keywords function
    if filtered_comments_tif_sko_il:
        # Call the positive_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = positive_keywords(filtered_comments_tif_sko_il,topic)        
        # Add the result to the output dictionary
        keyword_positive_output_tif_sko_il.append(keywords)
        keyword_input_token_tif_sko_il += input_tokens_loop
        keyword_output_token_tif_sko_il += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_tif_sko_il = time.time()
keyword_cost_input_token_tif_sko_il = round((0.01/1000)*keyword_input_token_tif_sko_il,2)
keyword_cost_output_token_tif_sko_il = round((0.03/1000)*keyword_output_token_tif_sko_il,2)
keyword_total_cost_tif_sko_il = keyword_cost_input_token_tif_sko_il + keyword_cost_output_token_tif_sko_il
keyword_total_time_loop_tif_sko_il = keyword_end_time_loop_tif_sko_il - keyword_start_time_loop_tif_sko_il

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_tif_sko_il[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_tif_sko_il,1))
print("Total Input Tokens - ", keyword_input_token_tif_sko_il)
print("Total Input Cost = USD ",keyword_cost_input_token_tif_sko_il)
print("Total Output Tokens - ", keyword_output_token_tif_sko_il)
print("Total Output Cost = USD ",keyword_cost_output_token_tif_sko_il)
print("Total Cost = USD ",round(keyword_total_cost_tif_sko_il,2))

Executed  10  Iterations
Total Execution time (in secs) -  7.0
Total Input Tokens -  4097
Total Input Cost = USD  0.04
Total Output Tokens -  221
Total Output Cost = USD  0.01
Total Cost = USD  0.05


In [553]:
#Initialize an empty DataFrame
positive_keywords_tif_sko_il = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']

# Add these columns to the DataFrame
for column in columns:
    positive_keywords_tif_sko_il[column] = None

# Process each JSON string
for json_str in keyword_positive_output_tif_sko_il:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'tif_sko_il'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'Positive'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            positive_keywords_tif_sko_il = pd.concat([positive_keywords_tif_sko_il, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            positive_keywords_tif_sko_il = pd.concat([positive_keywords_tif_sko_il, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
positive_keywords_tif_sko_il = positive_keywords_tif_sko_il.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

positive_keywords_tif_sko_il

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Tiffany & Co-Skokie, IL",Positive,keywords,,"beautiful :3, fantastic :2, nice :1, exception...","professional :1, kind :1, friendly :1, helpful :1","beautiful :3, silver :1, pearl :1",,,,,,
1,"Tiffany & Co-Skokie, IL",Positive,phrases,,"beautiful on :2, beautiful interior :1, great ...","fantastic job of customer care and service :1,...",beautiful single pearl silver hardware bracele...,,,,,,


### tif_eas_nj

In [554]:
# Initialize the output dictionary
keyword_positive_output_tif_eas_nj = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_tif_eas_nj=[0]
keyword_input_token_tif_eas_nj = 0
keyword_output_token_tif_eas_nj = 0
keyword_start_time_loop_tif_eas_nj = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_tif_eas_nj, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_tif_eas_nj[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_tif_eas_nj = keyword_dataframes['tif_eas_nj_final_sen_df_jul'][keyword_dataframes['tif_eas_nj_final_sen_df_jul'][topic]==1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are positive comments, call the positive_keywords function
    if filtered_comments_tif_eas_nj:
        # Call the positive_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = positive_keywords(filtered_comments_tif_eas_nj,topic)        
        # Add the result to the output dictionary
        keyword_positive_output_tif_eas_nj.append(keywords)
        keyword_input_token_tif_eas_nj += input_tokens_loop
        keyword_output_token_tif_eas_nj += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_tif_eas_nj = time.time()
keyword_cost_input_token_tif_eas_nj = round((0.01/1000)*keyword_input_token_tif_eas_nj,2)
keyword_cost_output_token_tif_eas_nj = round((0.03/1000)*keyword_output_token_tif_eas_nj,2)
keyword_total_cost_tif_eas_nj = keyword_cost_input_token_tif_eas_nj + keyword_cost_output_token_tif_eas_nj
keyword_total_time_loop_tif_eas_nj = keyword_end_time_loop_tif_eas_nj - keyword_start_time_loop_tif_eas_nj

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_tif_eas_nj[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_tif_eas_nj,1))
print("Total Input Tokens - ", keyword_input_token_tif_eas_nj)
print("Total Input Cost = USD ",keyword_cost_input_token_tif_eas_nj)
print("Total Output Tokens - ", keyword_output_token_tif_eas_nj)
print("Total Output Cost = USD ",keyword_cost_output_token_tif_eas_nj)
print("Total Cost = USD ",round(keyword_total_cost_tif_eas_nj,2))

Executed  10  Iterations
Total Execution time (in secs) -  11.0
Total Input Tokens -  7310
Total Input Cost = USD  0.07
Total Output Tokens -  372
Total Output Cost = USD  0.01
Total Cost = USD  0.08


In [555]:
#Initialize an empty DataFrame
positive_keywords_tif_eas_nj = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']

# Add these columns to the DataFrame
for column in columns:
    positive_keywords_tif_eas_nj[column] = None

# Process each JSON string
for json_str in keyword_positive_output_tif_eas_nj:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'tif_eas_nj'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'Positive'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            positive_keywords_tif_eas_nj = pd.concat([positive_keywords_tif_eas_nj, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            positive_keywords_tif_eas_nj = pd.concat([positive_keywords_tif_eas_nj, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
positive_keywords_tif_eas_nj = positive_keywords_tif_eas_nj.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

positive_keywords_tif_eas_nj

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Tiffany & Co-East Rutherford, NJ",Positive,keywords,"Trust :2, Reliable :1, Faithful :1, Personal :...","helpful :4, accommodating :1, welcoming :1, fr...","helpful :5, friendly :3, accommodating :1, pol...","unique :1, beautiful :1",No relevant positive keywords/ phrases,,,reasonable :1,,
1,"Tiffany & Co-East Rutherford, NJ",Positive,phrases,"Integral part of our life milestones :1, Perso...","amazing experiences :1, great hospitality :1, ...","great hospitality :1, amazing experiences :1, ...","beautiful unique engagement rings :1, I was in...",No relevant positive keywords/ phrases,,,Price was extremely reasonable :1,,


### tif_red_nj

In [556]:
# Initialize the output dictionary
keyword_positive_output_tif_red_nj = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_tif_red_nj=[0]
keyword_input_token_tif_red_nj = 0
keyword_output_token_tif_red_nj = 0
keyword_start_time_loop_tif_red_nj = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_tif_red_nj, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_tif_red_nj[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_tif_red_nj = keyword_dataframes['tif_red_nj_final_sen_df_jul'][keyword_dataframes['tif_red_nj_final_sen_df_jul'][topic]==1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are positive comments, call the positive_keywords function
    if filtered_comments_tif_red_nj:
        # Call the positive_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = positive_keywords(filtered_comments_tif_red_nj,topic)        
        # Add the result to the output dictionary
        keyword_positive_output_tif_red_nj.append(keywords)
        keyword_input_token_tif_red_nj += input_tokens_loop
        keyword_output_token_tif_red_nj += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_tif_red_nj = time.time()
keyword_cost_input_token_tif_red_nj = round((0.01/1000)*keyword_input_token_tif_red_nj,2)
keyword_cost_output_token_tif_red_nj = round((0.03/1000)*keyword_output_token_tif_red_nj,2)
keyword_total_cost_tif_red_nj = keyword_cost_input_token_tif_red_nj + keyword_cost_output_token_tif_red_nj
keyword_total_time_loop_tif_red_nj = keyword_end_time_loop_tif_red_nj - keyword_start_time_loop_tif_red_nj

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_tif_red_nj[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_tif_red_nj,1))
print("Total Input Tokens - ", keyword_input_token_tif_red_nj)
print("Total Input Cost = USD ",keyword_cost_input_token_tif_red_nj)
print("Total Output Tokens - ", keyword_output_token_tif_red_nj)
print("Total Output Cost = USD ",keyword_cost_output_token_tif_red_nj)
print("Total Cost = USD ",round(keyword_total_cost_tif_red_nj,2))

Executed  10  Iterations
Total Execution time (in secs) -  6.5
Total Input Tokens -  4101
Total Input Cost = USD  0.04
Total Output Tokens -  223
Total Output Cost = USD  0.01
Total Cost = USD  0.05


In [557]:
#Initialize an empty DataFrame
positive_keywords_tif_red_nj = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']

# Add these columns to the DataFrame
for column in columns:
    positive_keywords_tif_red_nj[column] = None

# Process each JSON string
for json_str in keyword_positive_output_tif_red_nj:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'tif_red_nj'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'Positive'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            positive_keywords_tif_red_nj = pd.concat([positive_keywords_tif_red_nj, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            positive_keywords_tif_red_nj = pd.concat([positive_keywords_tif_red_nj, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
positive_keywords_tif_red_nj = positive_keywords_tif_red_nj.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

positive_keywords_tif_red_nj

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Tiffany & Co-Red Bank, NJ",Positive,keywords,,"professional :2, clean :1, friendly :1","professional :2, helpful :1, kind :1, easy goi...",No relevant positive keywords/ phrases,,,,inexpensive :1,No relevant positive keywords/ phrases,
1,"Tiffany & Co-Red Bank, NJ",Positive,phrases,,"pleasant experience :1, no rush feeling :1",most pleasant experience :1,No relevant positive keywords/ phrases,,,,inexpensive piece of jewelry :1,No relevant positive keywords/ phrases,


### tif_hac_nj

In [558]:
# Initialize the output dictionary
keyword_positive_output_tif_hac_nj = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_tif_hac_nj=[0]
keyword_input_token_tif_hac_nj = 0
keyword_output_token_tif_hac_nj = 0
keyword_start_time_loop_tif_hac_nj = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_tif_hac_nj, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_tif_hac_nj[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_tif_hac_nj = keyword_dataframes['tif_hac_nj_final_sen_df_jul'][keyword_dataframes['tif_hac_nj_final_sen_df_jul'][topic]==1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are positive comments, call the positive_keywords function
    if filtered_comments_tif_hac_nj:
        # Call the positive_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = positive_keywords(filtered_comments_tif_hac_nj,topic)        
        # Add the result to the output dictionary
        keyword_positive_output_tif_hac_nj.append(keywords)
        keyword_input_token_tif_hac_nj += input_tokens_loop
        keyword_output_token_tif_hac_nj += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_tif_hac_nj = time.time()
keyword_cost_input_token_tif_hac_nj = round((0.01/1000)*keyword_input_token_tif_hac_nj,2)
keyword_cost_output_token_tif_hac_nj = round((0.03/1000)*keyword_output_token_tif_hac_nj,2)
keyword_total_cost_tif_hac_nj = keyword_cost_input_token_tif_hac_nj + keyword_cost_output_token_tif_hac_nj
keyword_total_time_loop_tif_hac_nj = keyword_end_time_loop_tif_hac_nj - keyword_start_time_loop_tif_hac_nj

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_tif_hac_nj[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_tif_hac_nj,1))
print("Total Input Tokens - ", keyword_input_token_tif_hac_nj)
print("Total Input Cost = USD ",keyword_cost_input_token_tif_hac_nj)
print("Total Output Tokens - ", keyword_output_token_tif_hac_nj)
print("Total Output Cost = USD ",keyword_cost_output_token_tif_hac_nj)
print("Total Cost = USD ",round(keyword_total_cost_tif_hac_nj,2))

Executed  10  Iterations
Total Execution time (in secs) -  6.0
Total Input Tokens -  2488
Total Input Cost = USD  0.02
Total Output Tokens -  193
Total Output Cost = USD  0.01
Total Cost = USD  0.03


In [559]:
#Initialize an empty DataFrame
positive_keywords_tif_hac_nj = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']

# Add these columns to the DataFrame
for column in columns:
    positive_keywords_tif_hac_nj[column] = None

# Process each JSON string
for json_str in keyword_positive_output_tif_hac_nj:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'tif_hac_nj'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'Positive'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            positive_keywords_tif_hac_nj = pd.concat([positive_keywords_tif_hac_nj, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            positive_keywords_tif_hac_nj = pd.concat([positive_keywords_tif_hac_nj, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
positive_keywords_tif_hac_nj = positive_keywords_tif_hac_nj.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

positive_keywords_tif_hac_nj

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Tiffany & Co-Hackensack, NJ",Positive,keywords,,"welcoming :1, luxury :1, elegant :1","knowledgeable :1, welcoming :1, friendly :1, p...",,No relevant positive keywords/ phrases,,,,,
1,"Tiffany & Co-Hackensack, NJ",Positive,phrases,,"better experience :1, luxury retail experience...","personable and knowledgeable :1, service is to...",,No relevant positive keywords/ phrases,,,,,


### tif_sho_nj

In [560]:
# Initialize the output dictionary
keyword_positive_output_tif_sho_nj = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_tif_sho_nj=[0]
keyword_input_token_tif_sho_nj = 0
keyword_output_token_tif_sho_nj = 0
keyword_start_time_loop_tif_sho_nj = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_tif_sho_nj, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_tif_sho_nj[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_tif_sho_nj = keyword_dataframes['tif_sho_nj_final_sen_df_jul'][keyword_dataframes['tif_sho_nj_final_sen_df_jul'][topic]==1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are positive comments, call the positive_keywords function
    if filtered_comments_tif_sho_nj:
        # Call the positive_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = positive_keywords(filtered_comments_tif_sho_nj,topic)        
        # Add the result to the output dictionary
        keyword_positive_output_tif_sho_nj.append(keywords)
        keyword_input_token_tif_sho_nj += input_tokens_loop
        keyword_output_token_tif_sho_nj += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_tif_sho_nj = time.time()
keyword_cost_input_token_tif_sho_nj = round((0.01/1000)*keyword_input_token_tif_sho_nj,2)
keyword_cost_output_token_tif_sho_nj = round((0.03/1000)*keyword_output_token_tif_sho_nj,2)
keyword_total_cost_tif_sho_nj = keyword_cost_input_token_tif_sho_nj + keyword_cost_output_token_tif_sho_nj
keyword_total_time_loop_tif_sho_nj = keyword_end_time_loop_tif_sho_nj - keyword_start_time_loop_tif_sho_nj

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_tif_sho_nj[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_tif_sho_nj,1))
print("Total Input Tokens - ", keyword_input_token_tif_sho_nj)
print("Total Input Cost = USD ",keyword_cost_input_token_tif_sho_nj)
print("Total Output Tokens - ", keyword_output_token_tif_sho_nj)
print("Total Output Cost = USD ",keyword_cost_output_token_tif_sho_nj)
print("Total Cost = USD ",round(keyword_total_cost_tif_sho_nj,2))

Executed  10  Iterations
Total Execution time (in secs) -  4.5
Total Input Tokens -  2292
Total Input Cost = USD  0.02
Total Output Tokens -  152
Total Output Cost = USD  0.0
Total Cost = USD  0.02


In [561]:
#Initialize an empty DataFrame
positive_keywords_tif_sho_nj = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']

# Add these columns to the DataFrame
for column in columns:
    positive_keywords_tif_sho_nj[column] = None

# Process each JSON string
for json_str in keyword_positive_output_tif_sho_nj:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'tif_sho_nj'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'Positive'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            positive_keywords_tif_sho_nj = pd.concat([positive_keywords_tif_sho_nj, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            positive_keywords_tif_sho_nj = pd.concat([positive_keywords_tif_sho_nj, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
positive_keywords_tif_sho_nj = positive_keywords_tif_sho_nj.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

positive_keywords_tif_sho_nj

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Tiffany & Co-Short Hills, NJ",Positive,keywords,,"wonderful :3, friendly :2, amazing :1, lovely ...","friendly :3, helpful :2, attentive :1, persona...",,,,,,,
1,"Tiffany & Co-Short Hills, NJ",Positive,phrases,,"wonderful experience :1, great experience :1, ...","wonderful to work with :2, great care :1, exce...",,,,,,,


### tif_par_nj

In [562]:
# Initialize the output dictionary
keyword_positive_output_tif_par_nj = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_tif_par_nj=[0]
keyword_input_token_tif_par_nj = 0
keyword_output_token_tif_par_nj = 0
keyword_start_time_loop_tif_par_nj = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_tif_par_nj, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_tif_par_nj[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_tif_par_nj = keyword_dataframes['tif_par_nj_final_sen_df_jul'][keyword_dataframes['tif_par_nj_final_sen_df_jul'][topic]==1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are positive comments, call the positive_keywords function
    if filtered_comments_tif_par_nj:
        # Call the positive_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = positive_keywords(filtered_comments_tif_par_nj,topic)        
        # Add the result to the output dictionary
        keyword_positive_output_tif_par_nj.append(keywords)
        keyword_input_token_tif_par_nj += input_tokens_loop
        keyword_output_token_tif_par_nj += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_tif_par_nj = time.time()
keyword_cost_input_token_tif_par_nj = round((0.01/1000)*keyword_input_token_tif_par_nj,2)
keyword_cost_output_token_tif_par_nj = round((0.03/1000)*keyword_output_token_tif_par_nj,2)
keyword_total_cost_tif_par_nj = keyword_cost_input_token_tif_par_nj + keyword_cost_output_token_tif_par_nj
keyword_total_time_loop_tif_par_nj = keyword_end_time_loop_tif_par_nj - keyword_start_time_loop_tif_par_nj

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_tif_par_nj[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_tif_par_nj,1))
print("Total Input Tokens - ", keyword_input_token_tif_par_nj)
print("Total Input Cost = USD ",keyword_cost_input_token_tif_par_nj)
print("Total Output Tokens - ", keyword_output_token_tif_par_nj)
print("Total Output Cost = USD ",keyword_cost_output_token_tif_par_nj)
print("Total Cost = USD ",round(keyword_total_cost_tif_par_nj,2))

Executed  10  Iterations
Total Execution time (in secs) -  5.5
Total Input Tokens -  2351
Total Input Cost = USD  0.02
Total Output Tokens -  143
Total Output Cost = USD  0.0
Total Cost = USD  0.02


In [563]:
#Initialize an empty DataFrame
positive_keywords_tif_par_nj = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']

# Add these columns to the DataFrame
for column in columns:
    positive_keywords_tif_par_nj[column] = None

# Process each JSON string
for json_str in keyword_positive_output_tif_par_nj:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'tif_par_nj'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'Positive'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            positive_keywords_tif_par_nj = pd.concat([positive_keywords_tif_par_nj, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            positive_keywords_tif_par_nj = pd.concat([positive_keywords_tif_par_nj, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
positive_keywords_tif_par_nj = positive_keywords_tif_par_nj.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

positive_keywords_tif_par_nj

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Tiffany & Co-Paramus, NJ",Positive,keywords,,"friendly :2, helpful :1, huge :1, seating area...","friendly :2, helpful :1",,variety :1,,,,,
1,"Tiffany & Co-Paramus, NJ",Positive,phrases,,"super friendly :1, great variety :1, lifetime ...",super friendly :1,,great variety of items :1,,,,,


### tif_vie_va

In [564]:
# Initialize the output dictionary
keyword_positive_output_tif_vie_va = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_tif_vie_va=[0]
keyword_input_token_tif_vie_va = 0
keyword_output_token_tif_vie_va = 0
keyword_start_time_loop_tif_vie_va = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_tif_vie_va, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_tif_vie_va[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_tif_vie_va = keyword_dataframes['tif_vie_va_final_sen_df_jul'][keyword_dataframes['tif_vie_va_final_sen_df_jul'][topic]==1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are positive comments, call the positive_keywords function
    if filtered_comments_tif_vie_va:
        # Call the positive_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = positive_keywords(filtered_comments_tif_vie_va,topic)        
        # Add the result to the output dictionary
        keyword_positive_output_tif_vie_va.append(keywords)
        keyword_input_token_tif_vie_va += input_tokens_loop
        keyword_output_token_tif_vie_va += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_tif_vie_va = time.time()
keyword_cost_input_token_tif_vie_va = round((0.01/1000)*keyword_input_token_tif_vie_va,2)
keyword_cost_output_token_tif_vie_va = round((0.03/1000)*keyword_output_token_tif_vie_va,2)
keyword_total_cost_tif_vie_va = keyword_cost_input_token_tif_vie_va + keyword_cost_output_token_tif_vie_va
keyword_total_time_loop_tif_vie_va = keyword_end_time_loop_tif_vie_va - keyword_start_time_loop_tif_vie_va

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_tif_vie_va[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_tif_vie_va,1))
print("Total Input Tokens - ", keyword_input_token_tif_vie_va)
print("Total Input Cost = USD ",keyword_cost_input_token_tif_vie_va)
print("Total Output Tokens - ", keyword_output_token_tif_vie_va)
print("Total Output Cost = USD ",keyword_cost_output_token_tif_vie_va)
print("Total Cost = USD ",round(keyword_total_cost_tif_vie_va,2))

Executed  10  Iterations
Total Execution time (in secs) -  8.5
Total Input Tokens -  5478
Total Input Cost = USD  0.05
Total Output Tokens -  307
Total Output Cost = USD  0.01
Total Cost = USD  0.06


In [565]:
#Initialize an empty DataFrame
positive_keywords_tif_vie_va = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']

# Add these columns to the DataFrame
for column in columns:
    positive_keywords_tif_vie_va[column] = None

# Process each JSON string
for json_str in keyword_positive_output_tif_vie_va:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'tif_vie_va'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'Positive'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            positive_keywords_tif_vie_va = pd.concat([positive_keywords_tif_vie_va, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            positive_keywords_tif_vie_va = pd.concat([positive_keywords_tif_vie_va, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
positive_keywords_tif_vie_va = positive_keywords_tif_vie_va.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

positive_keywords_tif_vie_va

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Tiffany & Co-Vienna, VA",Positive,keywords,"Knowledgeable :1, Comfortable :1, Recommend :1","great experience :2, nice experience :1, very ...","knowledgeable :3, helpful :3, professional :2,...",,"collections :1, pieces :1",,,,,No relevant positive keywords/ phrases
1,"Tiffany & Co-Vienna, VA",Positive,phrases,"Most knowledgeable and most patient :1, Convin...","silky smooth process :1, pleasure to come to t...","great experience :2, excellent customer servic...",,"variety of collections :1, classic pieces :1",,,,,No relevant positive keywords/ phrases


### tif_ric_va

In [566]:
# Initialize the output dictionary
keyword_positive_output_tif_ric_va = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_tif_ric_va=[0]
keyword_input_token_tif_ric_va = 0
keyword_output_token_tif_ric_va = 0
keyword_start_time_loop_tif_ric_va = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_tif_ric_va, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_tif_ric_va[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_tif_ric_va = keyword_dataframes['tif_ric_va_final_sen_df_jul'][keyword_dataframes['tif_ric_va_final_sen_df_jul'][topic]==1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are positive comments, call the positive_keywords function
    if filtered_comments_tif_ric_va:
        # Call the positive_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = positive_keywords(filtered_comments_tif_ric_va,topic)        
        # Add the result to the output dictionary
        keyword_positive_output_tif_ric_va.append(keywords)
        keyword_input_token_tif_ric_va += input_tokens_loop
        keyword_output_token_tif_ric_va += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_tif_ric_va = time.time()
keyword_cost_input_token_tif_ric_va = round((0.01/1000)*keyword_input_token_tif_ric_va,2)
keyword_cost_output_token_tif_ric_va = round((0.03/1000)*keyword_output_token_tif_ric_va,2)
keyword_total_cost_tif_ric_va = keyword_cost_input_token_tif_ric_va + keyword_cost_output_token_tif_ric_va
keyword_total_time_loop_tif_ric_va = keyword_end_time_loop_tif_ric_va - keyword_start_time_loop_tif_ric_va

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_tif_ric_va[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_tif_ric_va,1))
print("Total Input Tokens - ", keyword_input_token_tif_ric_va)
print("Total Input Cost = USD ",keyword_cost_input_token_tif_ric_va)
print("Total Output Tokens - ", keyword_output_token_tif_ric_va)
print("Total Output Cost = USD ",keyword_cost_output_token_tif_ric_va)
print("Total Cost = USD ",round(keyword_total_cost_tif_ric_va,2))

Executed  10  Iterations
Total Execution time (in secs) -  3.0
Total Input Tokens -  1492
Total Input Cost = USD  0.01
Total Output Tokens -  91
Total Output Cost = USD  0.0
Total Cost = USD  0.01


In [567]:
#Initialize an empty DataFrame
positive_keywords_tif_ric_va = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']

# Add these columns to the DataFrame
for column in columns:
    positive_keywords_tif_ric_va[column] = None

# Process each JSON string
for json_str in keyword_positive_output_tif_ric_va:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'tif_ric_va'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'Positive'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            positive_keywords_tif_ric_va = pd.concat([positive_keywords_tif_ric_va, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            positive_keywords_tif_ric_va = pd.concat([positive_keywords_tif_ric_va, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
positive_keywords_tif_ric_va = positive_keywords_tif_ric_va.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

positive_keywords_tif_ric_va

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Tiffany & Co-Richmond, VA",Positive,keywords,,No relevant positive keywords/ phrases,"friendly :2, courteous :1, welcoming :1",,,,,,,
1,"Tiffany & Co-Richmond, VA",Positive,phrases,,No relevant positive keywords/ phrases,"Wonderful staff :1, Great customer service :1",,,,,,,


### vbj_fri_tx

In [568]:
# Initialize the output dictionary
keyword_positive_output_vbj_fri_tx = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_vbj_fri_tx=[0]
keyword_input_token_vbj_fri_tx = 0
keyword_output_token_vbj_fri_tx = 0
keyword_start_time_loop_vbj_fri_tx = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_vbj_fri_tx, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_vbj_fri_tx[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_vbj_fri_tx = keyword_dataframes['vbj_fri_tx_final_sen_df_jul'][keyword_dataframes['vbj_fri_tx_final_sen_df_jul'][topic]==1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are positive comments, call the positive_keywords function
    if filtered_comments_vbj_fri_tx:
        # Call the positive_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = positive_keywords(filtered_comments_vbj_fri_tx,topic)        
        # Add the result to the output dictionary
        keyword_positive_output_vbj_fri_tx.append(keywords)
        keyword_input_token_vbj_fri_tx += input_tokens_loop
        keyword_output_token_vbj_fri_tx += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_vbj_fri_tx = time.time()
keyword_cost_input_token_vbj_fri_tx = round((0.01/1000)*keyword_input_token_vbj_fri_tx,2)
keyword_cost_output_token_vbj_fri_tx = round((0.03/1000)*keyword_output_token_vbj_fri_tx,2)
keyword_total_cost_vbj_fri_tx = keyword_cost_input_token_vbj_fri_tx + keyword_cost_output_token_vbj_fri_tx
keyword_total_time_loop_vbj_fri_tx = keyword_end_time_loop_vbj_fri_tx - keyword_start_time_loop_vbj_fri_tx

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_vbj_fri_tx[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_vbj_fri_tx,1))
print("Total Input Tokens - ", keyword_input_token_vbj_fri_tx)
print("Total Input Cost = USD ",keyword_cost_input_token_vbj_fri_tx)
print("Total Output Tokens - ", keyword_output_token_vbj_fri_tx)
print("Total Output Cost = USD ",keyword_cost_output_token_vbj_fri_tx)
print("Total Cost = USD ",round(keyword_total_cost_vbj_fri_tx,2))

Executed  10  Iterations
Total Execution time (in secs) -  29.6
Total Input Tokens -  81468
Total Input Cost = USD  0.81
Total Output Tokens -  679
Total Output Cost = USD  0.02
Total Cost = USD  0.83


In [569]:
#Initialize an empty DataFrame
positive_keywords_vbj_fri_tx = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']

# Add these columns to the DataFrame
for column in columns:
    positive_keywords_vbj_fri_tx[column] = None

# Process each JSON string
for json_str in keyword_positive_output_vbj_fri_tx:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'vbj_fri_tx'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'Positive'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            positive_keywords_vbj_fri_tx = pd.concat([positive_keywords_vbj_fri_tx, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            positive_keywords_vbj_fri_tx = pd.concat([positive_keywords_vbj_fri_tx, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
positive_keywords_vbj_fri_tx = positive_keywords_vbj_fri_tx.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

positive_keywords_vbj_fri_tx

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"VBJ Jewellers-Frisco, TX",Positive,keywords,"Trust :3, Reliable :2, Trustworthy :2, Confide...","great experience : 15, good experience : 10, w...","patient : 45, helpful : 40, friendly : 35, kno...","unique :3, exclusive :2, beautiful :2, stunnin...","collection : 78, variety : 8, options : 6, mod...","discount :4, deal :1",,"reasonable :5, competitive :4, transparent :4,...","quality :10, good quality :3, high quality :3,...",good rates :1
1,"VBJ Jewellers-Frisco, TX",Positive,phrases,"Trusted brand :2, Trusted place to buy :1, Tru...","great shopping experience : 4, wonderful shopp...","very patient and helpful : 10, very friendly a...","nice designs :2, unique and beautiful :1, stun...","great collection : 10, good collection : 9, ni...","amazing Aadi discount :1, good discount :1, ge...",,"reasonable rates :2, competitive prices :2, tr...","quality is fantastic :1, high-quality products...",exchange old jewelry for new :1


### tan_chi_il

In [570]:
# Initialize the output dictionary
keyword_positive_output_tan_chi_il = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_tan_chi_il=[0]
keyword_input_token_tan_chi_il = 0
keyword_output_token_tan_chi_il = 0
keyword_start_time_loop_tan_chi_il = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_tan_chi_il, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_tan_chi_il[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_tan_chi_il = keyword_dataframes['tan_chi_il_final_sen_df_jul'][keyword_dataframes['tan_chi_il_final_sen_df_jul'][topic]==1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are positive comments, call the positive_keywords function
    if filtered_comments_tan_chi_il:
        # Call the positive_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = positive_keywords(filtered_comments_tan_chi_il,topic)        
        # Add the result to the output dictionary
        keyword_positive_output_tan_chi_il.append(keywords)
        keyword_input_token_tan_chi_il += input_tokens_loop
        keyword_output_token_tan_chi_il += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_tan_chi_il = time.time()
keyword_cost_input_token_tan_chi_il = round((0.01/1000)*keyword_input_token_tan_chi_il,2)
keyword_cost_output_token_tan_chi_il = round((0.03/1000)*keyword_output_token_tan_chi_il,2)
keyword_total_cost_tan_chi_il = keyword_cost_input_token_tan_chi_il + keyword_cost_output_token_tan_chi_il
keyword_total_time_loop_tan_chi_il = keyword_end_time_loop_tan_chi_il - keyword_start_time_loop_tan_chi_il

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_tan_chi_il[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_tan_chi_il,1))
print("Total Input Tokens - ", keyword_input_token_tan_chi_il)
print("Total Input Cost = USD ",keyword_cost_input_token_tan_chi_il)
print("Total Output Tokens - ", keyword_output_token_tan_chi_il)
print("Total Output Cost = USD ",keyword_cost_output_token_tan_chi_il)
print("Total Cost = USD ",round(keyword_total_cost_tan_chi_il,2))

Executed  10  Iterations
Total Execution time (in secs) -  37.6
Total Input Tokens -  41898
Total Input Cost = USD  0.42
Total Output Tokens -  647
Total Output Cost = USD  0.02
Total Cost = USD  0.44


In [571]:
#Initialize an empty DataFrame
positive_keywords_tan_chi_il = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']

# Add these columns to the DataFrame
for column in columns:
    positive_keywords_tan_chi_il[column] = None

# Process each JSON string
for json_str in keyword_positive_output_tan_chi_il:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'tan_chi_il'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'Positive'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            positive_keywords_tan_chi_il = pd.concat([positive_keywords_tan_chi_il, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            positive_keywords_tan_chi_il = pd.concat([positive_keywords_tan_chi_il, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
positive_keywords_tan_chi_il = positive_keywords_tan_chi_il.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

positive_keywords_tan_chi_il

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Tanishq-Chicago, IL",Positive,keywords,"trusted :2, trust :2, trustworthy :2, confiden...","pleasant : 5, welcoming : 4, grand : 2, beauti...","helpful : 50, patient : 30, friendly : 25, att...","unique :5, trendy :3, beautiful :3, awesome :2...","collection : 45, variety : 10, options : 5, ra...","discount :5, offers :3, additional discounts :...",reasonable price :2,"reasonable :4, transparent :3, affordable :2, ...",quality :3,
1,"Tanishq-Chicago, IL",Positive,phrases,"trusted brand near to purchase :1, trustworthy...","great experience : 10, wonderful experience : ...","very helpful : 15, very patient : 10, very fri...","great designs :4, unique designs :3, beautiful...","wide collection : 3, unique collections : 3, g...","get the best discount :1, securing additional ...",No relevant positive phrases,"reasonable price :4, transparent with the pric...",No relevant positive keywords/ phrases,


### tan_fri_tx

In [572]:
# Initialize the output dictionary
keyword_positive_output_tan_fri_tx = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_tan_fri_tx=[0]
keyword_input_token_tan_fri_tx = 0
keyword_output_token_tan_fri_tx = 0
keyword_start_time_loop_tan_fri_tx = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_tan_fri_tx, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_tan_fri_tx[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_tan_fri_tx = keyword_dataframes['tan_fri_tx_final_sen_df_jul'][keyword_dataframes['tan_fri_tx_final_sen_df_jul'][topic]==1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are positive comments, call the positive_keywords function
    if filtered_comments_tan_fri_tx:
        # Call the positive_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = positive_keywords(filtered_comments_tan_fri_tx,topic)        
        # Add the result to the output dictionary
        keyword_positive_output_tan_fri_tx.append(keywords)
        keyword_input_token_tan_fri_tx += input_tokens_loop
        keyword_output_token_tan_fri_tx += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_tan_fri_tx = time.time()
keyword_cost_input_token_tan_fri_tx = round((0.01/1000)*keyword_input_token_tan_fri_tx,2)
keyword_cost_output_token_tan_fri_tx = round((0.03/1000)*keyword_output_token_tan_fri_tx,2)
keyword_total_cost_tan_fri_tx = keyword_cost_input_token_tan_fri_tx + keyword_cost_output_token_tan_fri_tx
keyword_total_time_loop_tan_fri_tx = keyword_end_time_loop_tan_fri_tx - keyword_start_time_loop_tan_fri_tx

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_tan_fri_tx[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_tan_fri_tx,1))
print("Total Input Tokens - ", keyword_input_token_tan_fri_tx)
print("Total Input Cost = USD ",keyword_cost_input_token_tan_fri_tx)
print("Total Output Tokens - ", keyword_output_token_tan_fri_tx)
print("Total Output Cost = USD ",keyword_cost_output_token_tan_fri_tx)
print("Total Cost = USD ",round(keyword_total_cost_tan_fri_tx,2))

Executed  10  Iterations
Total Execution time (in secs) -  44.1
Total Input Tokens -  89141
Total Input Cost = USD  0.89
Total Output Tokens -  726
Total Output Cost = USD  0.02
Total Cost = USD  0.91


In [573]:
#Initialize an empty DataFrame
positive_keywords_tan_fri_tx = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']

# Add these columns to the DataFrame
for column in columns:
    positive_keywords_tan_fri_tx[column] = None

# Process each JSON string
for json_str in keyword_positive_output_tan_fri_tx:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'tan_fri_tx'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'Positive'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            positive_keywords_tan_fri_tx = pd.concat([positive_keywords_tan_fri_tx, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            positive_keywords_tan_fri_tx = pd.concat([positive_keywords_tan_fri_tx, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
positive_keywords_tan_fri_tx = positive_keywords_tan_fri_tx.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

positive_keywords_tan_fri_tx

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Tanishq-Frisco, TX",Positive,keywords,"Trust :3, Reliable :2, Genuine :2, Transparent...","great experience : 50, good experience : 30, e...","patient : 45, helpful : 40, knowledgeable : 20...","unique :3, beautiful :3, stunning :2, exquisit...","varieties :5, options :4, variety :3, selectio...","discount :5, discounts :4, deal :2, promotions...",,"reasonable :3, competitive :2, affordable :2, ...","craftsmanship :5, quality :5, attention to det...","exchange :3, exchanging :2"
1,"Tanishq-Frisco, TX",Positive,phrases,"Environment of trust :2, Trustworthy and relia...","wonderful experience : 20, very good experienc...","very patient and helpful : 8, extremely helpfu...","beautiful designs :3, unique jewelry :2, stunn...","wide variety :3, lots of collections :3, great...","extra discount :2, good discounts :1, great di...",,"within our budget :3, fit our budget :1, suite...",quality of the products exceeded my expectatio...,"jewelry exchange :2, exchanging old jewelry :1..."


### tan_hou_tx

In [574]:
# Initialize the output dictionary
keyword_positive_output_tan_hou_tx = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_tan_hou_tx=[0]
keyword_input_token_tan_hou_tx = 0
keyword_output_token_tan_hou_tx = 0
keyword_start_time_loop_tan_hou_tx = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_tan_hou_tx, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_tan_hou_tx[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_tan_hou_tx = keyword_dataframes['tan_hou_tx_final_sen_df_jul'][keyword_dataframes['tan_hou_tx_final_sen_df_jul'][topic]==1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are positive comments, call the positive_keywords function
    if filtered_comments_tan_hou_tx:
        # Call the positive_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = positive_keywords(filtered_comments_tan_hou_tx,topic)        
        # Add the result to the output dictionary
        keyword_positive_output_tan_hou_tx.append(keywords)
        keyword_input_token_tan_hou_tx += input_tokens_loop
        keyword_output_token_tan_hou_tx += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_tan_hou_tx = time.time()
keyword_cost_input_token_tan_hou_tx = round((0.01/1000)*keyword_input_token_tan_hou_tx,2)
keyword_cost_output_token_tan_hou_tx = round((0.03/1000)*keyword_output_token_tan_hou_tx,2)
keyword_total_cost_tan_hou_tx = keyword_cost_input_token_tan_hou_tx + keyword_cost_output_token_tan_hou_tx
keyword_total_time_loop_tan_hou_tx = keyword_end_time_loop_tan_hou_tx - keyword_start_time_loop_tan_hou_tx

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_tan_hou_tx[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_tan_hou_tx,1))
print("Total Input Tokens - ", keyword_input_token_tan_hou_tx)
print("Total Input Cost = USD ",keyword_cost_input_token_tan_hou_tx)
print("Total Output Tokens - ", keyword_output_token_tan_hou_tx)
print("Total Output Cost = USD ",keyword_cost_output_token_tan_hou_tx)
print("Total Cost = USD ",round(keyword_total_cost_tan_hou_tx,2))

Executed  10  Iterations
Total Execution time (in secs) -  24.5
Total Input Tokens -  38989
Total Input Cost = USD  0.39
Total Output Tokens -  713
Total Output Cost = USD  0.02
Total Cost = USD  0.41


In [575]:
#Initialize an empty DataFrame
positive_keywords_tan_hou_tx = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']

# Add these columns to the DataFrame
for column in columns:
    positive_keywords_tan_hou_tx[column] = None

# Process each JSON string
for json_str in keyword_positive_output_tan_hou_tx:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'tan_hou_tx'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'Positive'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            positive_keywords_tan_hou_tx = pd.concat([positive_keywords_tan_hou_tx, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            positive_keywords_tan_hou_tx = pd.concat([positive_keywords_tan_hou_tx, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
positive_keywords_tan_hou_tx = positive_keywords_tan_hou_tx.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

positive_keywords_tan_hou_tx

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Tanishq-Houston, TX",Positive,keywords,"Trust :4, Reliable :2, Honest :1, Authenticity...","great experience : 15, pleasant experience : 8...","patient : 25, helpful : 24, knowledgeable : 15...","unique designs: 5, beautiful design: 3, tradit...","collection : 45, variety : 12, selection : 6, ...","discount :2, offers :2, gold coin :2",,"transparent :4, reasonable :3, competitive :2,...","high quality :3, amazing quality :2, top notch...","jewelry exchange :2, Gold Exchange :2"
1,"Tanishq-Houston, TX",Positive,phrases,"Trust regarding to jewellery :3, Trustworthy p...","great store : 3, beautiful store : 2, lovely e...","very patient : 10, very helpful : 8, extremely...","diverse range of designs: 3, wide range of des...","wide variety : 3, wide range : 3, huge variety...","explained the offers and discounts clearly :2,...",,"transparent with pricing :3, reasonable prices...","commitment to quality :2, quality of the gold ...","trade in jewelry :1, worked a lot with the ‘je..."


### tan_new_nj

In [576]:
# Initialize the output dictionary
keyword_positive_output_tan_new_nj = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_tan_new_nj=[0]
keyword_input_token_tan_new_nj = 0
keyword_output_token_tan_new_nj = 0
keyword_start_time_loop_tan_new_nj = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_tan_new_nj, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_tan_new_nj[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_tan_new_nj = keyword_dataframes['tan_new_nj_final_sen_df_jul'][keyword_dataframes['tan_new_nj_final_sen_df_jul'][topic]==1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are positive comments, call the positive_keywords function
    if filtered_comments_tan_new_nj:
        # Call the positive_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = positive_keywords(filtered_comments_tan_new_nj,topic)        
        # Add the result to the output dictionary
        keyword_positive_output_tan_new_nj.append(keywords)
        keyword_input_token_tan_new_nj += input_tokens_loop
        keyword_output_token_tan_new_nj += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_tan_new_nj = time.time()
keyword_cost_input_token_tan_new_nj = round((0.01/1000)*keyword_input_token_tan_new_nj,2)
keyword_cost_output_token_tan_new_nj = round((0.03/1000)*keyword_output_token_tan_new_nj,2)
keyword_total_cost_tan_new_nj = keyword_cost_input_token_tan_new_nj + keyword_cost_output_token_tan_new_nj
keyword_total_time_loop_tan_new_nj = keyword_end_time_loop_tan_new_nj - keyword_start_time_loop_tan_new_nj

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_tan_new_nj[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_tan_new_nj,1))
print("Total Input Tokens - ", keyword_input_token_tan_new_nj)
print("Total Input Cost = USD ",keyword_cost_input_token_tan_new_nj)
print("Total Output Tokens - ", keyword_output_token_tan_new_nj)
print("Total Output Cost = USD ",keyword_cost_output_token_tan_new_nj)
print("Total Cost = USD ",round(keyword_total_cost_tan_new_nj,2))

Executed  10  Iterations
Total Execution time (in secs) -  28.1
Total Input Tokens -  50312
Total Input Cost = USD  0.5
Total Output Tokens -  767
Total Output Cost = USD  0.02
Total Cost = USD  0.52


In [577]:
#Initialize an empty DataFrame
positive_keywords_tan_new_nj = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']

# Add these columns to the DataFrame
for column in columns:
    positive_keywords_tan_new_nj[column] = None

# Process each JSON string
for json_str in keyword_positive_output_tan_new_nj:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'tan_new_nj'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'Positive'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            positive_keywords_tan_new_nj = pd.concat([positive_keywords_tan_new_nj, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            positive_keywords_tan_new_nj = pd.concat([positive_keywords_tan_new_nj, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
positive_keywords_tan_new_nj = positive_keywords_tan_new_nj.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

positive_keywords_tan_new_nj

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Tanishq-New Jersey, NJ",Positive,keywords,"Trust :3, Transparency :2, Trusted :2, Authent...","pleasant :5, wonderful :5, great :4, exception...","helpful : 50, patient : 30, polite : 10, frien...","exclusive designs: 8, beautiful design: 4, ama...","collection : 20, variety : 5, selection : 4, o...","discount :5, discount benefits :1, discount of...",,"budget :6, transparent pricing :2, best prices...","quality :4, top quality :2, high-quality :2, g...","transparent :2, seamless :2, helpful :2, excha..."
1,"Tanishq-New Jersey, NJ",Positive,phrases,"Brand you can trust :1, Trusted source for my ...","great experience :10, wonderful experience :8,...","very helpful : 15, extremely helpful : 10, ver...","exclusive jewelry designs: 2, beautiful piece ...","great collection : 8, wide range : 3, good col...","great discount :2, best discount :1, 5% discou...",,"fit my budget :2, within our budget :2, transp...",quality and transparency is 100% guaranteed :1...,gold jewelry exchange was handled seamlessly :...


### tan_bar_db

In [578]:
# Initialize the output dictionary
keyword_positive_output_tan_bar_db = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_tan_bar_db=[0]
keyword_input_token_tan_bar_db = 0
keyword_output_token_tan_bar_db = 0
keyword_start_time_loop_tan_bar_db = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_tan_bar_db, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_tan_bar_db[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_tan_bar_db = keyword_dataframes['tan_bar_db_final_sen_df_jul'][keyword_dataframes['tan_bar_db_final_sen_df_jul'][topic]==1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are positive comments, call the positive_keywords function
    if filtered_comments_tan_bar_db:
        # Call the positive_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = positive_keywords(filtered_comments_tan_bar_db,topic)        
        # Add the result to the output dictionary
        keyword_positive_output_tan_bar_db.append(keywords)
        keyword_input_token_tan_bar_db += input_tokens_loop
        keyword_output_token_tan_bar_db += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_tan_bar_db = time.time()
keyword_cost_input_token_tan_bar_db = round((0.01/1000)*keyword_input_token_tan_bar_db,2)
keyword_cost_output_token_tan_bar_db = round((0.03/1000)*keyword_output_token_tan_bar_db,2)
keyword_total_cost_tan_bar_db = keyword_cost_input_token_tan_bar_db + keyword_cost_output_token_tan_bar_db
keyword_total_time_loop_tan_bar_db = keyword_end_time_loop_tan_bar_db - keyword_start_time_loop_tan_bar_db

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_tan_bar_db[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_tan_bar_db,1))
print("Total Input Tokens - ", keyword_input_token_tan_bar_db)
print("Total Input Cost = USD ",keyword_cost_input_token_tan_bar_db)
print("Total Output Tokens - ", keyword_output_token_tan_bar_db)
print("Total Output Cost = USD ",keyword_cost_output_token_tan_bar_db)
print("Total Cost = USD ",round(keyword_total_cost_tan_bar_db,2))

Executed  10  Iterations
Total Execution time (in secs) -  37.1
Total Input Tokens -  104729
Total Input Cost = USD  1.05
Total Output Tokens -  805
Total Output Cost = USD  0.02
Total Cost = USD  1.07


In [579]:
#Initialize an empty DataFrame
positive_keywords_tan_bar_db = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']

# Add these columns to the DataFrame
for column in columns:
    positive_keywords_tan_bar_db[column] = None

# Process each JSON string
for json_str in keyword_positive_output_tan_bar_db:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'tan_bar_db'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'Positive'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            positive_keywords_tan_bar_db = pd.concat([positive_keywords_tan_bar_db, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            positive_keywords_tan_bar_db = pd.concat([positive_keywords_tan_bar_db, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
positive_keywords_tan_bar_db = positive_keywords_tan_bar_db.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

positive_keywords_tan_bar_db

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Tanishq Jewellers-Al Barsha, DB",Positive,keywords,"Trust :5, Reliable :4, Trusted :3, Authentic :...","Great : 20, Excellent : 15, Amazing : 12, Wond...","helpful : 78, patient : 45, knowledgeable : 43...","unique designs: 15, good designs: 10, beautifu...","collection : 78, collections : 60, variety : 1...","discount :10, offers :8, deal :5, schemes :2, ...",No relevant positive keywords/ phrases,"best prices: 3, good price: 3, competitive rat...","quality :8, purest :2, purity :2, craftsmanshi...","exchange offer :2, exchange process :2, exchan..."
1,"Tanishq Jewellers-Al Barsha, DB",Positive,phrases,"Trustworthy place :2, Trusted shop to buy :2, ...","Great experience : 30, Wonderful experience : ...","very helpful : 30, excellent service : 28, gre...","latest and unique designs: 1, perfect design: ...","good collection : 10, nice collection : 9, ama...","amazing discount :2, best offers :2, great dis...",No relevant positive keywords/ phrases,"amazing price: 2, within our budget: 2, right ...","excellent quality :2, high-quality jewelry :2,...","exchange offer :2, exchange process very smoot..."


### tan_fah_db

In [580]:
# Initialize the output dictionary
keyword_positive_output_tan_fah_db = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_tan_fah_db=[0]
keyword_input_token_tan_fah_db = 0
keyword_output_token_tan_fah_db = 0
keyword_start_time_loop_tan_fah_db = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_tan_fah_db, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_tan_fah_db[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_tan_fah_db = keyword_dataframes['tan_fah_db_final_sen_df_jul'][keyword_dataframes['tan_fah_db_final_sen_df_jul'][topic]==1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are positive comments, call the positive_keywords function
    if filtered_comments_tan_fah_db:
        # Call the positive_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = positive_keywords(filtered_comments_tan_fah_db,topic)        
        # Add the result to the output dictionary
        keyword_positive_output_tan_fah_db.append(keywords)
        keyword_input_token_tan_fah_db += input_tokens_loop
        keyword_output_token_tan_fah_db += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_tan_fah_db = time.time()
keyword_cost_input_token_tan_fah_db = round((0.01/1000)*keyword_input_token_tan_fah_db,2)
keyword_cost_output_token_tan_fah_db = round((0.03/1000)*keyword_output_token_tan_fah_db,2)
keyword_total_cost_tan_fah_db = keyword_cost_input_token_tan_fah_db + keyword_cost_output_token_tan_fah_db
keyword_total_time_loop_tan_fah_db = keyword_end_time_loop_tan_fah_db - keyword_start_time_loop_tan_fah_db

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_tan_fah_db[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_tan_fah_db,1))
print("Total Input Tokens - ", keyword_input_token_tan_fah_db)
print("Total Input Cost = USD ",keyword_cost_input_token_tan_fah_db)
print("Total Output Tokens - ", keyword_output_token_tan_fah_db)
print("Total Output Cost = USD ",keyword_cost_output_token_tan_fah_db)
print("Total Cost = USD ",round(keyword_total_cost_tan_fah_db,2))

Executed  10  Iterations
Total Execution time (in secs) -  41.6
Total Input Tokens -  120588
Total Input Cost = USD  1.21
Total Output Tokens -  789
Total Output Cost = USD  0.02
Total Cost = USD  1.23


In [581]:
#Initialize an empty DataFrame
positive_keywords_tan_fah_db = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']

# Add these columns to the DataFrame
for column in columns:
    positive_keywords_tan_fah_db[column] = None

# Process each JSON string
for json_str in keyword_positive_output_tan_fah_db:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'tan_fah_db'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'Positive'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            positive_keywords_tan_fah_db = pd.concat([positive_keywords_tan_fah_db, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            positive_keywords_tan_fah_db = pd.concat([positive_keywords_tan_fah_db, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
positive_keywords_tan_fah_db = positive_keywords_tan_fah_db.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

positive_keywords_tan_fah_db

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Tanishq Jewellers-Al Fahidi, DB",Positive,keywords,"Trust :5, Reliable :3, Trusted :3, Trustable :...","good experience : 20, nice experience : 15, gr...","helpful : 50, friendly : 40, patient : 30, pol...","design : 98, designs : 95, unique : 10, elegan...","variety :3, wide range :3, different ornaments...","discount : 8, deal : 7, offers : 5, discounts ...","less :3, good :2","best price :4, competitive prices :2, reasonab...","quality :10, product :8, good :5, exceptional ...","exchange policy :3, exchange offer :2, exchang..."
1,"Tanishq Jewellers-Al Fahidi, DB",Positive,phrases,"Trusted must visit :2, Most trustable brand :2...","awesome experience : 3, amazing experience : 3...","very helpful and patient : 10, extremely helpf...","nice design : 20, beautiful designs : 10, amaz...","wide range of collections :2, wide variety of ...","great discounts : 3, best deal : 3, additional...","good making charges :2, less making charges :2","transparent and competitive pricing :2, within...","quality product :3, good product :3, product q...","flexible exchange policies :1, hassle-free exp..."


### tan_kar_db

In [582]:
# Initialize the output dictionary
keyword_positive_output_tan_kar_db = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_tan_kar_db=[0]
keyword_input_token_tan_kar_db = 0
keyword_output_token_tan_kar_db = 0
keyword_start_time_loop_tan_kar_db = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_tan_kar_db, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_tan_kar_db[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_tan_kar_db = keyword_dataframes['tan_kar_db_final_sen_df_jul'][keyword_dataframes['tan_kar_db_final_sen_df_jul'][topic]==1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are positive comments, call the positive_keywords function
    if filtered_comments_tan_kar_db:
        # Call the positive_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = positive_keywords(filtered_comments_tan_kar_db,topic)        
        # Add the result to the output dictionary
        keyword_positive_output_tan_kar_db.append(keywords)
        keyword_input_token_tan_kar_db += input_tokens_loop
        keyword_output_token_tan_kar_db += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_tan_kar_db = time.time()
keyword_cost_input_token_tan_kar_db = round((0.01/1000)*keyword_input_token_tan_kar_db,2)
keyword_cost_output_token_tan_kar_db = round((0.03/1000)*keyword_output_token_tan_kar_db,2)
keyword_total_cost_tan_kar_db = keyword_cost_input_token_tan_kar_db + keyword_cost_output_token_tan_kar_db
keyword_total_time_loop_tan_kar_db = keyword_end_time_loop_tan_kar_db - keyword_start_time_loop_tan_kar_db

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_tan_kar_db[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_tan_kar_db,1))
print("Total Input Tokens - ", keyword_input_token_tan_kar_db)
print("Total Input Cost = USD ",keyword_cost_input_token_tan_kar_db)
print("Total Output Tokens - ", keyword_output_token_tan_kar_db)
print("Total Output Cost = USD ",keyword_cost_output_token_tan_kar_db)
print("Total Cost = USD ",round(keyword_total_cost_tan_kar_db,2))

Executed  10  Iterations
Total Execution time (in secs) -  26.0
Total Input Tokens -  68297
Total Input Cost = USD  0.68
Total Output Tokens -  734
Total Output Cost = USD  0.02
Total Cost = USD  0.7


In [583]:
#Initialize an empty DataFrame
positive_keywords_tan_kar_db = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']

# Add these columns to the DataFrame
for column in columns:
    positive_keywords_tan_kar_db[column] = None

# Process each JSON string
for json_str in keyword_positive_output_tan_kar_db:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'tan_kar_db'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'Positive'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            positive_keywords_tan_kar_db = pd.concat([positive_keywords_tan_kar_db, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            positive_keywords_tan_kar_db = pd.concat([positive_keywords_tan_kar_db, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
positive_keywords_tan_kar_db = positive_keywords_tan_kar_db.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

positive_keywords_tan_kar_db

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Tanishq Jewellers-Al Karama, DB",Positive,keywords,"Trust :3, Trusted :3, Transparency :2, Genuine...","pleasant : 10, friendly : 9, helpful : 8, welc...","helpful : 50, friendly : 45, patient : 40, pro...","designs : 45, unique : 10, beautiful : 9, eleg...","collection : 95, collections : 60, variety : 5...","discount :6, offers :5, deal :3, discounted :2...",,"Fair :5, Reasonable :3, Genuine :2, Value :2, ...","quality :10, excellent :5, good quality :3, be...","exchange :6, exchanged :3, buying :2, old gold..."
1,"Tanishq Jewellers-Al Karama, DB",Positive,phrases,"Trustworthy service :2, Trusted name of Tanish...","great experience : 15, amazing experience : 12...","excellent customer service : 25, great service...","amazing designs : 4, good designs : 4, beautif...","wide range : 3, wide variety : 2, amazing coll...","special discount :3, good discount :2, best of...",,"fair prices :3, reasonable price :3, value for...","quality of the products is excellent :1, quali...","buying and exchanging :1, best place to buy an..."


### tan_ham_ad

In [584]:
# Initialize the output dictionary
keyword_positive_output_tan_ham_ad = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_tan_ham_ad=[0]
keyword_input_token_tan_ham_ad = 0
keyword_output_token_tan_ham_ad = 0
keyword_start_time_loop_tan_ham_ad = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_tan_ham_ad, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_tan_ham_ad[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_tan_ham_ad = keyword_dataframes['tan_ham_ad_final_sen_df_jul'][keyword_dataframes['tan_ham_ad_final_sen_df_jul'][topic]==1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are positive comments, call the positive_keywords function
    if filtered_comments_tan_ham_ad:
        # Call the positive_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = positive_keywords(filtered_comments_tan_ham_ad,topic)        
        # Add the result to the output dictionary
        keyword_positive_output_tan_ham_ad.append(keywords)
        keyword_input_token_tan_ham_ad += input_tokens_loop
        keyword_output_token_tan_ham_ad += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_tan_ham_ad = time.time()
keyword_cost_input_token_tan_ham_ad = round((0.01/1000)*keyword_input_token_tan_ham_ad,2)
keyword_cost_output_token_tan_ham_ad = round((0.03/1000)*keyword_output_token_tan_ham_ad,2)
keyword_total_cost_tan_ham_ad = keyword_cost_input_token_tan_ham_ad + keyword_cost_output_token_tan_ham_ad
keyword_total_time_loop_tan_ham_ad = keyword_end_time_loop_tan_ham_ad - keyword_start_time_loop_tan_ham_ad

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_tan_ham_ad[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_tan_ham_ad,1))
print("Total Input Tokens - ", keyword_input_token_tan_ham_ad)
print("Total Input Cost = USD ",keyword_cost_input_token_tan_ham_ad)
print("Total Output Tokens - ", keyword_output_token_tan_ham_ad)
print("Total Output Cost = USD ",keyword_cost_output_token_tan_ham_ad)
print("Total Cost = USD ",round(keyword_total_cost_tan_ham_ad,2))

Executed  10  Iterations
Total Execution time (in secs) -  33.6
Total Input Tokens -  46403
Total Input Cost = USD  0.46
Total Output Tokens -  757
Total Output Cost = USD  0.02
Total Cost = USD  0.48


In [585]:
#Initialize an empty DataFrame
positive_keywords_tan_ham_ad = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']

# Add these columns to the DataFrame
for column in columns:
    positive_keywords_tan_ham_ad[column] = None

# Process each JSON string
for json_str in keyword_positive_output_tan_ham_ad:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'tan_ham_ad'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'Positive'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            positive_keywords_tan_ham_ad = pd.concat([positive_keywords_tan_ham_ad, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            positive_keywords_tan_ham_ad = pd.concat([positive_keywords_tan_ham_ad, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
positive_keywords_tan_ham_ad = positive_keywords_tan_ham_ad.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

positive_keywords_tan_ham_ad

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Tanishq Jewellers-Hamdan Bin Mohammed Street, AD",Positive,keywords,"Trust :5, Reliable :3, Genuine :2, Authentic :...","pleasant : 5, welcoming : 4, comfortable : 4, ...","helpful : 35, knowledgeable : 20, friendly : 1...","unique :8, beautiful :7, good :6, excellent :4...","collection : 50, variety : 5, selection : 4, o...","discount :5, offers :4, deal :3, price :1, off...","reasonable :2, affordable :1, economic :1, les...","discount :2, price :2, offers :1, charges :1, ...","quality :8, high quality :5, good quality :3, ...",exchange deductions :2
1,"Tanishq Jewellers-Hamdan Bin Mohammed Street, AD",Positive,phrases,"Trust of TaTa :4, You can trust the Tata brand...","pleasant experience : 4, welcoming atmosphere ...","very helpful and pleasant : 2, very helpful an...","very good designs :3, unique designs :3, beaut...","great collection : 6, nice collection : 6, goo...","best discount :1, good offers :1, lot of disco...","reasonable making charge :2, affordable making...","good prices :1, price offers :1, value for mon...","quality of their products :2, quality of the j...",0% exchange deductions :2


### tan_mee_db

In [586]:
# Initialize the output dictionary
keyword_positive_output_tan_mee_db = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_tan_mee_db=[0]
keyword_input_token_tan_mee_db = 0
keyword_output_token_tan_mee_db = 0
keyword_start_time_loop_tan_mee_db = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_tan_mee_db, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_tan_mee_db[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_tan_mee_db = keyword_dataframes['tan_mee_db_final_sen_df_jul'][keyword_dataframes['tan_mee_db_final_sen_df_jul'][topic]==1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are positive comments, call the positive_keywords function
    if filtered_comments_tan_mee_db:
        # Call the positive_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = positive_keywords(filtered_comments_tan_mee_db,topic)        
        # Add the result to the output dictionary
        keyword_positive_output_tan_mee_db.append(keywords)
        keyword_input_token_tan_mee_db += input_tokens_loop
        keyword_output_token_tan_mee_db += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_tan_mee_db = time.time()
keyword_cost_input_token_tan_mee_db = round((0.01/1000)*keyword_input_token_tan_mee_db,2)
keyword_cost_output_token_tan_mee_db = round((0.03/1000)*keyword_output_token_tan_mee_db,2)
keyword_total_cost_tan_mee_db = keyword_cost_input_token_tan_mee_db + keyword_cost_output_token_tan_mee_db
keyword_total_time_loop_tan_mee_db = keyword_end_time_loop_tan_mee_db - keyword_start_time_loop_tan_mee_db

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_tan_mee_db[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_tan_mee_db,1))
print("Total Input Tokens - ", keyword_input_token_tan_mee_db)
print("Total Input Cost = USD ",keyword_cost_input_token_tan_mee_db)
print("Total Output Tokens - ", keyword_output_token_tan_mee_db)
print("Total Output Cost = USD ",keyword_cost_output_token_tan_mee_db)
print("Total Cost = USD ",round(keyword_total_cost_tan_mee_db,2))

Executed  10  Iterations
Total Execution time (in secs) -  30.6
Total Input Tokens -  75503
Total Input Cost = USD  0.76
Total Output Tokens -  705
Total Output Cost = USD  0.02
Total Cost = USD  0.78


In [587]:
#Initialize an empty DataFrame
positive_keywords_tan_mee_db = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']

# Add these columns to the DataFrame
for column in columns:
    positive_keywords_tan_mee_db[column] = None

# Process each JSON string
for json_str in keyword_positive_output_tan_mee_db:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'tan_mee_db'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'Positive'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            positive_keywords_tan_mee_db = pd.concat([positive_keywords_tan_mee_db, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            positive_keywords_tan_mee_db = pd.concat([positive_keywords_tan_mee_db, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
positive_keywords_tan_mee_db = positive_keywords_tan_mee_db.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

positive_keywords_tan_mee_db

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Tanishq Jewellers-Meena Bazar, DB",Positive,keywords,"trust :5, reliable :3, genuine :3, honesty :3,...",No relevant positive keywords/ phrases,"patient : 15, helpful : 12, friendly : 10, kno...","unique designs: 3, beautiful design: 3, good d...","collection : 45, variety : 8, range : 5, optio...","offers :8, discount :2, deal :2, savings :1, m...",best making :1,"Reasonable :3, Best :2, Genuine :1, Economical...","Good quality :3, High quality :3, Best quality...",No relevant positive keywords/ phrases
1,"Tanishq Jewellers-Meena Bazar, DB",Positive,phrases,"earned our trust :2, trust of Tata :2, trust T...",No relevant positive keywords/ phrases,"very helpful and patient : 3, very friendly an...","very unique designs: 2, beautiful collection: ...","vast collection : 2, wide range : 2, amazing c...","Dubai Shopping Festival offers :2, best making...",best making charges :1,"Reasonable price :2, Best price :2, Good price...","High-quality jewellery :2, Good quality gold :...",No relevant positive keywords/ phrases


### tan_sil_db

In [588]:
# Initialize the output dictionary
keyword_positive_output_tan_sil_db = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_tan_sil_db=[0]
keyword_input_token_tan_sil_db = 0
keyword_output_token_tan_sil_db = 0
keyword_start_time_loop_tan_sil_db = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_tan_sil_db, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_tan_sil_db[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_tan_sil_db = keyword_dataframes['tan_sil_db_final_sen_df_jul'][keyword_dataframes['tan_sil_db_final_sen_df_jul'][topic]==1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are positive comments, call the positive_keywords function
    if filtered_comments_tan_sil_db:
        # Call the positive_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = positive_keywords(filtered_comments_tan_sil_db,topic)        
        # Add the result to the output dictionary
        keyword_positive_output_tan_sil_db.append(keywords)
        keyword_input_token_tan_sil_db += input_tokens_loop
        keyword_output_token_tan_sil_db += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_tan_sil_db = time.time()
keyword_cost_input_token_tan_sil_db = round((0.01/1000)*keyword_input_token_tan_sil_db,2)
keyword_cost_output_token_tan_sil_db = round((0.03/1000)*keyword_output_token_tan_sil_db,2)
keyword_total_cost_tan_sil_db = keyword_cost_input_token_tan_sil_db + keyword_cost_output_token_tan_sil_db
keyword_total_time_loop_tan_sil_db = keyword_end_time_loop_tan_sil_db - keyword_start_time_loop_tan_sil_db

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_tan_sil_db[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_tan_sil_db,1))
print("Total Input Tokens - ", keyword_input_token_tan_sil_db)
print("Total Input Cost = USD ",keyword_cost_input_token_tan_sil_db)
print("Total Output Tokens - ", keyword_output_token_tan_sil_db)
print("Total Output Cost = USD ",keyword_cost_output_token_tan_sil_db)
print("Total Cost = USD ",round(keyword_total_cost_tan_sil_db,2))

Executed  10  Iterations
Total Execution time (in secs) -  37.6
Total Input Tokens -  69271
Total Input Cost = USD  0.69
Total Output Tokens -  771
Total Output Cost = USD  0.02
Total Cost = USD  0.71


In [589]:
#Initialize an empty DataFrame
positive_keywords_tan_sil_db = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']

# Add these columns to the DataFrame
for column in columns:
    positive_keywords_tan_sil_db[column] = None

# Process each JSON string
for json_str in keyword_positive_output_tan_sil_db:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'tan_sil_db'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'Positive'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            positive_keywords_tan_sil_db = pd.concat([positive_keywords_tan_sil_db, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            positive_keywords_tan_sil_db = pd.concat([positive_keywords_tan_sil_db, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
positive_keywords_tan_sil_db = positive_keywords_tan_sil_db.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

positive_keywords_tan_sil_db

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Tanishq Jewellers-Silicon Central, DB",Positive,keywords,"Trust :5, Trusted :4, Reliable :2, Honest :1, ...","Great Experience : 45, Good Experience : 30, W...","helpful : 78, friendly : 45, professional : 35...","designs : 50, unique : 8, elegant : 7, exquisi...","variety :5, options :4, selection :3, range :2...","discount :7, offers :5, discounted :2, offer :...",lesser making charges :1,"reasonable price: 3, affordable prices: 1, val...","quality :15, high quality :4, superior quality...","exchange :5, exchanged :4, full value :1, buy :1"
1,"Tanishq Jewellers-Silicon Central, DB",Positive,phrases,"Trust of TATA :2, Name to be trusted :2, Trust...","Amazing shopping experience : 5, Seamless shop...","very helpful : 20, extremely helpful : 10, ver...","excellent designs : 6, unique designs : 4, ele...","wide variety :2, variety of designs :2, variet...","good discount :3, great discount :2, genuine d...",No relevant positive phrases,"fit in the budget: 2, within budget: 1, at you...","Amazing quality :2, best for the quality :2, i...","exchanged at full value :1, adjusted the amoun..."


### mia_awm_ad

In [590]:
# Initialize the output dictionary
keyword_positive_output_mia_awm_ad = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_mia_awm_ad=[0]
keyword_input_token_mia_awm_ad = 0
keyword_output_token_mia_awm_ad = 0
keyword_start_time_loop_mia_awm_ad = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_mia_awm_ad, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_mia_awm_ad[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_mia_awm_ad = keyword_dataframes['mia_awm_ad_final_sen_df_jul'][keyword_dataframes['mia_awm_ad_final_sen_df_jul'][topic]==1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are positive comments, call the positive_keywords function
    if filtered_comments_mia_awm_ad:
        # Call the positive_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = positive_keywords(filtered_comments_mia_awm_ad,topic)        
        # Add the result to the output dictionary
        keyword_positive_output_mia_awm_ad.append(keywords)
        keyword_input_token_mia_awm_ad += input_tokens_loop
        keyword_output_token_mia_awm_ad += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_mia_awm_ad = time.time()
keyword_cost_input_token_mia_awm_ad = round((0.01/1000)*keyword_input_token_mia_awm_ad,2)
keyword_cost_output_token_mia_awm_ad = round((0.03/1000)*keyword_output_token_mia_awm_ad,2)
keyword_total_cost_mia_awm_ad = keyword_cost_input_token_mia_awm_ad + keyword_cost_output_token_mia_awm_ad
keyword_total_time_loop_mia_awm_ad = keyword_end_time_loop_mia_awm_ad - keyword_start_time_loop_mia_awm_ad

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_mia_awm_ad[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_mia_awm_ad,1))
print("Total Input Tokens - ", keyword_input_token_mia_awm_ad)
print("Total Input Cost = USD ",keyword_cost_input_token_mia_awm_ad)
print("Total Output Tokens - ", keyword_output_token_mia_awm_ad)
print("Total Output Cost = USD ",keyword_cost_output_token_mia_awm_ad)
print("Total Cost = USD ",round(keyword_total_cost_mia_awm_ad,2))

Executed  10  Iterations
Total Execution time (in secs) -  6.5
Total Input Tokens -  2357
Total Input Cost = USD  0.02
Total Output Tokens -  174
Total Output Cost = USD  0.01
Total Cost = USD  0.03


In [591]:
#Initialize an empty DataFrame
positive_keywords_mia_awm_ad = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']

# Add these columns to the DataFrame
for column in columns:
    positive_keywords_mia_awm_ad[column] = None

# Process each JSON string
for json_str in keyword_positive_output_mia_awm_ad:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'mia_awm_ad'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'Positive'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            positive_keywords_mia_awm_ad = pd.concat([positive_keywords_mia_awm_ad, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            positive_keywords_mia_awm_ad = pd.concat([positive_keywords_mia_awm_ad, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
positive_keywords_mia_awm_ad = positive_keywords_mia_awm_ad.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

positive_keywords_mia_awm_ad

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Mia-Al Wahda Mall, AD",Positive,keywords,,good experience :2,"service :8, staff :2",,collection :6,,,,,
1,"Mia-Al Wahda Mall, AD",Positive,phrases,,No relevant positive phrases,"Great service by Tahseen :1, Good service by m...",,"good collection :2, Nice collection :2, Superb...",,,,,


### mia_bur_db

In [592]:
# Initialize the output dictionary
keyword_positive_output_mia_bur_db = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_mia_bur_db=[0]
keyword_input_token_mia_bur_db = 0
keyword_output_token_mia_bur_db = 0
keyword_start_time_loop_mia_bur_db = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_mia_bur_db, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_mia_bur_db[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_mia_bur_db = keyword_dataframes['mia_bur_db_final_sen_df_jul'][keyword_dataframes['mia_bur_db_final_sen_df_jul'][topic]==1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are positive comments, call the positive_keywords function
    if filtered_comments_mia_bur_db:
        # Call the positive_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = positive_keywords(filtered_comments_mia_bur_db,topic)        
        # Add the result to the output dictionary
        keyword_positive_output_mia_bur_db.append(keywords)
        keyword_input_token_mia_bur_db += input_tokens_loop
        keyword_output_token_mia_bur_db += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_mia_bur_db = time.time()
keyword_cost_input_token_mia_bur_db = round((0.01/1000)*keyword_input_token_mia_bur_db,2)
keyword_cost_output_token_mia_bur_db = round((0.03/1000)*keyword_output_token_mia_bur_db,2)
keyword_total_cost_mia_bur_db = keyword_cost_input_token_mia_bur_db + keyword_cost_output_token_mia_bur_db
keyword_total_time_loop_mia_bur_db = keyword_end_time_loop_mia_bur_db - keyword_start_time_loop_mia_bur_db

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_mia_bur_db[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_mia_bur_db,1))
print("Total Input Tokens - ", keyword_input_token_mia_bur_db)
print("Total Input Cost = USD ",keyword_cost_input_token_mia_bur_db)
print("Total Output Tokens - ", keyword_output_token_mia_bur_db)
print("Total Output Cost = USD ",keyword_cost_output_token_mia_bur_db)
print("Total Cost = USD ",round(keyword_total_cost_mia_bur_db,2))

Executed  10  Iterations
Total Execution time (in secs) -  18.0
Total Input Tokens -  20921
Total Input Cost = USD  0.21
Total Output Tokens -  559
Total Output Cost = USD  0.02
Total Cost = USD  0.23


In [593]:
#Initialize an empty DataFrame
positive_keywords_mia_bur_db = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']

# Add these columns to the DataFrame
for column in columns:
    positive_keywords_mia_bur_db[column] = None

# Process each JSON string
for json_str in keyword_positive_output_mia_bur_db:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'mia_bur_db'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'Positive'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            positive_keywords_mia_bur_db = pd.concat([positive_keywords_mia_bur_db, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            positive_keywords_mia_bur_db = pd.concat([positive_keywords_mia_bur_db, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
positive_keywords_mia_bur_db = positive_keywords_mia_bur_db.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

positive_keywords_mia_bur_db

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Mia-Burjuman, DB",Positive,keywords,Transparency :1,"good experience :10, wonderful experience :8, ...","helpful : 45, friendly : 20, patient : 18, pol...","designs :5, unique :3, modern :2, elegant :1, ...","collection : 45, collections : 20, variety : 3...","discount :4, offers :2, voucher :1",,"good price:3, reasonable cost:1, genuine prici...","good quality:1, quality:1",
1,"Mia-Burjuman, DB",Positive,phrases,No relevant positive phrases,"very good experience :3, had a great experienc...","very helpful : 10, excellent service : 8, grea...","modern designs :2, unique designs :2, delicate...","nice collection : 8, good collection : 7, grea...","gave me 10% discount :1, Anniversary discount ...",,"Worth the money:2, Worth spending money:1, gre...",No relevant positive keywords/ phrases,


### tan_am_om

In [594]:
# Initialize the output dictionary
keyword_positive_output_tan_am_om = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_tan_am_om=[0]
keyword_input_token_tan_am_om = 0
keyword_output_token_tan_am_om = 0
keyword_start_time_loop_tan_am_om = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_tan_am_om, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_tan_am_om[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_tan_am_om = keyword_dataframes['tan_am_om_final_sen_df_jul'][keyword_dataframes['tan_am_om_final_sen_df_jul'][topic]==1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are positive comments, call the positive_keywords function
    if filtered_comments_tan_am_om:
        # Call the positive_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = positive_keywords(filtered_comments_tan_am_om,topic)        
        # Add the result to the output dictionary
        keyword_positive_output_tan_am_om.append(keywords)
        keyword_input_token_tan_am_om += input_tokens_loop
        keyword_output_token_tan_am_om += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_tan_am_om = time.time()
keyword_cost_input_token_tan_am_om = round((0.01/1000)*keyword_input_token_tan_am_om,2)
keyword_cost_output_token_tan_am_om = round((0.03/1000)*keyword_output_token_tan_am_om,2)
keyword_total_cost_tan_am_om = keyword_cost_input_token_tan_am_om + keyword_cost_output_token_tan_am_om
keyword_total_time_loop_tan_am_om = keyword_end_time_loop_tan_am_om - keyword_start_time_loop_tan_am_om

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_tan_am_om[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_tan_am_om,1))
print("Total Input Tokens - ", keyword_input_token_tan_am_om)
print("Total Input Cost = USD ",keyword_cost_input_token_tan_am_om)
print("Total Output Tokens - ", keyword_output_token_tan_am_om)
print("Total Output Cost = USD ",keyword_cost_output_token_tan_am_om)
print("Total Cost = USD ",round(keyword_total_cost_tan_am_om,2))

Executed  10  Iterations
Total Execution time (in secs) -  12.0
Total Input Tokens -  8017
Total Input Cost = USD  0.08
Total Output Tokens -  432
Total Output Cost = USD  0.01
Total Cost = USD  0.09


In [595]:
#Initialize an empty DataFrame
positive_keywords_tan_am_om = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']

# Add these columns to the DataFrame
for column in columns:
    positive_keywords_tan_am_om[column] = None

# Process each JSON string
for json_str in keyword_positive_output_tan_am_om:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'tan_am_om'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'Positive'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            positive_keywords_tan_am_om = pd.concat([positive_keywords_tan_am_om, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            positive_keywords_tan_am_om = pd.concat([positive_keywords_tan_am_om, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
positive_keywords_tan_am_om = positive_keywords_tan_am_om.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

positive_keywords_tan_am_om

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Tanishq Jewellers-Avenues Mall, OM",Positive,keywords,No relevant positive keywords/ phrases,"great experience :8, good ambiance :2, wonderf...","helpful :5, patient :4, friendly :3, polite :3...","designs :5, collection :2, unique :2, exquisit...","collection :10, variety :2, selection :1",,,"reasonable :1, affordable :1",craftsmanship :1,
1,"Tanishq Jewellers-Avenues Mall, OM",Positive,phrases,No relevant positive keywords/ phrases,"Overall an excellent customer experience :1, O...","very helpful :3, very patient :2, very friendl...","Beautiful designs :1, Designs are fabulous :1,...","best collection :3, good collection :2, wide v...",,,affordable range of prices :1,quality of craftsmanship :1,


### tan_atl_ga

In [596]:
# Initialize the output dictionary
keyword_positive_output_tan_atl_ga = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_tan_atl_ga=[0]
keyword_input_token_tan_atl_ga = 0
keyword_output_token_tan_atl_ga = 0
keyword_start_time_loop_tan_atl_ga = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_tan_atl_ga, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_tan_atl_ga[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_tan_atl_ga = keyword_dataframes['tan_atl_ga_final_sen_df_jul'][keyword_dataframes['tan_atl_ga_final_sen_df_jul'][topic]==1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are positive comments, call the positive_keywords function
    if filtered_comments_tan_atl_ga:
        # Call the positive_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = positive_keywords(filtered_comments_tan_atl_ga,topic)        
        # Add the result to the output dictionary
        keyword_positive_output_tan_atl_ga.append(keywords)
        keyword_input_token_tan_atl_ga += input_tokens_loop
        keyword_output_token_tan_atl_ga += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_tan_atl_ga = time.time()
keyword_cost_input_token_tan_atl_ga = round((0.01/1000)*keyword_input_token_tan_atl_ga,2)
keyword_cost_output_token_tan_atl_ga = round((0.03/1000)*keyword_output_token_tan_atl_ga,2)
keyword_total_cost_tan_atl_ga = keyword_cost_input_token_tan_atl_ga + keyword_cost_output_token_tan_atl_ga
keyword_total_time_loop_tan_atl_ga = keyword_end_time_loop_tan_atl_ga - keyword_start_time_loop_tan_atl_ga

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_tan_atl_ga[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_tan_atl_ga,1))
print("Total Input Tokens - ", keyword_input_token_tan_atl_ga)
print("Total Input Cost = USD ",keyword_cost_input_token_tan_atl_ga)
print("Total Output Tokens - ", keyword_output_token_tan_atl_ga)
print("Total Output Cost = USD ",keyword_cost_output_token_tan_atl_ga)
print("Total Cost = USD ",round(keyword_total_cost_tan_atl_ga,2))

Executed  10  Iterations
Total Execution time (in secs) -  17.5
Total Input Tokens -  21352
Total Input Cost = USD  0.21
Total Output Tokens -  580
Total Output Cost = USD  0.02
Total Cost = USD  0.23


In [597]:
#Initialize an empty DataFrame
positive_keywords_tan_atl_ga = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']

# Add these columns to the DataFrame
for column in columns:
    positive_keywords_tan_atl_ga[column] = None

# Process each JSON string
for json_str in keyword_positive_output_tan_atl_ga:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'tan_atl_ga'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'Positive'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            positive_keywords_tan_atl_ga = pd.concat([positive_keywords_tan_atl_ga, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            positive_keywords_tan_atl_ga = pd.concat([positive_keywords_tan_atl_ga, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
positive_keywords_tan_atl_ga = positive_keywords_tan_atl_ga.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

positive_keywords_tan_atl_ga

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Tanishq-Atlanta, GA",Positive,keywords,"Trust :1, Reliable :1","Great :15, Nice :10, Wonderful :8, Awesome :7,...","patient : 45, helpful : 44, friendly : 20, kno...","unique designs: 3, elegant: 2, exquisite: 2, s...","collection : 45, selection : 8, variety : 4, m...","great discounts:1, good offers:1, best deals:1",,"Fair :2, Good :1, Easy :1","quality :5, high quality :3, excellent :1, aut...",
1,"Tanishq-Atlanta, GA",Positive,phrases,"Return customer with 100% satisfaction :1, As ...","Great experience :10, Wonderful experience :5,...","very patient : 15, very helpful : 12, extremel...","trendy and elegant designs: 1, one of a kind a...","great collection : 8, nice collection : 7, bea...",No relevant positive phrases,,"Fair pricing :2, Price is really good :1, Easy...","quality is very good :1, quality gold :1, gold...",


### tan_fc_qa

In [600]:
# Initialize the output dictionary
keyword_positive_output_tan_fc_qa = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_tan_fc_qa=[0]
keyword_input_token_tan_fc_qa = 0
keyword_output_token_tan_fc_qa = 0
keyword_start_time_loop_tan_fc_qa = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_tan_fc_qa, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_tan_fc_qa[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_tan_fc_qa = keyword_dataframes['tan_fc_qa_final_sen_df_jul'][keyword_dataframes['tan_fc_qa_final_sen_df_jul'][topic]==1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are positive comments, call the positive_keywords function
    if filtered_comments_tan_fc_qa:
        # Call the positive_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = positive_keywords(filtered_comments_tan_fc_qa,topic)        
        # Add the result to the output dictionary
        keyword_positive_output_tan_fc_qa.append(keywords)
        keyword_input_token_tan_fc_qa += input_tokens_loop
        keyword_output_token_tan_fc_qa += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_tan_fc_qa = time.time()
keyword_cost_input_token_tan_fc_qa = round((0.01/1000)*keyword_input_token_tan_fc_qa,2)
keyword_cost_output_token_tan_fc_qa = round((0.03/1000)*keyword_output_token_tan_fc_qa,2)
keyword_total_cost_tan_fc_qa = keyword_cost_input_token_tan_fc_qa + keyword_cost_output_token_tan_fc_qa
keyword_total_time_loop_tan_fc_qa = keyword_end_time_loop_tan_fc_qa - keyword_start_time_loop_tan_fc_qa

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_tan_fc_qa[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_tan_fc_qa,1))
print("Total Input Tokens - ", keyword_input_token_tan_fc_qa)
print("Total Input Cost = USD ",keyword_cost_input_token_tan_fc_qa)
print("Total Output Tokens - ", keyword_output_token_tan_fc_qa)
print("Total Output Cost = USD ",keyword_cost_output_token_tan_fc_qa)
print("Total Cost = USD ",round(keyword_total_cost_tan_fc_qa,2))

Executed  10  Iterations
Total Execution time (in secs) -  13.5
Total Input Tokens -  7748
Total Input Cost = USD  0.08
Total Output Tokens -  437
Total Output Cost = USD  0.01
Total Cost = USD  0.09


In [601]:
#Initialize an empty DataFrame
positive_keywords_tan_fc_qa = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']

# Add these columns to the DataFrame
for column in columns:
    positive_keywords_tan_fc_qa[column] = None

# Process each JSON string
for json_str in keyword_positive_output_tan_fc_qa:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'tan_fc_qa'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'Positive'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            positive_keywords_tan_fc_qa = pd.concat([positive_keywords_tan_fc_qa, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            positive_keywords_tan_fc_qa = pd.concat([positive_keywords_tan_fc_qa, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
positive_keywords_tan_fc_qa = positive_keywords_tan_fc_qa.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

positive_keywords_tan_fc_qa

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Tanishq Jewellers-Festival City, QA",Positive,keywords,No relevant positive keywords/ phrases,No relevant positive keywords/ phrases,"Helpful :2, Patient :2, Knowledgeable :2, Coop...",No relevant positive keywords/ phrases,collection :4,good discount :1,lowest making charges :1,great prices :1,"Excellent :1, quality :1",
1,"Tanishq Jewellers-Festival City, QA",Positive,phrases,No relevant positive keywords/ phrases,No relevant positive keywords/ phrases,"Assisted us very well :1, Excellent service :1...",No relevant positive keywords/ phrases,"good range :1, nice collection :1, wonderful c...",No relevant positive phrases,No relevant positive phrases,No relevant positive keywords/ phrases,quality of gold :1,


### tan_gs_db

In [602]:
# Initialize the output dictionary
keyword_positive_output_tan_gs_db = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_tan_gs_db=[0]
keyword_input_token_tan_gs_db = 0
keyword_output_token_tan_gs_db = 0
keyword_start_time_loop_tan_gs_db = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_tan_gs_db, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_tan_gs_db[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_tan_gs_db = keyword_dataframes['tan_gs_db_final_sen_df_jul'][keyword_dataframes['tan_gs_db_final_sen_df_jul'][topic]==1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are positive comments, call the positive_keywords function
    if filtered_comments_tan_gs_db:
        # Call the positive_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = positive_keywords(filtered_comments_tan_gs_db,topic)        
        # Add the result to the output dictionary
        keyword_positive_output_tan_gs_db.append(keywords)
        keyword_input_token_tan_gs_db += input_tokens_loop
        keyword_output_token_tan_gs_db += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_tan_gs_db = time.time()
keyword_cost_input_token_tan_gs_db = round((0.01/1000)*keyword_input_token_tan_gs_db,2)
keyword_cost_output_token_tan_gs_db = round((0.03/1000)*keyword_output_token_tan_gs_db,2)
keyword_total_cost_tan_gs_db = keyword_cost_input_token_tan_gs_db + keyword_cost_output_token_tan_gs_db
keyword_total_time_loop_tan_gs_db = keyword_end_time_loop_tan_gs_db - keyword_start_time_loop_tan_gs_db

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_tan_gs_db[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_tan_gs_db,1))
print("Total Input Tokens - ", keyword_input_token_tan_gs_db)
print("Total Input Cost = USD ",keyword_cost_input_token_tan_gs_db)
print("Total Output Tokens - ", keyword_output_token_tan_gs_db)
print("Total Output Cost = USD ",keyword_cost_output_token_tan_gs_db)
print("Total Cost = USD ",round(keyword_total_cost_tan_gs_db,2))

Executed  10  Iterations
Total Execution time (in secs) -  31.6
Total Input Tokens -  72048
Total Input Cost = USD  0.72
Total Output Tokens -  838
Total Output Cost = USD  0.03
Total Cost = USD  0.75


In [603]:
#Initialize an empty DataFrame
positive_keywords_tan_gs_db = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']

# Add these columns to the DataFrame
for column in columns:
    positive_keywords_tan_gs_db[column] = None

# Process each JSON string
for json_str in keyword_positive_output_tan_gs_db:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'tan_gs_db'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'Positive'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            positive_keywords_tan_gs_db = pd.concat([positive_keywords_tan_gs_db, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            positive_keywords_tan_gs_db = pd.concat([positive_keywords_tan_gs_db, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
positive_keywords_tan_gs_db = positive_keywords_tan_gs_db.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

positive_keywords_tan_gs_db

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Tanishq Jewellers-Gold Souk, DB",Positive,keywords,"Trust :5, Trusted :4, Reliable :2, Authentic :...","Good experience : 20, Great experience : 15, W...","helpful : 50, polite : 20, friendly : 18, pati...","Good designs :10, Beautiful designs :8, Excell...","Good collection :10, Nice collection :8, Amazi...","deal :5, offers :4, discount :4, discounts :3,...",reasonable manufacturing :1,"good prices: 2, best price: 2, honest prices: ...","good quality :5, high-quality :4, premium qual...","exchange :6, gold exchange :3, exchange progra..."
1,"Tanishq Jewellers-Gold Souk, DB",Positive,phrases,"Trust with your eyes closed :1, Trustworthy pl...","Good experience overall : 2, Great experience ...","very helpful : 10, extremely helpful : 8, very...","Beautifully designed jewelry :2, High-quality,...","wide variety of designs :3, wide range of coll...","great deal :2, good deal :2, amazing offers :2...",No relevant positive phrases,"comparable rate with Indian Markets: 1, good p...","quality of the jewelry was excellent :1, produ...","great exchange program :1, offer on Exchange o..."


### tan_lul_qa

In [604]:
# Initialize the output dictionary
keyword_positive_output_tan_lul_qa = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_tan_lul_qa=[0]
keyword_input_token_tan_lul_qa = 0
keyword_output_token_tan_lul_qa = 0
keyword_start_time_loop_tan_lul_qa = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_tan_lul_qa, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_tan_lul_qa[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_tan_lul_qa = keyword_dataframes['tan_lul_qa_final_sen_df_jul'][keyword_dataframes['tan_lul_qa_final_sen_df_jul'][topic]==1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are positive comments, call the positive_keywords function
    if filtered_comments_tan_lul_qa:
        # Call the positive_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = positive_keywords(filtered_comments_tan_lul_qa,topic)        
        # Add the result to the output dictionary
        keyword_positive_output_tan_lul_qa.append(keywords)
        keyword_input_token_tan_lul_qa += input_tokens_loop
        keyword_output_token_tan_lul_qa += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_tan_lul_qa = time.time()
keyword_cost_input_token_tan_lul_qa = round((0.01/1000)*keyword_input_token_tan_lul_qa,2)
keyword_cost_output_token_tan_lul_qa = round((0.03/1000)*keyword_output_token_tan_lul_qa,2)
keyword_total_cost_tan_lul_qa = keyword_cost_input_token_tan_lul_qa + keyword_cost_output_token_tan_lul_qa
keyword_total_time_loop_tan_lul_qa = keyword_end_time_loop_tan_lul_qa - keyword_start_time_loop_tan_lul_qa

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_tan_lul_qa[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_tan_lul_qa,1))
print("Total Input Tokens - ", keyword_input_token_tan_lul_qa)
print("Total Input Cost = USD ",keyword_cost_input_token_tan_lul_qa)
print("Total Output Tokens - ", keyword_output_token_tan_lul_qa)
print("Total Output Cost = USD ",keyword_cost_output_token_tan_lul_qa)
print("Total Cost = USD ",round(keyword_total_cost_tan_lul_qa,2))

Executed  10  Iterations
Total Execution time (in secs) -  19.0
Total Input Tokens -  17521
Total Input Cost = USD  0.18
Total Output Tokens -  684
Total Output Cost = USD  0.02
Total Cost = USD  0.2


In [605]:
#Initialize an empty DataFrame
positive_keywords_tan_lul_qa = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']

# Add these columns to the DataFrame
for column in columns:
    positive_keywords_tan_lul_qa[column] = None

# Process each JSON string
for json_str in keyword_positive_output_tan_lul_qa:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'tan_lul_qa'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'Positive'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            positive_keywords_tan_lul_qa = pd.concat([positive_keywords_tan_lul_qa, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            positive_keywords_tan_lul_qa = pd.concat([positive_keywords_tan_lul_qa, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
positive_keywords_tan_lul_qa = positive_keywords_tan_lul_qa.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

positive_keywords_tan_lul_qa

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Tanishq Jewellers-Lulu Hypermarket, QA",Positive,keywords,"Trust :2, Transparency :2, Trusted :1, Reliabl...","Nice experience :5, Good experience :4, Great ...","helpful :8, friendly :7, patient :6, polite :3...","Good designs:3, Elegant designs:2, New Design:...","varieties :2, variety :1, range :1, assortment :1","special discount:2, offers:2",,"transparent :2, offers :2","top-notch :1, high-quality :1, superior :1, ex...","exchange :3, rate :1, resale :1"
1,"Tanishq Jewellers-Lulu Hypermarket, QA",Positive,phrases,"Legacy of trust and reliability :1, Transparen...","Shopping experience :3, Very good experience :...","very friendly and patient :2, very helpful and...","Designs are good:1, Accurate matching design:1...","wide varieties of design :1, wide variety :1, ...","special discount for QatarEnergy umbrella:1, n...",,"transparent about pricing :1, transparency in ...","superior quality and finish :1, high-quality j...","exchange process went smooth :1, provided very..."


### tan_mank_db

In [606]:
# Initialize the output dictionary
keyword_positive_output_tan_mank_db = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_tan_mank_db=[0]
keyword_input_token_tan_mank_db = 0
keyword_output_token_tan_mank_db = 0
keyword_start_time_loop_tan_mank_db = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_tan_mank_db, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_tan_mank_db[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_tan_mank_db = keyword_dataframes['tan_mank_db_final_sen_df_jul'][keyword_dataframes['tan_mank_db_final_sen_df_jul'][topic]==1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are positive comments, call the positive_keywords function
    if filtered_comments_tan_mank_db:
        # Call the positive_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = positive_keywords(filtered_comments_tan_mank_db,topic)        
        # Add the result to the output dictionary
        keyword_positive_output_tan_mank_db.append(keywords)
        keyword_input_token_tan_mank_db += input_tokens_loop
        keyword_output_token_tan_mank_db += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_tan_mank_db = time.time()
keyword_cost_input_token_tan_mank_db = round((0.01/1000)*keyword_input_token_tan_mank_db,2)
keyword_cost_output_token_tan_mank_db = round((0.03/1000)*keyword_output_token_tan_mank_db,2)
keyword_total_cost_tan_mank_db = keyword_cost_input_token_tan_mank_db + keyword_cost_output_token_tan_mank_db
keyword_total_time_loop_tan_mank_db = keyword_end_time_loop_tan_mank_db - keyword_start_time_loop_tan_mank_db

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_tan_mank_db[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_tan_mank_db,1))
print("Total Input Tokens - ", keyword_input_token_tan_mank_db)
print("Total Input Cost = USD ",keyword_cost_input_token_tan_mank_db)
print("Total Output Tokens - ", keyword_output_token_tan_mank_db)
print("Total Output Cost = USD ",keyword_cost_output_token_tan_mank_db)
print("Total Cost = USD ",round(keyword_total_cost_tan_mank_db,2))

Executed  10  Iterations
Total Execution time (in secs) -  16.5
Total Input Tokens -  8813
Total Input Cost = USD  0.09
Total Output Tokens -  488
Total Output Cost = USD  0.01
Total Cost = USD  0.1


In [607]:
#Initialize an empty DataFrame
positive_keywords_tan_mank_db = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']

# Add these columns to the DataFrame
for column in columns:
    positive_keywords_tan_mank_db[column] = None

# Process each JSON string
for json_str in keyword_positive_output_tan_mank_db:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'tan_mank_db'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'Positive'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            positive_keywords_tan_mank_db = pd.concat([positive_keywords_tan_mank_db, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            positive_keywords_tan_mank_db = pd.concat([positive_keywords_tan_mank_db, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
positive_keywords_tan_mank_db = positive_keywords_tan_mank_db.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

positive_keywords_tan_mank_db

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Tanishq Jewellers-UW Mall Al Mankhool, DB",Positive,keywords,transparency :1,"Great experience :4, Amazing experience :3, Ex...","helpful :8, excellent :4, courteous :3, patien...","unique :1, exquisite :1","collection :5, options :1, array :1","offers :2, deal :1",competitive making charges :1,"transparent :1, competitive :1","quality products:1, jewellery quality:1, high ...",
1,"Tanishq Jewellers-UW Mall Al Mankhool, DB",Positive,phrases,,"Highly recommend :3, Must visit :2, Looking fo...","very helpful :3, excellent service :2, excepti...","unique jewellery designs :1, exquisite and uni...","diverse collection :1, beautiful & unique coll...",No relevant positive phrases,No relevant positive phrases,Pricing was transparent and competitive :1,"quality products:1, jewellery quality is excep...",


### tan_rol_sh

In [608]:
# Initialize the output dictionary
keyword_positive_output_tan_rol_sh = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_tan_rol_sh=[0]
keyword_input_token_tan_rol_sh = 0
keyword_output_token_tan_rol_sh = 0
keyword_start_time_loop_tan_rol_sh = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_tan_rol_sh, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_tan_rol_sh[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_tan_rol_sh = keyword_dataframes['tan_rol_sh_final_sen_df_jul'][keyword_dataframes['tan_rol_sh_final_sen_df_jul'][topic]==1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are positive comments, call the positive_keywords function
    if filtered_comments_tan_rol_sh:
        # Call the positive_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = positive_keywords(filtered_comments_tan_rol_sh,topic)        
        # Add the result to the output dictionary
        keyword_positive_output_tan_rol_sh.append(keywords)
        keyword_input_token_tan_rol_sh += input_tokens_loop
        keyword_output_token_tan_rol_sh += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_tan_rol_sh = time.time()
keyword_cost_input_token_tan_rol_sh = round((0.01/1000)*keyword_input_token_tan_rol_sh,2)
keyword_cost_output_token_tan_rol_sh = round((0.03/1000)*keyword_output_token_tan_rol_sh,2)
keyword_total_cost_tan_rol_sh = keyword_cost_input_token_tan_rol_sh + keyword_cost_output_token_tan_rol_sh
keyword_total_time_loop_tan_rol_sh = keyword_end_time_loop_tan_rol_sh - keyword_start_time_loop_tan_rol_sh

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_tan_rol_sh[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_tan_rol_sh,1))
print("Total Input Tokens - ", keyword_input_token_tan_rol_sh)
print("Total Input Cost = USD ",keyword_cost_input_token_tan_rol_sh)
print("Total Output Tokens - ", keyword_output_token_tan_rol_sh)
print("Total Output Cost = USD ",keyword_cost_output_token_tan_rol_sh)
print("Total Cost = USD ",round(keyword_total_cost_tan_rol_sh,2))

Executed  10  Iterations
Total Execution time (in secs) -  22.5
Total Input Tokens -  21758
Total Input Cost = USD  0.22
Total Output Tokens -  627
Total Output Cost = USD  0.02
Total Cost = USD  0.24


In [609]:
#Initialize an empty DataFrame
positive_keywords_tan_rol_sh = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']

# Add these columns to the DataFrame
for column in columns:
    positive_keywords_tan_rol_sh[column] = None

# Process each JSON string
for json_str in keyword_positive_output_tan_rol_sh:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'tan_rol_sh'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'Positive'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            positive_keywords_tan_rol_sh = pd.concat([positive_keywords_tan_rol_sh, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            positive_keywords_tan_rol_sh = pd.concat([positive_keywords_tan_rol_sh, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
positive_keywords_tan_rol_sh = positive_keywords_tan_rol_sh.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

positive_keywords_tan_rol_sh

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Tanishq Jewellers-Rolla, SH",Positive,keywords,"Trust :3, Trusted :1, Trustable :1, Transparen...","Nice :5, Amazing :4, Wonderful :4, Great :4, E...","helpful : 20, professional : 15, friendly : 14...","designs :15, collection :5, unique :3, elegant...","collection : 45, collections : 20, variety : 3...","discount :3, offers :1",,"best price :1, Best prices :1","quality :5, craftsmanship :2, pure :1, high-qu...","exchange :4, value :1"
1,"Tanishq Jewellers-Rolla, SH",Positive,phrases,Best place to buy jewellery with trust of Tata...,"Nice place to shop :1, Very good shopping expe...","excellent service : 10, very good service : 8,...","excellent designs :3, wonderful designs :3, am...","good collection : 10, nice collection : 8, ama...","discount options :1, current discount offers :1",,No relevant positive phrases,"good quality :3, quality of the jewellery :1, ...","lifetime exchange :1, exchanging my old gold :..."


### tan_rse_wa

In [610]:
# Initialize the output dictionary
keyword_positive_output_tan_rse_wa = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_tan_rse_wa=[0]
keyword_input_token_tan_rse_wa = 0
keyword_output_token_tan_rse_wa = 0
keyword_start_time_loop_tan_rse_wa = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_tan_rse_wa, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_tan_rse_wa[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_tan_rse_wa = keyword_dataframes['tan_rse_wa_final_sen_df_jul'][keyword_dataframes['tan_rse_wa_final_sen_df_jul'][topic]==1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are positive comments, call the positive_keywords function
    if filtered_comments_tan_rse_wa:
        # Call the positive_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = positive_keywords(filtered_comments_tan_rse_wa,topic)        
        # Add the result to the output dictionary
        keyword_positive_output_tan_rse_wa.append(keywords)
        keyword_input_token_tan_rse_wa += input_tokens_loop
        keyword_output_token_tan_rse_wa += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_tan_rse_wa = time.time()
keyword_cost_input_token_tan_rse_wa = round((0.01/1000)*keyword_input_token_tan_rse_wa,2)
keyword_cost_output_token_tan_rse_wa = round((0.03/1000)*keyword_output_token_tan_rse_wa,2)
keyword_total_cost_tan_rse_wa = keyword_cost_input_token_tan_rse_wa + keyword_cost_output_token_tan_rse_wa
keyword_total_time_loop_tan_rse_wa = keyword_end_time_loop_tan_rse_wa - keyword_start_time_loop_tan_rse_wa

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_tan_rse_wa[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_tan_rse_wa,1))
print("Total Input Tokens - ", keyword_input_token_tan_rse_wa)
print("Total Input Cost = USD ",keyword_cost_input_token_tan_rse_wa)
print("Total Output Tokens - ", keyword_output_token_tan_rse_wa)
print("Total Output Cost = USD ",keyword_cost_output_token_tan_rse_wa)
print("Total Cost = USD ",round(keyword_total_cost_tan_rse_wa,2))

Executed  10  Iterations
Total Execution time (in secs) -  16.5
Total Input Tokens -  15108
Total Input Cost = USD  0.15
Total Output Tokens -  505
Total Output Cost = USD  0.02
Total Cost = USD  0.17


In [611]:
#Initialize an empty DataFrame
positive_keywords_tan_rse_wa = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']

# Add these columns to the DataFrame
for column in columns:
    positive_keywords_tan_rse_wa[column] = None

# Process each JSON string
for json_str in keyword_positive_output_tan_rse_wa:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'tan_rse_wa'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'Positive'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            positive_keywords_tan_rse_wa = pd.concat([positive_keywords_tan_rse_wa, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            positive_keywords_tan_rse_wa = pd.concat([positive_keywords_tan_rse_wa, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
positive_keywords_tan_rse_wa = positive_keywords_tan_rse_wa.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

positive_keywords_tan_rse_wa

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Tanishq-Redmond Seattle, WA",Positive,keywords,"Genuine :1, Reliable :1","Good experience :5, Pleasant experience :3, Wo...","patient : 15, helpful : 14, polite : 8, profes...","designs :15, collection :8, variety :3, crafts...","collection : 30, variety : 5, selection : 3, d...","offers :1, discounts :1",,reasonable :1,"quality :3, excellent :2, amazing :1",
1,"Tanishq-Redmond Seattle, WA",Positive,phrases,No relevant positive keywords/ phrases,"Great shopping experience :2, Delightful exper...","very helpful : 5, incredibly helpful : 3, very...","beautiful designs :1, different designs :1, la...","great collection : 8, stunning collection : 4,...",explained us in detail about the offers / disc...,,"helped with prices :1, prices were reasonable :1","good quality workmanship :1, high quality :1",


### tan_sc_ca

In [612]:
# Initialize the output dictionary
keyword_positive_output_tan_sc_ca = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_tan_sc_ca=[0]
keyword_input_token_tan_sc_ca = 0
keyword_output_token_tan_sc_ca = 0
keyword_start_time_loop_tan_sc_ca = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_tan_sc_ca, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_tan_sc_ca[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_tan_sc_ca = keyword_dataframes['tan_sc_ca_final_sen_df_jul'][keyword_dataframes['tan_sc_ca_final_sen_df_jul'][topic]==1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are positive comments, call the positive_keywords function
    if filtered_comments_tan_sc_ca:
        # Call the positive_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = positive_keywords(filtered_comments_tan_sc_ca,topic)        
        # Add the result to the output dictionary
        keyword_positive_output_tan_sc_ca.append(keywords)
        keyword_input_token_tan_sc_ca += input_tokens_loop
        keyword_output_token_tan_sc_ca += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_tan_sc_ca = time.time()
keyword_cost_input_token_tan_sc_ca = round((0.01/1000)*keyword_input_token_tan_sc_ca,2)
keyword_cost_output_token_tan_sc_ca = round((0.03/1000)*keyword_output_token_tan_sc_ca,2)
keyword_total_cost_tan_sc_ca = keyword_cost_input_token_tan_sc_ca + keyword_cost_output_token_tan_sc_ca
keyword_total_time_loop_tan_sc_ca = keyword_end_time_loop_tan_sc_ca - keyword_start_time_loop_tan_sc_ca

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_tan_sc_ca[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_tan_sc_ca,1))
print("Total Input Tokens - ", keyword_input_token_tan_sc_ca)
print("Total Input Cost = USD ",keyword_cost_input_token_tan_sc_ca)
print("Total Output Tokens - ", keyword_output_token_tan_sc_ca)
print("Total Output Cost = USD ",keyword_cost_output_token_tan_sc_ca)
print("Total Cost = USD ",round(keyword_total_cost_tan_sc_ca,2))

Executed  10  Iterations
Total Execution time (in secs) -  25.0
Total Input Tokens -  13500
Total Input Cost = USD  0.14
Total Output Tokens -  541
Total Output Cost = USD  0.02
Total Cost = USD  0.16


In [613]:
#Initialize an empty DataFrame
positive_keywords_tan_sc_ca = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']

# Add these columns to the DataFrame
for column in columns:
    positive_keywords_tan_sc_ca[column] = None

# Process each JSON string
for json_str in keyword_positive_output_tan_sc_ca:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'tan_sc_ca'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'Positive'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            positive_keywords_tan_sc_ca = pd.concat([positive_keywords_tan_sc_ca, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            positive_keywords_tan_sc_ca = pd.concat([positive_keywords_tan_sc_ca, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
positive_keywords_tan_sc_ca = positive_keywords_tan_sc_ca.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

positive_keywords_tan_sc_ca

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Tanishq-Santa Clara, CA",Positive,keywords,"Trusted :2, Trust :2, Guaranteed :1, Certifica...","Ambience :3, Atmosphere :2, Showroom :2, Spaci...","patient :5, knowledgeable :5, friendly :4, hel...","designs :6, innovative :1, classic :1, stunnin...","variety :3, wide variety :2, huge collections ...",,,"affordable :1, fair :1, reasonable :1","Quality :3, High-quality :2, Outstanding :2",
1,"Tanishq-Santa Clara, CA",Positive,phrases,"Most trusted brand :1, Trustworthy shopping en...","Beautiful store :4, Amazing experience :3, Won...","incredibly kind and attentive :2, above and be...","beautiful designs :2, excellent designs :1, in...","wide variety of gold and diamond jewellery :1,...",,,"great prices :1, surprisingly affordable exper...","Quality of the jewelry is outstanding :2, High...",


### tan_sc_sh

In [614]:
# Initialize the output dictionary
keyword_positive_output_tan_sc_sh = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_tan_sc_sh=[0]
keyword_input_token_tan_sc_sh = 0
keyword_output_token_tan_sc_sh = 0
keyword_start_time_loop_tan_sc_sh = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_tan_sc_sh, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_tan_sc_sh[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_tan_sc_sh = keyword_dataframes['tan_sc_sh_final_sen_df_jul'][keyword_dataframes['tan_sc_sh_final_sen_df_jul'][topic]==1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are positive comments, call the positive_keywords function
    if filtered_comments_tan_sc_sh:
        # Call the positive_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = positive_keywords(filtered_comments_tan_sc_sh,topic)        
        # Add the result to the output dictionary
        keyword_positive_output_tan_sc_sh.append(keywords)
        keyword_input_token_tan_sc_sh += input_tokens_loop
        keyword_output_token_tan_sc_sh += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_tan_sc_sh = time.time()
keyword_cost_input_token_tan_sc_sh = round((0.01/1000)*keyword_input_token_tan_sc_sh,2)
keyword_cost_output_token_tan_sc_sh = round((0.03/1000)*keyword_output_token_tan_sc_sh,2)
keyword_total_cost_tan_sc_sh = keyword_cost_input_token_tan_sc_sh + keyword_cost_output_token_tan_sc_sh
keyword_total_time_loop_tan_sc_sh = keyword_end_time_loop_tan_sc_sh - keyword_start_time_loop_tan_sc_sh

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_tan_sc_sh[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_tan_sc_sh,1))
print("Total Input Tokens - ", keyword_input_token_tan_sc_sh)
print("Total Input Cost = USD ",keyword_cost_input_token_tan_sc_sh)
print("Total Output Tokens - ", keyword_output_token_tan_sc_sh)
print("Total Output Cost = USD ",keyword_cost_output_token_tan_sc_sh)
print("Total Cost = USD ",round(keyword_total_cost_tan_sc_sh,2))

Executed  10  Iterations
Total Execution time (in secs) -  21.0
Total Input Tokens -  22188
Total Input Cost = USD  0.22
Total Output Tokens -  662
Total Output Cost = USD  0.02
Total Cost = USD  0.24


In [615]:
#Initialize an empty DataFrame
positive_keywords_tan_sc_sh = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']

# Add these columns to the DataFrame
for column in columns:
    positive_keywords_tan_sc_sh[column] = None

# Process each JSON string
for json_str in keyword_positive_output_tan_sc_sh:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'tan_sc_sh'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'Positive'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            positive_keywords_tan_sc_sh = pd.concat([positive_keywords_tan_sc_sh, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            positive_keywords_tan_sc_sh = pd.concat([positive_keywords_tan_sc_sh, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
positive_keywords_tan_sc_sh = positive_keywords_tan_sc_sh.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

positive_keywords_tan_sc_sh

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Tanishq Jewellers-Sharjah Central, SH",Positive,keywords,"Trust :1, Genuine :1, Assurance :1, Guaranteed...","Great experience : 8, Wonderful experience : 6...","helpful : 30, friendly : 25, cooperative : 15,...","unique :5, elegant :2, creative :1, exquisite ...","collection : 45, variety : 3","discount :3, offers :2",,"best price :2, great prices :1, good price :1,...","good quality:4, quality:3, best quality:1","gold exchange:3, old gold:2, value:1"
1,"Tanishq Jewellers-Sharjah Central, SH",Positive,phrases,"Trust Tanishq :1, Tata assurance :1, Purity is...","Very good experience : 4, Excellent shopping e...","very helpful : 10, extremely helpful : 5, very...","good designs :3, amazing designs :2, unique de...","Nice collection : 10, Good collection : 9, Ver...","good discounts :1, best offers :1, 10% discoun...",,"best prices :1, price offered :1, matched my t...","quality of the product:1, exceeded my expectat...","best value for old gold:1, good gold exchange ..."


### tan_taj_db

In [616]:
# Initialize the output dictionary
keyword_positive_output_tan_taj_db = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_tan_taj_db=[0]
keyword_input_token_tan_taj_db = 0
keyword_output_token_tan_taj_db = 0
keyword_start_time_loop_tan_taj_db = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_tan_taj_db, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_tan_taj_db[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_tan_taj_db = keyword_dataframes['tan_taj_db_final_sen_df_jul'][keyword_dataframes['tan_taj_db_final_sen_df_jul'][topic]==1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are positive comments, call the positive_keywords function
    if filtered_comments_tan_taj_db:
        # Call the positive_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = positive_keywords(filtered_comments_tan_taj_db,topic)        
        # Add the result to the output dictionary
        keyword_positive_output_tan_taj_db.append(keywords)
        keyword_input_token_tan_taj_db += input_tokens_loop
        keyword_output_token_tan_taj_db += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_tan_taj_db = time.time()
keyword_cost_input_token_tan_taj_db = round((0.01/1000)*keyword_input_token_tan_taj_db,2)
keyword_cost_output_token_tan_taj_db = round((0.03/1000)*keyword_output_token_tan_taj_db,2)
keyword_total_cost_tan_taj_db = keyword_cost_input_token_tan_taj_db + keyword_cost_output_token_tan_taj_db
keyword_total_time_loop_tan_taj_db = keyword_end_time_loop_tan_taj_db - keyword_start_time_loop_tan_taj_db

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_tan_taj_db[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_tan_taj_db,1))
print("Total Input Tokens - ", keyword_input_token_tan_taj_db)
print("Total Input Cost = USD ",keyword_cost_input_token_tan_taj_db)
print("Total Output Tokens - ", keyword_output_token_tan_taj_db)
print("Total Output Cost = USD ",keyword_cost_output_token_tan_taj_db)
print("Total Cost = USD ",round(keyword_total_cost_tan_taj_db,2))

Executed  10  Iterations
Total Execution time (in secs) -  17.5
Total Input Tokens -  8773
Total Input Cost = USD  0.09
Total Output Tokens -  459
Total Output Cost = USD  0.01
Total Cost = USD  0.1


In [617]:
#Initialize an empty DataFrame
positive_keywords_tan_taj_db = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']

# Add these columns to the DataFrame
for column in columns:
    positive_keywords_tan_taj_db[column] = None

# Process each JSON string
for json_str in keyword_positive_output_tan_taj_db:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'tan_taj_db'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'Positive'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            positive_keywords_tan_taj_db = pd.concat([positive_keywords_tan_taj_db, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            positive_keywords_tan_taj_db = pd.concat([positive_keywords_tan_taj_db, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
positive_keywords_tan_taj_db = positive_keywords_tan_taj_db.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

positive_keywords_tan_taj_db

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Tanishq Jewellers-Taj, DB",Positive,keywords,"Transparent :2, Trust :1, Loyal :1","ambience :3, environment :1, place :1, store :...","friendly :4, knowledgeable :3, polite :3, prof...","designs :5, jewellery :2","collection :22, variety :2","discounts :1, tax refund :1",transparent :2,,exceptional :1,
1,"Tanishq Jewellers-Taj, DB",Positive,phrases,"Always we trust tata :1, Tanishq's policies ar...","nice experience :2, amazing place :1, awesome ...","exceptional service :3, very good service :2, ...","Beautiful designs :2, Beautiful solitaire desi...","Superb collection :2, Nice collections :5, Bea...",No relevant positive phrases,transparent about their making charges :1,,quality is exceptional :1,


### Total Cost

keyword_positive_total_cost = keyword_total_cost_bhi_ak+keyword_total_cost_joy_ab+keyword_total_cost_joy_st_af+keyword_total_cost_joy_dm_ad+keyword_total_cost_joy_mz_ad+keyword_total_cost_joy_sh_ad+keyword_total_cost_mal_sc+keyword_total_cost_mal_ab+keyword_total_cost_mal_b1_af+keyword_total_cost_mal_ak+keyword_total_cost_mal_aw_ad+keyword_total_cost_mal_dm_ad+keyword_total_cost_mal_b1_ad+keyword_total_cost_mal_b2_ad+keyword_total_cost_mal_lu_ad+keyword_total_cost_mal_mb+keyword_total_cost_mal_sh_ad+keyword_total_cost_mal_b2_af+keyword_total_cost_mna_mb+keyword_total_cost_min_ak+keyword_total_cost_joy_ak+keyword_total_cost_kan_mb+keyword_total_cost_agd_mb+keyword_total_cost_bhi_dec_ga+keyword_total_cost_jar_bol_il+keyword_total_cost_jar_ver_il+keyword_total_cost_jar_lom_il+keyword_total_cost_jar_orl_il+keyword_total_cost_jar_aur_il+keyword_total_cost_jar_alg_il+keyword_total_cost_jar_sch_il+keyword_total_cost_joy_suw_ga+keyword_total_cost_joy_chi_il+keyword_total_cost_joy_hou_tx+keyword_total_cost_joy_fri_tx+keyword_total_cost_mal_chi_il+keyword_total_cost_mal_nap_il+keyword_total_cost_mal_ise_nj+keyword_total_cost_mal_fri_tx+keyword_total_cost_mal_ric_tx+keyword_total_cost_may_vie_va+keyword_total_cost_son_ise_nj+keyword_total_cost_tif_chi_il+keyword_total_cost_tif_nor_il+keyword_total_cost_tif_sko_il+keyword_total_cost_tif_eas_nj+keyword_total_cost_tif_sho_nj+keyword_total_cost_tif_vie_va+keyword_total_cost_vbj_fri_tx+keyword_total_cost_tan_chi_il+keyword_total_cost_tan_fri_tx+keyword_total_cost_tan_hou_tx+keyword_total_cost_tan_new_nj+keyword_total_cost_tan_bar_db+keyword_total_cost_tan_fah_db+keyword_total_cost_tan_kar_db+keyword_total_cost_tan_ham_ad+keyword_total_cost_tan_mee_db+keyword_total_cost_tan_sil_db

keyword_positive_total_cost

## Combined_positive

In [618]:
combined_df_positive_keywords = pd.DataFrame()

positive_keyword_df_list = [
                            "positive_keywords_agd_mb",
                            "positive_keywords_bhi_ak",
                            "positive_keywords_bhi_dec_ga",
                            "positive_keywords_eve_joh_ga",
                            "positive_keywords_jar_alg_il",
                            "positive_keywords_jar_aur_il",
                            "positive_keywords_jar_bol_il",
                            "positive_keywords_jar_lom_il",
                            "positive_keywords_jar_orl_il",
                            "positive_keywords_jar_sch_il",
                            "positive_keywords_jar_ver_il",
                            "positive_keywords_joy_ab",
                            "positive_keywords_joy_ak",
                            "positive_keywords_joy_chi_il",
                            "positive_keywords_joy_dm_ad",
                            "positive_keywords_joy_fri_tx",
                            "positive_keywords_joy_hou_tx",
                            "positive_keywords_joy_mz_ad",
                            "positive_keywords_joy_sh_ad",
                            "positive_keywords_joy_st_af",
                            "positive_keywords_joy_suw_ga",
                            "positive_keywords_kan_mb",
                            "positive_keywords_mal_ab",
                            "positive_keywords_mal_ak",
                            "positive_keywords_mal_aw_ad",
                            "positive_keywords_mal_b1_ad",
                            "positive_keywords_mal_b1_af",
                            "positive_keywords_mal_b2_ad",
                            "positive_keywords_mal_b2_af",
                            "positive_keywords_mal_chi_il",
                            "positive_keywords_mal_dm_ad",
                            "positive_keywords_mal_fri_tx",
                            "positive_keywords_mal_ise_nj",
                            "positive_keywords_mal_lu_ad",
                            "positive_keywords_mal_mb",
                            "positive_keywords_mal_nap_il",
                            "positive_keywords_mal_ric_tx",
                            "positive_keywords_mal_sc",
                            "positive_keywords_mal_sh_ad",
                            "positive_keywords_may_vie_va",
                            "positive_keywords_mia_awm_ad",
                            "positive_keywords_mia_bur_db",
                            "positive_keywords_min_ak",
                            "positive_keywords_mna_mb",
                            "positive_keywords_son_ise_nj",
                            "positive_keywords_tan_am_om",
                            "positive_keywords_tan_atl_ga",
                            "positive_keywords_tan_bar_db",
                            "positive_keywords_tan_chi_il",
                            "positive_keywords_tan_fah_db",
                            "positive_keywords_tan_fc_qa",
                            "positive_keywords_tan_fri_tx",
                            "positive_keywords_tan_gs_db",
                            "positive_keywords_tan_ham_ad",
                            "positive_keywords_tan_hou_tx",
                            "positive_keywords_tan_kar_db",
                            "positive_keywords_tan_lul_qa",
                            "positive_keywords_tan_mank_db",
                            "positive_keywords_tan_mee_db",
                            "positive_keywords_tan_new_nj",
                            "positive_keywords_tan_rol_sh",
                            "positive_keywords_tan_rse_wa",
                            "positive_keywords_tan_sc_ca",
                            "positive_keywords_tan_sc_sh",
                            "positive_keywords_tan_sil_db",
                            "positive_keywords_tan_taj_db",
                            "positive_keywords_tif_chi_il",
                            "positive_keywords_tif_eas_nj",
                            "positive_keywords_tif_hac_nj",
                            "positive_keywords_tif_nor_il",
                            "positive_keywords_tif_par_nj",
                            "positive_keywords_tif_red_nj",
                            "positive_keywords_tif_ric_va",
                            "positive_keywords_tif_sho_nj",
                            "positive_keywords_tif_sko_il",
                            "positive_keywords_tif_vie_va",
                            "positive_keywords_vbj_fri_tx"

]


for df_name in positive_keyword_df_list:
    combined_df_positive_keywords = pd.concat([combined_df_positive_keywords, eval(df_name)], ignore_index=True)

combined_df_positive_keywords.reset_index(drop=True, inplace=True)
    

In [619]:
combined_df_positive_keywords.to_excel("temp/combined_df_positive_keywords_current.xlsx",index=False)

## Negative Keywords

#### bhi_ak

In [620]:
# Initialize the output dictionary
keyword_negative_output_bhi_ak = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_bhi_ak=[0]
keyword_input_token_bhi_ak = 0
keyword_output_token_bhi_ak = 0
keyword_start_time_loop_bhi_ak = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_bhi_ak, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_bhi_ak[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_bhi_ak = keyword_dataframes['bhi_ak_final_sen_df_jul'][keyword_dataframes['bhi_ak_final_sen_df_jul'][topic]==-1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are negative comments, call the negative_keywords function
    if filtered_comments_bhi_ak:
        # Call the negative_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = negative_keywords(filtered_comments_bhi_ak,topic)        
        # Add the result to the output dictionary
        keyword_negative_output_bhi_ak.append(keywords)
        keyword_input_token_bhi_ak += input_tokens_loop
        keyword_output_token_bhi_ak += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_bhi_ak = time.time()
keyword_cost_input_token_bhi_ak = round((0.01/1000)*keyword_input_token_bhi_ak,2)
keyword_cost_output_token_bhi_ak = round((0.03/1000)*keyword_output_token_bhi_ak,2)
keyword_total_cost_bhi_ak = keyword_cost_input_token_bhi_ak + keyword_cost_output_token_bhi_ak
keyword_total_time_loop_bhi_ak = keyword_end_time_loop_bhi_ak - keyword_start_time_loop_bhi_ak

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_bhi_ak[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_bhi_ak,1))
print("Total Input Tokens - ", keyword_input_token_bhi_ak)
print("Total Input Cost = USD ",keyword_cost_input_token_bhi_ak)
print("Total Output Tokens - ", keyword_output_token_bhi_ak)
print("Total Output Cost = USD ",keyword_cost_output_token_bhi_ak)
print("Total Cost = USD ",round(keyword_total_cost_bhi_ak,2))

Executed  10  Iterations
Total Execution time (in secs) -  19.5
Total Input Tokens -  12715
Total Input Cost = USD  0.13
Total Output Tokens -  694
Total Output Cost = USD  0.02
Total Cost = USD  0.15


In [621]:
#Initialize an empty DataFrame
negative_keywords_bhi_ak = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']
# Add these columns to the DataFrame
for column in columns:
    negative_keywords_bhi_ak[column] = None

# Process each JSON string
for json_str in keyword_negative_output_bhi_ak:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'bhi_ak'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'negative'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            negative_keywords_bhi_ak = pd.concat([negative_keywords_bhi_ak, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            negative_keywords_bhi_ak = pd.concat([negative_keywords_bhi_ak, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
negative_keywords_bhi_ak = negative_keywords_bhi_ak.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

negative_keywords_bhi_ak

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,Bhima Jewellers - Al Karama,negative,keywords,"scam :1, rude :1, shocking :1, questionable :1...","delay :1, worst :1, bad :1, terrible :1, carel...","careless :1, not good :1, bad experience :1, t...","defective :1, poor-quality :1, limited selecti...",limited selection:1,No relevant negative keywords/ phrases,"highest making charges:2, much higher:1, limit...","unbearable :1, limitless :1, shocking :1, huge...","low quality :1, defective :1, poor-quality :1",
1,Bhima Jewellers - Al Karama,negative,phrases,"never do a scheme with this jewellers :1, hidi...","Delay at the billing point :1, bad experience ...","sales representatie didn’t told me :1, Miss gu...","broke within just a month :1, broke again :1, ...",very limited selection of replacement designs:...,No relevant negative keywords/ phrases,"loot it in making charges:1, making charges ar...","making charges are limitless :1, cost of makin...","broke within just a month :1, not connecting t...",


#### joy_ab

In [622]:
# Initialize the output dictionary
keyword_negative_output_joy_ab = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_joy_ab=[0]
keyword_input_token_joy_ab = 0
keyword_output_token_joy_ab = 0
keyword_start_time_loop_joy_ab = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_joy_ab, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_joy_ab[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_joy_ab = keyword_dataframes['joy_ab_final_sen_df_jul'][keyword_dataframes['joy_ab_final_sen_df_jul'][topic]==-1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are negative comments, call the negative_keywords function
    if filtered_comments_joy_ab:
        # Call the negative_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = negative_keywords(filtered_comments_joy_ab,topic)        
        # Add the result to the output dictionary
        keyword_negative_output_joy_ab.append(keywords)
        keyword_input_token_joy_ab += input_tokens_loop
        keyword_output_token_joy_ab += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_joy_ab = time.time()
keyword_cost_input_token_joy_ab = round((0.01/1000)*keyword_input_token_joy_ab,2)
keyword_cost_output_token_joy_ab = round((0.03/1000)*keyword_output_token_joy_ab,2)
keyword_total_cost_joy_ab = keyword_cost_input_token_joy_ab + keyword_cost_output_token_joy_ab
keyword_total_time_loop_joy_ab = keyword_end_time_loop_joy_ab - keyword_start_time_loop_joy_ab

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_joy_ab[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_joy_ab,1))
print("Total Input Tokens - ", keyword_input_token_joy_ab)
print("Total Input Cost = USD ",keyword_cost_input_token_joy_ab)
print("Total Output Tokens - ", keyword_output_token_joy_ab)
print("Total Output Cost = USD ",keyword_cost_output_token_joy_ab)
print("Total Cost = USD ",round(keyword_total_cost_joy_ab,2))

Executed  10  Iterations
Total Execution time (in secs) -  14.5
Total Input Tokens -  7166
Total Input Cost = USD  0.07
Total Output Tokens -  440
Total Output Cost = USD  0.01
Total Cost = USD  0.08


In [623]:
#Initialize an empty DataFrame
negative_keywords_joy_ab = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']
# Add these columns to the DataFrame
for column in columns:
    negative_keywords_joy_ab[column] = None

# Process each JSON string
for json_str in keyword_negative_output_joy_ab:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'joy_ab'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'negative'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            negative_keywords_joy_ab = pd.concat([negative_keywords_joy_ab, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            negative_keywords_joy_ab = pd.concat([negative_keywords_joy_ab, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
negative_keywords_joy_ab = negative_keywords_joy_ab.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

negative_keywords_joy_ab

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,Joyalukkas Jewellery - Al Barsha,negative,keywords,"Not trustworthy:1, manipulated:1, wrong inform...","waiting :1, improve :1, unwelcoming :1, indiff...","lazy :1, inexperienced :1, unwelcoming :1, ind...",old designs:1,"collections :1, models :1",No discount :1,No relevant negative keywords/ phrases,too expensive:1,,
1,Joyalukkas Jewellery - Al Barsha,negative,phrases,using paid services to increase its Shop Ratin...,"waiting longtime :1, service is need to improv...","very lazy staff :1, inexperienced staff :1, un...",Very old designs:1,"little more collections :1, mode models :1",need more discount :1,No relevant negative keywords/ phrases,"come with a lot of money:1, it’s too expensive:1",,


#### joy_st_af

In [624]:
# Initialize the output dictionary
keyword_negative_output_joy_st_af = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_joy_st_af=[0]
keyword_input_token_joy_st_af = 0
keyword_output_token_joy_st_af = 0
keyword_start_time_loop_joy_st_af = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_joy_st_af, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_joy_st_af[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_joy_st_af = keyword_dataframes['joy_st_af_final_sen_df_jul'][keyword_dataframes['joy_st_af_final_sen_df_jul'][topic]==-1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are negative comments, call the negative_keywords function
    if filtered_comments_joy_st_af:
        # Call the negative_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = negative_keywords(filtered_comments_joy_st_af,topic)        
        # Add the result to the output dictionary
        keyword_negative_output_joy_st_af.append(keywords)
        keyword_input_token_joy_st_af += input_tokens_loop
        keyword_output_token_joy_st_af += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_joy_st_af = time.time()
keyword_cost_input_token_joy_st_af = round((0.01/1000)*keyword_input_token_joy_st_af,2)
keyword_cost_output_token_joy_st_af = round((0.03/1000)*keyword_output_token_joy_st_af,2)
keyword_total_cost_joy_st_af = keyword_cost_input_token_joy_st_af + keyword_cost_output_token_joy_st_af
keyword_total_time_loop_joy_st_af = keyword_end_time_loop_joy_st_af - keyword_start_time_loop_joy_st_af

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_joy_st_af[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_joy_st_af,1))
print("Total Input Tokens - ", keyword_input_token_joy_st_af)
print("Total Input Cost = USD ",keyword_cost_input_token_joy_st_af)
print("Total Output Tokens - ", keyword_output_token_joy_st_af)
print("Total Output Cost = USD ",keyword_cost_output_token_joy_st_af)
print("Total Cost = USD ",round(keyword_total_cost_joy_st_af,2))

Executed  10  Iterations
Total Execution time (in secs) -  19.5
Total Input Tokens -  9318
Total Input Cost = USD  0.09
Total Output Tokens -  602
Total Output Cost = USD  0.02
Total Cost = USD  0.11


In [625]:
#Initialize an empty DataFrame
negative_keywords_joy_st_af = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']
# Add these columns to the DataFrame
for column in columns:
    negative_keywords_joy_st_af[column] = None

# Process each JSON string
for json_str in keyword_negative_output_joy_st_af:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'joy_st_af'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'negative'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            negative_keywords_joy_st_af = pd.concat([negative_keywords_joy_st_af, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            negative_keywords_joy_st_af = pd.concat([negative_keywords_joy_st_af, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
negative_keywords_joy_st_af = negative_keywords_joy_st_af.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

negative_keywords_joy_st_af

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,Joyalukkas Jewellery - Al Fahidi st - Al Fahidi,negative,keywords,No relevant negative keywords/ phrases,"crowd :1, mess :1, slow :1, suffocating :1, ti...","unprofessional :1, inefficient :1, compromised...",No relevant negative keywords/ phrases,"ok ok :1, more updated :1, more design :1","enough discount:1, more discount:1","high :5, exorbitant :2, unreasonable :1, less ...","overprice:1, expensive:1, higher:1","broken :1, poor soldering :1, quality problems...",
1,Joyalukkas Jewellery - Al Fahidi st - Al Fahidi,negative,phrases,No relevant negative keywords/ phrases,"store itself is a mess :1, suffocating crowd :...",more focused on google reviews than attending ...,No relevant negative keywords/ phrases,"Improve your nosepin collection :1, expecting ...","didn't give enough discount:1, need more disco...","making charges are very high :2, making charge...","question their prices:1, improvement on pricing:1",bracelet was broken just after 2 months :1,


#### joy_dm_ad

In [626]:
# Initialize the output dictionary
keyword_negative_output_joy_dm_ad = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_joy_dm_ad=[0]
keyword_input_token_joy_dm_ad = 0
keyword_output_token_joy_dm_ad = 0
keyword_start_time_loop_joy_dm_ad = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_joy_dm_ad, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_joy_dm_ad[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_joy_dm_ad = keyword_dataframes['joy_dm_ad_final_sen_df_jul'][keyword_dataframes['joy_dm_ad_final_sen_df_jul'][topic]==-1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are negative comments, call the negative_keywords function
    if filtered_comments_joy_dm_ad:
        # Call the negative_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = negative_keywords(filtered_comments_joy_dm_ad,topic)        
        # Add the result to the output dictionary
        keyword_negative_output_joy_dm_ad.append(keywords)
        keyword_input_token_joy_dm_ad += input_tokens_loop
        keyword_output_token_joy_dm_ad += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_joy_dm_ad = time.time()
keyword_cost_input_token_joy_dm_ad = round((0.01/1000)*keyword_input_token_joy_dm_ad,2)
keyword_cost_output_token_joy_dm_ad = round((0.03/1000)*keyword_output_token_joy_dm_ad,2)
keyword_total_cost_joy_dm_ad = keyword_cost_input_token_joy_dm_ad + keyword_cost_output_token_joy_dm_ad
keyword_total_time_loop_joy_dm_ad = keyword_end_time_loop_joy_dm_ad - keyword_start_time_loop_joy_dm_ad

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_joy_dm_ad[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_joy_dm_ad,1))
print("Total Input Tokens - ", keyword_input_token_joy_dm_ad)
print("Total Input Cost = USD ",keyword_cost_input_token_joy_dm_ad)
print("Total Output Tokens - ", keyword_output_token_joy_dm_ad)
print("Total Output Cost = USD ",keyword_cost_output_token_joy_dm_ad)
print("Total Cost = USD ",round(keyword_total_cost_joy_dm_ad,2))

Executed  10  Iterations
Total Execution time (in secs) -  19.5
Total Input Tokens -  7798
Total Input Cost = USD  0.08
Total Output Tokens -  550
Total Output Cost = USD  0.02
Total Cost = USD  0.1


In [627]:
#Initialize an empty DataFrame
negative_keywords_joy_dm_ad = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']
# Add these columns to the DataFrame
for column in columns:
    negative_keywords_joy_dm_ad[column] = None

# Process each JSON string
for json_str in keyword_negative_output_joy_dm_ad:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'joy_dm_ad'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'negative'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            negative_keywords_joy_dm_ad = pd.concat([negative_keywords_joy_dm_ad, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            negative_keywords_joy_dm_ad = pd.concat([negative_keywords_joy_dm_ad, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
negative_keywords_joy_dm_ad = negative_keywords_joy_dm_ad.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

negative_keywords_joy_dm_ad

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,Joyalukkas Jewellery - Dalma Plaza - Abu Dhabi,negative,keywords,"misleading :1, loss :1, not recommended :1, no...",No relevant negative keywords/ phrases,"ego :2, arrogance :1, conceited :1, unprofessi...",No relevant negative keywords/ phrases,limited :1,No relevant negative keywords/ phrases,"not correct:2, reduced:1, less:1, unhappy:1","non bargaining :1, reduce :1","become white:2, bad experience:2",
1,Joyalukkas Jewellery - Dalma Plaza - Abu Dhabi,negative,phrases,"didn't allowed to take the jewellery :1, gold ...",No relevant negative keywords/ phrases,don't have any courtesy to how to talk and tre...,No relevant negative keywords/ phrases,"choices/ options seemed a bit limited :1, Coll...",No relevant negative keywords/ phrases,calculation given for making charges is not co...,"good price :1, ask again money :1, reduce the ...","it's become white:2, don't sure about their pr...",


#### joy_mz_ad

In [628]:
# Initialize the output dictionary
keyword_negative_output_joy_mz_ad = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_joy_mz_ad=[0]
keyword_input_token_joy_mz_ad = 0
keyword_output_token_joy_mz_ad = 0
keyword_start_time_loop_joy_mz_ad = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_joy_mz_ad, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_joy_mz_ad[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_joy_mz_ad = keyword_dataframes['joy_mz_ad_final_sen_df_jul'][keyword_dataframes['joy_mz_ad_final_sen_df_jul'][topic]==-1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are negative comments, call the negative_keywords function
    if filtered_comments_joy_mz_ad:
        # Call the negative_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = negative_keywords(filtered_comments_joy_mz_ad,topic)        
        # Add the result to the output dictionary
        keyword_negative_output_joy_mz_ad.append(keywords)
        keyword_input_token_joy_mz_ad += input_tokens_loop
        keyword_output_token_joy_mz_ad += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_joy_mz_ad = time.time()
keyword_cost_input_token_joy_mz_ad = round((0.01/1000)*keyword_input_token_joy_mz_ad,2)
keyword_cost_output_token_joy_mz_ad = round((0.03/1000)*keyword_output_token_joy_mz_ad,2)
keyword_total_cost_joy_mz_ad = keyword_cost_input_token_joy_mz_ad + keyword_cost_output_token_joy_mz_ad
keyword_total_time_loop_joy_mz_ad = keyword_end_time_loop_joy_mz_ad - keyword_start_time_loop_joy_mz_ad

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_joy_mz_ad[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_joy_mz_ad,1))
print("Total Input Tokens - ", keyword_input_token_joy_mz_ad)
print("Total Input Cost = USD ",keyword_cost_input_token_joy_mz_ad)
print("Total Output Tokens - ", keyword_output_token_joy_mz_ad)
print("Total Output Cost = USD ",keyword_cost_output_token_joy_mz_ad)
print("Total Cost = USD ",round(keyword_total_cost_joy_mz_ad,2))

Executed  10  Iterations
Total Execution time (in secs) -  20.0
Total Input Tokens -  8264
Total Input Cost = USD  0.08
Total Output Tokens -  595
Total Output Cost = USD  0.02
Total Cost = USD  0.1


In [629]:
#Initialize an empty DataFrame
negative_keywords_joy_mz_ad = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']
# Add these columns to the DataFrame
for column in columns:
    negative_keywords_joy_mz_ad[column] = None

# Process each JSON string
for json_str in keyword_negative_output_joy_mz_ad:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'joy_mz_ad'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'negative'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            negative_keywords_joy_mz_ad = pd.concat([negative_keywords_joy_mz_ad, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            negative_keywords_joy_mz_ad = pd.concat([negative_keywords_joy_mz_ad, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
negative_keywords_joy_mz_ad = negative_keywords_joy_mz_ad.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

negative_keywords_joy_mz_ad

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,Joyalukkas Jewellery - Madinat Zayed Shopping ...,negative,keywords,"tricks :1, blame :1","bad service: 2, rude: 2, worst: 2, disinterest...","rude :4, disinterested :1, dishonest :1, horri...",,No relevant negative keywords/ phrases,fake offers:1,"fake offers:1, extra charges:1","extra charges: 2, expensive: 1, over amount: 1...",broken :1,
1,Joyalukkas Jewellery - Madinat Zayed Shopping ...,negative,phrases,"say different amount :2, mistake is from them ...","waited there for almost 30 mins: 1, waited for...","staff was so rude :1, very rudely attitude :1,...",,No relevant negative keywords/ phrases,50% off form making charge but same product ma...,making charge of an item it was 6% now there i...,"deduct 4%: 1, lie about gold price: 1, promisi...","yellow ish brownish :1, broken piece :1",


#### joy_sh_ad

In [630]:
# Initialize the output dictionary
keyword_negative_output_joy_sh_ad = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_joy_sh_ad=[0]
keyword_input_token_joy_sh_ad = 0
keyword_output_token_joy_sh_ad = 0
keyword_start_time_loop_joy_sh_ad = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_joy_sh_ad, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_joy_sh_ad[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_joy_sh_ad = keyword_dataframes['joy_sh_ad_final_sen_df_jul'][keyword_dataframes['joy_sh_ad_final_sen_df_jul'][topic]==-1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are negative comments, call the negative_keywords function
    if filtered_comments_joy_sh_ad:
        # Call the negative_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = negative_keywords(filtered_comments_joy_sh_ad,topic)        
        # Add the result to the output dictionary
        keyword_negative_output_joy_sh_ad.append(keywords)
        keyword_input_token_joy_sh_ad += input_tokens_loop
        keyword_output_token_joy_sh_ad += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_joy_sh_ad = time.time()
keyword_cost_input_token_joy_sh_ad = round((0.01/1000)*keyword_input_token_joy_sh_ad,2)
keyword_cost_output_token_joy_sh_ad = round((0.03/1000)*keyword_output_token_joy_sh_ad,2)
keyword_total_cost_joy_sh_ad = keyword_cost_input_token_joy_sh_ad + keyword_cost_output_token_joy_sh_ad
keyword_total_time_loop_joy_sh_ad = keyword_end_time_loop_joy_sh_ad - keyword_start_time_loop_joy_sh_ad

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_joy_sh_ad[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_joy_sh_ad,1))
print("Total Input Tokens - ", keyword_input_token_joy_sh_ad)
print("Total Input Cost = USD ",keyword_cost_input_token_joy_sh_ad)
print("Total Output Tokens - ", keyword_output_token_joy_sh_ad)
print("Total Output Cost = USD ",keyword_cost_output_token_joy_sh_ad)
print("Total Cost = USD ",round(keyword_total_cost_joy_sh_ad,2))

Executed  10  Iterations
Total Execution time (in secs) -  17.5
Total Input Tokens -  8308
Total Input Cost = USD  0.08
Total Output Tokens -  524
Total Output Cost = USD  0.02
Total Cost = USD  0.1


In [631]:
#Initialize an empty DataFrame
negative_keywords_joy_sh_ad = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']
# Add these columns to the DataFrame
for column in columns:
    negative_keywords_joy_sh_ad[column] = None

# Process each JSON string
for json_str in keyword_negative_output_joy_sh_ad:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'joy_sh_ad'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'negative'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            negative_keywords_joy_sh_ad = pd.concat([negative_keywords_joy_sh_ad, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            negative_keywords_joy_sh_ad = pd.concat([negative_keywords_joy_sh_ad, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
negative_keywords_joy_sh_ad = negative_keywords_joy_sh_ad.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

negative_keywords_joy_sh_ad

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,Joyalukkas Jewellery - Shabia - Abu Dhabi,negative,keywords,"no resell :2, scam :1, fishy :1, lost :1, tran...","Slow bill :2, bad service :2, very bad :2, wor...","unprofessional :1, dismissive :1, arrogant :1,...","twisted design:1, uncomfortable:1",No relevant negative keywords/ phrases,No relevant negative keywords/ phrases,high :1,,No relevant negative keywords/ phrases,
1,Joyalukkas Jewellery - Shabia - Abu Dhabi,negative,phrases,"didn't find an estimate :1, no resell value :1...","very Slow bill payment :1, spend the time more...","doesn’t respect customers :2, don’t even give ...","started coming apart:1, pricking my neck:1, ha...","Not much collections :1, No light weight items :1",No relevant negative keywords/ phrases,Making charge is very high :1,,"started coming apart :1, pricking my neck :1, ...",


#### mal_sc

In [632]:
# Initialize the output dictionary
keyword_negative_output_mal_sc = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_mal_sc=[0]
keyword_input_token_mal_sc = 0
keyword_output_token_mal_sc = 0
keyword_start_time_loop_mal_sc = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_mal_sc, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_mal_sc[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_mal_sc = keyword_dataframes['mal_sc_final_sen_df_jul'][keyword_dataframes['mal_sc_final_sen_df_jul'][topic]==-1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are negative comments, call the negative_keywords function
    if filtered_comments_mal_sc:
        # Call the negative_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = negative_keywords(filtered_comments_mal_sc,topic)        
        # Add the result to the output dictionary
        keyword_negative_output_mal_sc.append(keywords)
        keyword_input_token_mal_sc += input_tokens_loop
        keyword_output_token_mal_sc += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_mal_sc = time.time()
keyword_cost_input_token_mal_sc = round((0.01/1000)*keyword_input_token_mal_sc,2)
keyword_cost_output_token_mal_sc = round((0.03/1000)*keyword_output_token_mal_sc,2)
keyword_total_cost_mal_sc = keyword_cost_input_token_mal_sc + keyword_cost_output_token_mal_sc
keyword_total_time_loop_mal_sc = keyword_end_time_loop_mal_sc - keyword_start_time_loop_mal_sc

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_mal_sc[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_mal_sc,1))
print("Total Input Tokens - ", keyword_input_token_mal_sc)
print("Total Input Cost = USD ",keyword_cost_input_token_mal_sc)
print("Total Output Tokens - ", keyword_output_token_mal_sc)
print("Total Output Cost = USD ",keyword_cost_output_token_mal_sc)
print("Total Cost = USD ",round(keyword_total_cost_mal_sc,2))

Executed  10  Iterations
Total Execution time (in secs) -  12.0
Total Input Tokens -  6610
Total Input Cost = USD  0.07
Total Output Tokens -  439
Total Output Cost = USD  0.01
Total Cost = USD  0.08


In [633]:
#Initialize an empty DataFrame
negative_keywords_mal_sc = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']
# Add these columns to the DataFrame
for column in columns:
    negative_keywords_mal_sc[column] = None

# Process each JSON string
for json_str in keyword_negative_output_mal_sc:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'mal_sc'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'negative'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            negative_keywords_mal_sc = pd.concat([negative_keywords_mal_sc, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            negative_keywords_mal_sc = pd.concat([negative_keywords_mal_sc, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
negative_keywords_mal_sc = negative_keywords_mal_sc.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

negative_keywords_mal_sc

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,Malabar Gold & Diamonds - Silicon Oasis Central,negative,keywords,"misleading :1, unprofessional :1, cheated :1, ...","waited :2, wasted :1, disappointing :1, irrita...","rude :2, ignored :1, not bothered :1, smirk :1...",,,"no-discount :1, refused :1",No relevant negative keywords/ phrases,"higher priced:1, budget:1",,
1,Malabar Gold & Diamonds - Silicon Oasis Central,negative,phrases,"doesn't know how to deal with customers :1, do...","wait for 15 days :1, waited and wasted few min...","no other staffs too couldn’t see us :1, just I...",,,offered me 50 AED discount on making charge :1...,"I felt I paid lil more on making charges:1, ma...","quoted higher priced products:1, judged based ...",,


#### mal_ab

In [634]:
# Initialize the output dictionary
keyword_negative_output_mal_ab = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_mal_ab=[0]
keyword_input_token_mal_ab = 0
keyword_output_token_mal_ab = 0
keyword_start_time_loop_mal_ab = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_mal_ab, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_mal_ab[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_mal_ab = keyword_dataframes['mal_ab_final_sen_df_jul'][keyword_dataframes['mal_ab_final_sen_df_jul'][topic]==-1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are negative comments, call the negative_keywords function
    if filtered_comments_mal_ab:
        # Call the negative_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = negative_keywords(filtered_comments_mal_ab,topic)        
        # Add the result to the output dictionary
        keyword_negative_output_mal_ab.append(keywords)
        keyword_input_token_mal_ab += input_tokens_loop
        keyword_output_token_mal_ab += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_mal_ab = time.time()
keyword_cost_input_token_mal_ab = round((0.01/1000)*keyword_input_token_mal_ab,2)
keyword_cost_output_token_mal_ab = round((0.03/1000)*keyword_output_token_mal_ab,2)
keyword_total_cost_mal_ab = keyword_cost_input_token_mal_ab + keyword_cost_output_token_mal_ab
keyword_total_time_loop_mal_ab = keyword_end_time_loop_mal_ab - keyword_start_time_loop_mal_ab

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_mal_ab[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_mal_ab,1))
print("Total Input Tokens - ", keyword_input_token_mal_ab)
print("Total Input Cost = USD ",keyword_cost_input_token_mal_ab)
print("Total Output Tokens - ", keyword_output_token_mal_ab)
print("Total Output Cost = USD ",keyword_cost_output_token_mal_ab)
print("Total Cost = USD ",round(keyword_total_cost_mal_ab,2))

Executed  10  Iterations
Total Execution time (in secs) -  20.5
Total Input Tokens -  10389
Total Input Cost = USD  0.1
Total Output Tokens -  645
Total Output Cost = USD  0.02
Total Cost = USD  0.12


In [635]:
#Initialize an empty DataFrame
negative_keywords_mal_ab = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']
# Add these columns to the DataFrame
for column in columns:
    negative_keywords_mal_ab[column] = None

# Process each JSON string
for json_str in keyword_negative_output_mal_ab:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'mal_ab'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'negative'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            negative_keywords_mal_ab = pd.concat([negative_keywords_mal_ab, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            negative_keywords_mal_ab = pd.concat([negative_keywords_mal_ab, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
negative_keywords_mal_ab = negative_keywords_mal_ab.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

negative_keywords_mal_ab

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,Malabar Gold and Diamonds - Al Barsha - Dubai,negative,keywords,"scam :1, deceived :1, cheated :1, lied :1, unp...","cold person: 2, unwelcoming: 2, bad experience...","rude :2, unprofessional :2, lazy :1, cold :1, ...",No relevant negative keywords/ phrases,"poor collection:1, more options:1","not happy:1, not giving:1","high making :3, fake :1","high :2, negotiate :1, discounts :1, reduced :...","cracked :2, old :1, used :1",
1,Malabar Gold and Diamonds - Al Barsha - Dubai,negative,phrases,"scam customers :1, seller assured me :1, lied ...","not even attended: 1, not a very nice gesture:...","very rude and unprofessional staff :1, no one ...",No relevant negative keywords/ phrases,"very poor collection for kids earings:1, don't...","asked many times for discounts:1, not giving g...","making charges they are taking these days :1, ...","exponentially high side :1, making cost is ver...","not cracked :1, will not crack :1",


#### mal_b1_af

In [636]:
# Initialize the output dictionary
keyword_negative_output_mal_b1_af = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_mal_b1_af=[0]
keyword_input_token_mal_b1_af = 0
keyword_output_token_mal_b1_af = 0
keyword_start_time_loop_mal_b1_af = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_mal_b1_af, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_mal_b1_af[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_mal_b1_af = keyword_dataframes['mal_b1_af_final_sen_df_jul'][keyword_dataframes['mal_b1_af_final_sen_df_jul'][topic]==-1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are negative comments, call the negative_keywords function
    if filtered_comments_mal_b1_af:
        # Call the negative_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = negative_keywords(filtered_comments_mal_b1_af,topic)        
        # Add the result to the output dictionary
        keyword_negative_output_mal_b1_af.append(keywords)
        keyword_input_token_mal_b1_af += input_tokens_loop
        keyword_output_token_mal_b1_af += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_mal_b1_af = time.time()
keyword_cost_input_token_mal_b1_af = round((0.01/1000)*keyword_input_token_mal_b1_af,2)
keyword_cost_output_token_mal_b1_af = round((0.03/1000)*keyword_output_token_mal_b1_af,2)
keyword_total_cost_mal_b1_af = keyword_cost_input_token_mal_b1_af + keyword_cost_output_token_mal_b1_af
keyword_total_time_loop_mal_b1_af = keyword_end_time_loop_mal_b1_af - keyword_start_time_loop_mal_b1_af

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_mal_b1_af[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_mal_b1_af,1))
print("Total Input Tokens - ", keyword_input_token_mal_b1_af)
print("Total Input Cost = USD ",keyword_cost_input_token_mal_b1_af)
print("Total Output Tokens - ", keyword_output_token_mal_b1_af)
print("Total Output Cost = USD ",keyword_cost_output_token_mal_b1_af)
print("Total Cost = USD ",round(keyword_total_cost_mal_b1_af,2))

Executed  10  Iterations
Total Execution time (in secs) -  15.0
Total Input Tokens -  7856
Total Input Cost = USD  0.08
Total Output Tokens -  500
Total Output Cost = USD  0.01
Total Cost = USD  0.09


In [637]:
#Initialize an empty DataFrame
negative_keywords_mal_b1_af = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']
# Add these columns to the DataFrame
for column in columns:
    negative_keywords_mal_b1_af[column] = None

# Process each JSON string
for json_str in keyword_negative_output_mal_b1_af:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'mal_b1_af'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'negative'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            negative_keywords_mal_b1_af = pd.concat([negative_keywords_mal_b1_af, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            negative_keywords_mal_b1_af = pd.concat([negative_keywords_mal_b1_af, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
negative_keywords_mal_b1_af = negative_keywords_mal_b1_af.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

negative_keywords_mal_b1_af

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,Malabar Gold and Diamonds - Al Fahidi Street -...,negative,keywords,"shattered trust:1, betrayed:1, cheated:1, dism...","unfriendly :3, unprofessional :1, lazy :1, arr...","unfriendly :2, unprofessional :1, lazy :1, arr...",less designs :1,,"more discount:2, bonus money:1, false commitme...",,"expensive:1, loss:1",,"can't exchange:1, only in Dubai:1"
1,Malabar Gold and Diamonds - Al Fahidi Street -...,negative,phrases,"payment was delayed:1, shattered my trust in y...","very unprofessional staff :1, not a pleasant s...","passing customer to each other :2, hesitant to...",No relevant negative phrases,,"store needs to add more discount:2, not provid...",,"huge loss:1, financial loss:1, too expensive:1",,"told me we can't exchange:1, you can exchange ..."


#### mal_ak

In [638]:
# Initialize the output dictionary
keyword_negative_output_mal_ak = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_mal_ak=[0]
keyword_input_token_mal_ak = 0
keyword_output_token_mal_ak = 0
keyword_start_time_loop_mal_ak = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_mal_ak, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_mal_ak[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_mal_ak = keyword_dataframes['mal_ak_final_sen_df_jul'][keyword_dataframes['mal_ak_final_sen_df_jul'][topic]==-1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are negative comments, call the negative_keywords function
    if filtered_comments_mal_ak:
        # Call the negative_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = negative_keywords(filtered_comments_mal_ak,topic)        
        # Add the result to the output dictionary
        keyword_negative_output_mal_ak.append(keywords)
        keyword_input_token_mal_ak += input_tokens_loop
        keyword_output_token_mal_ak += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_mal_ak = time.time()
keyword_cost_input_token_mal_ak = round((0.01/1000)*keyword_input_token_mal_ak,2)
keyword_cost_output_token_mal_ak = round((0.03/1000)*keyword_output_token_mal_ak,2)
keyword_total_cost_mal_ak = keyword_cost_input_token_mal_ak + keyword_cost_output_token_mal_ak
keyword_total_time_loop_mal_ak = keyword_end_time_loop_mal_ak - keyword_start_time_loop_mal_ak

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_mal_ak[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_mal_ak,1))
print("Total Input Tokens - ", keyword_input_token_mal_ak)
print("Total Input Cost = USD ",keyword_cost_input_token_mal_ak)
print("Total Output Tokens - ", keyword_output_token_mal_ak)
print("Total Output Cost = USD ",keyword_cost_output_token_mal_ak)
print("Total Cost = USD ",round(keyword_total_cost_mal_ak,2))

Executed  10  Iterations
Total Execution time (in secs) -  18.5
Total Input Tokens -  9552
Total Input Cost = USD  0.1
Total Output Tokens -  676
Total Output Cost = USD  0.02
Total Cost = USD  0.12


In [639]:
#Initialize an empty DataFrame
negative_keywords_mal_ak = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']
# Add these columns to the DataFrame
for column in columns:
    negative_keywords_mal_ak[column] = None

# Process each JSON string
for json_str in keyword_negative_output_mal_ak:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'mal_ak'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'negative'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            negative_keywords_mal_ak = pd.concat([negative_keywords_mal_ak, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            negative_keywords_mal_ak = pd.concat([negative_keywords_mal_ak, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
negative_keywords_mal_ak = negative_keywords_mal_ak.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

negative_keywords_mal_ak

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,Malabar Gold and Diamonds - Al Karama - Dubai,negative,keywords,"unacceptable :1, overpriced :1, frustrating :1...","poor service: 3, bad experience: 2, waiting: 2...","unattentive :2, disinterested :1, reluctant :1...",No relevant negative keywords/ phrases,fewer collections :1,"no discount :1, more discount :1, overpriced :...","high :3, no negotiation :1, more % :1","increased :1, high :1, overpriced :1, differen...","broke:2, poor quality:1",
1,Malabar Gold and Diamonds - Al Karama - Dubai,negative,phrases,"not honor a price we agree on :1, do not negot...","nobody assist us: 1, no one attended to me: 1,...","very weird look :1, didn't even look at the in...",No relevant negative keywords/ phrases,No relevant negative phrases,"no discount :1, more discount :1, do not negot...","making charge is very high :1, quite more % of...","increased gold prices :1, making charges is hi...","links snapped:1, broke within 2 months:1, brok...",


#### mal_aw_ad

In [640]:
# Initialize the output dictionary
keyword_negative_output_mal_aw_ad = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_mal_aw_ad=[0]
keyword_input_token_mal_aw_ad = 0
keyword_output_token_mal_aw_ad = 0
keyword_start_time_loop_mal_aw_ad = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_mal_aw_ad, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_mal_aw_ad[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_mal_aw_ad = keyword_dataframes['mal_aw_ad_final_sen_df_jul'][keyword_dataframes['mal_aw_ad_final_sen_df_jul'][topic]==-1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are negative comments, call the negative_keywords function
    if filtered_comments_mal_aw_ad:
        # Call the negative_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = negative_keywords(filtered_comments_mal_aw_ad,topic)        
        # Add the result to the output dictionary
        keyword_negative_output_mal_aw_ad.append(keywords)
        keyword_input_token_mal_aw_ad += input_tokens_loop
        keyword_output_token_mal_aw_ad += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_mal_aw_ad = time.time()
keyword_cost_input_token_mal_aw_ad = round((0.01/1000)*keyword_input_token_mal_aw_ad,2)
keyword_cost_output_token_mal_aw_ad = round((0.03/1000)*keyword_output_token_mal_aw_ad,2)
keyword_total_cost_mal_aw_ad = keyword_cost_input_token_mal_aw_ad + keyword_cost_output_token_mal_aw_ad
keyword_total_time_loop_mal_aw_ad = keyword_end_time_loop_mal_aw_ad - keyword_start_time_loop_mal_aw_ad

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_mal_aw_ad[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_mal_aw_ad,1))
print("Total Input Tokens - ", keyword_input_token_mal_aw_ad)
print("Total Input Cost = USD ",keyword_cost_input_token_mal_aw_ad)
print("Total Output Tokens - ", keyword_output_token_mal_aw_ad)
print("Total Output Cost = USD ",keyword_cost_output_token_mal_aw_ad)
print("Total Cost = USD ",round(keyword_total_cost_mal_aw_ad,2))

Executed  10  Iterations
Total Execution time (in secs) -  9.5
Total Input Tokens -  4530
Total Input Cost = USD  0.05
Total Output Tokens -  288
Total Output Cost = USD  0.01
Total Cost = USD  0.06


In [641]:
#Initialize an empty DataFrame
negative_keywords_mal_aw_ad = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']
# Add these columns to the DataFrame
for column in columns:
    negative_keywords_mal_aw_ad[column] = None

# Process each JSON string
for json_str in keyword_negative_output_mal_aw_ad:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'mal_aw_ad'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'negative'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            negative_keywords_mal_aw_ad = pd.concat([negative_keywords_mal_aw_ad, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            negative_keywords_mal_aw_ad = pd.concat([negative_keywords_mal_aw_ad, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
negative_keywords_mal_aw_ad = negative_keywords_mal_aw_ad.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

negative_keywords_mal_aw_ad

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,Malabar Gold and Diamonds - Al Wahda Mall - Ab...,negative,keywords,No relevant negative keywords/ phrases,"system delay:1, attitude:1","rude :1, disrespected :1, attitude :1, unhappy...",,,,No relevant negative keywords/ phrases,,,
1,Malabar Gold and Diamonds - Al Wahda Mall - Ab...,negative,phrases,No relevant negative keywords/ phrases,"billing took more than 1 hour:1, get the item ...","Worst staff ever :1, treats customers with no ...",,,,No relevant negative keywords/ phrases,,,


#### mal_dm_ad

In [642]:
# Initialize the output dictionary
keyword_negative_output_mal_dm_ad = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_mal_dm_ad=[0]
keyword_input_token_mal_dm_ad = 0
keyword_output_token_mal_dm_ad = 0
keyword_start_time_loop_mal_dm_ad = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_mal_dm_ad, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_mal_dm_ad[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_mal_dm_ad = keyword_dataframes['mal_dm_ad_final_sen_df_jul'][keyword_dataframes['mal_dm_ad_final_sen_df_jul'][topic]==-1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are negative comments, call the negative_keywords function
    if filtered_comments_mal_dm_ad:
        # Call the negative_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = negative_keywords(filtered_comments_mal_dm_ad,topic)        
        # Add the result to the output dictionary
        keyword_negative_output_mal_dm_ad.append(keywords)
        keyword_input_token_mal_dm_ad += input_tokens_loop
        keyword_output_token_mal_dm_ad += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_mal_dm_ad = time.time()
keyword_cost_input_token_mal_dm_ad = round((0.01/1000)*keyword_input_token_mal_dm_ad,2)
keyword_cost_output_token_mal_dm_ad = round((0.03/1000)*keyword_output_token_mal_dm_ad,2)
keyword_total_cost_mal_dm_ad = keyword_cost_input_token_mal_dm_ad + keyword_cost_output_token_mal_dm_ad
keyword_total_time_loop_mal_dm_ad = keyword_end_time_loop_mal_dm_ad - keyword_start_time_loop_mal_dm_ad

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_mal_dm_ad[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_mal_dm_ad,1))
print("Total Input Tokens - ", keyword_input_token_mal_dm_ad)
print("Total Input Cost = USD ",keyword_cost_input_token_mal_dm_ad)
print("Total Output Tokens - ", keyword_output_token_mal_dm_ad)
print("Total Output Cost = USD ",keyword_cost_output_token_mal_dm_ad)
print("Total Cost = USD ",round(keyword_total_cost_mal_dm_ad,2))

Executed  10  Iterations
Total Execution time (in secs) -  8.5
Total Input Tokens -  2645
Total Input Cost = USD  0.03
Total Output Tokens -  263
Total Output Cost = USD  0.01
Total Cost = USD  0.04


In [643]:
#Initialize an empty DataFrame
negative_keywords_mal_dm_ad = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']
# Add these columns to the DataFrame
for column in columns:
    negative_keywords_mal_dm_ad[column] = None

# Process each JSON string
for json_str in keyword_negative_output_mal_dm_ad:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'mal_dm_ad'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'negative'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            negative_keywords_mal_dm_ad = pd.concat([negative_keywords_mal_dm_ad, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            negative_keywords_mal_dm_ad = pd.concat([negative_keywords_mal_dm_ad, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
negative_keywords_mal_dm_ad = negative_keywords_mal_dm_ad.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

negative_keywords_mal_dm_ad

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,Malabar Gold and Diamonds - Dalma Mall - Abu D...,negative,keywords,,"waiting :2, disappointing :2, unattended :1, i...","bad service :2, disappointing :2, bad experien...",,,,"high make:1, making charge:1",,,
1,Malabar Gold and Diamonds - Dalma Mall - Abu D...,negative,phrases,,"waited for 10 mints and left :1, no one asked ...","no one asked for any supports :1, staff standi...",,,,"making charge little high:1, 200% making charg...",,,


#### mal_b1_ad

In [644]:
# Initialize the output dictionary
keyword_negative_output_mal_b1_ad = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_mal_b1_ad=[0]
keyword_input_token_mal_b1_ad = 0
keyword_output_token_mal_b1_ad = 0
keyword_start_time_loop_mal_b1_ad = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_mal_b1_ad, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_mal_b1_ad[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_mal_b1_ad = keyword_dataframes['mal_b1_ad_final_sen_df_jul'][keyword_dataframes['mal_b1_ad_final_sen_df_jul'][topic]==-1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are negative comments, call the negative_keywords function
    if filtered_comments_mal_b1_ad:
        # Call the negative_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = negative_keywords(filtered_comments_mal_b1_ad,topic)        
        # Add the result to the output dictionary
        keyword_negative_output_mal_b1_ad.append(keywords)
        keyword_input_token_mal_b1_ad += input_tokens_loop
        keyword_output_token_mal_b1_ad += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_mal_b1_ad = time.time()
keyword_cost_input_token_mal_b1_ad = round((0.01/1000)*keyword_input_token_mal_b1_ad,2)
keyword_cost_output_token_mal_b1_ad = round((0.03/1000)*keyword_output_token_mal_b1_ad,2)
keyword_total_cost_mal_b1_ad = keyword_cost_input_token_mal_b1_ad + keyword_cost_output_token_mal_b1_ad
keyword_total_time_loop_mal_b1_ad = keyword_end_time_loop_mal_b1_ad - keyword_start_time_loop_mal_b1_ad

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_mal_b1_ad[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_mal_b1_ad,1))
print("Total Input Tokens - ", keyword_input_token_mal_b1_ad)
print("Total Input Cost = USD ",keyword_cost_input_token_mal_b1_ad)
print("Total Output Tokens - ", keyword_output_token_mal_b1_ad)
print("Total Output Cost = USD ",keyword_cost_output_token_mal_b1_ad)
print("Total Cost = USD ",round(keyword_total_cost_mal_b1_ad,2))

Executed  10  Iterations
Total Execution time (in secs) -  12.5
Total Input Tokens -  5550
Total Input Cost = USD  0.06
Total Output Tokens -  306
Total Output Cost = USD  0.01
Total Cost = USD  0.07


In [645]:
#Initialize an empty DataFrame
negative_keywords_mal_b1_ad = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']
# Add these columns to the DataFrame
for column in columns:
    negative_keywords_mal_b1_ad[column] = None

# Process each JSON string
for json_str in keyword_negative_output_mal_b1_ad:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'mal_b1_ad'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'negative'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            negative_keywords_mal_b1_ad = pd.concat([negative_keywords_mal_b1_ad, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            negative_keywords_mal_b1_ad = pd.concat([negative_keywords_mal_b1_ad, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
negative_keywords_mal_b1_ad = negative_keywords_mal_b1_ad.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

negative_keywords_mal_b1_ad

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,Malabar Gold and Diamonds - Hamdan Street ( Br...,negative,keywords,,No relevant negative keywords/ phrases,"disinterested :1, not friendly :1",,No relevant negative keywords/ phrases,maximum discount:1,"high :2, unbelievable :1",No relevant negative keywords/ phrases,No relevant negative keywords/ phrases,
1,Malabar Gold and Diamonds - Hamdan Street ( Br...,negative,phrases,,No relevant negative keywords/ phrases,"Sales person was disinterested in us :1, manag...",,No relevant negative keywords/ phrases,didn't got maximum discount:1,"way too high :1, too high :1",No relevant negative keywords/ phrases,No relevant negative keywords/ phrases,


#### mal_b2_ad

In [646]:
# Initialize the output dictionary
keyword_negative_output_mal_b2_ad = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_mal_b2_ad=[0]
keyword_input_token_mal_b2_ad = 0
keyword_output_token_mal_b2_ad = 0
keyword_start_time_loop_mal_b2_ad = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_mal_b2_ad, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_mal_b2_ad[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_mal_b2_ad = keyword_dataframes['mal_b2_ad_final_sen_df_jul'][keyword_dataframes['mal_b2_ad_final_sen_df_jul'][topic]==-1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are negative comments, call the negative_keywords function
    if filtered_comments_mal_b2_ad:
        # Call the negative_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = negative_keywords(filtered_comments_mal_b2_ad,topic)        
        # Add the result to the output dictionary
        keyword_negative_output_mal_b2_ad.append(keywords)
        keyword_input_token_mal_b2_ad += input_tokens_loop
        keyword_output_token_mal_b2_ad += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_mal_b2_ad = time.time()
keyword_cost_input_token_mal_b2_ad = round((0.01/1000)*keyword_input_token_mal_b2_ad,2)
keyword_cost_output_token_mal_b2_ad = round((0.03/1000)*keyword_output_token_mal_b2_ad,2)
keyword_total_cost_mal_b2_ad = keyword_cost_input_token_mal_b2_ad + keyword_cost_output_token_mal_b2_ad
keyword_total_time_loop_mal_b2_ad = keyword_end_time_loop_mal_b2_ad - keyword_start_time_loop_mal_b2_ad

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_mal_b2_ad[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_mal_b2_ad,1))
print("Total Input Tokens - ", keyword_input_token_mal_b2_ad)
print("Total Input Cost = USD ",keyword_cost_input_token_mal_b2_ad)
print("Total Output Tokens - ", keyword_output_token_mal_b2_ad)
print("Total Output Cost = USD ",keyword_cost_output_token_mal_b2_ad)
print("Total Cost = USD ",round(keyword_total_cost_mal_b2_ad,2))

Executed  10  Iterations
Total Execution time (in secs) -  10.5
Total Input Tokens -  5352
Total Input Cost = USD  0.05
Total Output Tokens -  367
Total Output Cost = USD  0.01
Total Cost = USD  0.06


In [647]:
#Initialize an empty DataFrame
negative_keywords_mal_b2_ad = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']
# Add these columns to the DataFrame
for column in columns:
    negative_keywords_mal_b2_ad[column] = None

# Process each JSON string
for json_str in keyword_negative_output_mal_b2_ad:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'mal_b2_ad'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'negative'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            negative_keywords_mal_b2_ad = pd.concat([negative_keywords_mal_b2_ad, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            negative_keywords_mal_b2_ad = pd.concat([negative_keywords_mal_b2_ad, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
negative_keywords_mal_b2_ad = negative_keywords_mal_b2_ad.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

negative_keywords_mal_b2_ad

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,Malabar Gold and Diamonds - Hamdan Street (Bra...,negative,keywords,,"congested :1, disappointing :1, uncomfortable ...","rude :1, attitude issue :1, not friendly :1, b...",,No relevant negative keywords/ phrases,,"higher :2, high :1","expensive: 2, additional: 1",,
1,Malabar Gold and Diamonds - Hamdan Street (Bra...,negative,phrases,,"congested place :1, no chairs :1, no toilettes...","very rude with customers :1, don’t even look a...",,No relevant negative keywords/ phrases,,making charges are bit higher comparatively :1...,"expensive gifts: 2, rising gold prices: 1, inc...",,


#### mal_lu_ad

In [648]:
# Initialize the output dictionary
keyword_negative_output_mal_lu_ad = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_mal_lu_ad=[0]
keyword_input_token_mal_lu_ad = 0
keyword_output_token_mal_lu_ad = 0
keyword_start_time_loop_mal_lu_ad = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_mal_lu_ad, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_mal_lu_ad[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_mal_lu_ad = keyword_dataframes['mal_lu_ad_final_sen_df_jul'][keyword_dataframes['mal_lu_ad_final_sen_df_jul'][topic]==-1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are negative comments, call the negative_keywords function
    if filtered_comments_mal_lu_ad:
        # Call the negative_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = negative_keywords(filtered_comments_mal_lu_ad,topic)        
        # Add the result to the output dictionary
        keyword_negative_output_mal_lu_ad.append(keywords)
        keyword_input_token_mal_lu_ad += input_tokens_loop
        keyword_output_token_mal_lu_ad += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_mal_lu_ad = time.time()
keyword_cost_input_token_mal_lu_ad = round((0.01/1000)*keyword_input_token_mal_lu_ad,2)
keyword_cost_output_token_mal_lu_ad = round((0.03/1000)*keyword_output_token_mal_lu_ad,2)
keyword_total_cost_mal_lu_ad = keyword_cost_input_token_mal_lu_ad + keyword_cost_output_token_mal_lu_ad
keyword_total_time_loop_mal_lu_ad = keyword_end_time_loop_mal_lu_ad - keyword_start_time_loop_mal_lu_ad

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_mal_lu_ad[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_mal_lu_ad,1))
print("Total Input Tokens - ", keyword_input_token_mal_lu_ad)
print("Total Input Cost = USD ",keyword_cost_input_token_mal_lu_ad)
print("Total Output Tokens - ", keyword_output_token_mal_lu_ad)
print("Total Output Cost = USD ",keyword_cost_output_token_mal_lu_ad)
print("Total Cost = USD ",round(keyword_total_cost_mal_lu_ad,2))

Executed  10  Iterations
Total Execution time (in secs) -  17.5
Total Input Tokens -  6762
Total Input Cost = USD  0.07
Total Output Tokens -  496
Total Output Cost = USD  0.01
Total Cost = USD  0.08


In [649]:
#Initialize an empty DataFrame
negative_keywords_mal_lu_ad = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']
# Add these columns to the DataFrame
for column in columns:
    negative_keywords_mal_lu_ad[column] = None

# Process each JSON string
for json_str in keyword_negative_output_mal_lu_ad:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'mal_lu_ad'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'negative'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            negative_keywords_mal_lu_ad = pd.concat([negative_keywords_mal_lu_ad, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            negative_keywords_mal_lu_ad = pd.concat([negative_keywords_mal_lu_ad, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
negative_keywords_mal_lu_ad = negative_keywords_mal_lu_ad.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

negative_keywords_mal_lu_ad

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,Malabar Gold and Diamonds - Lulu Hypermarket -...,negative,keywords,"fake reviews:1, scam:1, doubt:1","crowded :1, poorly managed :1, worst :1, unhap...","bad behaviour :1, poor service :1, not good :1...",,No relevant negative keywords/ phrases,"gift voucher :2, making charges :2","high :1, reduce :1, better :1, increase :1",,"burned :1, soldering :1",
1,Malabar Gold and Diamonds - Lulu Hypermarket -...,negative,phrases,"I suspect all are fake reviews:1, I personally...","Very crowded jewellery shop :1, have to stand ...","Staff attitudes vary bad :1, Christeena thomas...",,No relevant negative keywords/ phrases,"gift voucher scam :1, not useful :1, increase ...","way to high :1, reduce more in making charge :...",,"burn of soldering :1, doesnt look like the sam...",


#### mal_mb

In [650]:
# Initialize the output dictionary
keyword_negative_output_mal_mb = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_mal_mb=[0]
keyword_input_token_mal_mb = 0
keyword_output_token_mal_mb = 0
keyword_start_time_loop_mal_mb = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_mal_mb, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_mal_mb[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_mal_mb = keyword_dataframes['mal_mb_final_sen_df_jul'][keyword_dataframes['mal_mb_final_sen_df_jul'][topic]==-1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are negative comments, call the negative_keywords function
    if filtered_comments_mal_mb:
        # Call the negative_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = negative_keywords(filtered_comments_mal_mb,topic)        
        # Add the result to the output dictionary
        keyword_negative_output_mal_mb.append(keywords)
        keyword_input_token_mal_mb += input_tokens_loop
        keyword_output_token_mal_mb += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_mal_mb = time.time()
keyword_cost_input_token_mal_mb = round((0.01/1000)*keyword_input_token_mal_mb,2)
keyword_cost_output_token_mal_mb = round((0.03/1000)*keyword_output_token_mal_mb,2)
keyword_total_cost_mal_mb = keyword_cost_input_token_mal_mb + keyword_cost_output_token_mal_mb
keyword_total_time_loop_mal_mb = keyword_end_time_loop_mal_mb - keyword_start_time_loop_mal_mb

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_mal_mb[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_mal_mb,1))
print("Total Input Tokens - ", keyword_input_token_mal_mb)
print("Total Input Cost = USD ",keyword_cost_input_token_mal_mb)
print("Total Output Tokens - ", keyword_output_token_mal_mb)
print("Total Output Cost = USD ",keyword_cost_output_token_mal_mb)
print("Total Cost = USD ",round(keyword_total_cost_mal_mb,2))

Executed  10  Iterations
Total Execution time (in secs) -  17.5
Total Input Tokens -  9387
Total Input Cost = USD  0.09
Total Output Tokens -  612
Total Output Cost = USD  0.02
Total Cost = USD  0.11


In [651]:
#Initialize an empty DataFrame
negative_keywords_mal_mb = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']
# Add these columns to the DataFrame
for column in columns:
    negative_keywords_mal_mb[column] = None

# Process each JSON string
for json_str in keyword_negative_output_mal_mb:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'mal_mb'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'negative'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            negative_keywords_mal_mb = pd.concat([negative_keywords_mal_mb, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            negative_keywords_mal_mb = pd.concat([negative_keywords_mal_mb, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
negative_keywords_mal_mb = negative_keywords_mal_mb.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

negative_keywords_mal_mb

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,Malabar Gold and Diamonds - Meena Bazar - Dubai,negative,keywords,"liars :1, scammers :1, can't trust :1, unrelia...","bad service: 2, horrible experience: 1, pathet...","unhelpful :1, disinterested :1, rude :1, casua...",,,no discounts:1,"high :2, reducing :1, justification :1, ruthle...","expensive: 2, costly: 1, increased: 1, more: 1...","flimsy:1, broke:2","Bad Experience:1, Bad service:1"
1,Malabar Gold and Diamonds - Meena Bazar - Dubai,negative,phrases,charged ₹7500 for international transaction :1...,"long wait for billing: 1, not attended properl...","not attended properly :1, busy on their social...",,,More expensive and no discounts:1,"making changes are high :1, reduce the making ...","Costed us a lot of money: 1, Expect to pay mor...","it broke:1, that also broke:1",deduct from old gold weight and price:1


#### mal_sh_ad

In [652]:
# Initialize the output dictionary
keyword_negative_output_mal_sh_ad = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_mal_sh_ad=[0]
keyword_input_token_mal_sh_ad = 0
keyword_output_token_mal_sh_ad = 0
keyword_start_time_loop_mal_sh_ad = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_mal_sh_ad, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_mal_sh_ad[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_mal_sh_ad = keyword_dataframes['mal_sh_ad_final_sen_df_jul'][keyword_dataframes['mal_sh_ad_final_sen_df_jul'][topic]==-1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are negative comments, call the negative_keywords function
    if filtered_comments_mal_sh_ad:
        # Call the negative_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = negative_keywords(filtered_comments_mal_sh_ad,topic)        
        # Add the result to the output dictionary
        keyword_negative_output_mal_sh_ad.append(keywords)
        keyword_input_token_mal_sh_ad += input_tokens_loop
        keyword_output_token_mal_sh_ad += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_mal_sh_ad = time.time()
keyword_cost_input_token_mal_sh_ad = round((0.01/1000)*keyword_input_token_mal_sh_ad,2)
keyword_cost_output_token_mal_sh_ad = round((0.03/1000)*keyword_output_token_mal_sh_ad,2)
keyword_total_cost_mal_sh_ad = keyword_cost_input_token_mal_sh_ad + keyword_cost_output_token_mal_sh_ad
keyword_total_time_loop_mal_sh_ad = keyword_end_time_loop_mal_sh_ad - keyword_start_time_loop_mal_sh_ad

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_mal_sh_ad[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_mal_sh_ad,1))
print("Total Input Tokens - ", keyword_input_token_mal_sh_ad)
print("Total Input Cost = USD ",keyword_cost_input_token_mal_sh_ad)
print("Total Output Tokens - ", keyword_output_token_mal_sh_ad)
print("Total Output Cost = USD ",keyword_cost_output_token_mal_sh_ad)
print("Total Cost = USD ",round(keyword_total_cost_mal_sh_ad,2))

Executed  10  Iterations
Total Execution time (in secs) -  18.5
Total Input Tokens -  8437
Total Input Cost = USD  0.08
Total Output Tokens -  631
Total Output Cost = USD  0.02
Total Cost = USD  0.1


In [653]:
#Initialize an empty DataFrame
negative_keywords_mal_sh_ad = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']
# Add these columns to the DataFrame
for column in columns:
    negative_keywords_mal_sh_ad[column] = None

# Process each JSON string
for json_str in keyword_negative_output_mal_sh_ad:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'mal_sh_ad'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'negative'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            negative_keywords_mal_sh_ad = pd.concat([negative_keywords_mal_sh_ad, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            negative_keywords_mal_sh_ad = pd.concat([negative_keywords_mal_sh_ad, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
negative_keywords_mal_sh_ad = negative_keywords_mal_sh_ad.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

negative_keywords_mal_sh_ad

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,Malabar Gold and Diamonds - Shabia Musaffah,negative,keywords,"refused :2, never accept :1, not updated :1, r...","unhelpful :1, uninterested :1, disappointing :...","unhelpful :1, uninterested :1, poor :1, vague ...",No relevant negative keywords/ phrases,No relevant negative keywords/ phrases,"no discount:1, haggle:1","high :2, expensive :2, price :1, deduction :1,...","EXPENSIVE:1, overpriced:1, high:1, difference:...",,"refused :1, can't take :1"
1,Malabar Gold and Diamonds - Shabia Musaffah,negative,phrases,"never take back :1, not updated the same when ...","very poor customer service :1, nobody was atte...","never good behaviour with costumer :1, nobody ...",No relevant negative keywords/ phrases,No relevant negative keywords/ phrases,"had to haggle hard for discount:1, buy gold wi...","making charge is too price :1, making charges ...","making charges too high:1, many a times more t...",,"tried to exchange today but they refused :1, M..."


#### mal_b2_af

In [654]:
# Initialize the output dictionary
keyword_negative_output_mal_b2_af = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_mal_b2_af=[0]
keyword_input_token_mal_b2_af = 0
keyword_output_token_mal_b2_af = 0
keyword_start_time_loop_mal_b2_af = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_mal_b2_af, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_mal_b2_af[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_mal_b2_af = keyword_dataframes['mal_b2_af_final_sen_df_jul'][keyword_dataframes['mal_b2_af_final_sen_df_jul'][topic]==-1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are negative comments, call the negative_keywords function
    if filtered_comments_mal_b2_af:
        # Call the negative_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = negative_keywords(filtered_comments_mal_b2_af,topic)        
        # Add the result to the output dictionary
        keyword_negative_output_mal_b2_af.append(keywords)
        keyword_input_token_mal_b2_af += input_tokens_loop
        keyword_output_token_mal_b2_af += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_mal_b2_af = time.time()
keyword_cost_input_token_mal_b2_af = round((0.01/1000)*keyword_input_token_mal_b2_af,2)
keyword_cost_output_token_mal_b2_af = round((0.03/1000)*keyword_output_token_mal_b2_af,2)
keyword_total_cost_mal_b2_af = keyword_cost_input_token_mal_b2_af + keyword_cost_output_token_mal_b2_af
keyword_total_time_loop_mal_b2_af = keyword_end_time_loop_mal_b2_af - keyword_start_time_loop_mal_b2_af

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_mal_b2_af[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_mal_b2_af,1))
print("Total Input Tokens - ", keyword_input_token_mal_b2_af)
print("Total Input Cost = USD ",keyword_cost_input_token_mal_b2_af)
print("Total Output Tokens - ", keyword_output_token_mal_b2_af)
print("Total Output Cost = USD ",keyword_cost_output_token_mal_b2_af)
print("Total Cost = USD ",round(keyword_total_cost_mal_b2_af,2))

Executed  10  Iterations
Total Execution time (in secs) -  7.0
Total Input Tokens -  3115
Total Input Cost = USD  0.03
Total Output Tokens -  208
Total Output Cost = USD  0.01
Total Cost = USD  0.04


In [655]:
#Initialize an empty DataFrame
negative_keywords_mal_b2_af = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']
# Add these columns to the DataFrame
for column in columns:
    negative_keywords_mal_b2_af[column] = None

# Process each JSON string
for json_str in keyword_negative_output_mal_b2_af:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'mal_b2_af'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'negative'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            negative_keywords_mal_b2_af = pd.concat([negative_keywords_mal_b2_af, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            negative_keywords_mal_b2_af = pd.concat([negative_keywords_mal_b2_af, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
negative_keywords_mal_b2_af = negative_keywords_mal_b2_af.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

negative_keywords_mal_b2_af

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,Malabar Gold and Diamonds - Souq Al Kabeer Bui...,negative,keywords,,"worst level :1, don’t value :1","irresponsible:2, worst:1, bad:1, unacceptable:...",,,,,No relevant negative keywords/ phrases,,
1,Malabar Gold and Diamonds - Souq Al Kabeer Bui...,negative,phrases,,treat customers like beggars :1,"rush to bill for their own customers:1, treat ...",,,,,No relevant negative keywords/ phrases,,


#### mna_mb

In [656]:
# Initialize the output dictionary
keyword_negative_output_mna_mb = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_mna_mb=[0]
keyword_input_token_mna_mb = 0
keyword_output_token_mna_mb = 0
keyword_start_time_loop_mna_mb = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_mna_mb, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_mna_mb[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_mna_mb = keyword_dataframes['mna_mb_final_sen_df_jul'][keyword_dataframes['mna_mb_final_sen_df_jul'][topic]==-1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are negative comments, call the negative_keywords function
    if filtered_comments_mna_mb:
        # Call the negative_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = negative_keywords(filtered_comments_mna_mb,topic)        
        # Add the result to the output dictionary
        keyword_negative_output_mna_mb.append(keywords)
        keyword_input_token_mna_mb += input_tokens_loop
        keyword_output_token_mna_mb += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_mna_mb = time.time()
keyword_cost_input_token_mna_mb = round((0.01/1000)*keyword_input_token_mna_mb,2)
keyword_cost_output_token_mna_mb = round((0.03/1000)*keyword_output_token_mna_mb,2)
keyword_total_cost_mna_mb = keyword_cost_input_token_mna_mb + keyword_cost_output_token_mna_mb
keyword_total_time_loop_mna_mb = keyword_end_time_loop_mna_mb - keyword_start_time_loop_mna_mb

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_mna_mb[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_mna_mb,1))
print("Total Input Tokens - ", keyword_input_token_mna_mb)
print("Total Input Cost = USD ",keyword_cost_input_token_mna_mb)
print("Total Output Tokens - ", keyword_output_token_mna_mb)
print("Total Output Cost = USD ",keyword_cost_output_token_mna_mb)
print("Total Cost = USD ",round(keyword_total_cost_mna_mb,2))

Executed  10  Iterations
Total Execution time (in secs) -  9.0
Total Input Tokens -  3046
Total Input Cost = USD  0.03
Total Output Tokens -  229
Total Output Cost = USD  0.01
Total Cost = USD  0.04


In [657]:
#Initialize an empty DataFrame
negative_keywords_mna_mb = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']
# Add these columns to the DataFrame
for column in columns:
    negative_keywords_mna_mb[column] = None

# Process each JSON string
for json_str in keyword_negative_output_mna_mb:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'mna_mb'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'negative'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            negative_keywords_mna_mb = pd.concat([negative_keywords_mna_mb, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            negative_keywords_mna_mb = pd.concat([negative_keywords_mna_mb, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
negative_keywords_mna_mb = negative_keywords_mna_mb.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

negative_keywords_mna_mb

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,Meena Jewellers - Meena Bazar,negative,keywords,,"Pathetic service:1, waited:1","impatient :1, attitude :1, no regard :1, bad s...",,,,No relevant negative keywords/ phrases,No relevant negative keywords/ phrases,,
1,Meena Jewellers - Meena Bazar,negative,phrases,,"no regard for new customers:1, no one to attend:1","service you like it's a waste of time :1, no o...",,,,No relevant negative keywords/ phrases,No relevant negative keywords/ phrases,,


#### min_ak

In [658]:
# Initialize the output dictionary
keyword_negative_output_min_ak = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_min_ak=[0]
keyword_input_token_min_ak = 0
keyword_output_token_min_ak = 0
keyword_start_time_loop_min_ak = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_min_ak, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_min_ak[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_min_ak = keyword_dataframes['min_ak_final_sen_df_jul'][keyword_dataframes['min_ak_final_sen_df_jul'][topic]==-1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are negative comments, call the negative_keywords function
    if filtered_comments_min_ak:
        # Call the negative_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = negative_keywords(filtered_comments_min_ak,topic)        
        # Add the result to the output dictionary
        keyword_negative_output_min_ak.append(keywords)
        keyword_input_token_min_ak += input_tokens_loop
        keyword_output_token_min_ak += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_min_ak = time.time()
keyword_cost_input_token_min_ak = round((0.01/1000)*keyword_input_token_min_ak,2)
keyword_cost_output_token_min_ak = round((0.03/1000)*keyword_output_token_min_ak,2)
keyword_total_cost_min_ak = keyword_cost_input_token_min_ak + keyword_cost_output_token_min_ak
keyword_total_time_loop_min_ak = keyword_end_time_loop_min_ak - keyword_start_time_loop_min_ak

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_min_ak[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_min_ak,1))
print("Total Input Tokens - ", keyword_input_token_min_ak)
print("Total Input Cost = USD ",keyword_cost_input_token_min_ak)
print("Total Output Tokens - ", keyword_output_token_min_ak)
print("Total Output Cost = USD ",keyword_cost_output_token_min_ak)
print("Total Cost = USD ",round(keyword_total_cost_min_ak,2))

Executed  10  Iterations
Total Execution time (in secs) -  12.5
Total Input Tokens -  5843
Total Input Cost = USD  0.06
Total Output Tokens -  450
Total Output Cost = USD  0.01
Total Cost = USD  0.07


In [659]:
#Initialize an empty DataFrame
negative_keywords_min_ak = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']
# Add these columns to the DataFrame
for column in columns:
    negative_keywords_min_ak[column] = None

# Process each JSON string
for json_str in keyword_negative_output_min_ak:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'min_ak'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'negative'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            negative_keywords_min_ak = pd.concat([negative_keywords_min_ak, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            negative_keywords_min_ak = pd.concat([negative_keywords_min_ak, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
negative_keywords_min_ak = negative_keywords_min_ak.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

negative_keywords_min_ak

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,Mint Jewels - Al Karama,negative,keywords,"unprofessional :2, dishonest :1, aggressive :1...","unorganized system :1, unprofessional manner :...","aggressive :1, yelling :1",,,low discount:1,,"high price: 3, incorrect rates: 1, lowest pric...","old :1, milk spots :1",
1,Mint Jewels - Al Karama,negative,phrases,"no proper testing system :1, incorrect rate co...","no testing machines :1, incorrect rate codes :...","demeanour turned aggressive :1, Transparency a...",,add more for buying gold: 1,more discount:1,,"extremely high: 1, price could be better: 1, p...","all their silver are old :1, have milk spots :1",


#### joy_ak

In [660]:
# Initialize the output dictionary
keyword_negative_output_joy_ak = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_joy_ak=[0]
keyword_input_token_joy_ak = 0
keyword_output_token_joy_ak = 0
keyword_start_time_loop_joy_ak = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_joy_ak, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_joy_ak[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_joy_ak = keyword_dataframes['joy_ak_final_sen_df_jul'][keyword_dataframes['joy_ak_final_sen_df_jul'][topic]==-1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are negative comments, call the negative_keywords function
    if filtered_comments_joy_ak:
        # Call the negative_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = negative_keywords(filtered_comments_joy_ak,topic)        
        # Add the result to the output dictionary
        keyword_negative_output_joy_ak.append(keywords)
        keyword_input_token_joy_ak += input_tokens_loop
        keyword_output_token_joy_ak += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_joy_ak = time.time()
keyword_cost_input_token_joy_ak = round((0.01/1000)*keyword_input_token_joy_ak,2)
keyword_cost_output_token_joy_ak = round((0.03/1000)*keyword_output_token_joy_ak,2)
keyword_total_cost_joy_ak = keyword_cost_input_token_joy_ak + keyword_cost_output_token_joy_ak
keyword_total_time_loop_joy_ak = keyword_end_time_loop_joy_ak - keyword_start_time_loop_joy_ak

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_joy_ak[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_joy_ak,1))
print("Total Input Tokens - ", keyword_input_token_joy_ak)
print("Total Input Cost = USD ",keyword_cost_input_token_joy_ak)
print("Total Output Tokens - ", keyword_output_token_joy_ak)
print("Total Output Cost = USD ",keyword_cost_output_token_joy_ak)
print("Total Cost = USD ",round(keyword_total_cost_joy_ak,2))

Executed  10  Iterations
Total Execution time (in secs) -  15.0
Total Input Tokens -  7526
Total Input Cost = USD  0.08
Total Output Tokens -  554
Total Output Cost = USD  0.02
Total Cost = USD  0.1


In [661]:
#Initialize an empty DataFrame
negative_keywords_joy_ak = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']
# Add these columns to the DataFrame
for column in columns:
    negative_keywords_joy_ak[column] = None

# Process each JSON string
for json_str in keyword_negative_output_joy_ak:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'joy_ak'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'negative'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            negative_keywords_joy_ak = pd.concat([negative_keywords_joy_ak, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            negative_keywords_joy_ak = pd.concat([negative_keywords_joy_ak, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
negative_keywords_joy_ak = negative_keywords_joy_ak.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

negative_keywords_joy_ak

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,Joyalukkas Jewellery - Al Karama,negative,keywords,No relevant negative keywords/ phrases,"waiting :3, worst experience :1, bad experienc...","incompetent staffs:1, not supportive:1, ignori...",No relevant negative keywords/ phrases,"limited :1, collection :1",No relevant negative keywords/ phrases,"making charges: 3, very high: 2",No relevant negative keywords/ phrases,"defective :1, fake :1, broken :1, bad quality ...",
1,Joyalukkas Jewellery - Al Karama,negative,phrases,No relevant negative keywords/ phrases,Be prepared to wait for 40mins for them to com...,"Salesman not supportive:1, took more than one ...",No relevant negative keywords/ phrases,"collections of long chain is limited :1, More ...",No relevant negative keywords/ phrases,"making charges r more: 1, charging making char...",No relevant negative keywords/ phrases,"gold chain broken :1, bad gold quality :1",


#### kan_mb

In [662]:
# Initialize the output dictionary
keyword_negative_output_kan_mb = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_kan_mb=[0]
keyword_input_token_kan_mb = 0
keyword_output_token_kan_mb = 0
keyword_start_time_loop_kan_mb = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_kan_mb, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_kan_mb[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_kan_mb = keyword_dataframes['kan_mb_final_sen_df_jul'][keyword_dataframes['kan_mb_final_sen_df_jul'][topic]==-1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are negative comments, call the negative_keywords function
    if filtered_comments_kan_mb:
        # Call the negative_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = negative_keywords(filtered_comments_kan_mb,topic)        
        # Add the result to the output dictionary
        keyword_negative_output_kan_mb.append(keywords)
        keyword_input_token_kan_mb += input_tokens_loop
        keyword_output_token_kan_mb += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_kan_mb = time.time()
keyword_cost_input_token_kan_mb = round((0.01/1000)*keyword_input_token_kan_mb,2)
keyword_cost_output_token_kan_mb = round((0.03/1000)*keyword_output_token_kan_mb,2)
keyword_total_cost_kan_mb = keyword_cost_input_token_kan_mb + keyword_cost_output_token_kan_mb
keyword_total_time_loop_kan_mb = keyword_end_time_loop_kan_mb - keyword_start_time_loop_kan_mb

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_kan_mb[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_kan_mb,1))
print("Total Input Tokens - ", keyword_input_token_kan_mb)
print("Total Input Cost = USD ",keyword_cost_input_token_kan_mb)
print("Total Output Tokens - ", keyword_output_token_kan_mb)
print("Total Output Cost = USD ",keyword_cost_output_token_kan_mb)
print("Total Cost = USD ",round(keyword_total_cost_kan_mb,2))

Executed  10  Iterations
Total Execution time (in secs) -  3.5
Total Input Tokens -  1519
Total Input Cost = USD  0.02
Total Output Tokens -  68
Total Output Cost = USD  0.0
Total Cost = USD  0.02


In [663]:
#Initialize an empty DataFrame
negative_keywords_kan_mb = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']
# Add these columns to the DataFrame
for column in columns:
    negative_keywords_kan_mb[column] = None

# Process each JSON string
for json_str in keyword_negative_output_kan_mb:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'kan_mb'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'negative'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            negative_keywords_kan_mb = pd.concat([negative_keywords_kan_mb, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            negative_keywords_kan_mb = pd.concat([negative_keywords_kan_mb, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
negative_keywords_kan_mb = negative_keywords_kan_mb.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

negative_keywords_kan_mb

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,Kanz Jewellers,negative,keywords,,,No relevant negative keywords/ phrases,,,,,,,
1,Kanz Jewellers,negative,phrases,,,No relevant negative keywords/ phrases,,,,,,,


#### agd_mb

In [664]:
# Initialize the output dictionary
keyword_negative_output_agd_mb = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_agd_mb=[0]
keyword_input_token_agd_mb = 0
keyword_output_token_agd_mb = 0
keyword_start_time_loop_agd_mb = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_agd_mb, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_agd_mb[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_agd_mb = keyword_dataframes['agd_mb_final_sen_df_jul'][keyword_dataframes['agd_mb_final_sen_df_jul'][topic]==-1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are negative comments, call the negative_keywords function
    if filtered_comments_agd_mb:
        # Call the negative_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = negative_keywords(filtered_comments_agd_mb,topic)        
        # Add the result to the output dictionary
        keyword_negative_output_agd_mb.append(keywords)
        keyword_input_token_agd_mb += input_tokens_loop
        keyword_output_token_agd_mb += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_agd_mb = time.time()
keyword_cost_input_token_agd_mb = round((0.01/1000)*keyword_input_token_agd_mb,2)
keyword_cost_output_token_agd_mb = round((0.03/1000)*keyword_output_token_agd_mb,2)
keyword_total_cost_agd_mb = keyword_cost_input_token_agd_mb + keyword_cost_output_token_agd_mb
keyword_total_time_loop_agd_mb = keyword_end_time_loop_agd_mb - keyword_start_time_loop_agd_mb

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_agd_mb[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_agd_mb,1))
print("Total Input Tokens - ", keyword_input_token_agd_mb)
print("Total Input Cost = USD ",keyword_cost_input_token_agd_mb)
print("Total Output Tokens - ", keyword_output_token_agd_mb)
print("Total Output Cost = USD ",keyword_cost_output_token_agd_mb)
print("Total Cost = USD ",round(keyword_total_cost_agd_mb,2))

Executed  10  Iterations
Total Execution time (in secs) -  2.0
Total Input Tokens -  1444
Total Input Cost = USD  0.01
Total Output Tokens -  81
Total Output Cost = USD  0.0
Total Cost = USD  0.01


In [665]:
#Initialize an empty DataFrame
negative_keywords_agd_mb = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']
# Add these columns to the DataFrame
for column in columns:
    negative_keywords_agd_mb[column] = None

# Process each JSON string
for json_str in keyword_negative_output_agd_mb:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'agd_mb'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'negative'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            negative_keywords_agd_mb = pd.concat([negative_keywords_agd_mb, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            negative_keywords_agd_mb = pd.concat([negative_keywords_agd_mb, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
negative_keywords_agd_mb = negative_keywords_agd_mb.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

negative_keywords_agd_mb

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,Arakkal Gold and Diamonds LLC - Meena Bazar - ...,negative,keywords,,No relevant negative keywords/ phrases,,,,,,No relevant negative keywords/ phrases,,
1,Arakkal Gold and Diamonds LLC - Meena Bazar - ...,negative,phrases,,No relevant negative keywords/ phrases,,,,,,No relevant negative keywords/ phrases,,


#### bhi_dec_ga

In [666]:
# Initialize the output dictionary
keyword_negative_output_bhi_dec_ga = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_bhi_dec_ga=[0]
keyword_input_token_bhi_dec_ga = 0
keyword_output_token_bhi_dec_ga = 0
keyword_start_time_loop_bhi_dec_ga = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_bhi_dec_ga, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_bhi_dec_ga[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_bhi_dec_ga = keyword_dataframes['bhi_dec_ga_final_sen_df_jul'][keyword_dataframes['bhi_dec_ga_final_sen_df_jul'][topic]==-1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are negative comments, call the negative_keywords function
    if filtered_comments_bhi_dec_ga:
        # Call the negative_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = negative_keywords(filtered_comments_bhi_dec_ga,topic)        
        # Add the result to the output dictionary
        keyword_negative_output_bhi_dec_ga.append(keywords)
        keyword_input_token_bhi_dec_ga += input_tokens_loop
        keyword_output_token_bhi_dec_ga += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_bhi_dec_ga = time.time()
keyword_cost_input_token_bhi_dec_ga = round((0.01/1000)*keyword_input_token_bhi_dec_ga,2)
keyword_cost_output_token_bhi_dec_ga = round((0.03/1000)*keyword_output_token_bhi_dec_ga,2)
keyword_total_cost_bhi_dec_ga = keyword_cost_input_token_bhi_dec_ga + keyword_cost_output_token_bhi_dec_ga
keyword_total_time_loop_bhi_dec_ga = keyword_end_time_loop_bhi_dec_ga - keyword_start_time_loop_bhi_dec_ga

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_bhi_dec_ga[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_bhi_dec_ga,1))
print("Total Input Tokens - ", keyword_input_token_bhi_dec_ga)
print("Total Input Cost = USD ",keyword_cost_input_token_bhi_dec_ga)
print("Total Output Tokens - ", keyword_output_token_bhi_dec_ga)
print("Total Output Cost = USD ",keyword_cost_output_token_bhi_dec_ga)
print("Total Cost = USD ",round(keyword_total_cost_bhi_dec_ga,2))

Executed  10  Iterations
Total Execution time (in secs) -  10.5
Total Input Tokens -  4829
Total Input Cost = USD  0.05
Total Output Tokens -  341
Total Output Cost = USD  0.01
Total Cost = USD  0.06


In [667]:
#Initialize an empty DataFrame
negative_keywords_bhi_dec_ga = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']
# Add these columns to the DataFrame
for column in columns:
    negative_keywords_bhi_dec_ga[column] = None

# Process each JSON string
for json_str in keyword_negative_output_bhi_dec_ga:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'bhi_dec_ga'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'negative'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            negative_keywords_bhi_dec_ga = pd.concat([negative_keywords_bhi_dec_ga, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            negative_keywords_bhi_dec_ga = pd.concat([negative_keywords_bhi_dec_ga, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
negative_keywords_bhi_dec_ga = negative_keywords_bhi_dec_ga.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

negative_keywords_bhi_dec_ga

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Bhindi Jewellers-Decatur, GA",negative,keywords,"scam :1, fake gold :1, crooked :1, cheat :1, n...","locked door :1, appointment required :1, turne...","not responsible :1, not respond :1, turned awa...",,,,,"inflated :1, expensive :1, more :1",No relevant negative keywords/ phrases,
1,"Bhindi Jewellers-Decatur, GA",negative,phrases,"Product not as ordered :1, sells Rolex’s direc...","store owner was not responsible :1, staff stoo...",staff stood by looking like I came to rob the ...,,,,,"inflated price :1, more than any other places ...",No relevant negative keywords/ phrases,


#### eve_joh_ga

In [211]:
# Initialize the output dictionary
keyword_negative_output_eve_joh_ga = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_eve_joh_ga=[0]
keyword_input_token_eve_joh_ga = 0
keyword_output_token_eve_joh_ga = 0
keyword_start_time_loop_eve_joh_ga = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_eve_joh_ga, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_eve_joh_ga[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_eve_joh_ga = keyword_dataframes['eve_joh_ga_final_sen_df_jul'][keyword_dataframes['eve_joh_ga_final_sen_df_jul'][topic]==-1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are negative comments, call the negative_keywords function
    if filtered_comments_eve_joh_ga:
        # Call the negative_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = negative_keywords(filtered_comments_eve_joh_ga,topic)        
        # Add the result to the output dictionary
        keyword_negative_output_eve_joh_ga.append(keywords)
        keyword_input_token_eve_joh_ga += input_tokens_loop
        keyword_output_token_eve_joh_ga += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_eve_joh_ga = time.time()
keyword_cost_input_token_eve_joh_ga = round((0.01/1000)*keyword_input_token_eve_joh_ga,2)
keyword_cost_output_token_eve_joh_ga = round((0.03/1000)*keyword_output_token_eve_joh_ga,2)
keyword_total_cost_eve_joh_ga = keyword_cost_input_token_eve_joh_ga + keyword_cost_output_token_eve_joh_ga
keyword_total_time_loop_eve_joh_ga = keyword_end_time_loop_eve_joh_ga - keyword_start_time_loop_eve_joh_ga

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_eve_joh_ga[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_eve_joh_ga,1))
print("Total Input Tokens - ", keyword_input_token_eve_joh_ga)
print("Total Input Cost = USD ",keyword_cost_input_token_eve_joh_ga)
print("Total Output Tokens - ", keyword_output_token_eve_joh_ga)
print("Total Output Cost = USD ",keyword_cost_output_token_eve_joh_ga)
print("Total Cost = USD ",round(keyword_total_cost_eve_joh_ga,2))

Executed  10  Iterations
Total Execution time (in secs) -  0.0
Total Input Tokens -  0
Total Input Cost = USD  0.0
Total Output Tokens -  0
Total Output Cost = USD  0.0
Total Cost = USD  0.0


In [212]:
#Initialize an empty DataFrame
negative_keywords_eve_joh_ga = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']
# Add these columns to the DataFrame
for column in columns:
    negative_keywords_eve_joh_ga[column] = None

# Process each JSON string
for json_str in keyword_negative_output_eve_joh_ga:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'eve_joh_ga'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'negative'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            negative_keywords_eve_joh_ga = pd.concat([negative_keywords_eve_joh_ga, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            negative_keywords_eve_joh_ga = pd.concat([negative_keywords_eve_joh_ga, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
negative_keywords_eve_joh_ga = negative_keywords_eve_joh_ga.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

negative_keywords_eve_joh_ga

ValueError: cannot insert Type, already exists

#### jar_bol_il

In [668]:
# Initialize the output dictionary
keyword_negative_output_jar_bol_il = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_jar_bol_il=[0]
keyword_input_token_jar_bol_il = 0
keyword_output_token_jar_bol_il = 0
keyword_start_time_loop_jar_bol_il = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_jar_bol_il, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_jar_bol_il[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_jar_bol_il = keyword_dataframes['jar_bol_il_final_sen_df_jul'][keyword_dataframes['jar_bol_il_final_sen_df_jul'][topic]==-1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are negative comments, call the negative_keywords function
    if filtered_comments_jar_bol_il:
        # Call the negative_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = negative_keywords(filtered_comments_jar_bol_il,topic)        
        # Add the result to the output dictionary
        keyword_negative_output_jar_bol_il.append(keywords)
        keyword_input_token_jar_bol_il += input_tokens_loop
        keyword_output_token_jar_bol_il += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_jar_bol_il = time.time()
keyword_cost_input_token_jar_bol_il = round((0.01/1000)*keyword_input_token_jar_bol_il,2)
keyword_cost_output_token_jar_bol_il = round((0.03/1000)*keyword_output_token_jar_bol_il,2)
keyword_total_cost_jar_bol_il = keyword_cost_input_token_jar_bol_il + keyword_cost_output_token_jar_bol_il
keyword_total_time_loop_jar_bol_il = keyword_end_time_loop_jar_bol_il - keyword_start_time_loop_jar_bol_il

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_jar_bol_il[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_jar_bol_il,1))
print("Total Input Tokens - ", keyword_input_token_jar_bol_il)
print("Total Input Cost = USD ",keyword_cost_input_token_jar_bol_il)
print("Total Output Tokens - ", keyword_output_token_jar_bol_il)
print("Total Output Cost = USD ",keyword_cost_output_token_jar_bol_il)
print("Total Cost = USD ",round(keyword_total_cost_jar_bol_il,2))

Executed  10  Iterations
Total Execution time (in secs) -  10.0
Total Input Tokens -  5068
Total Input Cost = USD  0.05
Total Output Tokens -  304
Total Output Cost = USD  0.01
Total Cost = USD  0.06


In [669]:
#Initialize an empty DataFrame
negative_keywords_jar_bol_il = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']
# Add these columns to the DataFrame
for column in columns:
    negative_keywords_jar_bol_il[column] = None

# Process each JSON string
for json_str in keyword_negative_output_jar_bol_il:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'jar_bol_il'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'negative'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            negative_keywords_jar_bol_il = pd.concat([negative_keywords_jar_bol_il, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            negative_keywords_jar_bol_il = pd.concat([negative_keywords_jar_bol_il, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
negative_keywords_jar_bol_il = negative_keywords_jar_bol_il.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

negative_keywords_jar_bol_il

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Jared-Bolingbrook, IL",negative,keywords,"unprofessional :1, unknowledgeable :1, shady :...","rude :6, unprofessional :1, unwelcoming :1, mi...","rude :6, unprofessional :1, unknowledgeable :1...",,No relevant negative keywords/ phrases,,,,,
1,"Jared-Bolingbrook, IL",negative,phrases,"low toned, sarcastic comments :1, not advanced...","waited 45min before helping :1, not welcoming ...","rude unnecessary questions :1, rude unnecessar...",,No relevant negative keywords/ phrases,,,,,


#### jar_ver_il

In [670]:
# Initialize the output dictionary
keyword_negative_output_jar_ver_il = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_jar_ver_il=[0]
keyword_input_token_jar_ver_il = 0
keyword_output_token_jar_ver_il = 0
keyword_start_time_loop_jar_ver_il = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_jar_ver_il, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_jar_ver_il[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_jar_ver_il = keyword_dataframes['jar_ver_il_final_sen_df_jul'][keyword_dataframes['jar_ver_il_final_sen_df_jul'][topic]==-1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are negative comments, call the negative_keywords function
    if filtered_comments_jar_ver_il:
        # Call the negative_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = negative_keywords(filtered_comments_jar_ver_il,topic)        
        # Add the result to the output dictionary
        keyword_negative_output_jar_ver_il.append(keywords)
        keyword_input_token_jar_ver_il += input_tokens_loop
        keyword_output_token_jar_ver_il += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_jar_ver_il = time.time()
keyword_cost_input_token_jar_ver_il = round((0.01/1000)*keyword_input_token_jar_ver_il,2)
keyword_cost_output_token_jar_ver_il = round((0.03/1000)*keyword_output_token_jar_ver_il,2)
keyword_total_cost_jar_ver_il = keyword_cost_input_token_jar_ver_il + keyword_cost_output_token_jar_ver_il
keyword_total_time_loop_jar_ver_il = keyword_end_time_loop_jar_ver_il - keyword_start_time_loop_jar_ver_il

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_jar_ver_il[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_jar_ver_il,1))
print("Total Input Tokens - ", keyword_input_token_jar_ver_il)
print("Total Input Cost = USD ",keyword_cost_input_token_jar_ver_il)
print("Total Output Tokens - ", keyword_output_token_jar_ver_il)
print("Total Output Cost = USD ",keyword_cost_output_token_jar_ver_il)
print("Total Cost = USD ",round(keyword_total_cost_jar_ver_il,2))

Executed  10  Iterations
Total Execution time (in secs) -  11.0
Total Input Tokens -  5726
Total Input Cost = USD  0.06
Total Output Tokens -  410
Total Output Cost = USD  0.01
Total Cost = USD  0.07


In [671]:
#Initialize an empty DataFrame
negative_keywords_jar_ver_il = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']
# Add these columns to the DataFrame
for column in columns:
    negative_keywords_jar_ver_il[column] = None

# Process each JSON string
for json_str in keyword_negative_output_jar_ver_il:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'jar_ver_il'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'negative'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            negative_keywords_jar_ver_il = pd.concat([negative_keywords_jar_ver_il, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            negative_keywords_jar_ver_il = pd.concat([negative_keywords_jar_ver_il, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
negative_keywords_jar_ver_il = negative_keywords_jar_ver_il.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

negative_keywords_jar_ver_il

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Jared-Vernon Hills, IL",negative,keywords,"don't trust :1, not happy :1, don't live :1, n...","rude :2, condescending :1, wrong ring :1, stin...","rude :3, condescending :1, dismissive :1, vola...",,,,,"hefty amount:1, pay $1400:1","diamonds falling :1, quality product :1",
1,"Jared-Vernon Hills, IL",negative,phrases,"don't trust a store owner to update :1, diamon...","immediate attitude of the manager :1, incredib...","extremely rude and volatile :1, verbally abusi...",,,,,"spent a hefty amount:1, pay $1400 to select a ...","diamonds keep falling out :1, not enough to bu...",


#### jar_lom_il

In [672]:
# Initialize the output dictionary
keyword_negative_output_jar_lom_il = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_jar_lom_il=[0]
keyword_input_token_jar_lom_il = 0
keyword_output_token_jar_lom_il = 0
keyword_start_time_loop_jar_lom_il = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_jar_lom_il, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_jar_lom_il[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_jar_lom_il = keyword_dataframes['jar_lom_il_final_sen_df_jul'][keyword_dataframes['jar_lom_il_final_sen_df_jul'][topic]==-1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are negative comments, call the negative_keywords function
    if filtered_comments_jar_lom_il:
        # Call the negative_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = negative_keywords(filtered_comments_jar_lom_il,topic)        
        # Add the result to the output dictionary
        keyword_negative_output_jar_lom_il.append(keywords)
        keyword_input_token_jar_lom_il += input_tokens_loop
        keyword_output_token_jar_lom_il += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_jar_lom_il = time.time()
keyword_cost_input_token_jar_lom_il = round((0.01/1000)*keyword_input_token_jar_lom_il,2)
keyword_cost_output_token_jar_lom_il = round((0.03/1000)*keyword_output_token_jar_lom_il,2)
keyword_total_cost_jar_lom_il = keyword_cost_input_token_jar_lom_il + keyword_cost_output_token_jar_lom_il
keyword_total_time_loop_jar_lom_il = keyword_end_time_loop_jar_lom_il - keyword_start_time_loop_jar_lom_il

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_jar_lom_il[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_jar_lom_il,1))
print("Total Input Tokens - ", keyword_input_token_jar_lom_il)
print("Total Input Cost = USD ",keyword_cost_input_token_jar_lom_il)
print("Total Output Tokens - ", keyword_output_token_jar_lom_il)
print("Total Output Cost = USD ",keyword_cost_output_token_jar_lom_il)
print("Total Cost = USD ",round(keyword_total_cost_jar_lom_il,2))

Executed  10  Iterations
Total Execution time (in secs) -  9.5
Total Input Tokens -  6437
Total Input Cost = USD  0.06
Total Output Tokens -  339
Total Output Cost = USD  0.01
Total Cost = USD  0.07


In [673]:
#Initialize an empty DataFrame
negative_keywords_jar_lom_il = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']
# Add these columns to the DataFrame
for column in columns:
    negative_keywords_jar_lom_il[column] = None

# Process each JSON string
for json_str in keyword_negative_output_jar_lom_il:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'jar_lom_il'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'negative'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            negative_keywords_jar_lom_il = pd.concat([negative_keywords_jar_lom_il, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            negative_keywords_jar_lom_il = pd.concat([negative_keywords_jar_lom_il, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
negative_keywords_jar_lom_il = negative_keywords_jar_lom_il.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

negative_keywords_jar_lom_il

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Jared-Lombard, IL",negative,keywords,"no apology :1, not valued :1","waiting :3, delay :1, poor service :1, rude :1...","ignored :2, rude :2, attitude :1, audacity :1,...",,,,,not cheap:1,,"ignored :1, couldn't fix :1"
1,"Jared-Lombard, IL",negative,phrases,"waiting too long :1, no apology for the delay ...","waiting too long :1, terrible experience :1, n...","ignored me and said they couldn't fix it :1, n...",,,,,JEWLERY is not cheap especially when it comes ...,,"refunded the money to the card :1, not what I ..."


#### jar_orl_il

In [674]:
# Initialize the output dictionary
keyword_negative_output_jar_orl_il = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_jar_orl_il=[0]
keyword_input_token_jar_orl_il = 0
keyword_output_token_jar_orl_il = 0
keyword_start_time_loop_jar_orl_il = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_jar_orl_il, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_jar_orl_il[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_jar_orl_il = keyword_dataframes['jar_orl_il_final_sen_df_jul'][keyword_dataframes['jar_orl_il_final_sen_df_jul'][topic]==-1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are negative comments, call the negative_keywords function
    if filtered_comments_jar_orl_il:
        # Call the negative_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = negative_keywords(filtered_comments_jar_orl_il,topic)        
        # Add the result to the output dictionary
        keyword_negative_output_jar_orl_il.append(keywords)
        keyword_input_token_jar_orl_il += input_tokens_loop
        keyword_output_token_jar_orl_il += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_jar_orl_il = time.time()
keyword_cost_input_token_jar_orl_il = round((0.01/1000)*keyword_input_token_jar_orl_il,2)
keyword_cost_output_token_jar_orl_il = round((0.03/1000)*keyword_output_token_jar_orl_il,2)
keyword_total_cost_jar_orl_il = keyword_cost_input_token_jar_orl_il + keyword_cost_output_token_jar_orl_il
keyword_total_time_loop_jar_orl_il = keyword_end_time_loop_jar_orl_il - keyword_start_time_loop_jar_orl_il

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_jar_orl_il[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_jar_orl_il,1))
print("Total Input Tokens - ", keyword_input_token_jar_orl_il)
print("Total Input Cost = USD ",keyword_cost_input_token_jar_orl_il)
print("Total Output Tokens - ", keyword_output_token_jar_orl_il)
print("Total Output Cost = USD ",keyword_cost_output_token_jar_orl_il)
print("Total Cost = USD ",round(keyword_total_cost_jar_orl_il,2))

Executed  10  Iterations
Total Execution time (in secs) -  9.0
Total Input Tokens -  4623
Total Input Cost = USD  0.05
Total Output Tokens -  306
Total Output Cost = USD  0.01
Total Cost = USD  0.06


In [675]:
#Initialize an empty DataFrame
negative_keywords_jar_orl_il = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']
# Add these columns to the DataFrame
for column in columns:
    negative_keywords_jar_orl_il[column] = None

# Process each JSON string
for json_str in keyword_negative_output_jar_orl_il:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'jar_orl_il'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'negative'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            negative_keywords_jar_orl_il = pd.concat([negative_keywords_jar_orl_il, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            negative_keywords_jar_orl_il = pd.concat([negative_keywords_jar_orl_il, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
negative_keywords_jar_orl_il = negative_keywords_jar_orl_il.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

negative_keywords_jar_orl_il

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Jared-Orland Park, IL",negative,keywords,"scamming :1, don't care :1","unwelcomed :2, short :2, stone :2, buzzed in :...","unwelcomed :2, stone :2, short :2, awkwardness...",,,,,"scamming:1, charged:1",,
1,"Jared-Orland Park, IL",negative,phrases,"charged me 7, 500 for a ring that cost not eve...","felt unwelcomed :2, looked us up and down :2, ...","looked us up and down :2, no open ended conver...",,,,,"cost not even half:1, perfectly happy scamming...",,


#### jar_aur_il

In [676]:
# Initialize the output dictionary
keyword_negative_output_jar_aur_il = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_jar_aur_il=[0]
keyword_input_token_jar_aur_il = 0
keyword_output_token_jar_aur_il = 0
keyword_start_time_loop_jar_aur_il = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_jar_aur_il, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_jar_aur_il[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_jar_aur_il = keyword_dataframes['jar_aur_il_final_sen_df_jul'][keyword_dataframes['jar_aur_il_final_sen_df_jul'][topic]==-1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are negative comments, call the negative_keywords function
    if filtered_comments_jar_aur_il:
        # Call the negative_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = negative_keywords(filtered_comments_jar_aur_il,topic)        
        # Add the result to the output dictionary
        keyword_negative_output_jar_aur_il.append(keywords)
        keyword_input_token_jar_aur_il += input_tokens_loop
        keyword_output_token_jar_aur_il += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_jar_aur_il = time.time()
keyword_cost_input_token_jar_aur_il = round((0.01/1000)*keyword_input_token_jar_aur_il,2)
keyword_cost_output_token_jar_aur_il = round((0.03/1000)*keyword_output_token_jar_aur_il,2)
keyword_total_cost_jar_aur_il = keyword_cost_input_token_jar_aur_il + keyword_cost_output_token_jar_aur_il
keyword_total_time_loop_jar_aur_il = keyword_end_time_loop_jar_aur_il - keyword_start_time_loop_jar_aur_il

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_jar_aur_il[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_jar_aur_il,1))
print("Total Input Tokens - ", keyword_input_token_jar_aur_il)
print("Total Input Cost = USD ",keyword_cost_input_token_jar_aur_il)
print("Total Output Tokens - ", keyword_output_token_jar_aur_il)
print("Total Output Cost = USD ",keyword_cost_output_token_jar_aur_il)
print("Total Cost = USD ",round(keyword_total_cost_jar_aur_il,2))

Executed  10  Iterations
Total Execution time (in secs) -  7.5
Total Input Tokens -  2728
Total Input Cost = USD  0.03
Total Output Tokens -  234
Total Output Cost = USD  0.01
Total Cost = USD  0.04


In [677]:
#Initialize an empty DataFrame
negative_keywords_jar_aur_il = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']
# Add these columns to the DataFrame
for column in columns:
    negative_keywords_jar_aur_il[column] = None

# Process each JSON string
for json_str in keyword_negative_output_jar_aur_il:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'jar_aur_il'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'negative'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            negative_keywords_jar_aur_il = pd.concat([negative_keywords_jar_aur_il, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            negative_keywords_jar_aur_il = pd.concat([negative_keywords_jar_aur_il, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
negative_keywords_jar_aur_il = negative_keywords_jar_aur_il.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

negative_keywords_jar_aur_il

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Jared-Aurora, IL",negative,keywords,,"disappointed :2, rude :1, unwelcome :1, condes...","rude :2, condescending :1, screamed :1, unwelc...",,,,,"cheap:1, expensive:1",,
1,"Jared-Aurora, IL",negative,phrases,,"extremely disappointing experience :1, worse c...","screamed to me :1, incredibly rude :1, condesc...",,,,,"not too expensive:1, if I want something cheap...",,


#### jar_alg_il

In [678]:
# Initialize the output dictionary
keyword_negative_output_jar_alg_il = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_jar_alg_il=[0]
keyword_input_token_jar_alg_il = 0
keyword_output_token_jar_alg_il = 0
keyword_start_time_loop_jar_alg_il = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_jar_alg_il, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_jar_alg_il[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_jar_alg_il = keyword_dataframes['jar_alg_il_final_sen_df_jul'][keyword_dataframes['jar_alg_il_final_sen_df_jul'][topic]==-1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are negative comments, call the negative_keywords function
    if filtered_comments_jar_alg_il:
        # Call the negative_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = negative_keywords(filtered_comments_jar_alg_il,topic)        
        # Add the result to the output dictionary
        keyword_negative_output_jar_alg_il.append(keywords)
        keyword_input_token_jar_alg_il += input_tokens_loop
        keyword_output_token_jar_alg_il += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_jar_alg_il = time.time()
keyword_cost_input_token_jar_alg_il = round((0.01/1000)*keyword_input_token_jar_alg_il,2)
keyword_cost_output_token_jar_alg_il = round((0.03/1000)*keyword_output_token_jar_alg_il,2)
keyword_total_cost_jar_alg_il = keyword_cost_input_token_jar_alg_il + keyword_cost_output_token_jar_alg_il
keyword_total_time_loop_jar_alg_il = keyword_end_time_loop_jar_alg_il - keyword_start_time_loop_jar_alg_il

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_jar_alg_il[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_jar_alg_il,1))
print("Total Input Tokens - ", keyword_input_token_jar_alg_il)
print("Total Input Cost = USD ",keyword_cost_input_token_jar_alg_il)
print("Total Output Tokens - ", keyword_output_token_jar_alg_il)
print("Total Output Cost = USD ",keyword_cost_output_token_jar_alg_il)
print("Total Cost = USD ",round(keyword_total_cost_jar_alg_il,2))

Executed  10  Iterations
Total Execution time (in secs) -  7.0
Total Input Tokens -  3132
Total Input Cost = USD  0.03
Total Output Tokens -  248
Total Output Cost = USD  0.01
Total Cost = USD  0.04


In [679]:
#Initialize an empty DataFrame
negative_keywords_jar_alg_il = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']
# Add these columns to the DataFrame
for column in columns:
    negative_keywords_jar_alg_il[column] = None

# Process each JSON string
for json_str in keyword_negative_output_jar_alg_il:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'jar_alg_il'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'negative'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            negative_keywords_jar_alg_il = pd.concat([negative_keywords_jar_alg_il, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            negative_keywords_jar_alg_il = pd.concat([negative_keywords_jar_alg_il, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
negative_keywords_jar_alg_il = negative_keywords_jar_alg_il.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

negative_keywords_jar_alg_il

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Jared-Algonquin, IL",negative,keywords,"trust :1, tarnishing :1, pathetic :1","pathetic store :1, no response :1, locked down...","no response :1, pathetic store :1, not a word ...",,,,,,,
1,"Jared-Algonquin, IL",negative,phrases,"Do not trust this store :1, What a pathetic st...","entire place was locked down :1, totally ruine...","didn't bother changing the band :1, should hav...",,,,,,,


#### jar_sch_il

In [680]:
# Initialize the output dictionary
keyword_negative_output_jar_sch_il = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_jar_sch_il=[0]
keyword_input_token_jar_sch_il = 0
keyword_output_token_jar_sch_il = 0
keyword_start_time_loop_jar_sch_il = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_jar_sch_il, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_jar_sch_il[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_jar_sch_il = keyword_dataframes['jar_sch_il_final_sen_df_jul'][keyword_dataframes['jar_sch_il_final_sen_df_jul'][topic]==-1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are negative comments, call the negative_keywords function
    if filtered_comments_jar_sch_il:
        # Call the negative_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = negative_keywords(filtered_comments_jar_sch_il,topic)        
        # Add the result to the output dictionary
        keyword_negative_output_jar_sch_il.append(keywords)
        keyword_input_token_jar_sch_il += input_tokens_loop
        keyword_output_token_jar_sch_il += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_jar_sch_il = time.time()
keyword_cost_input_token_jar_sch_il = round((0.01/1000)*keyword_input_token_jar_sch_il,2)
keyword_cost_output_token_jar_sch_il = round((0.03/1000)*keyword_output_token_jar_sch_il,2)
keyword_total_cost_jar_sch_il = keyword_cost_input_token_jar_sch_il + keyword_cost_output_token_jar_sch_il
keyword_total_time_loop_jar_sch_il = keyword_end_time_loop_jar_sch_il - keyword_start_time_loop_jar_sch_il

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_jar_sch_il[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_jar_sch_il,1))
print("Total Input Tokens - ", keyword_input_token_jar_sch_il)
print("Total Input Cost = USD ",keyword_cost_input_token_jar_sch_il)
print("Total Output Tokens - ", keyword_output_token_jar_sch_il)
print("Total Output Cost = USD ",keyword_cost_output_token_jar_sch_il)
print("Total Cost = USD ",round(keyword_total_cost_jar_sch_il,2))

Executed  10  Iterations
Total Execution time (in secs) -  13.0
Total Input Tokens -  8177
Total Input Cost = USD  0.08
Total Output Tokens -  437
Total Output Cost = USD  0.01
Total Cost = USD  0.09


In [681]:
#Initialize an empty DataFrame
negative_keywords_jar_sch_il = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']
# Add these columns to the DataFrame
for column in columns:
    negative_keywords_jar_sch_il[column] = None

# Process each JSON string
for json_str in keyword_negative_output_jar_sch_il:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'jar_sch_il'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'negative'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            negative_keywords_jar_sch_il = pd.concat([negative_keywords_jar_sch_il, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            negative_keywords_jar_sch_il = pd.concat([negative_keywords_jar_sch_il, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
negative_keywords_jar_sch_il = negative_keywords_jar_sch_il.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

negative_keywords_jar_sch_il

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Jared-Schaumburg, IL",negative,keywords,"no appraisal:1, excuses:1, unable to help:1, d...","rude :3, locked :1, disorganized :1, unprofess...","rude :3, condescending :1, demeaning :1, annoy...","No light :1, ring boxes :1","barely anything:1, settings only:1",,,,"broken :2, awful :1, not great :1",
1,"Jared-Schaumburg, IL",negative,phrases,be careful buying here as they don't honor the...,"rudest salesperson :1, awful experience :1, aw...","rudest salesperson :1, made us feel stupid :1,...",No light up ring boxes :1,"none of the 5:1, not in store:1, almost all th...",,,,"broke in half :1, quality of product is not gr...",


#### joy_suw_ga

In [682]:
# Initialize the output dictionary
keyword_negative_output_joy_suw_ga = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_joy_suw_ga=[0]
keyword_input_token_joy_suw_ga = 0
keyword_output_token_joy_suw_ga = 0
keyword_start_time_loop_joy_suw_ga = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_joy_suw_ga, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_joy_suw_ga[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_joy_suw_ga = keyword_dataframes['joy_suw_ga_final_sen_df_jul'][keyword_dataframes['joy_suw_ga_final_sen_df_jul'][topic]==-1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are negative comments, call the negative_keywords function
    if filtered_comments_joy_suw_ga:
        # Call the negative_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = negative_keywords(filtered_comments_joy_suw_ga,topic)        
        # Add the result to the output dictionary
        keyword_negative_output_joy_suw_ga.append(keywords)
        keyword_input_token_joy_suw_ga += input_tokens_loop
        keyword_output_token_joy_suw_ga += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_joy_suw_ga = time.time()
keyword_cost_input_token_joy_suw_ga = round((0.01/1000)*keyword_input_token_joy_suw_ga,2)
keyword_cost_output_token_joy_suw_ga = round((0.03/1000)*keyword_output_token_joy_suw_ga,2)
keyword_total_cost_joy_suw_ga = keyword_cost_input_token_joy_suw_ga + keyword_cost_output_token_joy_suw_ga
keyword_total_time_loop_joy_suw_ga = keyword_end_time_loop_joy_suw_ga - keyword_start_time_loop_joy_suw_ga

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_joy_suw_ga[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_joy_suw_ga,1))
print("Total Input Tokens - ", keyword_input_token_joy_suw_ga)
print("Total Input Cost = USD ",keyword_cost_input_token_joy_suw_ga)
print("Total Output Tokens - ", keyword_output_token_joy_suw_ga)
print("Total Output Cost = USD ",keyword_cost_output_token_joy_suw_ga)
print("Total Cost = USD ",round(keyword_total_cost_joy_suw_ga,2))

Executed  10  Iterations
Total Execution time (in secs) -  20.0
Total Input Tokens -  12990
Total Input Cost = USD  0.13
Total Output Tokens -  827
Total Output Cost = USD  0.02
Total Cost = USD  0.15


In [683]:
#Initialize an empty DataFrame
negative_keywords_joy_suw_ga = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']
# Add these columns to the DataFrame
for column in columns:
    negative_keywords_joy_suw_ga[column] = None

# Process each JSON string
for json_str in keyword_negative_output_joy_suw_ga:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'joy_suw_ga'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'negative'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            negative_keywords_joy_suw_ga = pd.concat([negative_keywords_joy_suw_ga, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            negative_keywords_joy_suw_ga = pd.concat([negative_keywords_joy_suw_ga, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
negative_keywords_joy_suw_ga = negative_keywords_joy_suw_ga.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

negative_keywords_joy_suw_ga

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Joyalukkas Jewellery-Suwanee, GA",negative,keywords,"unethical :1, disappointed :1, regret :1, bewa...","bad service :2, rude attitude :1, unprofession...","rude :3, unprofessional :2, disrespectful :2, ...",poor designs:1,"limited options: 2, limited collection: 2, not...",,"expensive:1, high:1, hidden:1, rip:1, markup:1","pricey :1, price difference :1, high :1, low b...","poor quality :1, worst :1, faulty :1, weak :1","faulty item:2, refund:1, exchange:1, untagged:..."
1,"Joyalukkas Jewellery-Suwanee, GA",negative,phrases,"don't feel like enrolling :1, worst experience...",least interested to even show the jewellery :1...,"very rude in talking :1, doesn't care customer...",No relevant negative phrases,"lack of variety in the jewelry offerings: 1, s...",,making charges are 100% more than gold prices:...,"Quite pricey I felt for such a small piece :1,...","very poor quality :1, quality of gold is worst...","faulty one:2, not the correct one:1, missing t..."


#### joy_chi_il

In [684]:
# Initialize the output dictionary
keyword_negative_output_joy_chi_il = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_joy_chi_il=[0]
keyword_input_token_joy_chi_il = 0
keyword_output_token_joy_chi_il = 0
keyword_start_time_loop_joy_chi_il = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_joy_chi_il, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_joy_chi_il[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_joy_chi_il = keyword_dataframes['joy_chi_il_final_sen_df_jul'][keyword_dataframes['joy_chi_il_final_sen_df_jul'][topic]==-1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are negative comments, call the negative_keywords function
    if filtered_comments_joy_chi_il:
        # Call the negative_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = negative_keywords(filtered_comments_joy_chi_il,topic)        
        # Add the result to the output dictionary
        keyword_negative_output_joy_chi_il.append(keywords)
        keyword_input_token_joy_chi_il += input_tokens_loop
        keyword_output_token_joy_chi_il += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_joy_chi_il = time.time()
keyword_cost_input_token_joy_chi_il = round((0.01/1000)*keyword_input_token_joy_chi_il,2)
keyword_cost_output_token_joy_chi_il = round((0.03/1000)*keyword_output_token_joy_chi_il,2)
keyword_total_cost_joy_chi_il = keyword_cost_input_token_joy_chi_il + keyword_cost_output_token_joy_chi_il
keyword_total_time_loop_joy_chi_il = keyword_end_time_loop_joy_chi_il - keyword_start_time_loop_joy_chi_il

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_joy_chi_il[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_joy_chi_il,1))
print("Total Input Tokens - ", keyword_input_token_joy_chi_il)
print("Total Input Cost = USD ",keyword_cost_input_token_joy_chi_il)
print("Total Output Tokens - ", keyword_output_token_joy_chi_il)
print("Total Output Cost = USD ",keyword_cost_output_token_joy_chi_il)
print("Total Cost = USD ",round(keyword_total_cost_joy_chi_il,2))

Executed  10  Iterations
Total Execution time (in secs) -  19.5
Total Input Tokens -  11901
Total Input Cost = USD  0.12
Total Output Tokens -  700
Total Output Cost = USD  0.02
Total Cost = USD  0.14


In [685]:
#Initialize an empty DataFrame
negative_keywords_joy_chi_il = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']
# Add these columns to the DataFrame
for column in columns:
    negative_keywords_joy_chi_il[column] = None

# Process each JSON string
for json_str in keyword_negative_output_joy_chi_il:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'joy_chi_il'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'negative'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            negative_keywords_joy_chi_il = pd.concat([negative_keywords_joy_chi_il, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            negative_keywords_joy_chi_il = pd.concat([negative_keywords_joy_chi_il, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
negative_keywords_joy_chi_il = negative_keywords_joy_chi_il.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

negative_keywords_joy_chi_il

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Joyalukkas Jewellery-Chicago, IL",negative,keywords,"not honest:1, scam:1, lack of integrity:1, lac...","unresponsive :1, rude :1, unprofessional :1, d...","rude :3, unresponsive :1, unprofessional :1, a...",Old designs:1,"collections :3, choice :1, design :1","no discounts :1, better deal :1",daylight robbery:1,"expensive: 1, pricey: 1, manipulated: 1, diffe...","manufacturing defect :3, faulty :1, flaw :1, b...","lost value:1, bring back:1"
1,"Joyalukkas Jewellery-Chicago, IL",negative,phrases,"totally different before and after business:1,...","very disappointing experience :1, very bad exp...","very rude :2, absolutely zero patience :1, doe...",No relevant negative phrases,"Not too many collections :1, need some more ch...","no real benefits or discounts :1, discounts ca...",day light robbery in name of making charges:1,"negotiating the price: 2, manipulated prices: ...","clear manufacturing defect :2, started showing...",don't bring back the ring to exchange within 1...


#### joy_hou_tx

In [686]:
# Initialize the output dictionary
keyword_negative_output_joy_hou_tx = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_joy_hou_tx=[0]
keyword_input_token_joy_hou_tx = 0
keyword_output_token_joy_hou_tx = 0
keyword_start_time_loop_joy_hou_tx = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_joy_hou_tx, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_joy_hou_tx[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_joy_hou_tx = keyword_dataframes['joy_hou_tx_final_sen_df_jul'][keyword_dataframes['joy_hou_tx_final_sen_df_jul'][topic]==-1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are negative comments, call the negative_keywords function
    if filtered_comments_joy_hou_tx:
        # Call the negative_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = negative_keywords(filtered_comments_joy_hou_tx,topic)        
        # Add the result to the output dictionary
        keyword_negative_output_joy_hou_tx.append(keywords)
        keyword_input_token_joy_hou_tx += input_tokens_loop
        keyword_output_token_joy_hou_tx += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_joy_hou_tx = time.time()
keyword_cost_input_token_joy_hou_tx = round((0.01/1000)*keyword_input_token_joy_hou_tx,2)
keyword_cost_output_token_joy_hou_tx = round((0.03/1000)*keyword_output_token_joy_hou_tx,2)
keyword_total_cost_joy_hou_tx = keyword_cost_input_token_joy_hou_tx + keyword_cost_output_token_joy_hou_tx
keyword_total_time_loop_joy_hou_tx = keyword_end_time_loop_joy_hou_tx - keyword_start_time_loop_joy_hou_tx

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_joy_hou_tx[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_joy_hou_tx,1))
print("Total Input Tokens - ", keyword_input_token_joy_hou_tx)
print("Total Input Cost = USD ",keyword_cost_input_token_joy_hou_tx)
print("Total Output Tokens - ", keyword_output_token_joy_hou_tx)
print("Total Output Cost = USD ",keyword_cost_output_token_joy_hou_tx)
print("Total Cost = USD ",round(keyword_total_cost_joy_hou_tx,2))

Executed  10  Iterations
Total Execution time (in secs) -  22.5
Total Input Tokens -  16723
Total Input Cost = USD  0.17
Total Output Tokens -  779
Total Output Cost = USD  0.02
Total Cost = USD  0.19


In [687]:
#Initialize an empty DataFrame
negative_keywords_joy_hou_tx = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']
# Add these columns to the DataFrame
for column in columns:
    negative_keywords_joy_hou_tx[column] = None

# Process each JSON string
for json_str in keyword_negative_output_joy_hou_tx:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'joy_hou_tx'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'negative'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            negative_keywords_joy_hou_tx = pd.concat([negative_keywords_joy_hou_tx, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            negative_keywords_joy_hou_tx = pd.concat([negative_keywords_joy_hou_tx, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
negative_keywords_joy_hou_tx = negative_keywords_joy_hou_tx.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

negative_keywords_joy_hou_tx

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Joyalukkas Jewellery-Houston, TX",negative,keywords,"fraud :2, transparency :2, depreciate :1, trap...","rude :3, ignoring :2, unprofessional :2, disre...","rude :3, unhelpful :2, disrespectful :2, unpro...",No relevant negative keywords/ phrases,"limited :2, lackluster :1, dated :1, uninspiri...","no discount:1, higher:1, caveats:1, bargain:1,...","higher:2, fraud:1, ridicules:1, sheer:1, trap:1","high :2, unclear :1, misleading :1, arbitrary ...","poor quality:1, mediocre quality:1","no transparency: 2, poor customer service: 1, ..."
1,"Joyalukkas Jewellery-Houston, TX",negative,phrases,"fraud business model :2, no transparency :1, s...","no immediate greeting :1, pretending to be bus...","show ZERO courtesy towards customers :1, greet...",No relevant negative keywords/ phrases,"not a lot of selection :1, wish they had more ...","not willing to give any discount:1, don't get ...","charge me 22% Making:1, high in Making Charges...","no transparency :2, prices were disproportiona...","broke immediately:1, quality is poor:1, broke ...","lack of transparency: 1, feeling deceived: 1, ..."


#### joy_fri_tx

In [688]:
# Initialize the output dictionary
keyword_negative_output_joy_fri_tx = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_joy_fri_tx=[0]
keyword_input_token_joy_fri_tx = 0
keyword_output_token_joy_fri_tx = 0
keyword_start_time_loop_joy_fri_tx = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_joy_fri_tx, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_joy_fri_tx[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_joy_fri_tx = keyword_dataframes['joy_fri_tx_final_sen_df_jul'][keyword_dataframes['joy_fri_tx_final_sen_df_jul'][topic]==-1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are negative comments, call the negative_keywords function
    if filtered_comments_joy_fri_tx:
        # Call the negative_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = negative_keywords(filtered_comments_joy_fri_tx,topic)        
        # Add the result to the output dictionary
        keyword_negative_output_joy_fri_tx.append(keywords)
        keyword_input_token_joy_fri_tx += input_tokens_loop
        keyword_output_token_joy_fri_tx += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_joy_fri_tx = time.time()
keyword_cost_input_token_joy_fri_tx = round((0.01/1000)*keyword_input_token_joy_fri_tx,2)
keyword_cost_output_token_joy_fri_tx = round((0.03/1000)*keyword_output_token_joy_fri_tx,2)
keyword_total_cost_joy_fri_tx = keyword_cost_input_token_joy_fri_tx + keyword_cost_output_token_joy_fri_tx
keyword_total_time_loop_joy_fri_tx = keyword_end_time_loop_joy_fri_tx - keyword_start_time_loop_joy_fri_tx

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_joy_fri_tx[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_joy_fri_tx,1))
print("Total Input Tokens - ", keyword_input_token_joy_fri_tx)
print("Total Input Cost = USD ",keyword_cost_input_token_joy_fri_tx)
print("Total Output Tokens - ", keyword_output_token_joy_fri_tx)
print("Total Output Cost = USD ",keyword_cost_output_token_joy_fri_tx)
print("Total Cost = USD ",round(keyword_total_cost_joy_fri_tx,2))

Executed  10  Iterations
Total Execution time (in secs) -  22.0
Total Input Tokens -  9870
Total Input Cost = USD  0.1
Total Output Tokens -  731
Total Output Cost = USD  0.02
Total Cost = USD  0.12


In [689]:
#Initialize an empty DataFrame
negative_keywords_joy_fri_tx = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']
# Add these columns to the DataFrame
for column in columns:
    negative_keywords_joy_fri_tx[column] = None

# Process each JSON string
for json_str in keyword_negative_output_joy_fri_tx:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'joy_fri_tx'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'negative'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            negative_keywords_joy_fri_tx = pd.concat([negative_keywords_joy_fri_tx, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            negative_keywords_joy_fri_tx = pd.concat([negative_keywords_joy_fri_tx, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
negative_keywords_joy_fri_tx = negative_keywords_joy_fri_tx.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

negative_keywords_joy_fri_tx

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Joyalukkas Jewellery-Frisco, TX",negative,keywords,"cheat :1, trust :1, respect :1, disappointed :...","bad experience:3, uninterested:1, unprofession...","bad attitude:2, rude:2, uninterested:1, unhelp...","distorted :1, not clean :1","limited collection:2, limited options:1, less ...",,"high :3, too much :1, highest :1, more :1, rel...","overpriced:2, extra:1, higher:1, wealthy:1, ne...","broke: 3, broken: 2, terrible: 1, distorted: 1...",
1,"Joyalukkas Jewellery-Frisco, TX",negative,phrases,"cheat with weight :1, impact your customer's t...","no transparency in weighing:1, mistakes in bil...","not even ready to take the box outside:1, not ...","item design was distorted :1, not a clean fix :1","Very limited collection compared to Malani:1, ...",,"making charges are very high :1, making charge...",price we have got is higher compared to the ot...,"bad weld quality: 1, ring broke into 2 pieces:...",


#### mal_chi_il

In [690]:
# Initialize the output dictionary
keyword_negative_output_mal_chi_il = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_mal_chi_il=[0]
keyword_input_token_mal_chi_il = 0
keyword_output_token_mal_chi_il = 0
keyword_start_time_loop_mal_chi_il = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_mal_chi_il, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_mal_chi_il[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_mal_chi_il = keyword_dataframes['mal_chi_il_final_sen_df_jul'][keyword_dataframes['mal_chi_il_final_sen_df_jul'][topic]==-1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are negative comments, call the negative_keywords function
    if filtered_comments_mal_chi_il:
        # Call the negative_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = negative_keywords(filtered_comments_mal_chi_il,topic)        
        # Add the result to the output dictionary
        keyword_negative_output_mal_chi_il.append(keywords)
        keyword_input_token_mal_chi_il += input_tokens_loop
        keyword_output_token_mal_chi_il += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_mal_chi_il = time.time()
keyword_cost_input_token_mal_chi_il = round((0.01/1000)*keyword_input_token_mal_chi_il,2)
keyword_cost_output_token_mal_chi_il = round((0.03/1000)*keyword_output_token_mal_chi_il,2)
keyword_total_cost_mal_chi_il = keyword_cost_input_token_mal_chi_il + keyword_cost_output_token_mal_chi_il
keyword_total_time_loop_mal_chi_il = keyword_end_time_loop_mal_chi_il - keyword_start_time_loop_mal_chi_il

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_mal_chi_il[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_mal_chi_il,1))
print("Total Input Tokens - ", keyword_input_token_mal_chi_il)
print("Total Input Cost = USD ",keyword_cost_input_token_mal_chi_il)
print("Total Output Tokens - ", keyword_output_token_mal_chi_il)
print("Total Output Cost = USD ",keyword_cost_output_token_mal_chi_il)
print("Total Cost = USD ",round(keyword_total_cost_mal_chi_il,2))

Executed  10  Iterations
Total Execution time (in secs) -  10.0
Total Input Tokens -  5457
Total Input Cost = USD  0.05
Total Output Tokens -  370
Total Output Cost = USD  0.01
Total Cost = USD  0.06


In [691]:
#Initialize an empty DataFrame
negative_keywords_mal_chi_il = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']
# Add these columns to the DataFrame
for column in columns:
    negative_keywords_mal_chi_il[column] = None

# Process each JSON string
for json_str in keyword_negative_output_mal_chi_il:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'mal_chi_il'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'negative'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            negative_keywords_mal_chi_il = pd.concat([negative_keywords_mal_chi_il, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            negative_keywords_mal_chi_il = pd.concat([negative_keywords_mal_chi_il, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
negative_keywords_mal_chi_il = negative_keywords_mal_chi_il.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

negative_keywords_mal_chi_il

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Malabar Gold & Diamonds-Chicago, IL",negative,keywords,No relevant negative keywords/ phrases,"bad experience:2, unacceptable:1, disappointin...","rude :1, unpleasant :1, idle :1, ignoring :1, ...",,less designs:1,,,"expensive: 2, overspending: 1, doubled: 1, sup...","broke:2, delicate:1",
1,"Malabar Gold & Diamonds-Chicago, IL",negative,phrases,No relevant negative keywords/ phrases,"no interest in working:1, glued to their phone...","bad attitude :1, not helping :1, no interest i...",,"may be less no designs:1, hope they will add m...",,,"raise it price alot: 1, price was doubled befo...","broke the moment:1, broke off:1, did not hold ...",


#### mal_nap_il

In [692]:
# Initialize the output dictionary
keyword_negative_output_mal_nap_il = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_mal_nap_il=[0]
keyword_input_token_mal_nap_il = 0
keyword_output_token_mal_nap_il = 0
keyword_start_time_loop_mal_nap_il = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_mal_nap_il, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_mal_nap_il[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_mal_nap_il = keyword_dataframes['mal_nap_il_final_sen_df_jul'][keyword_dataframes['mal_nap_il_final_sen_df_jul'][topic]==-1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are negative comments, call the negative_keywords function
    if filtered_comments_mal_nap_il:
        # Call the negative_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = negative_keywords(filtered_comments_mal_nap_il,topic)        
        # Add the result to the output dictionary
        keyword_negative_output_mal_nap_il.append(keywords)
        keyword_input_token_mal_nap_il += input_tokens_loop
        keyword_output_token_mal_nap_il += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_mal_nap_il = time.time()
keyword_cost_input_token_mal_nap_il = round((0.01/1000)*keyword_input_token_mal_nap_il,2)
keyword_cost_output_token_mal_nap_il = round((0.03/1000)*keyword_output_token_mal_nap_il,2)
keyword_total_cost_mal_nap_il = keyword_cost_input_token_mal_nap_il + keyword_cost_output_token_mal_nap_il
keyword_total_time_loop_mal_nap_il = keyword_end_time_loop_mal_nap_il - keyword_start_time_loop_mal_nap_il

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_mal_nap_il[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_mal_nap_il,1))
print("Total Input Tokens - ", keyword_input_token_mal_nap_il)
print("Total Input Cost = USD ",keyword_cost_input_token_mal_nap_il)
print("Total Output Tokens - ", keyword_output_token_mal_nap_il)
print("Total Output Cost = USD ",keyword_cost_output_token_mal_nap_il)
print("Total Cost = USD ",round(keyword_total_cost_mal_nap_il,2))

Executed  10  Iterations
Total Execution time (in secs) -  22.0
Total Input Tokens -  11284
Total Input Cost = USD  0.11
Total Output Tokens -  630
Total Output Cost = USD  0.02
Total Cost = USD  0.13


In [693]:
#Initialize an empty DataFrame
negative_keywords_mal_nap_il = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']
# Add these columns to the DataFrame
for column in columns:
    negative_keywords_mal_nap_il[column] = None

# Process each JSON string
for json_str in keyword_negative_output_mal_nap_il:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'mal_nap_il'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'negative'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            negative_keywords_mal_nap_il = pd.concat([negative_keywords_mal_nap_il, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            negative_keywords_mal_nap_il = pd.concat([negative_keywords_mal_nap_il, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
negative_keywords_mal_nap_il = negative_keywords_mal_nap_il.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

negative_keywords_mal_nap_il

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Malabar Gold & Diamonds-Naperville, IL",negative,keywords,"high price:1, huge margins:1","bad management :1, poor service :1, rude :1, d...","rude :2, arrogant :1, lazy :1, unprofessional ...",No relevant negative keywords/ phrases,old fashioned :1,special offers:1,"high price:1, huge margins:1","different price: 3, wrong price: 2, higher pri...",defective :1,deduct :1
1,"Malabar Gold & Diamonds-Naperville, IL",negative,phrases,"charging 50% on the making charges:1, doesn’t ...","horribly managed crowd :1, waited for an hour ...","lack of communication between employees :2, di...",No relevant negative keywords/ phrases,Very old fashioned models :1,No relevant negative keywords/ phrases,"charging 50% on the making charges:1, surprise...","inconsistency in pricing: 1, confusing and com...","both pieces were defective :1, no one had even...","no deduction while you exchange :1, we will de..."


#### mal_ise_nj

In [694]:
# Initialize the output dictionary
keyword_negative_output_mal_ise_nj = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_mal_ise_nj=[0]
keyword_input_token_mal_ise_nj = 0
keyword_output_token_mal_ise_nj = 0
keyword_start_time_loop_mal_ise_nj = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_mal_ise_nj, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_mal_ise_nj[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_mal_ise_nj = keyword_dataframes['mal_ise_nj_final_sen_df_jul'][keyword_dataframes['mal_ise_nj_final_sen_df_jul'][topic]==-1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are negative comments, call the negative_keywords function
    if filtered_comments_mal_ise_nj:
        # Call the negative_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = negative_keywords(filtered_comments_mal_ise_nj,topic)        
        # Add the result to the output dictionary
        keyword_negative_output_mal_ise_nj.append(keywords)
        keyword_input_token_mal_ise_nj += input_tokens_loop
        keyword_output_token_mal_ise_nj += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_mal_ise_nj = time.time()
keyword_cost_input_token_mal_ise_nj = round((0.01/1000)*keyword_input_token_mal_ise_nj,2)
keyword_cost_output_token_mal_ise_nj = round((0.03/1000)*keyword_output_token_mal_ise_nj,2)
keyword_total_cost_mal_ise_nj = keyword_cost_input_token_mal_ise_nj + keyword_cost_output_token_mal_ise_nj
keyword_total_time_loop_mal_ise_nj = keyword_end_time_loop_mal_ise_nj - keyword_start_time_loop_mal_ise_nj

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_mal_ise_nj[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_mal_ise_nj,1))
print("Total Input Tokens - ", keyword_input_token_mal_ise_nj)
print("Total Input Cost = USD ",keyword_cost_input_token_mal_ise_nj)
print("Total Output Tokens - ", keyword_output_token_mal_ise_nj)
print("Total Output Cost = USD ",keyword_cost_output_token_mal_ise_nj)
print("Total Cost = USD ",round(keyword_total_cost_mal_ise_nj,2))

Executed  10  Iterations
Total Execution time (in secs) -  15.5
Total Input Tokens -  10880
Total Input Cost = USD  0.11
Total Output Tokens -  595
Total Output Cost = USD  0.02
Total Cost = USD  0.13


In [695]:
#Initialize an empty DataFrame
negative_keywords_mal_ise_nj = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']
# Add these columns to the DataFrame
for column in columns:
    negative_keywords_mal_ise_nj[column] = None

# Process each JSON string
for json_str in keyword_negative_output_mal_ise_nj:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'mal_ise_nj'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'negative'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            negative_keywords_mal_ise_nj = pd.concat([negative_keywords_mal_ise_nj, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            negative_keywords_mal_ise_nj = pd.concat([negative_keywords_mal_ise_nj, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
negative_keywords_mal_ise_nj = negative_keywords_mal_ise_nj.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

negative_keywords_mal_ise_nj

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Malabar Gold & Diamonds-Iselin, NJ",negative,keywords,"refused to honor :1, unnecessary :1, concernin...","rude :2, pushy :1, unengaged :1, annoyed :1, h...","rude :2, unengaged :1, ignored :1, gossiping :...",No relevant negative keywords/ phrases,"zero collection:1, limited options:1","No proper discounts :1, better discount :1","very high :2, expensive :1","expensive: 3, high: 3, costly: 1, overpriced: ...",,"no exchange :1, not too helpful :1"
1,"Malabar Gold & Diamonds-Iselin, NJ",negative,phrases,refused to honor their stated cash back policy...,"sales woman started giving me lectures :1, rud...","not responding properly :1, giving me lectures...",design collection could be more up-to-date :1,no relevant negative phrases,No proper discounts or benefits for regular cu...,making charges and stone cost is also very hig...,"very high: 2, making charges: 2, choose their ...",,no exchange option for the items purchased out...


#### mal_fri_tx

In [696]:
# Initialize the output dictionary
keyword_negative_output_mal_fri_tx = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_mal_fri_tx=[0]
keyword_input_token_mal_fri_tx = 0
keyword_output_token_mal_fri_tx = 0
keyword_start_time_loop_mal_fri_tx = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_mal_fri_tx, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_mal_fri_tx[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_mal_fri_tx = keyword_dataframes['mal_fri_tx_final_sen_df_jul'][keyword_dataframes['mal_fri_tx_final_sen_df_jul'][topic]==-1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are negative comments, call the negative_keywords function
    if filtered_comments_mal_fri_tx:
        # Call the negative_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = negative_keywords(filtered_comments_mal_fri_tx,topic)        
        # Add the result to the output dictionary
        keyword_negative_output_mal_fri_tx.append(keywords)
        keyword_input_token_mal_fri_tx += input_tokens_loop
        keyword_output_token_mal_fri_tx += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_mal_fri_tx = time.time()
keyword_cost_input_token_mal_fri_tx = round((0.01/1000)*keyword_input_token_mal_fri_tx,2)
keyword_cost_output_token_mal_fri_tx = round((0.03/1000)*keyword_output_token_mal_fri_tx,2)
keyword_total_cost_mal_fri_tx = keyword_cost_input_token_mal_fri_tx + keyword_cost_output_token_mal_fri_tx
keyword_total_time_loop_mal_fri_tx = keyword_end_time_loop_mal_fri_tx - keyword_start_time_loop_mal_fri_tx

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_mal_fri_tx[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_mal_fri_tx,1))
print("Total Input Tokens - ", keyword_input_token_mal_fri_tx)
print("Total Input Cost = USD ",keyword_cost_input_token_mal_fri_tx)
print("Total Output Tokens - ", keyword_output_token_mal_fri_tx)
print("Total Output Cost = USD ",keyword_cost_output_token_mal_fri_tx)
print("Total Cost = USD ",round(keyword_total_cost_mal_fri_tx,2))

Executed  10  Iterations
Total Execution time (in secs) -  17.5
Total Input Tokens -  10687
Total Input Cost = USD  0.11
Total Output Tokens -  650
Total Output Cost = USD  0.02
Total Cost = USD  0.13


In [697]:
#Initialize an empty DataFrame
negative_keywords_mal_fri_tx = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']
# Add these columns to the DataFrame
for column in columns:
    negative_keywords_mal_fri_tx[column] = None

# Process each JSON string
for json_str in keyword_negative_output_mal_fri_tx:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'mal_fri_tx'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'negative'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            negative_keywords_mal_fri_tx = pd.concat([negative_keywords_mal_fri_tx, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            negative_keywords_mal_fri_tx = pd.concat([negative_keywords_mal_fri_tx, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
negative_keywords_mal_fri_tx = negative_keywords_mal_fri_tx.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

negative_keywords_mal_fri_tx

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Malabar Gold & Diamonds-Frisco, TX",negative,keywords,"disappointed :2, unprofessional :1, fake :1, s...","waiting :4, closed :2, unhelpful :1, ignored :...","rude :3, unhelpful :2, unprofessional :1, abru...",,"small collection:2, more inventory:1, more mod...",no discounts:1,"expensive :1, scam :1, high :1","expensive:1, high:1, more:1","subpar :1, broke :1, low quality :1","exchange :2, NO FULL REFUND :1"
1,"Malabar Gold & Diamonds-Frisco, TX",negative,phrases,"none picked the phone :1, failed to listen :1,...","waited for 45+ minutes :2, waited for almost 4...","rude people along with Manager :1, took 2 hrs ...",,"collection was smaller than expected:2, need t...",gave good discount but now they are saying no ...,charged a little expensive on making charges :...,"very high price:1, more expensive here:1, char...","not good quality :1, low quality product :1","only exchange, not explained to me :1, challen..."


#### mal_ric_tx

In [698]:
# Initialize the output dictionary
keyword_negative_output_mal_ric_tx = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_mal_ric_tx=[0]
keyword_input_token_mal_ric_tx = 0
keyword_output_token_mal_ric_tx = 0
keyword_start_time_loop_mal_ric_tx = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_mal_ric_tx, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_mal_ric_tx[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_mal_ric_tx = keyword_dataframes['mal_ric_tx_final_sen_df_jul'][keyword_dataframes['mal_ric_tx_final_sen_df_jul'][topic]==-1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are negative comments, call the negative_keywords function
    if filtered_comments_mal_ric_tx:
        # Call the negative_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = negative_keywords(filtered_comments_mal_ric_tx,topic)        
        # Add the result to the output dictionary
        keyword_negative_output_mal_ric_tx.append(keywords)
        keyword_input_token_mal_ric_tx += input_tokens_loop
        keyword_output_token_mal_ric_tx += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_mal_ric_tx = time.time()
keyword_cost_input_token_mal_ric_tx = round((0.01/1000)*keyword_input_token_mal_ric_tx,2)
keyword_cost_output_token_mal_ric_tx = round((0.03/1000)*keyword_output_token_mal_ric_tx,2)
keyword_total_cost_mal_ric_tx = keyword_cost_input_token_mal_ric_tx + keyword_cost_output_token_mal_ric_tx
keyword_total_time_loop_mal_ric_tx = keyword_end_time_loop_mal_ric_tx - keyword_start_time_loop_mal_ric_tx

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_mal_ric_tx[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_mal_ric_tx,1))
print("Total Input Tokens - ", keyword_input_token_mal_ric_tx)
print("Total Input Cost = USD ",keyword_cost_input_token_mal_ric_tx)
print("Total Output Tokens - ", keyword_output_token_mal_ric_tx)
print("Total Output Cost = USD ",keyword_cost_output_token_mal_ric_tx)
print("Total Cost = USD ",round(keyword_total_cost_mal_ric_tx,2))

Executed  10  Iterations
Total Execution time (in secs) -  12.0
Total Input Tokens -  6152
Total Input Cost = USD  0.06
Total Output Tokens -  425
Total Output Cost = USD  0.01
Total Cost = USD  0.07


In [699]:
#Initialize an empty DataFrame
negative_keywords_mal_ric_tx = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']
# Add these columns to the DataFrame
for column in columns:
    negative_keywords_mal_ric_tx[column] = None

# Process each JSON string
for json_str in keyword_negative_output_mal_ric_tx:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'mal_ric_tx'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'negative'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            negative_keywords_mal_ric_tx = pd.concat([negative_keywords_mal_ric_tx, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            negative_keywords_mal_ric_tx = pd.concat([negative_keywords_mal_ric_tx, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
negative_keywords_mal_ric_tx = negative_keywords_mal_ric_tx.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

negative_keywords_mal_ric_tx

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Malani Jewellers-Richardson, TX",negative,keywords,"lying :2, stupid :1","uncomfortable :1, rude :1, disappointed :1, un...","rude :3, unprofessional :1, hateful :1, disres...",,,fake discounts:1,,"higher side:1, mark up:1, overcharged:1, fake ...",No relevant negative keywords/ phrases,
1,"Malani Jewellers-Richardson, TX",negative,phrases,take all your information before you even deci...,"eyes following my every move :1, experience wa...","not even giving the full details :1, not very ...",,,mark up the price and give fake discounts:1,,"mark up the price:1, doesn't match with the li...",No relevant negative keywords/ phrases,


#### may_vie_va

In [700]:
# Initialize the output dictionary
keyword_negative_output_may_vie_va = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_may_vie_va=[0]
keyword_input_token_may_vie_va = 0
keyword_output_token_may_vie_va = 0
keyword_start_time_loop_may_vie_va = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_may_vie_va, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_may_vie_va[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_may_vie_va = keyword_dataframes['may_vie_va_final_sen_df_jul'][keyword_dataframes['may_vie_va_final_sen_df_jul'][topic]==-1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are negative comments, call the negative_keywords function
    if filtered_comments_may_vie_va:
        # Call the negative_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = negative_keywords(filtered_comments_may_vie_va,topic)        
        # Add the result to the output dictionary
        keyword_negative_output_may_vie_va.append(keywords)
        keyword_input_token_may_vie_va += input_tokens_loop
        keyword_output_token_may_vie_va += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_may_vie_va = time.time()
keyword_cost_input_token_may_vie_va = round((0.01/1000)*keyword_input_token_may_vie_va,2)
keyword_cost_output_token_may_vie_va = round((0.03/1000)*keyword_output_token_may_vie_va,2)
keyword_total_cost_may_vie_va = keyword_cost_input_token_may_vie_va + keyword_cost_output_token_may_vie_va
keyword_total_time_loop_may_vie_va = keyword_end_time_loop_may_vie_va - keyword_start_time_loop_may_vie_va

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_may_vie_va[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_may_vie_va,1))
print("Total Input Tokens - ", keyword_input_token_may_vie_va)
print("Total Input Cost = USD ",keyword_cost_input_token_may_vie_va)
print("Total Output Tokens - ", keyword_output_token_may_vie_va)
print("Total Output Cost = USD ",keyword_cost_output_token_may_vie_va)
print("Total Cost = USD ",round(keyword_total_cost_may_vie_va,2))

Executed  10  Iterations
Total Execution time (in secs) -  5.5
Total Input Tokens -  2629
Total Input Cost = USD  0.03
Total Output Tokens -  210
Total Output Cost = USD  0.01
Total Cost = USD  0.04


In [701]:
#Initialize an empty DataFrame
negative_keywords_may_vie_va = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']
# Add these columns to the DataFrame
for column in columns:
    negative_keywords_may_vie_va[column] = None

# Process each JSON string
for json_str in keyword_negative_output_may_vie_va:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'may_vie_va'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'negative'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            negative_keywords_may_vie_va = pd.concat([negative_keywords_may_vie_va, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            negative_keywords_may_vie_va = pd.concat([negative_keywords_may_vie_va, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
negative_keywords_may_vie_va = negative_keywords_may_vie_va.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

negative_keywords_may_vie_va

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"May Jewelers-Vienna, VA",negative,keywords,,"horrible experience:1, scammed:1, terrible ser...","rude :1, annoying :1, scammers :1, horrible :1...",,,,,high prices :1,,
1,"May Jewelers-Vienna, VA",negative,phrases,,"cut off gold:2, didn't adjust the size:1, scam...","guy with glasses started yelling :1, old lady ...",,,,,extremely high prices :1,,


#### son_ise_nj

In [702]:
# Initialize the output dictionary
keyword_negative_output_son_ise_nj = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_son_ise_nj=[0]
keyword_input_token_son_ise_nj = 0
keyword_output_token_son_ise_nj = 0
keyword_start_time_loop_son_ise_nj = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_son_ise_nj, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_son_ise_nj[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_son_ise_nj = keyword_dataframes['son_ise_nj_final_sen_df_jul'][keyword_dataframes['son_ise_nj_final_sen_df_jul'][topic]==-1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are negative comments, call the negative_keywords function
    if filtered_comments_son_ise_nj:
        # Call the negative_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = negative_keywords(filtered_comments_son_ise_nj,topic)        
        # Add the result to the output dictionary
        keyword_negative_output_son_ise_nj.append(keywords)
        keyword_input_token_son_ise_nj += input_tokens_loop
        keyword_output_token_son_ise_nj += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_son_ise_nj = time.time()
keyword_cost_input_token_son_ise_nj = round((0.01/1000)*keyword_input_token_son_ise_nj,2)
keyword_cost_output_token_son_ise_nj = round((0.03/1000)*keyword_output_token_son_ise_nj,2)
keyword_total_cost_son_ise_nj = keyword_cost_input_token_son_ise_nj + keyword_cost_output_token_son_ise_nj
keyword_total_time_loop_son_ise_nj = keyword_end_time_loop_son_ise_nj - keyword_start_time_loop_son_ise_nj

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_son_ise_nj[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_son_ise_nj,1))
print("Total Input Tokens - ", keyword_input_token_son_ise_nj)
print("Total Input Cost = USD ",keyword_cost_input_token_son_ise_nj)
print("Total Output Tokens - ", keyword_output_token_son_ise_nj)
print("Total Output Cost = USD ",keyword_cost_output_token_son_ise_nj)
print("Total Cost = USD ",round(keyword_total_cost_son_ise_nj,2))

Executed  10  Iterations
Total Execution time (in secs) -  11.5
Total Input Tokens -  7420
Total Input Cost = USD  0.07
Total Output Tokens -  464
Total Output Cost = USD  0.01
Total Cost = USD  0.08


In [703]:
#Initialize an empty DataFrame
negative_keywords_son_ise_nj = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']
# Add these columns to the DataFrame
for column in columns:
    negative_keywords_son_ise_nj[column] = None

# Process each JSON string
for json_str in keyword_negative_output_son_ise_nj:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'son_ise_nj'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'negative'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            negative_keywords_son_ise_nj = pd.concat([negative_keywords_son_ise_nj, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            negative_keywords_son_ise_nj = pd.concat([negative_keywords_son_ise_nj, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
negative_keywords_son_ise_nj = negative_keywords_son_ise_nj.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

negative_keywords_son_ise_nj

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Sona Jewelers-Iselin, NJ",negative,keywords,overpriced:1,"delayed order:1, unresponsive:1, overpriced:1,...","rude :2, uninterested :1, unappreciated :1, ru...","designs sucks:1, highly priced:1, looks fake:1...",,,,"overpriced: 3, pricey: 2, pricy: 2, high: 1, e...","poor quality:2, cheaply made:1, disintegrated:...",
1,"Sona Jewelers-Iselin, NJ",negative,phrases,prices are outrageously high for the quality:1,kept promising it would be ready the next day:...,"staff seemed uninterested in helping :1, staff...",mix wax and metals:1,,,,"outrageously high for the quality: 1, way too ...","quality is really poor:1, completely disintegr...",


#### tif_chi_il

In [704]:
# Initialize the output dictionary
keyword_negative_output_tif_chi_il = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_tif_chi_il=[0]
keyword_input_token_tif_chi_il = 0
keyword_output_token_tif_chi_il = 0
keyword_start_time_loop_tif_chi_il = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_tif_chi_il, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_tif_chi_il[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_tif_chi_il = keyword_dataframes['tif_chi_il_final_sen_df_jul'][keyword_dataframes['tif_chi_il_final_sen_df_jul'][topic]==-1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are negative comments, call the negative_keywords function
    if filtered_comments_tif_chi_il:
        # Call the negative_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = negative_keywords(filtered_comments_tif_chi_il,topic)        
        # Add the result to the output dictionary
        keyword_negative_output_tif_chi_il.append(keywords)
        keyword_input_token_tif_chi_il += input_tokens_loop
        keyword_output_token_tif_chi_il += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_tif_chi_il = time.time()
keyword_cost_input_token_tif_chi_il = round((0.01/1000)*keyword_input_token_tif_chi_il,2)
keyword_cost_output_token_tif_chi_il = round((0.03/1000)*keyword_output_token_tif_chi_il,2)
keyword_total_cost_tif_chi_il = keyword_cost_input_token_tif_chi_il + keyword_cost_output_token_tif_chi_il
keyword_total_time_loop_tif_chi_il = keyword_end_time_loop_tif_chi_il - keyword_start_time_loop_tif_chi_il

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_tif_chi_il[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_tif_chi_il,1))
print("Total Input Tokens - ", keyword_input_token_tif_chi_il)
print("Total Input Cost = USD ",keyword_cost_input_token_tif_chi_il)
print("Total Output Tokens - ", keyword_output_token_tif_chi_il)
print("Total Output Cost = USD ",keyword_cost_output_token_tif_chi_il)
print("Total Cost = USD ",round(keyword_total_cost_tif_chi_il,2))

Executed  10  Iterations
Total Execution time (in secs) -  13.5
Total Input Tokens -  4950
Total Input Cost = USD  0.05
Total Output Tokens -  386
Total Output Cost = USD  0.01
Total Cost = USD  0.06


In [705]:
#Initialize an empty DataFrame
negative_keywords_tif_chi_il = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']
# Add these columns to the DataFrame
for column in columns:
    negative_keywords_tif_chi_il[column] = None

# Process each JSON string
for json_str in keyword_negative_output_tif_chi_il:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'tif_chi_il'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'negative'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            negative_keywords_tif_chi_il = pd.concat([negative_keywords_tif_chi_il, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            negative_keywords_tif_chi_il = pd.concat([negative_keywords_tif_chi_il, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
negative_keywords_tif_chi_il = negative_keywords_tif_chi_il.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

negative_keywords_tif_chi_il

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Tiffany & Co-Chicago, IL",negative,keywords,"sold their customer list:1, marketing company:...","unprofessional :1, unwelcome :1, uncomfortable...","unprofessional :1, rude :2, inappropriate :1, ...",,,,,"expensive: 2, poor: 1","tarnishes :1, defective :1, broke :1, unwearab...",
1,"Tiffany & Co-Chicago, IL",negative,phrases,"DO NOT give any of your info:1, end-up on a “r...","inappropriate comment :1, deplorable state :1,...","made an inappropriate comment :1, salesman was...",,,,,"very expensive: 1, sells items at that price: ...","tarnishes so badly :1, defective one :1, broke...",


#### tif_nor_il

In [706]:
# Initialize the output dictionary
keyword_negative_output_tif_nor_il = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_tif_nor_il=[0]
keyword_input_token_tif_nor_il = 0
keyword_output_token_tif_nor_il = 0
keyword_start_time_loop_tif_nor_il = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_tif_nor_il, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_tif_nor_il[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_tif_nor_il = keyword_dataframes['tif_nor_il_final_sen_df_jul'][keyword_dataframes['tif_nor_il_final_sen_df_jul'][topic]==-1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are negative comments, call the negative_keywords function
    if filtered_comments_tif_nor_il:
        # Call the negative_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = negative_keywords(filtered_comments_tif_nor_il,topic)        
        # Add the result to the output dictionary
        keyword_negative_output_tif_nor_il.append(keywords)
        keyword_input_token_tif_nor_il += input_tokens_loop
        keyword_output_token_tif_nor_il += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_tif_nor_il = time.time()
keyword_cost_input_token_tif_nor_il = round((0.01/1000)*keyword_input_token_tif_nor_il,2)
keyword_cost_output_token_tif_nor_il = round((0.03/1000)*keyword_output_token_tif_nor_il,2)
keyword_total_cost_tif_nor_il = keyword_cost_input_token_tif_nor_il + keyword_cost_output_token_tif_nor_il
keyword_total_time_loop_tif_nor_il = keyword_end_time_loop_tif_nor_il - keyword_start_time_loop_tif_nor_il

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_tif_nor_il[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_tif_nor_il,1))
print("Total Input Tokens - ", keyword_input_token_tif_nor_il)
print("Total Input Cost = USD ",keyword_cost_input_token_tif_nor_il)
print("Total Output Tokens - ", keyword_output_token_tif_nor_il)
print("Total Output Cost = USD ",keyword_cost_output_token_tif_nor_il)
print("Total Cost = USD ",round(keyword_total_cost_tif_nor_il,2))

Executed  10  Iterations
Total Execution time (in secs) -  9.0
Total Input Tokens -  5080
Total Input Cost = USD  0.05
Total Output Tokens -  332
Total Output Cost = USD  0.01
Total Cost = USD  0.06


In [707]:
#Initialize an empty DataFrame
negative_keywords_tif_nor_il = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']
# Add these columns to the DataFrame
for column in columns:
    negative_keywords_tif_nor_il[column] = None

# Process each JSON string
for json_str in keyword_negative_output_tif_nor_il:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'tif_nor_il'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'negative'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            negative_keywords_tif_nor_il = pd.concat([negative_keywords_tif_nor_il, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            negative_keywords_tif_nor_il = pd.concat([negative_keywords_tif_nor_il, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
negative_keywords_tif_nor_il = negative_keywords_tif_nor_il.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

negative_keywords_tif_nor_il

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Tiffany & Co-Northbrook, IL",negative,keywords,,No relevant negative keywords/ phrases,"RUDE :1, discriminatory :1",,No relevant negative keywords/ phrases,,,"charge :1, cheap :1","tarnished :3, disappointed :1, appaling :1","nothing done:1, extremely disappointed:1"
1,"Tiffany & Co-Northbrook, IL",negative,phrases,,No relevant negative keywords/ phrases,Wouldn't let me touch product I requested :1,,No relevant negative keywords/ phrases,,,"wanted to charge me 20$ :1, not a cheap store :1","worst combination of materials :1, needed extr...",wanted to exchange it and was told there was n...


#### tif_sko_il

In [708]:
# Initialize the output dictionary
keyword_negative_output_tif_sko_il = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_tif_sko_il=[0]
keyword_input_token_tif_sko_il = 0
keyword_output_token_tif_sko_il = 0
keyword_start_time_loop_tif_sko_il = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_tif_sko_il, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_tif_sko_il[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_tif_sko_il = keyword_dataframes['tif_sko_il_final_sen_df_jul'][keyword_dataframes['tif_sko_il_final_sen_df_jul'][topic]==-1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are negative comments, call the negative_keywords function
    if filtered_comments_tif_sko_il:
        # Call the negative_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = negative_keywords(filtered_comments_tif_sko_il,topic)        
        # Add the result to the output dictionary
        keyword_negative_output_tif_sko_il.append(keywords)
        keyword_input_token_tif_sko_il += input_tokens_loop
        keyword_output_token_tif_sko_il += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_tif_sko_il = time.time()
keyword_cost_input_token_tif_sko_il = round((0.01/1000)*keyword_input_token_tif_sko_il,2)
keyword_cost_output_token_tif_sko_il = round((0.03/1000)*keyword_output_token_tif_sko_il,2)
keyword_total_cost_tif_sko_il = keyword_cost_input_token_tif_sko_il + keyword_cost_output_token_tif_sko_il
keyword_total_time_loop_tif_sko_il = keyword_end_time_loop_tif_sko_il - keyword_start_time_loop_tif_sko_il

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_tif_sko_il[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_tif_sko_il,1))
print("Total Input Tokens - ", keyword_input_token_tif_sko_il)
print("Total Input Cost = USD ",keyword_cost_input_token_tif_sko_il)
print("Total Output Tokens - ", keyword_output_token_tif_sko_il)
print("Total Output Cost = USD ",keyword_cost_output_token_tif_sko_il)
print("Total Cost = USD ",round(keyword_total_cost_tif_sko_il,2))

Executed  10  Iterations
Total Execution time (in secs) -  8.0
Total Input Tokens -  5959
Total Input Cost = USD  0.06
Total Output Tokens -  351
Total Output Cost = USD  0.01
Total Cost = USD  0.07


In [709]:
#Initialize an empty DataFrame
negative_keywords_tif_sko_il = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']
# Add these columns to the DataFrame
for column in columns:
    negative_keywords_tif_sko_il[column] = None

# Process each JSON string
for json_str in keyword_negative_output_tif_sko_il:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'tif_sko_il'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'negative'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            negative_keywords_tif_sko_il = pd.concat([negative_keywords_tif_sko_il, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            negative_keywords_tif_sko_il = pd.concat([negative_keywords_tif_sko_il, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
negative_keywords_tif_sko_il = negative_keywords_tif_sko_il.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

negative_keywords_tif_sko_il

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Tiffany & Co-Skokie, IL",negative,keywords,"dishonest :1, refused :1, lost :1, discrepancy...","disappointing :2, lost :2, irreplaceable :1, s...","rude :1, dismissive :1, dishonest :1, attitude...",,,,,No relevant negative keywords/ phrases,No relevant negative keywords/ phrases,
1,"Tiffany & Co-Skokie, IL",negative,phrases,"did not honor the Tiffany promise :1, refused ...","deeply disappointing :1, policy should change ...","did not honor the Tiffany promise :1, refused ...",,,,,No relevant negative keywords/ phrases,No relevant negative keywords/ phrases,


#### tif_eas_nj

In [710]:
# Initialize the output dictionary
keyword_negative_output_tif_eas_nj = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_tif_eas_nj=[0]
keyword_input_token_tif_eas_nj = 0
keyword_output_token_tif_eas_nj = 0
keyword_start_time_loop_tif_eas_nj = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_tif_eas_nj, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_tif_eas_nj[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_tif_eas_nj = keyword_dataframes['tif_eas_nj_final_sen_df_jul'][keyword_dataframes['tif_eas_nj_final_sen_df_jul'][topic]==-1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are negative comments, call the negative_keywords function
    if filtered_comments_tif_eas_nj:
        # Call the negative_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = negative_keywords(filtered_comments_tif_eas_nj,topic)        
        # Add the result to the output dictionary
        keyword_negative_output_tif_eas_nj.append(keywords)
        keyword_input_token_tif_eas_nj += input_tokens_loop
        keyword_output_token_tif_eas_nj += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_tif_eas_nj = time.time()
keyword_cost_input_token_tif_eas_nj = round((0.01/1000)*keyword_input_token_tif_eas_nj,2)
keyword_cost_output_token_tif_eas_nj = round((0.03/1000)*keyword_output_token_tif_eas_nj,2)
keyword_total_cost_tif_eas_nj = keyword_cost_input_token_tif_eas_nj + keyword_cost_output_token_tif_eas_nj
keyword_total_time_loop_tif_eas_nj = keyword_end_time_loop_tif_eas_nj - keyword_start_time_loop_tif_eas_nj

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_tif_eas_nj[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_tif_eas_nj,1))
print("Total Input Tokens - ", keyword_input_token_tif_eas_nj)
print("Total Input Cost = USD ",keyword_cost_input_token_tif_eas_nj)
print("Total Output Tokens - ", keyword_output_token_tif_eas_nj)
print("Total Output Cost = USD ",keyword_cost_output_token_tif_eas_nj)
print("Total Cost = USD ",round(keyword_total_cost_tif_eas_nj,2))

Executed  10  Iterations
Total Execution time (in secs) -  6.0
Total Input Tokens -  2758
Total Input Cost = USD  0.03
Total Output Tokens -  215
Total Output Cost = USD  0.01
Total Cost = USD  0.04


In [711]:
#Initialize an empty DataFrame
negative_keywords_tif_eas_nj = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']
# Add these columns to the DataFrame
for column in columns:
    negative_keywords_tif_eas_nj[column] = None

# Process each JSON string
for json_str in keyword_negative_output_tif_eas_nj:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'tif_eas_nj'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'negative'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            negative_keywords_tif_eas_nj = pd.concat([negative_keywords_tif_eas_nj, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            negative_keywords_tif_eas_nj = pd.concat([negative_keywords_tif_eas_nj, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
negative_keywords_tif_eas_nj = negative_keywords_tif_eas_nj.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

negative_keywords_tif_eas_nj

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Tiffany & Co-East Rutherford, NJ",negative,keywords,,"ignored :1, unhelpful :1, dismissive :1, conde...","rude :1, unhelpful :1, dismissive :1, condesce...",,No relevant negative keywords/ phrases,,,,,
1,"Tiffany & Co-East Rutherford, NJ",negative,phrases,,"completely ignored when entering a store :1, d...","verbally abusing :1, bothered by my presence :...",,No relevant negative keywords/ phrases,,,,,


#### tif_red_nj

In [712]:
# Initialize the output dictionary
keyword_negative_output_tif_red_nj = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_tif_red_nj=[0]
keyword_input_token_tif_red_nj = 0
keyword_output_token_tif_red_nj = 0
keyword_start_time_loop_tif_red_nj = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_tif_red_nj, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_tif_red_nj[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_tif_red_nj = keyword_dataframes['tif_red_nj_final_sen_df_jul'][keyword_dataframes['tif_red_nj_final_sen_df_jul'][topic]==-1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are negative comments, call the negative_keywords function
    if filtered_comments_tif_red_nj:
        # Call the negative_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = negative_keywords(filtered_comments_tif_red_nj,topic)        
        # Add the result to the output dictionary
        keyword_negative_output_tif_red_nj.append(keywords)
        keyword_input_token_tif_red_nj += input_tokens_loop
        keyword_output_token_tif_red_nj += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_tif_red_nj = time.time()
keyword_cost_input_token_tif_red_nj = round((0.01/1000)*keyword_input_token_tif_red_nj,2)
keyword_cost_output_token_tif_red_nj = round((0.03/1000)*keyword_output_token_tif_red_nj,2)
keyword_total_cost_tif_red_nj = keyword_cost_input_token_tif_red_nj + keyword_cost_output_token_tif_red_nj
keyword_total_time_loop_tif_red_nj = keyword_end_time_loop_tif_red_nj - keyword_start_time_loop_tif_red_nj

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_tif_red_nj[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_tif_red_nj,1))
print("Total Input Tokens - ", keyword_input_token_tif_red_nj)
print("Total Input Cost = USD ",keyword_cost_input_token_tif_red_nj)
print("Total Output Tokens - ", keyword_output_token_tif_red_nj)
print("Total Output Cost = USD ",keyword_cost_output_token_tif_red_nj)
print("Total Cost = USD ",round(keyword_total_cost_tif_red_nj,2))

Executed  10  Iterations
Total Execution time (in secs) -  10.0
Total Input Tokens -  5811
Total Input Cost = USD  0.06
Total Output Tokens -  371
Total Output Cost = USD  0.01
Total Cost = USD  0.07


In [713]:
#Initialize an empty DataFrame
negative_keywords_tif_red_nj = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']
# Add these columns to the DataFrame
for column in columns:
    negative_keywords_tif_red_nj[column] = None

# Process each JSON string
for json_str in keyword_negative_output_tif_red_nj:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'tif_red_nj'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'negative'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            negative_keywords_tif_red_nj = pd.concat([negative_keywords_tif_red_nj, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            negative_keywords_tif_red_nj = pd.concat([negative_keywords_tif_red_nj, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
negative_keywords_tif_red_nj = negative_keywords_tif_red_nj.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

negative_keywords_tif_red_nj

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Tiffany & Co-Red Bank, NJ",negative,keywords,,"rude treatment:1, misled:1, disrespected:1, fr...","rude treatment:1, unwilling:1, misled:1, disre...",,,,,"overpriced:1, expensive:1","Garbage :1, overpriced :1","store policy :2, original purchaser :2, remain..."
1,"Tiffany & Co-Red Bank, NJ",negative,phrases,,"staff clearly profiles patrons:1, unwilling to...","staff clearly profiles patrons:1, unwilling to...",,,,,"way overpriced:1, expensive place:1, high end,...",Garbage jewelry :1,could only be paid by check to the original pu...


#### tif_hac_nj

In [714]:
# Initialize the output dictionary
keyword_negative_output_tif_hac_nj = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_tif_hac_nj=[0]
keyword_input_token_tif_hac_nj = 0
keyword_output_token_tif_hac_nj = 0
keyword_start_time_loop_tif_hac_nj = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_tif_hac_nj, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_tif_hac_nj[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_tif_hac_nj = keyword_dataframes['tif_hac_nj_final_sen_df_jul'][keyword_dataframes['tif_hac_nj_final_sen_df_jul'][topic]==-1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are negative comments, call the negative_keywords function
    if filtered_comments_tif_hac_nj:
        # Call the negative_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = negative_keywords(filtered_comments_tif_hac_nj,topic)        
        # Add the result to the output dictionary
        keyword_negative_output_tif_hac_nj.append(keywords)
        keyword_input_token_tif_hac_nj += input_tokens_loop
        keyword_output_token_tif_hac_nj += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_tif_hac_nj = time.time()
keyword_cost_input_token_tif_hac_nj = round((0.01/1000)*keyword_input_token_tif_hac_nj,2)
keyword_cost_output_token_tif_hac_nj = round((0.03/1000)*keyword_output_token_tif_hac_nj,2)
keyword_total_cost_tif_hac_nj = keyword_cost_input_token_tif_hac_nj + keyword_cost_output_token_tif_hac_nj
keyword_total_time_loop_tif_hac_nj = keyword_end_time_loop_tif_hac_nj - keyword_start_time_loop_tif_hac_nj

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_tif_hac_nj[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_tif_hac_nj,1))
print("Total Input Tokens - ", keyword_input_token_tif_hac_nj)
print("Total Input Cost = USD ",keyword_cost_input_token_tif_hac_nj)
print("Total Output Tokens - ", keyword_output_token_tif_hac_nj)
print("Total Output Cost = USD ",keyword_cost_output_token_tif_hac_nj)
print("Total Cost = USD ",round(keyword_total_cost_tif_hac_nj,2))

Executed  10  Iterations
Total Execution time (in secs) -  9.0
Total Input Tokens -  4474
Total Input Cost = USD  0.04
Total Output Tokens -  399
Total Output Cost = USD  0.01
Total Cost = USD  0.05


In [715]:
#Initialize an empty DataFrame
negative_keywords_tif_hac_nj = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']
# Add these columns to the DataFrame
for column in columns:
    negative_keywords_tif_hac_nj[column] = None

# Process each JSON string
for json_str in keyword_negative_output_tif_hac_nj:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'tif_hac_nj'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'negative'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            negative_keywords_tif_hac_nj = pd.concat([negative_keywords_tif_hac_nj, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            negative_keywords_tif_hac_nj = pd.concat([negative_keywords_tif_hac_nj, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
negative_keywords_tif_hac_nj = negative_keywords_tif_hac_nj.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

negative_keywords_tif_hac_nj

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Tiffany & Co-Hackensack, NJ",negative,keywords,"reputation :1, stand by :1, reputable :1","unprofessional :2, rude :2, negative impressio...","rude :3, unprofessional :2, not knowledgeable ...",,,,,No relevant negative keywords/ phrases,"normal wear :1, fell out :1, repair :1",
1,"Tiffany & Co-Hackensack, NJ",negative,phrases,do not stand by their products or customers :1...,very unprofessional and really turned me away ...,"staff are rude and not knowledgeable :1, very ...",,,,,No relevant negative keywords/ phrases,"small diamonds fell out :1, do not stand by th...",


#### tif_sho_nj

In [716]:
# Initialize the output dictionary
keyword_negative_output_tif_sho_nj = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_tif_sho_nj=[0]
keyword_input_token_tif_sho_nj = 0
keyword_output_token_tif_sho_nj = 0
keyword_start_time_loop_tif_sho_nj = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_tif_sho_nj, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_tif_sho_nj[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_tif_sho_nj = keyword_dataframes['tif_sho_nj_final_sen_df_jul'][keyword_dataframes['tif_sho_nj_final_sen_df_jul'][topic]==-1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are negative comments, call the negative_keywords function
    if filtered_comments_tif_sho_nj:
        # Call the negative_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = negative_keywords(filtered_comments_tif_sho_nj,topic)        
        # Add the result to the output dictionary
        keyword_negative_output_tif_sho_nj.append(keywords)
        keyword_input_token_tif_sho_nj += input_tokens_loop
        keyword_output_token_tif_sho_nj += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_tif_sho_nj = time.time()
keyword_cost_input_token_tif_sho_nj = round((0.01/1000)*keyword_input_token_tif_sho_nj,2)
keyword_cost_output_token_tif_sho_nj = round((0.03/1000)*keyword_output_token_tif_sho_nj,2)
keyword_total_cost_tif_sho_nj = keyword_cost_input_token_tif_sho_nj + keyword_cost_output_token_tif_sho_nj
keyword_total_time_loop_tif_sho_nj = keyword_end_time_loop_tif_sho_nj - keyword_start_time_loop_tif_sho_nj

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_tif_sho_nj[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_tif_sho_nj,1))
print("Total Input Tokens - ", keyword_input_token_tif_sho_nj)
print("Total Input Cost = USD ",keyword_cost_input_token_tif_sho_nj)
print("Total Output Tokens - ", keyword_output_token_tif_sho_nj)
print("Total Output Cost = USD ",keyword_cost_output_token_tif_sho_nj)
print("Total Cost = USD ",round(keyword_total_cost_tif_sho_nj,2))

Executed  10  Iterations
Total Execution time (in secs) -  11.5
Total Input Tokens -  6545
Total Input Cost = USD  0.07
Total Output Tokens -  433
Total Output Cost = USD  0.01
Total Cost = USD  0.08


In [717]:
#Initialize an empty DataFrame
negative_keywords_tif_sho_nj = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']
# Add these columns to the DataFrame
for column in columns:
    negative_keywords_tif_sho_nj[column] = None

# Process each JSON string
for json_str in keyword_negative_output_tif_sho_nj:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'tif_sho_nj'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'negative'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            negative_keywords_tif_sho_nj = pd.concat([negative_keywords_tif_sho_nj, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            negative_keywords_tif_sho_nj = pd.concat([negative_keywords_tif_sho_nj, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
negative_keywords_tif_sho_nj = negative_keywords_tif_sho_nj.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

negative_keywords_tif_sho_nj

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Tiffany & Co-Short Hills, NJ",negative,keywords,"out of luck:1, outrageous:1, waste:1","scratches :1, looser :1, incorrect :1, defensi...","unprofessional :1, defensive :1, rude :1, mise...",,No relevant negative keywords/ phrases,,,"over priced:1, premium:1, five figures:1","scratches :1, low quality :1, marked up :1, fa...",
1,"Tiffany & Co-Short Hills, NJ",negative,phrases,claims it has all the quality guarantees and w...,"noticeable large scratches :1, took them 15-20...","asked too many personal questions :1, very sho...",,No relevant negative keywords/ phrases,,,"not worth the premium:1, over priced low quali...","noticeable large scratches :1, low quality jew...",


#### tif_par_nj

In [718]:
# Initialize the output dictionary
keyword_negative_output_tif_par_nj = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_tif_par_nj=[0]
keyword_input_token_tif_par_nj = 0
keyword_output_token_tif_par_nj = 0
keyword_start_time_loop_tif_par_nj = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_tif_par_nj, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_tif_par_nj[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_tif_par_nj = keyword_dataframes['tif_par_nj_final_sen_df_jul'][keyword_dataframes['tif_par_nj_final_sen_df_jul'][topic]==-1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are negative comments, call the negative_keywords function
    if filtered_comments_tif_par_nj:
        # Call the negative_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = negative_keywords(filtered_comments_tif_par_nj,topic)        
        # Add the result to the output dictionary
        keyword_negative_output_tif_par_nj.append(keywords)
        keyword_input_token_tif_par_nj += input_tokens_loop
        keyword_output_token_tif_par_nj += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_tif_par_nj = time.time()
keyword_cost_input_token_tif_par_nj = round((0.01/1000)*keyword_input_token_tif_par_nj,2)
keyword_cost_output_token_tif_par_nj = round((0.03/1000)*keyword_output_token_tif_par_nj,2)
keyword_total_cost_tif_par_nj = keyword_cost_input_token_tif_par_nj + keyword_cost_output_token_tif_par_nj
keyword_total_time_loop_tif_par_nj = keyword_end_time_loop_tif_par_nj - keyword_start_time_loop_tif_par_nj

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_tif_par_nj[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_tif_par_nj,1))
print("Total Input Tokens - ", keyword_input_token_tif_par_nj)
print("Total Input Cost = USD ",keyword_cost_input_token_tif_par_nj)
print("Total Output Tokens - ", keyword_output_token_tif_par_nj)
print("Total Output Cost = USD ",keyword_cost_output_token_tif_par_nj)
print("Total Cost = USD ",round(keyword_total_cost_tif_par_nj,2))

Executed  10  Iterations
Total Execution time (in secs) -  5.0
Total Input Tokens -  2947
Total Input Cost = USD  0.03
Total Output Tokens -  191
Total Output Cost = USD  0.01
Total Cost = USD  0.04


In [719]:
#Initialize an empty DataFrame
negative_keywords_tif_par_nj = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']
# Add these columns to the DataFrame
for column in columns:
    negative_keywords_tif_par_nj[column] = None

# Process each JSON string
for json_str in keyword_negative_output_tif_par_nj:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'tif_par_nj'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'negative'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            negative_keywords_tif_par_nj = pd.concat([negative_keywords_tif_par_nj, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            negative_keywords_tif_par_nj = pd.concat([negative_keywords_tif_par_nj, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
negative_keywords_tif_par_nj = negative_keywords_tif_par_nj.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

negative_keywords_tif_par_nj

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Tiffany & Co-Paramus, NJ",negative,keywords,,"unprofessional :1, unacceptable :1, dismissive...","unhelpful :1, dismissive :1, rude :1, unprofes...",,,,,,,
1,"Tiffany & Co-Paramus, NJ",negative,phrases,,"treated poorly :1, no communication whatsoever...","no empathy whatsoever :1, treated poorly :1, b...",,,,,,,


#### tif_vie_va

In [720]:
# Initialize the output dictionary
keyword_negative_output_tif_vie_va = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_tif_vie_va=[0]
keyword_input_token_tif_vie_va = 0
keyword_output_token_tif_vie_va = 0
keyword_start_time_loop_tif_vie_va = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_tif_vie_va, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_tif_vie_va[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_tif_vie_va = keyword_dataframes['tif_vie_va_final_sen_df_jul'][keyword_dataframes['tif_vie_va_final_sen_df_jul'][topic]==-1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are negative comments, call the negative_keywords function
    if filtered_comments_tif_vie_va:
        # Call the negative_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = negative_keywords(filtered_comments_tif_vie_va,topic)        
        # Add the result to the output dictionary
        keyword_negative_output_tif_vie_va.append(keywords)
        keyword_input_token_tif_vie_va += input_tokens_loop
        keyword_output_token_tif_vie_va += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_tif_vie_va = time.time()
keyword_cost_input_token_tif_vie_va = round((0.01/1000)*keyword_input_token_tif_vie_va,2)
keyword_cost_output_token_tif_vie_va = round((0.03/1000)*keyword_output_token_tif_vie_va,2)
keyword_total_cost_tif_vie_va = keyword_cost_input_token_tif_vie_va + keyword_cost_output_token_tif_vie_va
keyword_total_time_loop_tif_vie_va = keyword_end_time_loop_tif_vie_va - keyword_start_time_loop_tif_vie_va

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_tif_vie_va[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_tif_vie_va,1))
print("Total Input Tokens - ", keyword_input_token_tif_vie_va)
print("Total Input Cost = USD ",keyword_cost_input_token_tif_vie_va)
print("Total Output Tokens - ", keyword_output_token_tif_vie_va)
print("Total Output Cost = USD ",keyword_cost_output_token_tif_vie_va)
print("Total Cost = USD ",round(keyword_total_cost_tif_vie_va,2))

Executed  10  Iterations
Total Execution time (in secs) -  4.0
Total Input Tokens -  2508
Total Input Cost = USD  0.03
Total Output Tokens -  140
Total Output Cost = USD  0.0
Total Cost = USD  0.03


In [721]:
#Initialize an empty DataFrame
negative_keywords_tif_vie_va = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']
# Add these columns to the DataFrame
for column in columns:
    negative_keywords_tif_vie_va[column] = None

# Process each JSON string
for json_str in keyword_negative_output_tif_vie_va:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'tif_vie_va'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'negative'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            negative_keywords_tif_vie_va = pd.concat([negative_keywords_tif_vie_va, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            negative_keywords_tif_vie_va = pd.concat([negative_keywords_tif_vie_va, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
negative_keywords_tif_vie_va = negative_keywords_tif_vie_va.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

negative_keywords_tif_vie_va

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Tiffany & Co-Vienna, VA",negative,keywords,No relevant negative keywords/ phrases,"empty store :1, no warmth :1, no verification :1",No relevant negative keywords/ phrases,,,,,,,
1,"Tiffany & Co-Vienna, VA",negative,phrases,No relevant negative keywords/ phrases,"treats customers like transactions :1, no warm...",No relevant negative keywords/ phrases,,,,,,,


#### tif_ric_va

In [722]:
# Initialize the output dictionary
keyword_negative_output_tif_ric_va = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_tif_ric_va=[0]
keyword_input_token_tif_ric_va = 0
keyword_output_token_tif_ric_va = 0
keyword_start_time_loop_tif_ric_va = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_tif_ric_va, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_tif_ric_va[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_tif_ric_va = keyword_dataframes['tif_ric_va_final_sen_df_jul'][keyword_dataframes['tif_ric_va_final_sen_df_jul'][topic]==-1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are negative comments, call the negative_keywords function
    if filtered_comments_tif_ric_va:
        # Call the negative_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = negative_keywords(filtered_comments_tif_ric_va,topic)        
        # Add the result to the output dictionary
        keyword_negative_output_tif_ric_va.append(keywords)
        keyword_input_token_tif_ric_va += input_tokens_loop
        keyword_output_token_tif_ric_va += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_tif_ric_va = time.time()
keyword_cost_input_token_tif_ric_va = round((0.01/1000)*keyword_input_token_tif_ric_va,2)
keyword_cost_output_token_tif_ric_va = round((0.03/1000)*keyword_output_token_tif_ric_va,2)
keyword_total_cost_tif_ric_va = keyword_cost_input_token_tif_ric_va + keyword_cost_output_token_tif_ric_va
keyword_total_time_loop_tif_ric_va = keyword_end_time_loop_tif_ric_va - keyword_start_time_loop_tif_ric_va

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_tif_ric_va[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_tif_ric_va,1))
print("Total Input Tokens - ", keyword_input_token_tif_ric_va)
print("Total Input Cost = USD ",keyword_cost_input_token_tif_ric_va)
print("Total Output Tokens - ", keyword_output_token_tif_ric_va)
print("Total Output Cost = USD ",keyword_cost_output_token_tif_ric_va)
print("Total Cost = USD ",round(keyword_total_cost_tif_ric_va,2))

Executed  10  Iterations
Total Execution time (in secs) -  0.0
Total Input Tokens -  0
Total Input Cost = USD  0.0
Total Output Tokens -  0
Total Output Cost = USD  0.0
Total Cost = USD  0.0


In [723]:
#Initialize an empty DataFrame
negative_keywords_tif_ric_va = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']
# Add these columns to the DataFrame
for column in columns:
    negative_keywords_tif_ric_va[column] = None

# Process each JSON string
for json_str in keyword_negative_output_tif_ric_va:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'tif_ric_va'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'negative'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            negative_keywords_tif_ric_va = pd.concat([negative_keywords_tif_ric_va, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            negative_keywords_tif_ric_va = pd.concat([negative_keywords_tif_ric_va, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
negative_keywords_tif_ric_va = negative_keywords_tif_ric_va.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

negative_keywords_tif_ric_va

ValueError: cannot insert Type, already exists

#### vbj_fri_tx

In [724]:
# Initialize the output dictionary
keyword_negative_output_vbj_fri_tx = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_vbj_fri_tx=[0]
keyword_input_token_vbj_fri_tx = 0
keyword_output_token_vbj_fri_tx = 0
keyword_start_time_loop_vbj_fri_tx = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_vbj_fri_tx, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_vbj_fri_tx[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_vbj_fri_tx = keyword_dataframes['vbj_fri_tx_final_sen_df_jul'][keyword_dataframes['vbj_fri_tx_final_sen_df_jul'][topic]==-1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are negative comments, call the negative_keywords function
    if filtered_comments_vbj_fri_tx:
        # Call the negative_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = negative_keywords(filtered_comments_vbj_fri_tx,topic)        
        # Add the result to the output dictionary
        keyword_negative_output_vbj_fri_tx.append(keywords)
        keyword_input_token_vbj_fri_tx += input_tokens_loop
        keyword_output_token_vbj_fri_tx += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_vbj_fri_tx = time.time()
keyword_cost_input_token_vbj_fri_tx = round((0.01/1000)*keyword_input_token_vbj_fri_tx,2)
keyword_cost_output_token_vbj_fri_tx = round((0.03/1000)*keyword_output_token_vbj_fri_tx,2)
keyword_total_cost_vbj_fri_tx = keyword_cost_input_token_vbj_fri_tx + keyword_cost_output_token_vbj_fri_tx
keyword_total_time_loop_vbj_fri_tx = keyword_end_time_loop_vbj_fri_tx - keyword_start_time_loop_vbj_fri_tx

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_vbj_fri_tx[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_vbj_fri_tx,1))
print("Total Input Tokens - ", keyword_input_token_vbj_fri_tx)
print("Total Input Cost = USD ",keyword_cost_input_token_vbj_fri_tx)
print("Total Output Tokens - ", keyword_output_token_vbj_fri_tx)
print("Total Output Cost = USD ",keyword_cost_output_token_vbj_fri_tx)
print("Total Cost = USD ",round(keyword_total_cost_vbj_fri_tx,2))

Executed  10  Iterations
Total Execution time (in secs) -  17.0
Total Input Tokens -  11789
Total Input Cost = USD  0.12
Total Output Tokens -  746
Total Output Cost = USD  0.02
Total Cost = USD  0.14


In [725]:
#Initialize an empty DataFrame
negative_keywords_vbj_fri_tx = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']
# Add these columns to the DataFrame
for column in columns:
    negative_keywords_vbj_fri_tx[column] = None

# Process each JSON string
for json_str in keyword_negative_output_vbj_fri_tx:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'vbj_fri_tx'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'negative'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            negative_keywords_vbj_fri_tx = pd.concat([negative_keywords_vbj_fri_tx, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            negative_keywords_vbj_fri_tx = pd.concat([negative_keywords_vbj_fri_tx, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
negative_keywords_vbj_fri_tx = negative_keywords_vbj_fri_tx.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

negative_keywords_vbj_fri_tx

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"VBJ Jewellers-Frisco, TX",negative,keywords,"poor :1, unfair :1, rip off :1, not trust :1, ...","un-inviting :1, crowded :1, hesitant :1, ignor...","un-inviting :1, un-inviting :1, disappointing ...",,No relevant negative keywords/ phrases,"no discounts:2, better rates:1","unfair :1, rip off :1","expensive: 1, high: 1, rigid: 1, competative: ...","damage :2, fake :1, damaged :1, yellow stones ...","return process:1, restocking fee:1, making cos..."
1,"VBJ Jewellers-Frisco, TX",negative,phrases,"no report available :1, not there in the GIA d...","treatment we got there :1, billing section nee...","very hesitant to show us other stuff :1, not a...",,No relevant negative keywords/ phrases,"no proper treatment and no discounts:1, told t...",charging all the making cost on customer who r...,"charged me almost 10% of the cost: 1, pricing ...",one of the pieces that I like the most had dam...,"return process is tiresome:1, not straightaway..."


#### tan_chi_il

In [726]:
# Initialize the output dictionary
keyword_negative_output_tan_chi_il = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_tan_chi_il=[0]
keyword_input_token_tan_chi_il = 0
keyword_output_token_tan_chi_il = 0
keyword_start_time_loop_tan_chi_il = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_tan_chi_il, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_tan_chi_il[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_tan_chi_il = keyword_dataframes['tan_chi_il_final_sen_df_jul'][keyword_dataframes['tan_chi_il_final_sen_df_jul'][topic]==-1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are negative comments, call the negative_keywords function
    if filtered_comments_tan_chi_il:
        # Call the negative_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = negative_keywords(filtered_comments_tan_chi_il,topic)        
        # Add the result to the output dictionary
        keyword_negative_output_tan_chi_il.append(keywords)
        keyword_input_token_tan_chi_il += input_tokens_loop
        keyword_output_token_tan_chi_il += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_tan_chi_il = time.time()
keyword_cost_input_token_tan_chi_il = round((0.01/1000)*keyword_input_token_tan_chi_il,2)
keyword_cost_output_token_tan_chi_il = round((0.03/1000)*keyword_output_token_tan_chi_il,2)
keyword_total_cost_tan_chi_il = keyword_cost_input_token_tan_chi_il + keyword_cost_output_token_tan_chi_il
keyword_total_time_loop_tan_chi_il = keyword_end_time_loop_tan_chi_il - keyword_start_time_loop_tan_chi_il

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_tan_chi_il[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_tan_chi_il,1))
print("Total Input Tokens - ", keyword_input_token_tan_chi_il)
print("Total Input Cost = USD ",keyword_cost_input_token_tan_chi_il)
print("Total Output Tokens - ", keyword_output_token_tan_chi_il)
print("Total Output Cost = USD ",keyword_cost_output_token_tan_chi_il)
print("Total Cost = USD ",round(keyword_total_cost_tan_chi_il,2))

Executed  10  Iterations
Total Execution time (in secs) -  15.0
Total Input Tokens -  11177
Total Input Cost = USD  0.11
Total Output Tokens -  594
Total Output Cost = USD  0.02
Total Cost = USD  0.13


In [727]:
#Initialize an empty DataFrame
negative_keywords_tan_chi_il = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']
# Add these columns to the DataFrame
for column in columns:
    negative_keywords_tan_chi_il[column] = None

# Process each JSON string
for json_str in keyword_negative_output_tan_chi_il:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'tan_chi_il'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'negative'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            negative_keywords_tan_chi_il = pd.concat([negative_keywords_tan_chi_il, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            negative_keywords_tan_chi_il = pd.concat([negative_keywords_tan_chi_il, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
negative_keywords_tan_chi_il = negative_keywords_tan_chi_il.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

negative_keywords_tan_chi_il

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Tanishq-Chicago, IL",negative,keywords,"disregard :1, erode trust :1, compromise :1, m...","poor experience: 2, poor service: 2, human err...","rude :2, poor service :2, neglect :1, untraine...",,,No relevant negative keywords/ phrases,,"misguide :2, human error :2, freeze :1, lock :...","defective :2, unsatisfactory :2","store credit:2, defective product:2, exchange ..."
1,"Tanishq-Chicago, IL",negative,phrases,"removing critical feedback :1, blatant disrega...","no idea as how and who will help: 1, not easy ...","none of them bothered to assist me :2, not all...",,,No relevant negative keywords/ phrases,,"cannot fix the gold price :1, cannot freeze th...","product was defective :2, repair was not satis...","can't exchange another store product:2, argued..."


#### tan_fri_tx

In [728]:
# Initialize the output dictionary
keyword_negative_output_tan_fri_tx = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_tan_fri_tx=[0]
keyword_input_token_tan_fri_tx = 0
keyword_output_token_tan_fri_tx = 0
keyword_start_time_loop_tan_fri_tx = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_tan_fri_tx, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_tan_fri_tx[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_tan_fri_tx = keyword_dataframes['tan_fri_tx_final_sen_df_jul'][keyword_dataframes['tan_fri_tx_final_sen_df_jul'][topic]==-1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are negative comments, call the negative_keywords function
    if filtered_comments_tan_fri_tx:
        # Call the negative_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = negative_keywords(filtered_comments_tan_fri_tx,topic)        
        # Add the result to the output dictionary
        keyword_negative_output_tan_fri_tx.append(keywords)
        keyword_input_token_tan_fri_tx += input_tokens_loop
        keyword_output_token_tan_fri_tx += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_tan_fri_tx = time.time()
keyword_cost_input_token_tan_fri_tx = round((0.01/1000)*keyword_input_token_tan_fri_tx,2)
keyword_cost_output_token_tan_fri_tx = round((0.03/1000)*keyword_output_token_tan_fri_tx,2)
keyword_total_cost_tan_fri_tx = keyword_cost_input_token_tan_fri_tx + keyword_cost_output_token_tan_fri_tx
keyword_total_time_loop_tan_fri_tx = keyword_end_time_loop_tan_fri_tx - keyword_start_time_loop_tan_fri_tx

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_tan_fri_tx[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_tan_fri_tx,1))
print("Total Input Tokens - ", keyword_input_token_tan_fri_tx)
print("Total Input Cost = USD ",keyword_cost_input_token_tan_fri_tx)
print("Total Output Tokens - ", keyword_output_token_tan_fri_tx)
print("Total Output Cost = USD ",keyword_cost_output_token_tan_fri_tx)
print("Total Cost = USD ",round(keyword_total_cost_tan_fri_tx,2))

Executed  10  Iterations
Total Execution time (in secs) -  20.0
Total Input Tokens -  11380
Total Input Cost = USD  0.11
Total Output Tokens -  756
Total Output Cost = USD  0.02
Total Cost = USD  0.13


In [729]:
#Initialize an empty DataFrame
negative_keywords_tan_fri_tx = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']
# Add these columns to the DataFrame
for column in columns:
    negative_keywords_tan_fri_tx[column] = None

# Process each JSON string
for json_str in keyword_negative_output_tan_fri_tx:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'tan_fri_tx'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'negative'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            negative_keywords_tan_fri_tx = pd.concat([negative_keywords_tan_fri_tx, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            negative_keywords_tan_fri_tx = pd.concat([negative_keywords_tan_fri_tx, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
negative_keywords_tan_fri_tx = negative_keywords_tan_fri_tx.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

negative_keywords_tan_fri_tx

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Tanishq-Frisco, TX",negative,keywords,"faulty machines:1, limited knowledge:1, faulty...","poor experience:2, denied entry:2, limited cho...","unhelpful :2, arrogant :1, amateur :1, unfrien...",,"limited collection: 3, less collection: 1, not...","no clarity :2, little more :2, not giving :2, ...",high :4,"overpriced:2, budget:2","broken :2, faulty :2, breakages :1","faulty machines: 2, no cash: 1, store credits:..."
1,"Tanishq-Frisco, TX",negative,phrases,"multiple breakages within the chain:1, weighin...","locked doors at 6:30:2, no clarity in communic...","didn't help much :2, didn't understand English...",,"choices were very limited: 2, didn't meet our ...","didn't get much of a discount :2, not giving m...","Making charge is very high :3, making charges ...","spending more than budget:2, pricing of the pr...","broke immediately :1, multiple breakages :1, f...",weighing machines at this branch are faulty: 1...


#### tan_hou_tx

In [730]:
# Initialize the output dictionary
keyword_negative_output_tan_hou_tx = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_tan_hou_tx=[0]
keyword_input_token_tan_hou_tx = 0
keyword_output_token_tan_hou_tx = 0
keyword_start_time_loop_tan_hou_tx = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_tan_hou_tx, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_tan_hou_tx[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_tan_hou_tx = keyword_dataframes['tan_hou_tx_final_sen_df_jul'][keyword_dataframes['tan_hou_tx_final_sen_df_jul'][topic]==-1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are negative comments, call the negative_keywords function
    if filtered_comments_tan_hou_tx:
        # Call the negative_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = negative_keywords(filtered_comments_tan_hou_tx,topic)        
        # Add the result to the output dictionary
        keyword_negative_output_tan_hou_tx.append(keywords)
        keyword_input_token_tan_hou_tx += input_tokens_loop
        keyword_output_token_tan_hou_tx += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_tan_hou_tx = time.time()
keyword_cost_input_token_tan_hou_tx = round((0.01/1000)*keyword_input_token_tan_hou_tx,2)
keyword_cost_output_token_tan_hou_tx = round((0.03/1000)*keyword_output_token_tan_hou_tx,2)
keyword_total_cost_tan_hou_tx = keyword_cost_input_token_tan_hou_tx + keyword_cost_output_token_tan_hou_tx
keyword_total_time_loop_tan_hou_tx = keyword_end_time_loop_tan_hou_tx - keyword_start_time_loop_tan_hou_tx

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_tan_hou_tx[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_tan_hou_tx,1))
print("Total Input Tokens - ", keyword_input_token_tan_hou_tx)
print("Total Input Cost = USD ",keyword_cost_input_token_tan_hou_tx)
print("Total Output Tokens - ", keyword_output_token_tan_hou_tx)
print("Total Output Cost = USD ",keyword_cost_output_token_tan_hou_tx)
print("Total Cost = USD ",round(keyword_total_cost_tan_hou_tx,2))

Executed  10  Iterations
Total Execution time (in secs) -  13.5
Total Input Tokens -  10618
Total Input Cost = USD  0.11
Total Output Tokens -  503
Total Output Cost = USD  0.02
Total Cost = USD  0.13


In [731]:
#Initialize an empty DataFrame
negative_keywords_tan_hou_tx = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']
# Add these columns to the DataFrame
for column in columns:
    negative_keywords_tan_hou_tx[column] = None

# Process each JSON string
for json_str in keyword_negative_output_tan_hou_tx:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'tan_hou_tx'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'negative'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            negative_keywords_tan_hou_tx = pd.concat([negative_keywords_tan_hou_tx, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            negative_keywords_tan_hou_tx = pd.concat([negative_keywords_tan_hou_tx, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
negative_keywords_tan_hou_tx = negative_keywords_tan_hou_tx.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

negative_keywords_tan_hou_tx

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Tanishq-Houston, TX",negative,keywords,"cheated :2, hidden fee :2, damaged product :2,...","bad customer :4, horrible experience :3, rude ...","attitude problem:2, rude:2, unhelpful:1, uneth...",,"few varieties:2, better collection:1, similar:1",,"hidden fee: 2, high: 2",expensive :1,"damaged product:2, deformed:2, suspect:1",
1,"Tanishq-Houston, TX",negative,phrases,"rethink our loyalty :2, should we be able to t...","attitude problem :2, never coming back again :...","staff not treating my friend very well:1, secu...",,"expected better collection:1, few varieties in...",,"making charges are very high: 2, hidden fee of...",over 25% more expensive :1,"got deformed in one usage:1, damaged product:1...",


#### tan_new_nj

In [732]:
# Initialize the output dictionary
keyword_negative_output_tan_new_nj = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_tan_new_nj=[0]
keyword_input_token_tan_new_nj = 0
keyword_output_token_tan_new_nj = 0
keyword_start_time_loop_tan_new_nj = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_tan_new_nj, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_tan_new_nj[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_tan_new_nj = keyword_dataframes['tan_new_nj_final_sen_df_jul'][keyword_dataframes['tan_new_nj_final_sen_df_jul'][topic]==-1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are negative comments, call the negative_keywords function
    if filtered_comments_tan_new_nj:
        # Call the negative_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = negative_keywords(filtered_comments_tan_new_nj,topic)        
        # Add the result to the output dictionary
        keyword_negative_output_tan_new_nj.append(keywords)
        keyword_input_token_tan_new_nj += input_tokens_loop
        keyword_output_token_tan_new_nj += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_tan_new_nj = time.time()
keyword_cost_input_token_tan_new_nj = round((0.01/1000)*keyword_input_token_tan_new_nj,2)
keyword_cost_output_token_tan_new_nj = round((0.03/1000)*keyword_output_token_tan_new_nj,2)
keyword_total_cost_tan_new_nj = keyword_cost_input_token_tan_new_nj + keyword_cost_output_token_tan_new_nj
keyword_total_time_loop_tan_new_nj = keyword_end_time_loop_tan_new_nj - keyword_start_time_loop_tan_new_nj

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_tan_new_nj[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_tan_new_nj,1))
print("Total Input Tokens - ", keyword_input_token_tan_new_nj)
print("Total Input Cost = USD ",keyword_cost_input_token_tan_new_nj)
print("Total Output Tokens - ", keyword_output_token_tan_new_nj)
print("Total Output Cost = USD ",keyword_cost_output_token_tan_new_nj)
print("Total Cost = USD ",round(keyword_total_cost_tan_new_nj,2))

Executed  10  Iterations
Total Execution time (in secs) -  22.5
Total Input Tokens -  27451
Total Input Cost = USD  0.27
Total Output Tokens -  813
Total Output Cost = USD  0.02
Total Cost = USD  0.29


In [733]:
#Initialize an empty DataFrame
negative_keywords_tan_new_nj = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']
# Add these columns to the DataFrame
for column in columns:
    negative_keywords_tan_new_nj[column] = None

# Process each JSON string
for json_str in keyword_negative_output_tan_new_nj:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'tan_new_nj'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'negative'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            negative_keywords_tan_new_nj = pd.concat([negative_keywords_tan_new_nj, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            negative_keywords_tan_new_nj = pd.concat([negative_keywords_tan_new_nj, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
negative_keywords_tan_new_nj = negative_keywords_tan_new_nj.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

negative_keywords_tan_new_nj

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Tanishq-New Jersey, NJ",negative,keywords,"uncomfortable :2, insecure :1, discriminative ...","long wait :3, rude staff :2, bad management :2...","rude :4, dismissive :3, unprofessional :2, ind...",,"limited inventory: 2, lack of variety: 1, limi...",,No relevant negative keywords/ phrases,"high :2, expensive :2, overpriced :1, unreason...","poorly finished:2, faulty:2, damaged:1, broke:...","no returns :2, poor exchange :1, painful excha..."
1,"Tanishq-New Jersey, NJ",negative,phrases,no longer feel comfortable spending my money :...,"waited for two hours :2, no one attended :2, n...","treated very rudely :2, rude and dismissive :1...",,"extremely limited inventory: 1, not a lot of v...",,No relevant negative keywords/ phrases,prices are so high here compared to stores on ...,"diamonds from a bracelet just fell off:1, clas...","exchanges are not accepted after 6 PM :2, very..."


#### tan_bar_db

In [734]:
# Initialize the output dictionary
keyword_negative_output_tan_bar_db = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_tan_bar_db=[0]
keyword_input_token_tan_bar_db = 0
keyword_output_token_tan_bar_db = 0
keyword_start_time_loop_tan_bar_db = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_tan_bar_db, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_tan_bar_db[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_tan_bar_db = keyword_dataframes['tan_bar_db_final_sen_df_jul'][keyword_dataframes['tan_bar_db_final_sen_df_jul'][topic]==-1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are negative comments, call the negative_keywords function
    if filtered_comments_tan_bar_db:
        # Call the negative_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = negative_keywords(filtered_comments_tan_bar_db,topic)        
        # Add the result to the output dictionary
        keyword_negative_output_tan_bar_db.append(keywords)
        keyword_input_token_tan_bar_db += input_tokens_loop
        keyword_output_token_tan_bar_db += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_tan_bar_db = time.time()
keyword_cost_input_token_tan_bar_db = round((0.01/1000)*keyword_input_token_tan_bar_db,2)
keyword_cost_output_token_tan_bar_db = round((0.03/1000)*keyword_output_token_tan_bar_db,2)
keyword_total_cost_tan_bar_db = keyword_cost_input_token_tan_bar_db + keyword_cost_output_token_tan_bar_db
keyword_total_time_loop_tan_bar_db = keyword_end_time_loop_tan_bar_db - keyword_start_time_loop_tan_bar_db

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_tan_bar_db[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_tan_bar_db,1))
print("Total Input Tokens - ", keyword_input_token_tan_bar_db)
print("Total Input Cost = USD ",keyword_cost_input_token_tan_bar_db)
print("Total Output Tokens - ", keyword_output_token_tan_bar_db)
print("Total Output Cost = USD ",keyword_cost_output_token_tan_bar_db)
print("Total Cost = USD ",round(keyword_total_cost_tan_bar_db,2))

Executed  10  Iterations
Total Execution time (in secs) -  19.0
Total Input Tokens -  9587
Total Input Cost = USD  0.1
Total Output Tokens -  592
Total Output Cost = USD  0.02
Total Cost = USD  0.12


In [735]:
#Initialize an empty DataFrame
negative_keywords_tan_bar_db = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']
# Add these columns to the DataFrame
for column in columns:
    negative_keywords_tan_bar_db[column] = None

# Process each JSON string
for json_str in keyword_negative_output_tan_bar_db:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'tan_bar_db'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'negative'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            negative_keywords_tan_bar_db = pd.concat([negative_keywords_tan_bar_db, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            negative_keywords_tan_bar_db = pd.concat([negative_keywords_tan_bar_db, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
negative_keywords_tan_bar_db = negative_keywords_tan_bar_db.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

negative_keywords_tan_bar_db

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Tanishq Jewellers-Al Barsha, DB",negative,keywords,"lied :4, false information :2, careful :2","worst experience:1, poor way:1, bad experience...","lied :2, unfriendly :1, humiliated :1, rude :1...",,limited :1,"marketing gimmick: 2, discount scams: 1","high making :5, making charges :4, making char...",expensive: 2,,"making charges:2, less gold:2, additional:2, h..."
1,"Tanishq Jewellers-Al Barsha, DB",negative,phrases,"information provided to us was false :2, repre...","not at all friendly:1, no any respect:1, treat...","not at all friendly :1, asked not to try & mak...",,"needed more office wear :1, needs to add more ...",all these other brands giving you 50-70% off i...,"making charged quite high around 15% :2, makin...",Price bit expensive: 2,,"losing 3 grams of our gold:2, charged a making..."


#### tan_fah_db

In [736]:
# Initialize the output dictionary
keyword_negative_output_tan_fah_db = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_tan_fah_db=[0]
keyword_input_token_tan_fah_db = 0
keyword_output_token_tan_fah_db = 0
keyword_start_time_loop_tan_fah_db = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_tan_fah_db, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_tan_fah_db[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_tan_fah_db = keyword_dataframes['tan_fah_db_final_sen_df_jul'][keyword_dataframes['tan_fah_db_final_sen_df_jul'][topic]==-1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are negative comments, call the negative_keywords function
    if filtered_comments_tan_fah_db:
        # Call the negative_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = negative_keywords(filtered_comments_tan_fah_db,topic)        
        # Add the result to the output dictionary
        keyword_negative_output_tan_fah_db.append(keywords)
        keyword_input_token_tan_fah_db += input_tokens_loop
        keyword_output_token_tan_fah_db += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_tan_fah_db = time.time()
keyword_cost_input_token_tan_fah_db = round((0.01/1000)*keyword_input_token_tan_fah_db,2)
keyword_cost_output_token_tan_fah_db = round((0.03/1000)*keyword_output_token_tan_fah_db,2)
keyword_total_cost_tan_fah_db = keyword_cost_input_token_tan_fah_db + keyword_cost_output_token_tan_fah_db
keyword_total_time_loop_tan_fah_db = keyword_end_time_loop_tan_fah_db - keyword_start_time_loop_tan_fah_db

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_tan_fah_db[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_tan_fah_db,1))
print("Total Input Tokens - ", keyword_input_token_tan_fah_db)
print("Total Input Cost = USD ",keyword_cost_input_token_tan_fah_db)
print("Total Output Tokens - ", keyword_output_token_tan_fah_db)
print("Total Output Cost = USD ",keyword_cost_output_token_tan_fah_db)
print("Total Cost = USD ",round(keyword_total_cost_tan_fah_db,2))

Executed  10  Iterations
Total Execution time (in secs) -  10.0
Total Input Tokens -  6420
Total Input Cost = USD  0.06
Total Output Tokens -  392
Total Output Cost = USD  0.01
Total Cost = USD  0.07


In [737]:
#Initialize an empty DataFrame
negative_keywords_tan_fah_db = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']
# Add these columns to the DataFrame
for column in columns:
    negative_keywords_tan_fah_db[column] = None

# Process each JSON string
for json_str in keyword_negative_output_tan_fah_db:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'tan_fah_db'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'negative'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            negative_keywords_tan_fah_db = pd.concat([negative_keywords_tan_fah_db, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            negative_keywords_tan_fah_db = pd.concat([negative_keywords_tan_fah_db, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
negative_keywords_tan_fah_db = negative_keywords_tan_fah_db.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

negative_keywords_tan_fah_db

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Tanishq Jewellers-Al Fahidi, DB",negative,keywords,"false details:1, wrong info:1",No relevant negative keywords/ phrases,"false details: 2, wrong info: 2",,"more design:3, lack:1, disappointed:1",No relevant negative keywords/ phrases,"high :2, higher :1",No relevant negative keywords/ phrases,,
1,"Tanishq Jewellers-Al Fahidi, DB",negative,phrases,"telling false details:1, salesman told us we c...",No relevant negative keywords/ phrases,"telling false details: 2, salesman told us we ...",,"Need to keep more chain design:2, looking for ...",No relevant negative keywords/ phrases,"making charges are too high :2, making charge ...",No relevant negative keywords/ phrases,,


#### tan_kar_db

In [738]:
# Initialize the output dictionary
keyword_negative_output_tan_kar_db = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_tan_kar_db=[0]
keyword_input_token_tan_kar_db = 0
keyword_output_token_tan_kar_db = 0
keyword_start_time_loop_tan_kar_db = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_tan_kar_db, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_tan_kar_db[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_tan_kar_db = keyword_dataframes['tan_kar_db_final_sen_df_jul'][keyword_dataframes['tan_kar_db_final_sen_df_jul'][topic]==-1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are negative comments, call the negative_keywords function
    if filtered_comments_tan_kar_db:
        # Call the negative_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = negative_keywords(filtered_comments_tan_kar_db,topic)        
        # Add the result to the output dictionary
        keyword_negative_output_tan_kar_db.append(keywords)
        keyword_input_token_tan_kar_db += input_tokens_loop
        keyword_output_token_tan_kar_db += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_tan_kar_db = time.time()
keyword_cost_input_token_tan_kar_db = round((0.01/1000)*keyword_input_token_tan_kar_db,2)
keyword_cost_output_token_tan_kar_db = round((0.03/1000)*keyword_output_token_tan_kar_db,2)
keyword_total_cost_tan_kar_db = keyword_cost_input_token_tan_kar_db + keyword_cost_output_token_tan_kar_db
keyword_total_time_loop_tan_kar_db = keyword_end_time_loop_tan_kar_db - keyword_start_time_loop_tan_kar_db

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_tan_kar_db[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_tan_kar_db,1))
print("Total Input Tokens - ", keyword_input_token_tan_kar_db)
print("Total Input Cost = USD ",keyword_cost_input_token_tan_kar_db)
print("Total Output Tokens - ", keyword_output_token_tan_kar_db)
print("Total Output Cost = USD ",keyword_cost_output_token_tan_kar_db)
print("Total Cost = USD ",round(keyword_total_cost_tan_kar_db,2))

Executed  10  Iterations
Total Execution time (in secs) -  12.5
Total Input Tokens -  6439
Total Input Cost = USD  0.06
Total Output Tokens -  349
Total Output Cost = USD  0.01
Total Cost = USD  0.07


In [739]:
#Initialize an empty DataFrame
negative_keywords_tan_kar_db = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']
# Add these columns to the DataFrame
for column in columns:
    negative_keywords_tan_kar_db[column] = None

# Process each JSON string
for json_str in keyword_negative_output_tan_kar_db:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'tan_kar_db'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'negative'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            negative_keywords_tan_kar_db = pd.concat([negative_keywords_tan_kar_db, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            negative_keywords_tan_kar_db = pd.concat([negative_keywords_tan_kar_db, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
negative_keywords_tan_kar_db = negative_keywords_tan_kar_db.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

negative_keywords_tan_kar_db

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Tanishq Jewellers-Al Karama, DB",negative,keywords,lying :2,No relevant negative keywords/ phrases,"disgusted :2, disinterested :2, laying :1, sto...",,collection :2,No relevant negative keywords/ phrases,"very high:2, different:2, mess:2",,"broke :2, nightmare :2",
1,"Tanishq Jewellers-Al Karama, DB",negative,phrases,Manager is telling a lot of stories :2,No relevant negative keywords/ phrases,"looked visibly disgusted/disinterested :2, Man...",,Collection can be a bit more :2,no alternative way to purchase the jewels .......,making charges and all was different and was a...,,"broke in 2 weeks :2, without any undue pressur...",


#### tan_ham_ad

In [740]:
# Initialize the output dictionary
keyword_negative_output_tan_ham_ad = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_tan_ham_ad=[0]
keyword_input_token_tan_ham_ad = 0
keyword_output_token_tan_ham_ad = 0
keyword_start_time_loop_tan_ham_ad = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_tan_ham_ad, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_tan_ham_ad[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_tan_ham_ad = keyword_dataframes['tan_ham_ad_final_sen_df_jul'][keyword_dataframes['tan_ham_ad_final_sen_df_jul'][topic]==-1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are negative comments, call the negative_keywords function
    if filtered_comments_tan_ham_ad:
        # Call the negative_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = negative_keywords(filtered_comments_tan_ham_ad,topic)        
        # Add the result to the output dictionary
        keyword_negative_output_tan_ham_ad.append(keywords)
        keyword_input_token_tan_ham_ad += input_tokens_loop
        keyword_output_token_tan_ham_ad += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_tan_ham_ad = time.time()
keyword_cost_input_token_tan_ham_ad = round((0.01/1000)*keyword_input_token_tan_ham_ad,2)
keyword_cost_output_token_tan_ham_ad = round((0.03/1000)*keyword_output_token_tan_ham_ad,2)
keyword_total_cost_tan_ham_ad = keyword_cost_input_token_tan_ham_ad + keyword_cost_output_token_tan_ham_ad
keyword_total_time_loop_tan_ham_ad = keyword_end_time_loop_tan_ham_ad - keyword_start_time_loop_tan_ham_ad

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_tan_ham_ad[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_tan_ham_ad,1))
print("Total Input Tokens - ", keyword_input_token_tan_ham_ad)
print("Total Input Cost = USD ",keyword_cost_input_token_tan_ham_ad)
print("Total Output Tokens - ", keyword_output_token_tan_ham_ad)
print("Total Output Cost = USD ",keyword_cost_output_token_tan_ham_ad)
print("Total Cost = USD ",round(keyword_total_cost_tan_ham_ad,2))

Executed  10  Iterations
Total Execution time (in secs) -  10.5
Total Input Tokens -  4834
Total Input Cost = USD  0.05
Total Output Tokens -  363
Total Output Cost = USD  0.01
Total Cost = USD  0.06


In [741]:
#Initialize an empty DataFrame
negative_keywords_tan_ham_ad = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']
# Add these columns to the DataFrame
for column in columns:
    negative_keywords_tan_ham_ad[column] = None

# Process each JSON string
for json_str in keyword_negative_output_tan_ham_ad:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'tan_ham_ad'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'negative'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            negative_keywords_tan_ham_ad = pd.concat([negative_keywords_tan_ham_ad, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            negative_keywords_tan_ham_ad = pd.concat([negative_keywords_tan_ham_ad, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
negative_keywords_tan_ham_ad = negative_keywords_tan_ham_ad.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

negative_keywords_tan_ham_ad

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Tanishq Jewellers-Hamdan Bin Mohammed Street, AD",negative,keywords,,"disappointing experience:1, ignored:1, disrega...","unprofessional :1, selective :1, ignored :1, d...",,"Limited choice :3, Not much choice :2, low sto...",loss: 2,"increase :2, discount :2","loss: 2, budget: 2",,
1,"Tanishq Jewellers-Hamdan Bin Mohammed Street, AD",negative,phrases,,"completely ignored us:1, noticeably selective:...","completely ignored us :1, without acknowledgin...",,they have little to offer here :2,did not even give a discount: 2,"increase the making charge :2, did not even gi...","increase the making charge: 2, did not even gi...",,


#### tan_mee_db

In [742]:
# Initialize the output dictionary
keyword_negative_output_tan_mee_db = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_tan_mee_db=[0]
keyword_input_token_tan_mee_db = 0
keyword_output_token_tan_mee_db = 0
keyword_start_time_loop_tan_mee_db = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_tan_mee_db, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_tan_mee_db[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_tan_mee_db = keyword_dataframes['tan_mee_db_final_sen_df_jul'][keyword_dataframes['tan_mee_db_final_sen_df_jul'][topic]==-1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are negative comments, call the negative_keywords function
    if filtered_comments_tan_mee_db:
        # Call the negative_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = negative_keywords(filtered_comments_tan_mee_db,topic)        
        # Add the result to the output dictionary
        keyword_negative_output_tan_mee_db.append(keywords)
        keyword_input_token_tan_mee_db += input_tokens_loop
        keyword_output_token_tan_mee_db += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_tan_mee_db = time.time()
keyword_cost_input_token_tan_mee_db = round((0.01/1000)*keyword_input_token_tan_mee_db,2)
keyword_cost_output_token_tan_mee_db = round((0.03/1000)*keyword_output_token_tan_mee_db,2)
keyword_total_cost_tan_mee_db = keyword_cost_input_token_tan_mee_db + keyword_cost_output_token_tan_mee_db
keyword_total_time_loop_tan_mee_db = keyword_end_time_loop_tan_mee_db - keyword_start_time_loop_tan_mee_db

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_tan_mee_db[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_tan_mee_db,1))
print("Total Input Tokens - ", keyword_input_token_tan_mee_db)
print("Total Input Cost = USD ",keyword_cost_input_token_tan_mee_db)
print("Total Output Tokens - ", keyword_output_token_tan_mee_db)
print("Total Output Cost = USD ",keyword_cost_output_token_tan_mee_db)
print("Total Cost = USD ",round(keyword_total_cost_tan_mee_db,2))

Executed  10  Iterations
Total Execution time (in secs) -  7.5
Total Input Tokens -  4077
Total Input Cost = USD  0.04
Total Output Tokens -  249
Total Output Cost = USD  0.01
Total Cost = USD  0.05


In [743]:
#Initialize an empty DataFrame
negative_keywords_tan_mee_db = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']
# Add these columns to the DataFrame
for column in columns:
    negative_keywords_tan_mee_db[column] = None

# Process each JSON string
for json_str in keyword_negative_output_tan_mee_db:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'tan_mee_db'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'negative'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            negative_keywords_tan_mee_db = pd.concat([negative_keywords_tan_mee_db, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            negative_keywords_tan_mee_db = pd.concat([negative_keywords_tan_mee_db, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
negative_keywords_tan_mee_db = negative_keywords_tan_mee_db.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

negative_keywords_tan_mee_db

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Tanishq Jewellers-Meena Bazar, DB",negative,keywords,over confident:2,"nobody answer:2, useless:1, horrible:1, mistake:1",No relevant negative keywords/ phrases,,No relevant negative keywords/ phrases,,,,,
1,"Tanishq Jewellers-Meena Bazar, DB",negative,phrases,"ask for your number and name:2, data collectio...","didn't receive the card:2, no one showed up:1,...",No relevant negative keywords/ phrases,,No relevant negative keywords/ phrases,,,,,


#### tan_sil_db

In [744]:
# Initialize the output dictionary
keyword_negative_output_tan_sil_db = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_tan_sil_db=[0]
keyword_input_token_tan_sil_db = 0
keyword_output_token_tan_sil_db = 0
keyword_start_time_loop_tan_sil_db = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_tan_sil_db, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_tan_sil_db[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_tan_sil_db = keyword_dataframes['tan_sil_db_final_sen_df_jul'][keyword_dataframes['tan_sil_db_final_sen_df_jul'][topic]==-1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are negative comments, call the negative_keywords function
    if filtered_comments_tan_sil_db:
        # Call the negative_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = negative_keywords(filtered_comments_tan_sil_db,topic)        
        # Add the result to the output dictionary
        keyword_negative_output_tan_sil_db.append(keywords)
        keyword_input_token_tan_sil_db += input_tokens_loop
        keyword_output_token_tan_sil_db += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_tan_sil_db = time.time()
keyword_cost_input_token_tan_sil_db = round((0.01/1000)*keyword_input_token_tan_sil_db,2)
keyword_cost_output_token_tan_sil_db = round((0.03/1000)*keyword_output_token_tan_sil_db,2)
keyword_total_cost_tan_sil_db = keyword_cost_input_token_tan_sil_db + keyword_cost_output_token_tan_sil_db
keyword_total_time_loop_tan_sil_db = keyword_end_time_loop_tan_sil_db - keyword_start_time_loop_tan_sil_db

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_tan_sil_db[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_tan_sil_db,1))
print("Total Input Tokens - ", keyword_input_token_tan_sil_db)
print("Total Input Cost = USD ",keyword_cost_input_token_tan_sil_db)
print("Total Output Tokens - ", keyword_output_token_tan_sil_db)
print("Total Output Cost = USD ",keyword_cost_output_token_tan_sil_db)
print("Total Cost = USD ",round(keyword_total_cost_tan_sil_db,2))

Executed  10  Iterations
Total Execution time (in secs) -  9.0
Total Input Tokens -  4301
Total Input Cost = USD  0.04
Total Output Tokens -  256
Total Output Cost = USD  0.01
Total Cost = USD  0.05


In [745]:
#Initialize an empty DataFrame
negative_keywords_tan_sil_db = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']
# Add these columns to the DataFrame
for column in columns:
    negative_keywords_tan_sil_db[column] = None

# Process each JSON string
for json_str in keyword_negative_output_tan_sil_db:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'tan_sil_db'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'negative'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            negative_keywords_tan_sil_db = pd.concat([negative_keywords_tan_sil_db, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            negative_keywords_tan_sil_db = pd.concat([negative_keywords_tan_sil_db, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
negative_keywords_tan_sil_db = negative_keywords_tan_sil_db.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

negative_keywords_tan_sil_db

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Tanishq Jewellers-Silicon Central, DB",negative,keywords,,"time consuming:2, slow:1","unhelpful :1, show off :1",,"not available :2, not much :2",,,"expensive:2, costier:2, extra:1",,
1,"Tanishq Jewellers-Silicon Central, DB",negative,phrases,,"very time consuming:2, very very slow:1, too m...",STOP SHOWING OFF IN FRONT OF YOUR CUSTOMERS AN...,,"mostly not available :2, not much options :2",,,"More than expensive:2, pay something extra:1, ...",,


#### mia_awm_ad

In [746]:
# Initialize the output dictionary
keyword_negative_output_mia_awm_ad = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_mia_awm_ad=[0]
keyword_input_token_mia_awm_ad = 0
keyword_output_token_mia_awm_ad = 0
keyword_start_time_loop_mia_awm_ad = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_mia_awm_ad, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_mia_awm_ad[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_mia_awm_ad = keyword_dataframes['mia_awm_ad_final_sen_df_jul'][keyword_dataframes['mia_awm_ad_final_sen_df_jul'][topic]==-1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are negative comments, call the negative_keywords function
    if filtered_comments_mia_awm_ad:
        # Call the negative_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = negative_keywords(filtered_comments_mia_awm_ad,topic)        
        # Add the result to the output dictionary
        keyword_negative_output_mia_awm_ad.append(keywords)
        keyword_input_token_mia_awm_ad += input_tokens_loop
        keyword_output_token_mia_awm_ad += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_mia_awm_ad = time.time()
keyword_cost_input_token_mia_awm_ad = round((0.01/1000)*keyword_input_token_mia_awm_ad,2)
keyword_cost_output_token_mia_awm_ad = round((0.03/1000)*keyword_output_token_mia_awm_ad,2)
keyword_total_cost_mia_awm_ad = keyword_cost_input_token_mia_awm_ad + keyword_cost_output_token_mia_awm_ad
keyword_total_time_loop_mia_awm_ad = keyword_end_time_loop_mia_awm_ad - keyword_start_time_loop_mia_awm_ad

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_mia_awm_ad[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_mia_awm_ad,1))
print("Total Input Tokens - ", keyword_input_token_mia_awm_ad)
print("Total Input Cost = USD ",keyword_cost_input_token_mia_awm_ad)
print("Total Output Tokens - ", keyword_output_token_mia_awm_ad)
print("Total Output Cost = USD ",keyword_cost_output_token_mia_awm_ad)
print("Total Cost = USD ",round(keyword_total_cost_mia_awm_ad,2))

Executed  10  Iterations
Total Execution time (in secs) -  0.0
Total Input Tokens -  0
Total Input Cost = USD  0.0
Total Output Tokens -  0
Total Output Cost = USD  0.0
Total Cost = USD  0.0


In [747]:
#Initialize an empty DataFrame
negative_keywords_mia_awm_ad = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']
# Add these columns to the DataFrame
for column in columns:
    negative_keywords_mia_awm_ad[column] = None

# Process each JSON string
for json_str in keyword_negative_output_mia_awm_ad:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'mia_awm_ad'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'negative'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            negative_keywords_mia_awm_ad = pd.concat([negative_keywords_mia_awm_ad, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            negative_keywords_mia_awm_ad = pd.concat([negative_keywords_mia_awm_ad, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
negative_keywords_mia_awm_ad = negative_keywords_mia_awm_ad.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

negative_keywords_mia_awm_ad

ValueError: cannot insert Type, already exists

#### mia_bur_db

In [748]:
# Initialize the output dictionary
keyword_negative_output_mia_bur_db = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_mia_bur_db=[0]
keyword_input_token_mia_bur_db = 0
keyword_output_token_mia_bur_db = 0
keyword_start_time_loop_mia_bur_db = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_mia_bur_db, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_mia_bur_db[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_mia_bur_db = keyword_dataframes['mia_bur_db_final_sen_df_jul'][keyword_dataframes['mia_bur_db_final_sen_df_jul'][topic]==-1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are negative comments, call the negative_keywords function
    if filtered_comments_mia_bur_db:
        # Call the negative_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = negative_keywords(filtered_comments_mia_bur_db,topic)        
        # Add the result to the output dictionary
        keyword_negative_output_mia_bur_db.append(keywords)
        keyword_input_token_mia_bur_db += input_tokens_loop
        keyword_output_token_mia_bur_db += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_mia_bur_db = time.time()
keyword_cost_input_token_mia_bur_db = round((0.01/1000)*keyword_input_token_mia_bur_db,2)
keyword_cost_output_token_mia_bur_db = round((0.03/1000)*keyword_output_token_mia_bur_db,2)
keyword_total_cost_mia_bur_db = keyword_cost_input_token_mia_bur_db + keyword_cost_output_token_mia_bur_db
keyword_total_time_loop_mia_bur_db = keyword_end_time_loop_mia_bur_db - keyword_start_time_loop_mia_bur_db

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_mia_bur_db[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_mia_bur_db,1))
print("Total Input Tokens - ", keyword_input_token_mia_bur_db)
print("Total Input Cost = USD ",keyword_cost_input_token_mia_bur_db)
print("Total Output Tokens - ", keyword_output_token_mia_bur_db)
print("Total Output Cost = USD ",keyword_cost_output_token_mia_bur_db)
print("Total Cost = USD ",round(keyword_total_cost_mia_bur_db,2))

Executed  10  Iterations
Total Execution time (in secs) -  3.0
Total Input Tokens -  1496
Total Input Cost = USD  0.01
Total Output Tokens -  88
Total Output Cost = USD  0.0
Total Cost = USD  0.01


In [749]:
#Initialize an empty DataFrame
negative_keywords_mia_bur_db = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']
# Add these columns to the DataFrame
for column in columns:
    negative_keywords_mia_bur_db[column] = None

# Process each JSON string
for json_str in keyword_negative_output_mia_bur_db:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'mia_bur_db'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'negative'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            negative_keywords_mia_bur_db = pd.concat([negative_keywords_mia_bur_db, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            negative_keywords_mia_bur_db = pd.concat([negative_keywords_mia_bur_db, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
negative_keywords_mia_bur_db = negative_keywords_mia_bur_db.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

negative_keywords_mia_bur_db

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Mia-Burjuman, DB",negative,keywords,,,,,No relevant negative keywords/ phrases,,No relevant negative keywords/ phrases,,,
1,"Mia-Burjuman, DB",negative,phrases,,,,,No relevant negative keywords/ phrases,,Not very transparent with the pricing of makin...,,,


#### tan_am_om

In [750]:
# Initialize the output dictionary
keyword_negative_output_tan_am_om = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_tan_am_om=[0]
keyword_input_token_tan_am_om = 0
keyword_output_token_tan_am_om = 0
keyword_start_time_loop_tan_am_om = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_tan_am_om, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_tan_am_om[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_tan_am_om = keyword_dataframes['tan_am_om_final_sen_df_jul'][keyword_dataframes['tan_am_om_final_sen_df_jul'][topic]==-1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are negative comments, call the negative_keywords function
    if filtered_comments_tan_am_om:
        # Call the negative_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = negative_keywords(filtered_comments_tan_am_om,topic)        
        # Add the result to the output dictionary
        keyword_negative_output_tan_am_om.append(keywords)
        keyword_input_token_tan_am_om += input_tokens_loop
        keyword_output_token_tan_am_om += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_tan_am_om = time.time()
keyword_cost_input_token_tan_am_om = round((0.01/1000)*keyword_input_token_tan_am_om,2)
keyword_cost_output_token_tan_am_om = round((0.03/1000)*keyword_output_token_tan_am_om,2)
keyword_total_cost_tan_am_om = keyword_cost_input_token_tan_am_om + keyword_cost_output_token_tan_am_om
keyword_total_time_loop_tan_am_om = keyword_end_time_loop_tan_am_om - keyword_start_time_loop_tan_am_om

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_tan_am_om[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_tan_am_om,1))
print("Total Input Tokens - ", keyword_input_token_tan_am_om)
print("Total Input Cost = USD ",keyword_cost_input_token_tan_am_om)
print("Total Output Tokens - ", keyword_output_token_tan_am_om)
print("Total Output Cost = USD ",keyword_cost_output_token_tan_am_om)
print("Total Cost = USD ",round(keyword_total_cost_tan_am_om,2))

Executed  10  Iterations
Total Execution time (in secs) -  1.5
Total Input Tokens -  780
Total Input Cost = USD  0.01
Total Output Tokens -  47
Total Output Cost = USD  0.0
Total Cost = USD  0.01


In [751]:
#Initialize an empty DataFrame
negative_keywords_tan_am_om = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']
# Add these columns to the DataFrame
for column in columns:
    negative_keywords_tan_am_om[column] = None

# Process each JSON string
for json_str in keyword_negative_output_tan_am_om:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'tan_am_om'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'negative'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            negative_keywords_tan_am_om = pd.concat([negative_keywords_tan_am_om, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            negative_keywords_tan_am_om = pd.concat([negative_keywords_tan_am_om, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
negative_keywords_tan_am_om = negative_keywords_tan_am_om.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

negative_keywords_tan_am_om

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Tanishq Jewellers-Avenues Mall, OM",negative,keywords,,,,,limited :1,,,,,
1,"Tanishq Jewellers-Avenues Mall, OM",negative,phrases,,,,,"options are limited :1, inventory is not match...",,,,,


#### tan_atl_ga

In [752]:
# Initialize the output dictionary
keyword_negative_output_tan_atl_ga = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_tan_atl_ga=[0]
keyword_input_token_tan_atl_ga = 0
keyword_output_token_tan_atl_ga = 0
keyword_start_time_loop_tan_atl_ga = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_tan_atl_ga, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_tan_atl_ga[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_tan_atl_ga = keyword_dataframes['tan_atl_ga_final_sen_df_jul'][keyword_dataframes['tan_atl_ga_final_sen_df_jul'][topic]==-1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are negative comments, call the negative_keywords function
    if filtered_comments_tan_atl_ga:
        # Call the negative_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = negative_keywords(filtered_comments_tan_atl_ga,topic)        
        # Add the result to the output dictionary
        keyword_negative_output_tan_atl_ga.append(keywords)
        keyword_input_token_tan_atl_ga += input_tokens_loop
        keyword_output_token_tan_atl_ga += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_tan_atl_ga = time.time()
keyword_cost_input_token_tan_atl_ga = round((0.01/1000)*keyword_input_token_tan_atl_ga,2)
keyword_cost_output_token_tan_atl_ga = round((0.03/1000)*keyword_output_token_tan_atl_ga,2)
keyword_total_cost_tan_atl_ga = keyword_cost_input_token_tan_atl_ga + keyword_cost_output_token_tan_atl_ga
keyword_total_time_loop_tan_atl_ga = keyword_end_time_loop_tan_atl_ga - keyword_start_time_loop_tan_atl_ga

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_tan_atl_ga[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_tan_atl_ga,1))
print("Total Input Tokens - ", keyword_input_token_tan_atl_ga)
print("Total Input Cost = USD ",keyword_cost_input_token_tan_atl_ga)
print("Total Output Tokens - ", keyword_output_token_tan_atl_ga)
print("Total Output Cost = USD ",keyword_cost_output_token_tan_atl_ga)
print("Total Cost = USD ",round(keyword_total_cost_tan_atl_ga,2))

Executed  10  Iterations
Total Execution time (in secs) -  15.0
Total Input Tokens -  6951
Total Input Cost = USD  0.07
Total Output Tokens -  684
Total Output Cost = USD  0.02
Total Cost = USD  0.09


In [753]:
#Initialize an empty DataFrame
negative_keywords_tan_atl_ga = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']
# Add these columns to the DataFrame
for column in columns:
    negative_keywords_tan_atl_ga[column] = None

# Process each JSON string
for json_str in keyword_negative_output_tan_atl_ga:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'tan_atl_ga'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'negative'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            negative_keywords_tan_atl_ga = pd.concat([negative_keywords_tan_atl_ga, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            negative_keywords_tan_atl_ga = pd.concat([negative_keywords_tan_atl_ga, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
negative_keywords_tan_atl_ga = negative_keywords_tan_atl_ga.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

negative_keywords_tan_atl_ga

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Tanishq-Atlanta, GA",negative,keywords,"fake information:1, false promises:1, clueless...","clueless :2, contradictory :2, worst :1, terri...","clueless :2, dismissively :1",,"limited :1, not so great :1",,"higher :2, expensive :1, high :1, waste :1, co...","making charges: 3, competitive prices: 1, pric...",,"no returns:2, contradictory:1, clueless:1, res..."
1,"Tanishq-Atlanta, GA",negative,phrases,"cheating people with false promises:1, staff i...","return policies(NO RETURNS) :1, WORST STORE EX...","staff is clueless about their own policies :1,...",,"product selection felt quite limited :1, not s...",,"making charges to be significantly higher :1, ...",making charges to be significantly higher comp...,,"return literally only means exchange:1, terms ..."


#### tan_fc_qa

In [754]:
# Initialize the output dictionary
keyword_negative_output_tan_fc_qa = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_tan_fc_qa=[0]
keyword_input_token_tan_fc_qa = 0
keyword_output_token_tan_fc_qa = 0
keyword_start_time_loop_tan_fc_qa = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_tan_fc_qa, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_tan_fc_qa[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_tan_fc_qa = keyword_dataframes['tan_fc_qa_final_sen_df_jul'][keyword_dataframes['tan_fc_qa_final_sen_df_jul'][topic]==-1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are negative comments, call the negative_keywords function
    if filtered_comments_tan_fc_qa:
        # Call the negative_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = negative_keywords(filtered_comments_tan_fc_qa,topic)        
        # Add the result to the output dictionary
        keyword_negative_output_tan_fc_qa.append(keywords)
        keyword_input_token_tan_fc_qa += input_tokens_loop
        keyword_output_token_tan_fc_qa += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_tan_fc_qa = time.time()
keyword_cost_input_token_tan_fc_qa = round((0.01/1000)*keyword_input_token_tan_fc_qa,2)
keyword_cost_output_token_tan_fc_qa = round((0.03/1000)*keyword_output_token_tan_fc_qa,2)
keyword_total_cost_tan_fc_qa = keyword_cost_input_token_tan_fc_qa + keyword_cost_output_token_tan_fc_qa
keyword_total_time_loop_tan_fc_qa = keyword_end_time_loop_tan_fc_qa - keyword_start_time_loop_tan_fc_qa

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_tan_fc_qa[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_tan_fc_qa,1))
print("Total Input Tokens - ", keyword_input_token_tan_fc_qa)
print("Total Input Cost = USD ",keyword_cost_input_token_tan_fc_qa)
print("Total Output Tokens - ", keyword_output_token_tan_fc_qa)
print("Total Output Cost = USD ",keyword_cost_output_token_tan_fc_qa)
print("Total Cost = USD ",round(keyword_total_cost_tan_fc_qa,2))

Executed  10  Iterations
Total Execution time (in secs) -  0.0
Total Input Tokens -  0
Total Input Cost = USD  0.0
Total Output Tokens -  0
Total Output Cost = USD  0.0
Total Cost = USD  0.0


In [755]:
#Initialize an empty DataFrame
negative_keywords_tan_fc_qa = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']
# Add these columns to the DataFrame
for column in columns:
    negative_keywords_tan_fc_qa[column] = None

# Process each JSON string
for json_str in keyword_negative_output_tan_fc_qa:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'tan_fc_qa'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'negative'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            negative_keywords_tan_fc_qa = pd.concat([negative_keywords_tan_fc_qa, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            negative_keywords_tan_fc_qa = pd.concat([negative_keywords_tan_fc_qa, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
negative_keywords_tan_fc_qa = negative_keywords_tan_fc_qa.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

negative_keywords_tan_fc_qa

ValueError: cannot insert Type, already exists

#### tan_gs_db

In [762]:
# Initialize the output dictionary
keyword_negative_output_tan_gs_db = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_tan_gs_db=[0]
keyword_input_token_tan_gs_db = 0
keyword_output_token_tan_gs_db = 0
keyword_start_time_loop_tan_gs_db = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_tan_gs_db, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_tan_gs_db[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_tan_gs_db = keyword_dataframes['tan_gs_db_final_sen_df_jul'][keyword_dataframes['tan_gs_db_final_sen_df_jul'][topic]==-1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are negative comments, call the negative_keywords function
    if filtered_comments_tan_gs_db:
        # Call the negative_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = negative_keywords(filtered_comments_tan_gs_db,topic)        
        # Add the result to the output dictionary
        keyword_negative_output_tan_gs_db.append(keywords)
        keyword_input_token_tan_gs_db += input_tokens_loop
        keyword_output_token_tan_gs_db += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_tan_gs_db = time.time()
keyword_cost_input_token_tan_gs_db = round((0.01/1000)*keyword_input_token_tan_gs_db,2)
keyword_cost_output_token_tan_gs_db = round((0.03/1000)*keyword_output_token_tan_gs_db,2)
keyword_total_cost_tan_gs_db = keyword_cost_input_token_tan_gs_db + keyword_cost_output_token_tan_gs_db
keyword_total_time_loop_tan_gs_db = keyword_end_time_loop_tan_gs_db - keyword_start_time_loop_tan_gs_db

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_tan_gs_db[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_tan_gs_db,1))
print("Total Input Tokens - ", keyword_input_token_tan_gs_db)
print("Total Input Cost = USD ",keyword_cost_input_token_tan_gs_db)
print("Total Output Tokens - ", keyword_output_token_tan_gs_db)
print("Total Output Cost = USD ",keyword_cost_output_token_tan_gs_db)
print("Total Cost = USD ",round(keyword_total_cost_tan_gs_db,2))

Executed  10  Iterations
Total Execution time (in secs) -  6.5
Total Input Tokens -  3344
Total Input Cost = USD  0.03
Total Output Tokens -  229
Total Output Cost = USD  0.01
Total Cost = USD  0.04


In [763]:
#Initialize an empty DataFrame
negative_keywords_tan_gs_db = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']
# Add these columns to the DataFrame
for column in columns:
    negative_keywords_tan_gs_db[column] = None

# Process each JSON string
for json_str in keyword_negative_output_tan_gs_db:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'tan_gs_db'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'negative'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            negative_keywords_tan_gs_db = pd.concat([negative_keywords_tan_gs_db, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            negative_keywords_tan_gs_db = pd.concat([negative_keywords_tan_gs_db, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
negative_keywords_tan_gs_db = negative_keywords_tan_gs_db.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

negative_keywords_tan_gs_db

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Tanishq Jewellers-Gold Souk, DB",negative,keywords,,"crowded place :1, no clear signs :1",,,,cheating :1,"high :2, higher :2","cheating :1, disclosed :1",,
1,"Tanishq Jewellers-Gold Souk, DB",negative,phrases,,"payment process took lot of time :1, hard to c...",,,,"discount was applied only on a single item :1,...","making charge bit high :1, making charge is in...","price for each item was not disclosed :1, disc...",,


#### tan_lul_qa

In [764]:
# Initialize the output dictionary
keyword_negative_output_tan_lul_qa = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_tan_lul_qa=[0]
keyword_input_token_tan_lul_qa = 0
keyword_output_token_tan_lul_qa = 0
keyword_start_time_loop_tan_lul_qa = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_tan_lul_qa, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_tan_lul_qa[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_tan_lul_qa = keyword_dataframes['tan_lul_qa_final_sen_df_jul'][keyword_dataframes['tan_lul_qa_final_sen_df_jul'][topic]==-1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are negative comments, call the negative_keywords function
    if filtered_comments_tan_lul_qa:
        # Call the negative_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = negative_keywords(filtered_comments_tan_lul_qa,topic)        
        # Add the result to the output dictionary
        keyword_negative_output_tan_lul_qa.append(keywords)
        keyword_input_token_tan_lul_qa += input_tokens_loop
        keyword_output_token_tan_lul_qa += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_tan_lul_qa = time.time()
keyword_cost_input_token_tan_lul_qa = round((0.01/1000)*keyword_input_token_tan_lul_qa,2)
keyword_cost_output_token_tan_lul_qa = round((0.03/1000)*keyword_output_token_tan_lul_qa,2)
keyword_total_cost_tan_lul_qa = keyword_cost_input_token_tan_lul_qa + keyword_cost_output_token_tan_lul_qa
keyword_total_time_loop_tan_lul_qa = keyword_end_time_loop_tan_lul_qa - keyword_start_time_loop_tan_lul_qa

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_tan_lul_qa[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_tan_lul_qa,1))
print("Total Input Tokens - ", keyword_input_token_tan_lul_qa)
print("Total Input Cost = USD ",keyword_cost_input_token_tan_lul_qa)
print("Total Output Tokens - ", keyword_output_token_tan_lul_qa)
print("Total Output Cost = USD ",keyword_cost_output_token_tan_lul_qa)
print("Total Cost = USD ",round(keyword_total_cost_tan_lul_qa,2))

Executed  10  Iterations
Total Execution time (in secs) -  3.5
Total Input Tokens -  733
Total Input Cost = USD  0.01
Total Output Tokens -  41
Total Output Cost = USD  0.0
Total Cost = USD  0.01


In [765]:
#Initialize an empty DataFrame
negative_keywords_tan_lul_qa = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']
# Add these columns to the DataFrame
for column in columns:
    negative_keywords_tan_lul_qa[column] = None

# Process each JSON string
for json_str in keyword_negative_output_tan_lul_qa:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'tan_lul_qa'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'negative'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            negative_keywords_tan_lul_qa = pd.concat([negative_keywords_tan_lul_qa, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            negative_keywords_tan_lul_qa = pd.concat([negative_keywords_tan_lul_qa, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
negative_keywords_tan_lul_qa = negative_keywords_tan_lul_qa.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

negative_keywords_tan_lul_qa

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Tanishq Jewellers-Lulu Hypermarket, QA",negative,keywords,,,,,No relevant negative keywords/ phrases,,,,,
1,"Tanishq Jewellers-Lulu Hypermarket, QA",negative,phrases,,,,,No relevant negative keywords/ phrases,,,,,


#### tan_mank_db

In [766]:
# Initialize the output dictionary
keyword_negative_output_tan_mank_db = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_tan_mank_db=[0]
keyword_input_token_tan_mank_db = 0
keyword_output_token_tan_mank_db = 0
keyword_start_time_loop_tan_mank_db = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_tan_mank_db, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_tan_mank_db[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_tan_mank_db = keyword_dataframes['tan_mank_db_final_sen_df_jul'][keyword_dataframes['tan_mank_db_final_sen_df_jul'][topic]==-1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are negative comments, call the negative_keywords function
    if filtered_comments_tan_mank_db:
        # Call the negative_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = negative_keywords(filtered_comments_tan_mank_db,topic)        
        # Add the result to the output dictionary
        keyword_negative_output_tan_mank_db.append(keywords)
        keyword_input_token_tan_mank_db += input_tokens_loop
        keyword_output_token_tan_mank_db += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_tan_mank_db = time.time()
keyword_cost_input_token_tan_mank_db = round((0.01/1000)*keyword_input_token_tan_mank_db,2)
keyword_cost_output_token_tan_mank_db = round((0.03/1000)*keyword_output_token_tan_mank_db,2)
keyword_total_cost_tan_mank_db = keyword_cost_input_token_tan_mank_db + keyword_cost_output_token_tan_mank_db
keyword_total_time_loop_tan_mank_db = keyword_end_time_loop_tan_mank_db - keyword_start_time_loop_tan_mank_db

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_tan_mank_db[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_tan_mank_db,1))
print("Total Input Tokens - ", keyword_input_token_tan_mank_db)
print("Total Input Cost = USD ",keyword_cost_input_token_tan_mank_db)
print("Total Output Tokens - ", keyword_output_token_tan_mank_db)
print("Total Output Cost = USD ",keyword_cost_output_token_tan_mank_db)
print("Total Cost = USD ",round(keyword_total_cost_tan_mank_db,2))

Executed  10  Iterations
Total Execution time (in secs) -  0.0
Total Input Tokens -  0
Total Input Cost = USD  0.0
Total Output Tokens -  0
Total Output Cost = USD  0.0
Total Cost = USD  0.0


In [767]:
#Initialize an empty DataFrame
negative_keywords_tan_mank_db = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']
# Add these columns to the DataFrame
for column in columns:
    negative_keywords_tan_mank_db[column] = None

# Process each JSON string
for json_str in keyword_negative_output_tan_mank_db:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'tan_mank_db'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'negative'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            negative_keywords_tan_mank_db = pd.concat([negative_keywords_tan_mank_db, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            negative_keywords_tan_mank_db = pd.concat([negative_keywords_tan_mank_db, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
negative_keywords_tan_mank_db = negative_keywords_tan_mank_db.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

negative_keywords_tan_mank_db

ValueError: cannot insert Type, already exists

#### tan_rol_sh

In [768]:
# Initialize the output dictionary
keyword_negative_output_tan_rol_sh = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_tan_rol_sh=[0]
keyword_input_token_tan_rol_sh = 0
keyword_output_token_tan_rol_sh = 0
keyword_start_time_loop_tan_rol_sh = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_tan_rol_sh, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_tan_rol_sh[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_tan_rol_sh = keyword_dataframes['tan_rol_sh_final_sen_df_jul'][keyword_dataframes['tan_rol_sh_final_sen_df_jul'][topic]==-1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are negative comments, call the negative_keywords function
    if filtered_comments_tan_rol_sh:
        # Call the negative_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = negative_keywords(filtered_comments_tan_rol_sh,topic)        
        # Add the result to the output dictionary
        keyword_negative_output_tan_rol_sh.append(keywords)
        keyword_input_token_tan_rol_sh += input_tokens_loop
        keyword_output_token_tan_rol_sh += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_tan_rol_sh = time.time()
keyword_cost_input_token_tan_rol_sh = round((0.01/1000)*keyword_input_token_tan_rol_sh,2)
keyword_cost_output_token_tan_rol_sh = round((0.03/1000)*keyword_output_token_tan_rol_sh,2)
keyword_total_cost_tan_rol_sh = keyword_cost_input_token_tan_rol_sh + keyword_cost_output_token_tan_rol_sh
keyword_total_time_loop_tan_rol_sh = keyword_end_time_loop_tan_rol_sh - keyword_start_time_loop_tan_rol_sh

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_tan_rol_sh[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_tan_rol_sh,1))
print("Total Input Tokens - ", keyword_input_token_tan_rol_sh)
print("Total Input Cost = USD ",keyword_cost_input_token_tan_rol_sh)
print("Total Output Tokens - ", keyword_output_token_tan_rol_sh)
print("Total Output Cost = USD ",keyword_cost_output_token_tan_rol_sh)
print("Total Cost = USD ",round(keyword_total_cost_tan_rol_sh,2))

Executed  10  Iterations
Total Execution time (in secs) -  1.5
Total Input Tokens -  727
Total Input Cost = USD  0.01
Total Output Tokens -  40
Total Output Cost = USD  0.0
Total Cost = USD  0.01


In [769]:
#Initialize an empty DataFrame
negative_keywords_tan_rol_sh = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']
# Add these columns to the DataFrame
for column in columns:
    negative_keywords_tan_rol_sh[column] = None

# Process each JSON string
for json_str in keyword_negative_output_tan_rol_sh:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'tan_rol_sh'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'negative'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            negative_keywords_tan_rol_sh = pd.concat([negative_keywords_tan_rol_sh, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            negative_keywords_tan_rol_sh = pd.concat([negative_keywords_tan_rol_sh, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
negative_keywords_tan_rol_sh = negative_keywords_tan_rol_sh.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

negative_keywords_tan_rol_sh

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Tanishq Jewellers-Rolla, SH",negative,keywords,,,,,,,,No relevant negative keywords/ phrases,,
1,"Tanishq Jewellers-Rolla, SH",negative,phrases,,,,,,,,No relevant negative keywords/ phrases,,


#### tan_rse_wa

In [770]:
# Initialize the output dictionary
keyword_negative_output_tan_rse_wa = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_tan_rse_wa=[0]
keyword_input_token_tan_rse_wa = 0
keyword_output_token_tan_rse_wa = 0
keyword_start_time_loop_tan_rse_wa = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_tan_rse_wa, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_tan_rse_wa[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_tan_rse_wa = keyword_dataframes['tan_rse_wa_final_sen_df_jul'][keyword_dataframes['tan_rse_wa_final_sen_df_jul'][topic]==-1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are negative comments, call the negative_keywords function
    if filtered_comments_tan_rse_wa:
        # Call the negative_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = negative_keywords(filtered_comments_tan_rse_wa,topic)        
        # Add the result to the output dictionary
        keyword_negative_output_tan_rse_wa.append(keywords)
        keyword_input_token_tan_rse_wa += input_tokens_loop
        keyword_output_token_tan_rse_wa += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_tan_rse_wa = time.time()
keyword_cost_input_token_tan_rse_wa = round((0.01/1000)*keyword_input_token_tan_rse_wa,2)
keyword_cost_output_token_tan_rse_wa = round((0.03/1000)*keyword_output_token_tan_rse_wa,2)
keyword_total_cost_tan_rse_wa = keyword_cost_input_token_tan_rse_wa + keyword_cost_output_token_tan_rse_wa
keyword_total_time_loop_tan_rse_wa = keyword_end_time_loop_tan_rse_wa - keyword_start_time_loop_tan_rse_wa

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_tan_rse_wa[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_tan_rse_wa,1))
print("Total Input Tokens - ", keyword_input_token_tan_rse_wa)
print("Total Input Cost = USD ",keyword_cost_input_token_tan_rse_wa)
print("Total Output Tokens - ", keyword_output_token_tan_rse_wa)
print("Total Output Cost = USD ",keyword_cost_output_token_tan_rse_wa)
print("Total Cost = USD ",round(keyword_total_cost_tan_rse_wa,2))

Executed  10  Iterations
Total Execution time (in secs) -  18.0
Total Input Tokens -  8622
Total Input Cost = USD  0.09
Total Output Tokens -  707
Total Output Cost = USD  0.02
Total Cost = USD  0.11


In [771]:
#Initialize an empty DataFrame
negative_keywords_tan_rse_wa = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']
# Add these columns to the DataFrame
for column in columns:
    negative_keywords_tan_rse_wa[column] = None

# Process each JSON string
for json_str in keyword_negative_output_tan_rse_wa:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'tan_rse_wa'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'negative'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            negative_keywords_tan_rse_wa = pd.concat([negative_keywords_tan_rse_wa, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            negative_keywords_tan_rse_wa = pd.concat([negative_keywords_tan_rse_wa, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
negative_keywords_tan_rse_wa = negative_keywords_tan_rse_wa.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

negative_keywords_tan_rse_wa

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Tanishq-Redmond Seattle, WA",negative,keywords,"unfair pricing:2, unclear:1, overpaid:1, lack ...","uninterested staff :1, unclear bill :1, denied...","terrible employees:2, scammed:1, uninterested:...","breakdown:1, hard:1","traditional :2, contemporary :1, stylish :1, l...",,"unfair pricing:2, unclear:1, overpaid:1, addit...","overpriced:1, unreasonable:1, unclear:1, highe...","unreasonable :1, disappointed :1","refused :1, less gold :1"
1,"Tanishq-Redmond Seattle, WA",negative,phrases,weren’t transparent about the making charges:1...,no one came or asked what we were looking for ...,No one came or asked what we were looking for:...,"earrings might be hard to wear:1, can breakdow...",Most of their collection is very traditional :...,,weren’t transparent about the making charges:1...,"overpaid for my purchase:1, much higher:1, com...","back hoop kept falling off :2, product quality...","couldn’t go back on the exchange :1, refused t..."


#### tan_sc_ca

In [772]:
# Initialize the output dictionary
keyword_negative_output_tan_sc_ca = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_tan_sc_ca=[0]
keyword_input_token_tan_sc_ca = 0
keyword_output_token_tan_sc_ca = 0
keyword_start_time_loop_tan_sc_ca = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_tan_sc_ca, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_tan_sc_ca[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_tan_sc_ca = keyword_dataframes['tan_sc_ca_final_sen_df_jul'][keyword_dataframes['tan_sc_ca_final_sen_df_jul'][topic]==-1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are negative comments, call the negative_keywords function
    if filtered_comments_tan_sc_ca:
        # Call the negative_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = negative_keywords(filtered_comments_tan_sc_ca,topic)        
        # Add the result to the output dictionary
        keyword_negative_output_tan_sc_ca.append(keywords)
        keyword_input_token_tan_sc_ca += input_tokens_loop
        keyword_output_token_tan_sc_ca += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_tan_sc_ca = time.time()
keyword_cost_input_token_tan_sc_ca = round((0.01/1000)*keyword_input_token_tan_sc_ca,2)
keyword_cost_output_token_tan_sc_ca = round((0.03/1000)*keyword_output_token_tan_sc_ca,2)
keyword_total_cost_tan_sc_ca = keyword_cost_input_token_tan_sc_ca + keyword_cost_output_token_tan_sc_ca
keyword_total_time_loop_tan_sc_ca = keyword_end_time_loop_tan_sc_ca - keyword_start_time_loop_tan_sc_ca

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_tan_sc_ca[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_tan_sc_ca,1))
print("Total Input Tokens - ", keyword_input_token_tan_sc_ca)
print("Total Input Cost = USD ",keyword_cost_input_token_tan_sc_ca)
print("Total Output Tokens - ", keyword_output_token_tan_sc_ca)
print("Total Output Cost = USD ",keyword_cost_output_token_tan_sc_ca)
print("Total Cost = USD ",round(keyword_total_cost_tan_sc_ca,2))

Executed  10  Iterations
Total Execution time (in secs) -  8.5
Total Input Tokens -  4760
Total Input Cost = USD  0.05
Total Output Tokens -  347
Total Output Cost = USD  0.01
Total Cost = USD  0.06


In [773]:
#Initialize an empty DataFrame
negative_keywords_tan_sc_ca = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']
# Add these columns to the DataFrame
for column in columns:
    negative_keywords_tan_sc_ca[column] = None

# Process each JSON string
for json_str in keyword_negative_output_tan_sc_ca:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'tan_sc_ca'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'negative'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            negative_keywords_tan_sc_ca = pd.concat([negative_keywords_tan_sc_ca, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            negative_keywords_tan_sc_ca = pd.concat([negative_keywords_tan_sc_ca, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
negative_keywords_tan_sc_ca = negative_keywords_tan_sc_ca.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

negative_keywords_tan_sc_ca

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Tanishq-Santa Clara, CA",negative,keywords,,"rude :3, unprofessional :2, worst :2, impatien...","rude :3, unprofessional :2, impatient :1, unin...","design element:1, finishing:1",,,"high making charges:2, overpriced:1, unreasona...","over priced:1, expensive:1, high:1",,
1,"Tanishq-Santa Clara, CA",negative,phrases,,"worst experience ever :2, extremely rude :1, w...","worst customer service :2, spoke in a condesce...","broke a design element:1, finishing on this re...",,,"unreasonable high making charges:1, making hol...","prices over online and in store varies:1, maki...",,


#### tan_sc_sh

In [774]:
# Initialize the output dictionary
keyword_negative_output_tan_sc_sh = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_tan_sc_sh=[0]
keyword_input_token_tan_sc_sh = 0
keyword_output_token_tan_sc_sh = 0
keyword_start_time_loop_tan_sc_sh = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_tan_sc_sh, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_tan_sc_sh[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_tan_sc_sh = keyword_dataframes['tan_sc_sh_final_sen_df_jul'][keyword_dataframes['tan_sc_sh_final_sen_df_jul'][topic]==-1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are negative comments, call the negative_keywords function
    if filtered_comments_tan_sc_sh:
        # Call the negative_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = negative_keywords(filtered_comments_tan_sc_sh,topic)        
        # Add the result to the output dictionary
        keyword_negative_output_tan_sc_sh.append(keywords)
        keyword_input_token_tan_sc_sh += input_tokens_loop
        keyword_output_token_tan_sc_sh += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_tan_sc_sh = time.time()
keyword_cost_input_token_tan_sc_sh = round((0.01/1000)*keyword_input_token_tan_sc_sh,2)
keyword_cost_output_token_tan_sc_sh = round((0.03/1000)*keyword_output_token_tan_sc_sh,2)
keyword_total_cost_tan_sc_sh = keyword_cost_input_token_tan_sc_sh + keyword_cost_output_token_tan_sc_sh
keyword_total_time_loop_tan_sc_sh = keyword_end_time_loop_tan_sc_sh - keyword_start_time_loop_tan_sc_sh

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_tan_sc_sh[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_tan_sc_sh,1))
print("Total Input Tokens - ", keyword_input_token_tan_sc_sh)
print("Total Input Cost = USD ",keyword_cost_input_token_tan_sc_sh)
print("Total Output Tokens - ", keyword_output_token_tan_sc_sh)
print("Total Output Cost = USD ",keyword_cost_output_token_tan_sc_sh)
print("Total Cost = USD ",round(keyword_total_cost_tan_sc_sh,2))

Executed  10  Iterations
Total Execution time (in secs) -  4.5
Total Input Tokens -  2289
Total Input Cost = USD  0.02
Total Output Tokens -  113
Total Output Cost = USD  0.0
Total Cost = USD  0.02


In [775]:
#Initialize an empty DataFrame
negative_keywords_tan_sc_sh = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']
# Add these columns to the DataFrame
for column in columns:
    negative_keywords_tan_sc_sh[column] = None

# Process each JSON string
for json_str in keyword_negative_output_tan_sc_sh:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'tan_sc_sh'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'negative'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            negative_keywords_tan_sc_sh = pd.concat([negative_keywords_tan_sc_sh, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            negative_keywords_tan_sc_sh = pd.concat([negative_keywords_tan_sc_sh, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
negative_keywords_tan_sc_sh = negative_keywords_tan_sc_sh.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

negative_keywords_tan_sc_sh

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Tanishq Jewellers-Sharjah Central, SH",negative,keywords,,,customer service :1,,Limited options:1,,,,,
1,"Tanishq Jewellers-Sharjah Central, SH",negative,phrases,,,Worst customer service :1,,pay attention to diversity:1,make offers suitable for all occasions: 1,,,,


#### tan_taj_db

In [776]:
# Initialize the output dictionary
keyword_negative_output_tan_taj_db = []
# List of topic columns to iterate over, excluding the 'review_text' column
keyword_topics = ['Customer Confidence',
                  'Store Experience', 
                  'Store Staff', 
                  'Product Design', 
                  'Product Variety',
                  'Discount', 
                  'Making Charge', 
                  'Price', 
                  'Product Quality',
                  'Jewellery Exchange']

keyword_counter_tan_taj_db=[0]
keyword_input_token_tan_taj_db = 0
keyword_output_token_tan_taj_db = 0
keyword_start_time_loop_tan_taj_db = time.time()

#Threading setup
keyword_total_iterations = len(keyword_topics) 
stop_event = threading.Event()
message_thread = threading.Thread(target=print_dynamic_message_keyword, args=(keyword_counter_tan_taj_db, keyword_total_iterations, stop_event))
message_thread.start()

# Loop through each topic
for topic in keyword_topics:
    keyword_counter_tan_taj_db[0]+=1
    # Filter the DataFrame for rows where the topic has a value of 1
    filtered_comments_tan_taj_db = keyword_dataframes['tan_taj_db_final_sen_df_jul'][keyword_dataframes['tan_taj_db_final_sen_df_jul'][topic]==-1]['review_text'].tolist()
    #print(topic, "-", len(filtered_comments))
    # If there are negative comments, call the negative_keywords function
    if filtered_comments_tan_taj_db:
        # Call the negative_keywords function and store the result
        keywords, input_tokens_loop, output_token_loop = negative_keywords(filtered_comments_tan_taj_db,topic)        
        # Add the result to the output dictionary
        keyword_negative_output_tan_taj_db.append(keywords)
        keyword_input_token_tan_taj_db += input_tokens_loop
        keyword_output_token_tan_taj_db += output_token_loop

#Stopping the dynamic message thread
stop_event.set()
message_thread.join()

keyword_end_time_loop_tan_taj_db = time.time()
keyword_cost_input_token_tan_taj_db = round((0.01/1000)*keyword_input_token_tan_taj_db,2)
keyword_cost_output_token_tan_taj_db = round((0.03/1000)*keyword_output_token_tan_taj_db,2)
keyword_total_cost_tan_taj_db = keyword_cost_input_token_tan_taj_db + keyword_cost_output_token_tan_taj_db
keyword_total_time_loop_tan_taj_db = keyword_end_time_loop_tan_taj_db - keyword_start_time_loop_tan_taj_db

#display loop performance parameters & cost
clear_output(wait=True)
print("Executed ",keyword_counter_tan_taj_db[0], " Iterations")
print("Total Execution time (in secs) - ", round(keyword_total_time_loop_tan_taj_db,1))
print("Total Input Tokens - ", keyword_input_token_tan_taj_db)
print("Total Input Cost = USD ",keyword_cost_input_token_tan_taj_db)
print("Total Output Tokens - ", keyword_output_token_tan_taj_db)
print("Total Output Cost = USD ",keyword_cost_output_token_tan_taj_db)
print("Total Cost = USD ",round(keyword_total_cost_tan_taj_db,2))

Executed  10  Iterations
Total Execution time (in secs) -  4.0
Total Input Tokens -  2485
Total Input Cost = USD  0.02
Total Output Tokens -  154
Total Output Cost = USD  0.0
Total Cost = USD  0.02


In [777]:
#Initialize an empty DataFrame
negative_keywords_tan_taj_db = pd.DataFrame()

# Define the columns based on the provided image
columns = ['Store Name', 
           'Sentiment', 
           'Type', 
           'Customer Confidence',
           'Store Experience', 
           'Store Staff', 
           'Product Design', 
           'Product Variety',
           'Discount', 
           'Making Charge', 
           'Price', 
           'Product Quality',
           'Jewellery Exchange']
# Add these columns to the DataFrame
for column in columns:
    negative_keywords_tan_taj_db[column] = None

# Process each JSON string
for json_str in keyword_negative_output_tan_taj_db:
    # Load the JSON string into a dictionary
    data = json.loads(json_str)
    # Get the key (category) from the dictionary
    for category, content_list in data.items():
        # Initialize a dictionary to hold the row data
        row_data = {column: None for column in columns}
        # Fill in the store name and sentiment
        input_store_identifier = 'tan_taj_db'  
        # Check if the input store identifier is in the keyword_mappings
        row_data['Store Name'] = "Store Name Not Found"
        for key, value in keyword_mappings.items():
            if input_store_identifier in key:
                row_data['Store Name'] = value
                break
        row_data['Sentiment'] = 'negative'
        # Now extract keywords and phrases for each category
        for content in content_list:
            # Keywords are handled first
            row_data['Type'] = 'keywords'
            row_data[category] = ', '.join([f"{kw.strip()}" for kw in content['keywords'].split(',')])
            negative_keywords_tan_taj_db = pd.concat([negative_keywords_tan_taj_db, pd.DataFrame([row_data])], ignore_index=True)
            # Phrases are handled next
            row_data['Type'] = 'phrases'
            row_data[category] = ', '.join([f"{ph.strip()}" for ph in content['phrases'].split(',')])
            negative_keywords_tan_taj_db = pd.concat([negative_keywords_tan_taj_db, pd.DataFrame([row_data])], ignore_index=True)

# Now we will consolidate rows that have the same 'Type' into a single row for each 'Store Name' and 'Sentiment'
negative_keywords_tan_taj_db = negative_keywords_tan_taj_db.groupby(['Store Name', 'Sentiment', 'Type']).agg(lambda x: ' '.join(x.dropna())).reset_index()

negative_keywords_tan_taj_db

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,"Tanishq Jewellers-Taj, DB",negative,keywords,"layer :1, trust :1","slow service:1, small shop:1, stand in que:1",No relevant negative keywords/ phrases,,,,,,,
1,"Tanishq Jewellers-Taj, DB",negative,phrases,"Bad experience :1, Don't trust :1, be careful :1","extremely small shop:1, had to stand in que fo...",No relevant negative keywords/ phrases,,,,,,,


In [291]:
keyword_negative_total_cost = keyword_total_cost_bhi_ak+keyword_total_cost_joy_ab+keyword_total_cost_joy_st_af+keyword_total_cost_joy_dm_ad+keyword_total_cost_joy_mz_ad+keyword_total_cost_joy_sh_ad+keyword_total_cost_mal_sc+keyword_total_cost_mal_ab+keyword_total_cost_mal_b1_af+keyword_total_cost_mal_ak+keyword_total_cost_mal_aw_ad+keyword_total_cost_mal_b1_ad+keyword_total_cost_mal_b2_ad+keyword_total_cost_mal_lu_ad+keyword_total_cost_mal_mb+keyword_total_cost_mal_sh_ad+keyword_total_cost_mna_mb+keyword_total_cost_joy_ak+keyword_total_cost_bhi_dec_ga+keyword_total_cost_jar_bol_il+keyword_total_cost_jar_ver_il+keyword_total_cost_jar_aur_il+keyword_total_cost_jar_alg_il+keyword_total_cost_jar_sch_il+keyword_total_cost_joy_suw_ga+keyword_total_cost_joy_chi_il+keyword_total_cost_joy_hou_tx+keyword_total_cost_joy_fri_tx+keyword_total_cost_mal_chi_il+keyword_total_cost_mal_nap_il+keyword_total_cost_mal_ise_nj+keyword_total_cost_mal_fri_tx+keyword_total_cost_mal_ric_tx+keyword_total_cost_son_ise_nj+keyword_total_cost_tif_chi_il+keyword_total_cost_tif_nor_il+keyword_total_cost_tif_sko_il+keyword_total_cost_tif_eas_nj+keyword_total_cost_tif_red_nj+keyword_total_cost_tif_sho_nj+keyword_total_cost_tif_par_nj+keyword_total_cost_vbj_fri_tx+keyword_total_cost_tan_chi_il+keyword_total_cost_tan_fri_tx+keyword_total_cost_tan_hou_tx+keyword_total_cost_tan_new_nj+keyword_total_cost_tan_bar_db+keyword_total_cost_tan_fah_db+keyword_total_cost_tan_kar_db+keyword_total_cost_tan_ham_ad+keyword_total_cost_tan_sil_db
#+keyword_total_cost_eve_joh_ga

In [292]:
keyword_negative_total_cost

4.57

## Combined_df_negative

In [778]:
combined_df_negative_keywords = pd.DataFrame()

negative_keyword_df_list = ["negative_keywords_agd_mb",
                            "negative_keywords_bhi_ak",
                            "negative_keywords_bhi_dec_ga",
                            "negative_keywords_eve_joh_ga",
                            "negative_keywords_jar_alg_il",
                            "negative_keywords_jar_aur_il",
                            "negative_keywords_jar_bol_il",
                            "negative_keywords_jar_lom_il",
                            "negative_keywords_jar_orl_il",
                            "negative_keywords_jar_sch_il",
                            "negative_keywords_jar_ver_il",
                            "negative_keywords_joy_ab",
                            "negative_keywords_joy_ak",
                            "negative_keywords_joy_chi_il",
                            "negative_keywords_joy_dm_ad",
                            "negative_keywords_joy_fri_tx",
                            "negative_keywords_joy_hou_tx",
                            "negative_keywords_joy_mz_ad",
                            "negative_keywords_joy_sh_ad",
                            "negative_keywords_joy_st_af",
                            "negative_keywords_joy_suw_ga",
                            "negative_keywords_kan_mb",
                            "negative_keywords_mal_ab",
                            "negative_keywords_mal_ak",
                            "negative_keywords_mal_aw_ad",
                            "negative_keywords_mal_b1_ad",
                            "negative_keywords_mal_b1_af",
                            "negative_keywords_mal_b2_ad",
                            "negative_keywords_mal_b2_af",
                            "negative_keywords_mal_chi_il",
                            "negative_keywords_mal_dm_ad",
                            "negative_keywords_mal_fri_tx",
                            "negative_keywords_mal_ise_nj",
                            "negative_keywords_mal_lu_ad",
                            "negative_keywords_mal_mb",
                            "negative_keywords_mal_nap_il",
                            "negative_keywords_mal_ric_tx",
                            "negative_keywords_mal_sc",
                            "negative_keywords_mal_sh_ad",
                            "negative_keywords_may_vie_va",
                            "negative_keywords_mia_awm_ad",
                            "negative_keywords_mia_bur_db",
                            "negative_keywords_min_ak",
                            "negative_keywords_mna_mb",
                            "negative_keywords_son_ise_nj",
                            "negative_keywords_tan_am_om",
                            "negative_keywords_tan_atl_ga",
                            "negative_keywords_tan_bar_db",
                            "negative_keywords_tan_chi_il",
                            "negative_keywords_tan_fah_db",
                            "negative_keywords_tan_fc_qa",
                            "negative_keywords_tan_fri_tx",
                            "negative_keywords_tan_gs_db",
                            "negative_keywords_tan_ham_ad",
                            "negative_keywords_tan_hou_tx",
                            "negative_keywords_tan_kar_db",
                            "negative_keywords_tan_lul_qa",
                            "negative_keywords_tan_mank_db",
                            "negative_keywords_tan_mee_db",
                            "negative_keywords_tan_new_nj",
                            "negative_keywords_tan_rol_sh",
                            "negative_keywords_tan_rse_wa",
                            "negative_keywords_tan_sc_ca",
                            "negative_keywords_tan_sc_sh",
                            "negative_keywords_tan_sil_db",
                            "negative_keywords_tan_taj_db",
                            "negative_keywords_tif_chi_il",
                            "negative_keywords_tif_eas_nj",
                            "negative_keywords_tif_hac_nj",
                            "negative_keywords_tif_nor_il",
                            "negative_keywords_tif_par_nj",
                            "negative_keywords_tif_red_nj",
                            "negative_keywords_tif_ric_va",
                            "negative_keywords_tif_sho_nj",
                            "negative_keywords_tif_sko_il",
                            "negative_keywords_tif_vie_va",
                            "negative_keywords_vbj_fri_tx"]


for df_name in negative_keyword_df_list:
    try:
        combined_df_negative_keywords = pd.concat([combined_df_negative_keywords, eval(df_name)], ignore_index=True)

    except:
        pass
combined_df_negative_keywords.reset_index(drop=True, inplace=True)
    

In [779]:
combined_df_negative_keywords.to_excel("temp/combined_df_negative_keywords_current.xlsx",index=False)

# Combined Keywords

#Read Data
combined_df_negative_keywords = pd.read_excel('temp/combined_df_negative_keywords_current.xlsx')
combined_df_positive_keywords = pd.read_excel('temp/combined_df_positive_keywords_current.xlsx')

In [780]:
keywords_combined_df = pd.concat([combined_df_positive_keywords, combined_df_negative_keywords], ignore_index=True)

In [781]:
keywords_combined_df.to_excel('recent_keywords_filtered/combined_keywords.xlsx',index=False)

In [782]:
keywords_combined_df.to_parquet('recent_keywords_filtered/combined_keywords.parquet',index=False)

In [127]:
test_filter = combined_keywords_working_df[
                                            (combined_keywords_working_df['Store Name'] == 'Malabar Gold & Diamonds - Silicon Oasis Central') & 
                                            (combined_keywords_working_df['Type'] == 'keywords') & 
                                            (combined_keywords_working_df['Sentiment'] == 'negative')
                                        ]

cell_content = test_filter['Product Design']

In [128]:
pd.set_option('display.max_colwidth', None)
cell_content

94    No relevant negative keywords
Name: Product Design, dtype: object

In [116]:
api_response, input_token, output_token = combine_keywords(cell_content)

In [117]:
api_response

'```json\n{\n    "Great Experience": 33,\n    "Excellent Service": 12,\n    "Good Experience": 12,\n    "Knowledgeable Staff": 3,\n    "Welcoming Staff": 3,\n    "Pleasant": 5,\n    "Smooth": 3,\n    "Wonderful": 3,\n    "Amazing": 3,\n    "Seamless": 1\n}\n```'

In [118]:

# Cleaning up the string to remove ```json and ```
api_cleaned = api_response.replace('```json', '').replace('```', '').strip()


In [119]:
api_cleaned

'{\n    "Great Experience": 33,\n    "Excellent Service": 12,\n    "Good Experience": 12,\n    "Knowledgeable Staff": 3,\n    "Welcoming Staff": 3,\n    "Pleasant": 5,\n    "Smooth": 3,\n    "Wonderful": 3,\n    "Amazing": 3,\n    "Seamless": 1\n}'

In [120]:
response_dict = json.loads(api_cleaned)
response_dict

{'Great Experience': 33,
 'Excellent Service': 12,
 'Good Experience': 12,
 'Knowledgeable Staff': 3,
 'Welcoming Staff': 3,
 'Pleasant': 5,
 'Smooth': 3,
 'Wonderful': 3,
 'Amazing': 3,
 'Seamless': 1}

In [121]:
temp_df = pd.DataFrame(columns=['Store Experience'])
# Convert the dictionary to a JSON string
# json_string = json.dumps(response_dict)
# Convert the dictionary to a string without curly braces
formatted_string = ', '.join([f"{key}: {value}" for key, value in response_dict.items()])

# Storing the formatted string in the 'Store Experience' column
temp_df.at[0, 'Store Experience'] = formatted_string

#Storing the dictionary in a specific cell (for example, first row, 'Store Experience' column)
#temp_df.at[0, 'Store Experience'] = json_string

In [122]:
temp_df

Unnamed: 0,Store Experience
0,"Great Experience: 33, Excellent Service: 12, Good Experience: 12, Knowledgeable Staff: 3, Welcoming Staff: 3, Pleasant: 5, Smooth: 3, Wonderful: 3, Amazing: 3, Seamless: 1"


## Approved for Phrases

In [89]:
store_names = ['Malabar Gold & Diamonds - Silicon Oasis Central', 'Malabar Gold and Diamonds - Al Fahidi Street - Bur Dubai (Branch 1)']
type_list = combined_keywords_working_df['Type'].unique().tolist()
sentiment_list = combined_keywords_working_df['Sentiment'].unique().tolist()
columns_poc = keywords_combined_df.columns.tolist()

In [95]:
temp_df = pd.DataFrame(columns=columns_poc)
temp_df
row_index = 0

for store in store_names:
    for type_n in type_list:
        for sentiment in sentiment_list:
            if type_n == 'phrases':
                filter_df_poc = combined_keywords_working_df[
                                                                (combined_keywords_working_df['Store Name'] == store) & 
                                                                (combined_keywords_working_df['Type'] == type_n) & 
                                                                (combined_keywords_working_df['Sentiment'] == sentiment)
                                                            ]

                for topic in columns_to_concatenate:
                    cell_content_poc = filter_df_poc[topic]
                    api_response, input_token, output_token = combine_phrases(cell_content_poc)
                    
                    #Cleaning up the string to remove ```json and ```
                    api_cleaned = api_response.replace('```json', '').replace('```', '').strip()
                    response_dict = json.loads(api_cleaned)
                    formatted_string = ', '.join([f"{key}: {value}" for key, value in response_dict.items()])
                    #Storing the formatted string in the 'Store Experience' column
                    temp_df.at[row_index, 'Store Name'] = store
                    temp_df.at[row_index, 'Sentiment'] = sentiment
                    temp_df.at[row_index, 'Type'] = type_n
                    temp_df.at[row_index, topic] = formatted_string
                    
                row_index+=1
            

                    
    

## Approved for Keywords

In [None]:
store_names = ['Malabar Gold & Diamonds - Silicon Oasis Central', 'Malabar Gold and Diamonds - Al Fahidi Street - Bur Dubai (Branch 1)']
type_list = combined_keywords_working_df['Type'].unique().tolist()
sentiment_list = combined_keywords_working_df['Sentiment'].unique().tolist()
columns_poc = keywords_combined_df.columns.tolist()

In [134]:
temp_df = pd.DataFrame(columns=columns_poc)
temp_df
row_index = 0

for store in store_names:
    for type_n in type_list:
        for sentiment in sentiment_list:
            if type_n == 'phrases':
                filter_df_poc = combined_keywords_working_df[
                                                                (combined_keywords_working_df['Store Name'] == store) & 
                                                                (combined_keywords_working_df['Type'] == type_n) & 
                                                                (combined_keywords_working_df['Sentiment'] == sentiment)
                                                            ]

                for topic in columns_to_concatenate:
                    cell_content_poc = filter_df_poc[topic]
                    api_response, input_token, output_token = combine_phrases(cell_content_poc)
                    
                    #Cleaning up the string to remove ```json and ```
                    api_cleaned = api_response.replace('```json', '').replace('```', '').strip()
                    try:
                        response_dict = json.loads(api_cleaned)
                        formatted_string = ', '.join([f"{key}: {value}" for key, value in response_dict.items()])
                        #Storing the formatted string in the 'Store Experience' column
                        temp_df.at[row_index, 'Store Name'] = store
                        temp_df.at[row_index, 'Sentiment'] = sentiment
                        temp_df.at[row_index, 'Type'] = type_n
                        temp_df.at[row_index, topic] = formatted_string
                    except Exception as e:
                        print(f"{e} in {store}, {sentiment},{type_n},{formatted_string}")
                    
                row_index+=1
            
            else:
                filter_df_poc = combined_keywords_working_df[
                                                                (combined_keywords_working_df['Store Name'] == store) & 
                                                                (combined_keywords_working_df['Type'] == type_n) & 
                                                                (combined_keywords_working_df['Sentiment'] == sentiment)
                                                            ]

                for topic in columns_to_concatenate:
                    cell_content_poc = filter_df_poc[topic]
                    api_response, input_token, output_token = combine_keywords(cell_content_poc)
                    
                    #Cleaning up the string to remove ```json and ```
                    api_cleaned = api_response.replace('```json', '').replace('```', '').strip()
                    try:
                        response_dict = json.loads(api_cleaned)
                        formatted_string = ', '.join([f"{key}: {value}" for key, value in response_dict.items()])
                        #Storing the formatted string in the 'Store Experience' column
                        temp_df.at[row_index, 'Store Name'] = store
                        temp_df.at[row_index, 'Sentiment'] = sentiment
                        temp_df.at[row_index, 'Type'] = type_n
                        temp_df.at[row_index, topic] = formatted_string
                    except Exception as e:
                        print(f"{e} in {store}, {sentiment},{type_n},{topic},{api_cleaned}")
                row_index+=1
    

In [135]:
temp_df

Unnamed: 0,Store Name,Sentiment,Type,Customer Confidence,Store Experience,Store Staff,Product Design,Product Variety,Discount,Making Charge,Price,Product Quality,Jewellery Exchange
0,Malabar Gold & Diamonds - Silicon Oasis Central,Positive,keywords,"Trust: 12, Knowledgeable: 3, Professional: 2, Transparent: 2, Understanding: 1, Supportive: 1","Great Experience: 33, Excellent Service: 12, Good Experience: 12, Knowledgeable Staff: 3, Welcoming Staff: 3, Pleasant: 5, Smooth: 3, Wonderful: 3, Amazing: 3, Seamless: 1","helpful: 105, friendly: 24, knowledgeable: 15, polite: 20, professional: 18, patient: 10, great service: 3, attentive: 2","unique designs: 5, good design: 8, beautiful collections: 3, exquisite collection: 2, wonderful collection: 2, great designs: 3, best designs: 2, amazing designs: 2, elegant designs: 1, new designs: 1","collection: 87, variety: 4, selection: 3, range: 3, designs: 1, options: 2","discount: 16, deal: 2, rate: 2, offer: 1, price: 1","reduced: 3, discounted: 2, making charges: 2, negotiate: 1, maximum reduction: 1","good price: 11, best price: 7, reasonable price: 3, great prices: 2, affordable: 1, cost effective: 1, attractive price: 1","good quality: 5, quality: 4, quality service and product: 1, top notch: 1, super quality: 1, quality gold: 1, quality is fantabulous: 1","Exchange: 4, Best Price: 2, Favorable Rates: 2, Transparent: 1"
1,Malabar Gold & Diamonds - Silicon Oasis Central,negative,keywords,"Unprofessional: 1, Cut Off: 1, Not to Visit: 1, Never Trusted: 1, Least Interest: 1, Not Bothered: 1, I Don't Care: 1","unhelpful: 3, wait: 2, rude: 1, dissatisfied: 1, nonsense: 1, irritating: 1, disappointing: 1, wasted: 1","rude: 3, unhelpful: 2, ignored: 2, dissatisfied: 1, hard time: 1, wait: 1, smirk: 1, unbothered: 1, leisurely: 1",No relevant negative keywords: 94,Product Variety: 94,"no-discount: 1, refused: 1, mercy: 1","high making charges: 1, overpay: 1, paying double: 1, charged 15%: 1, embarrassing: 1","Price Review: 1, Discount: 1, Higher Priced: 2, Budget: 1, Pocket Size: 1, Unnecessary Add: 1",Product Quality: 94,"unprofessional: 1, cut off: 1, profit: 1"
2,Malabar Gold & Diamonds - Silicon Oasis Central,Positive,phrases,"Trusted place to buy: 2, Completely transparent: 2, Trustworthy shopping experience: 1, Well trusted: 1, True blessing: 1, My first go-to choice: 1, Knowledgeable and well-behaved: 1, Knowledgeable and helpful: 1, Professional and always have good styles: 1, Guided the entire process: 1, Trustworthy brand: 1, Always trust & buy: 1","great experience: 30, very good customer service: 13, helpful and attentive: 8, friendly and helpful staff: 3, best experience: 3, good store to visit: 1, amazing place for purchasing jewelry: 1","very helpful: 29, excellent service: 13, great service: 10, very professional: 7, very patient: 4, very friendly: 2, very attentive: 1","excellent choice of designs: 2, lovely designs: 1, simple yet elegant bracelet: 1, best and unique designs: 2, ideal design: 1, helpful for showing designs: 1, good brief on the designs: 1, great eye for design: 1, perfect designs: 1, stunning collection: 1","lots of collections: 2, best options: 2, superb collections in diamonds: 1, good bangle collection and earrings also nice collection: 1, live at the collections shown: 1, beautiful collections: 1, great selection: 1, various product range: 1, huge selection: 1, variety of selections: 1","discount on making charges: 3, good discount: 3, discount on purchased item: 1, small discount at the end: 1, best deal: 1, good rate: 1","reduce making charge as much as possible: 1, reduction in making charges: 1, explaining the making charges: 1, less making cost: 1, opportunity to negotiate the making charges: 1, proper making charges: 1","Value for money: 1, Best rates: 2, Good rate: 1, Fair prices: 1, Different price ranges: 1, Great price: 1, Good price: 1, Better prices: 1, Within our budget: 1","quality of their gold was impressive: 1, good quality items: 1, quality service and quality product: 1, quality is fantabulous: 1, quality of their jewelry: 1, quality products: 1, quality of their pieces: 1, quality and style: 1, quality and nice pricing: 1, quality ended up: 1","Helped us with the exchange: 1, Whole process smooth and transparent: 1, Good rate for exchange: 1, Most favorable exchange rates: 2, Took my old jewellery for exchange: 1, Shafeeq was super helpful in getting us the best deal: 1"
3,Malabar Gold & Diamonds - Silicon Oasis Central,negative,phrases,"not paying back the certificate amount: 2, making a profit from the 4% cut-off: 1, used to buy gold bars from Malabar as a trusted source but that's the end of it: 1, given me the reason not to visit any of their stores in future: 1, never trusted it until I faced the same: 1, he doesn't know how to deal with customers: 1, he doesn't know how to respect someone's requirements and budget: 1, purposefully, he quoted higher priced products despite having products in my budget: 1","gave me such a hard time: 1, will not buy anything from here again: 1, could not speak with any manager: 1, had to wait for 40 minutes to have a salesman service: 1, not comfortable shopping at an outlet: 1, resizing time period: 1, waited and wasted: 1, none turned out: 1","gave me such a hard time: 1, extremely dissatisfied with the service provided: 1, did not want to help me at all: 1, were so rude and unhelpful: 1, had to wait for 40 minutes to have a salesman service: 1, couldn't see us: 1, wasting our time: 1, not bothered about customers: 1, I don't care attitude: 1, refused to call the manager: 1",No relevant negative phrases: 95,Try to add more collection in bangles and chains: 1,"didn't want to review the price: 1, to give any kind of discount: 1, offered me 50 AED discount on making charge: 1, had to refuse and leave: 1","making charges are high: 1, being charged 15% of making charge was quite embarrassing: 1, had to pay only 3% in another well-known jewellery: 1, don't really understand the complicated design which he meant: 1, really cheating the customers: 1","didn't want to review the price at all: 1, to give any kind of discount on the new item: 1, quoted higher priced products: 1, in spite of having product in my budget: 1, judged based on your pocket size: 1, unnecessarily add charges for card payments: 1, certification charges on Swiss coins are also up to 25% higher than competitors: 1",No relevant negative phrases: 95,"don't pay you the certificate amount back: 1, took me a week to change a necklace: 1, manager was not helping: 1, didn't want to review the price: 1, exchange was not going to happen: 1"
4,Malabar Gold and Diamonds - Al Fahidi Street - Bur Dubai (Branch 1),Positive,keywords,"trust: 30, reliable: 5, genuine: 4, confidence: 2, transparency: 1","great experience: 52, good experience: 44, wonderful experience: 15, best experience: 18, excellent service: 23, helpful staff: 23, friendly staff: 9, amazing experience: 9, nice experience: 8","good service: 394, helpful: 68, excellent service: 120, best service: 80, friendly: 15, patient: 9, communicative: 3","design: 20, beautiful designs: 14, unique designs: 6, latest designs: 6, good design: 8, amazing designs: 4, designs: 14, nice design: 2, best design: 2, chain design: 1","variety: 20, range: 10, collection: 19, designs: 4, options: 3","best discount: 42, great deal: 33, good discount: 26, best deal: 13, good deal: 5, special discount: 9, nice discount: 3, maximum discount: 1","reasonable: 14, less: 6, low: 2, nominal: 1, discount: 1","best price: 37, reasonable price: 10, value for money: 5, good price: 13, competitive rate: 3, affordable price: 4, fair price: 2, best rate: 2","quality: 6, good quality: 10, excellent quality: 3, high quality: 2, best quality: 1, top-notch: 1","Exchange: 18, Good: 2, Service: 2, Comfortable: 1, Price: 1, Policy: 1"
5,Malabar Gold and Diamonds - Al Fahidi Street - Bur Dubai (Branch 1),negative,keywords,"disappointed: 3, lied: 1, no trust: 1, denied: 1, insulted: 1, pathetic: 1, delayed: 1, deceitful: 1, shattered: 1, cheated: 1, never recommending: 1","unfriendly: 4, poor service: 3, rude: 2, unprofessional: 1, understaffed: 1, aggressive: 1, lazy: 1, long queue: 1, worst experience: 1, disappointed: 1","unfriendly: 4, rude: 3, unprofessional: 1, aggressive: 1, understaffed: 1, lazy: 1, hesitant: 1, gossiping: 1, arrogant: 1, disappointed: 1",No relevant negative keywords: 118,"Limited Options: 2, Slight Improvement: 1, Not Bad: 1","no discount: 2, expected discounts: 1, more discount/offer: 1, reasonable making charges: 1, denied: 1",Profit: 3,"expensive: 3, high: 2, additional cost: 1, financial loss: 2, betrayed: 1, cheated: 1","lost: 1, fall off: 1","denied coupon: 1, poor quality: 1, additional cost: 1, rejected full refund: 1, broke easily: 1, can't exchange: 1, not recommend: 1"
6,Malabar Gold and Diamonds - Al Fahidi Street - Bur Dubai (Branch 1),Positive,phrases,"trusted purchase: 3, highly recommended: 3, truly trustworthy: 3, trust in the brand: 2, trusted outlet: 1, trust of Malbar: 1, confidence in purchase: 2, most trusted and valuable brand: 1, trusted brand for a long time: 1","excellent customer service: 9, very good experience: 6, great shopping experience: 5, wonderful experience: 5, pleasant experience: 7, best service: 4, very helpful: 4, very patient: 4, amazing experience: 4, highly recommend: 3","thank you for your service: 10, great customer service: 12, wonderful service: 7, very helpful: 10, good customer service: 9, excellent service: 4, best service: 3","Excellent design: 9, Variety of designs: 4, Great designs: 4, Amazing designs: 2, Beautiful designs: 2, Best design: 5, Very good designs: 2, Wonderful designs: 2, Wide range of collection: 2, Good pattern gold: 1","variety of collections: 7, wide range of collection: 5, great variety of designs: 3, variety of options: 3, wide variety: 3, lots of varieties: 3, lot of collections: 2, various collections: 2, plethora of variety: 1, different varieties of gold: 1","gave us the best deal: 5, gave best discount: 5, discount on making: 8, best discounts: 5, gave us a good discount: 4, reasonable discount on making: 2, offered good discounts: 2, helped a lot with the discounts and pricing: 1, maximum discount: 1, good variety and discount on making: 1","reasonable making charges: 10, less making charges: 3, low charges: 1, nominal making charges: 1, special making charges and discount: 1, making charges are very very reasonable: 1, making charges also very less: 1, less mc 2.99%: 1, reducing the making charges: 1, negotiate on making charges: 1","best rates: 5, best price and service: 2, within our budget: 2, very good rate: 2, competitive rates: 1, excellent pricing: 1, favorable charges: 1, value for money: 1, helped with pricing: 1, great jewelry with best rate: 1","highly recommended jewelry destination for quality and service: 6, quality of the product: 3, good quality gold: 3, quality product: 2, quality and purity gold: 1, excellent quality of pure gold: 1, quality of jewelry at Malabar Gold Shop is top-notch: 1, best place to buy gold jewellery for quality and design: 1, quality of the gold is top-notch: 1, high-quality jewelry: 1","assisted us so well: 1, provided us with the right price: 1, gave the best exchange rate: 1, helped us a lot during the exchange: 1, clear about the estimate and exchange offer: 1, very good experience while exchanging: 1, patiently assisted us with a good price: 1"
7,Malabar Gold and Diamonds - Al Fahidi Street - Bur Dubai (Branch 1),negative,phrases,"Lied to us and shattered trust: 2, No trust in this brand: 1, Denied the use of the coupon: 1, Customers will be insulted: 1, Very disappointed: 1, Payment was delayed: 1, System fault: 1, Feel betrayed: 1, Huge loss: 1, Not providing promised bonus money: 1","poor customer service: 5, long wait times: 3, unhelpful staff: 3, rude store manager: 1, disappointed with brand: 1, not recommending brand: 1","constantly asking them to attend us: 2, service is worst: 1, made us run from one person to another: 1, no more response: 1, wait wait in aggressive way: 1, manager did not care to listen: 1, passing customer to each other: 1, not caring for the customers: 1, didn't explain and show the product properly: 1, talking in their own language: 1, worst experience with Sanjesh: 1, very arrogant and rude: 1, doesn't have basic manners: 1, let their name down by having such people: 1, never recommending this brand anymore: 1","stones won't fall off easily: 1, ruby from the ring is lost: 1, fixing free of cost: 1","Find very few options in ear tops and earrings: 1, Choices of your products need slight improvement: 1, Need more varieties as before: 1","expected discounts more: 1, could provide more discount/offer to the customers: 1, couldn't get the discount as expected: 1, blatantly denied the use of the coupon: 1, what is the use of the gift voucher if customers will be insulted like that: 1","demanded extra money which he never informed us before: 1, putting big margin in making charges: 1, making charges were a bit higher: 1","Price is very high: 1, Very expensive shop: 1, Bharath was genuine but expensive: 1, Selling the item on high margin: 1, Have to bear 71 DHS additional cost: 1, Deceitful scheme: 1","stones won't fall off easily: 1, ruby from the ring is lost: 1","manager blatantly denied the use of the coupon: 1, item is not strong enough: 1, rejected full refund and replacement: 1, have to bear 71 DHS additional cost: 1, fighting with customer for 71 DHS: 1, exchange only in Dubai: 1, can't exchange in India: 1, clearly asked her before I buy: 1"
