In [1]:
# to reference env. variables
from dotenv import load_dotenv
import os
# to process files in a zipped file
import zipfile
# to work with json files
import json
# to work with regular expressions. 
# use cases for this script include: extract date from text
import re
# to use chat model
from langchain.chat_models import AzureChatOpenAI
# to define roles in chat
from langchain.schema import HumanMessage, SystemMessage, AIMessage
# to create prompt template
from langchain.prompts import (
    ChatPromptTemplate,
    PromptTemplate,
    SystemMessagePromptTemplate,
    HumanMessagePromptTemplate
)
# set up langchain
from langchain.chains import LLMChain
# working with dataframes
import pandas as pd

#########
# for play and learning only
# to write sentiment into a file and read sentiment from a file. 
# This is to not have to run through the llm repeated during testing
import csv
# to only get file name
from pathlib import Path
# to write datetime to filename
import time

In [2]:
load_dotenv()  

# get input zip file location
zip_file_location = os.getenv('in_file_location')
filename = 'json 2.zip'
# save df location 
save_df_location = os.getenv('save_df_location')

In [11]:
def getCompanyDetails(fileData):
    '''
    From the json content extract values of certain keys
    Arg:
    fileData - json from file
    Returns:
    Company details       
    '''

    company = fileData['company_name']
    event_date = extractDateFromText(fileData['title'])
    event = extract_event_type(fileData['title'])
    company_id, year, month = fileData['company_id'], fileData['year'], fileData['month']

    return company, company_id, event, event_date, month, year

In [12]:
def extractDateFromText(text):
    '''
    Arg:
    text - title that has the date within in it
    Returns: 
    date as string
    '''
    
    date_pattern = r"\b(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s\d{1,2},\s\d{4}\b"
    date_string = re.search(date_pattern, text)
    
    return date_string.group()

def extract_event_type(text):
    '''
    Arg:
    text - text from which the event type is to be extracted
    
    Returns:
    str of earnings call nature
    '''
    listOfParts = text.split(', ', 2)
    
    
    event_type = str(listOfParts[1])
    
    return event_type

In [13]:
def extractCategories(ld):
    '''
    From a list of dictionaries, extract applicable values based on another key value within the dictionary
    This is to be done for applicable list items of type (dict)

    This function will call 2 other functions because extraction of presentation and question content follow 
    the same pattern. But the extraction of question and subsequent answers to it into a list of dictionaries is slightly different.
    
    Arg:
    ld - list of dictionaries

    Returns:
    3 lists of dictionaries. 1 for each category - presentation, questions, questions and answer(s)
    
    '''

    presentation_list = listBasedOfTranscriptType (ld, 'Presenter Speech')
    question_list = listBasedOfTranscriptType (ld, 'Question')
    question_answer_list = get_list_qa(ld)

    return presentation_list, question_list, question_answer_list



def listBasedOfTranscriptType (contentList, componentType):
    '''
    Creates a list of values from list of dictionaries based on specified dictionary key
    
    Args:
    contentList = list containing dictionaries from which component Type values are to be extracted
    
    Returns:
    list of values of specified key for all the dictionary items in list
    '''
    
    valueList = []
    
    valueList = [{'flow_of_call': c.get('flow_of_call'), 'componenttext': c.get('componenttext')}  for c in contentList if c['transcriptcomponenttypename'] == componentType]
    
    return valueList

def get_list_qa(contentListData):
    '''
    From the list of dictionaries, create another list of dictionaries. Each list items dictionary has 3 keys - 
    id, question, answers. Answers is a list of all the answers by the different executives
    
    Arg:
    contentListData - list of dictionaries
    
    Returns:
    list of dictionaries of fewer keys. A unit is a dictionary of 3 keys - id for question, question, answer. 
    Value for answer is a list of all the answers to a particular question
    
    '''
    
    list_qa = []
    i = 0
    
    for i in range(len(contentListData)):
        if contentListData[i]['transcriptcomponenttypename'] == 'Question':
            qa_dict = {'flow_of_call': '', 'Question':'', 'Answer(s)': []}
            qa_dict['flow_of_call'] = contentListData[i]['flow_of_call']
            qa_dict['Question'] = contentListData[i]['componenttext']
            j=i
            while contentListData[j+1]['transcriptcomponenttypename'] == 'Answer':
                qa_dict['Answer(s)'] += [contentListData[j+1]['componenttext']]
                j+=1
            list_qa.append(qa_dict)
    
    return list_qa


In [4]:
def extractSentiment(l_2keydict, variable_value_for_system_prompt):
    '''
    Determines sentiment for value of specific key of dictionary that is list item. 
    This is done for each item of list.
    
    Arg:
    1_2keydict - list of 2 key dictionary items
    varaible_value_for_system_prompt - variable to be passed to system prompt for chat model

    Returns:
    list of dicionary with 2 keys. 1 key is call id, other key is sentiment
    '''
    # set chatModel
    chat_model = AzureChatOpenAI(deployment_name = 'gpt-4')
    
    # create prompt
    prompt = PromptTemplate(
    template = 'You are very good at determinining sentiment based on transcripts \
    of human conversations of the following type of speech - {speechType}. \
    For a given piece of text you can respond with 1 word describing \
    the sentiment of the text. The word choice is 1 of these three: Positive, Negative, Neutral',
    input_variables = ['speechType']
    )
    
    human_template = '{text}'
    human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)
    system_message_prompt = SystemMessagePromptTemplate(prompt=prompt)
    
    # create chat prompt template
    chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt])

    # set up chain
    chain = LLMChain(llm=chat_model, prompt= chat_prompt)

    sentimentList = []

    for d in l_2keydict:
        response = chain.run({
            'speechType' : variable_value_for_system_prompt,
            'text' : d['componenttext'] 
        })
        sent_dict = {'flow_of_call': d.get('flow_of_call'), 'sentiment' : response}
        sentimentList.append(sent_dict)
    
    return sentimentList


def extractqaSentiment(dataList, variable_value_for_system_prompt):
    '''
    Extract sentiment on a conversation. The conversation has 2 parties. 1 party is the Analyst. The second party is the collective response by executives to the 
    question raised by the analyst.

    Arg:
    dataList - list of dictionaries. Each dictionary has 3 keys. 1 key is call id corresponding to 2nd key - Question. 3rd key is answer. Answer value is 
    list of responses to question.
    variable_value_for_system_prompt - context input to pass to system prompt

    Returns:
    list of dicionary with 2 keys. 1 key is call id, other key is sentiment
    '''

    # set chatModel
    chat_model = AzureChatOpenAI(deployment_name = 'gpt-4')

    prompt_for_qa = PromptTemplate(
        template = 'You are very good at determining the sentiment of human conversation based on \
        transcripts of these conversations in the following context - {speechContext}. \
        For a given conversation, can you please respond with 1 word that indicates the overall sentiment of the conversation. \
        The response is to be in 1 of the 3 following words: Positive, Negative, Neutral',
        input_variables = ['speechContext']
        )
    
    human_qa_template = 'Question by the analyst: {question}. Responses by Company executives - {answers}'
    
    system_message_qa_prompt = SystemMessagePromptTemplate(prompt = prompt_for_qa)
    
    human_message_qa_prompt = HumanMessagePromptTemplate.from_template(human_qa_template)
    
    qa_chat_prompt = ChatPromptTemplate.from_messages([system_message_qa_prompt, human_message_qa_prompt])

    qa_chain = LLMChain(llm = chat_model, prompt = qa_chat_prompt)

    sentiment_for_list_qa = []

    for d in dataList:
        q = d['Question']
        answers_from_list = '\n'.join(d['Answer(s)'])

        response = qa_chain.run(
            {
                'speechContext': variable_value_for_system_prompt,
                'question': q,
                'answers': answers_from_list
            }
        )

        qa_sent_dict = {'flow_of_call': d.get('flow_of_call'), 'sentiment' : response}
        sentiment_for_list_qa.append(qa_sent_dict)

    return sentiment_for_list_qa

In [3]:
# creating a function not for the true script but to write the sentiment list data into files to figure out 
# how the data should be written into the detailed csv
# https://www.pythonforbeginners.com/basics/list-of-dictionaries-to-csv-in-python
 

def writeIntocsv(l_of_d, filename, list_type):
    '''
    Takes a list of dictionaries and writes the content into a csv
    
    Arg:
    l_of_d - list of dictionaries
    filename - from which file this data was gathered from 
    list_type - type of list - presentation, questions, questions and answers
    
    Returns:
    csv file 
    
    '''
    
    myFile = open('C:\MIDS\FromGrzegorz\RoughTest\\' + filename + list_type + '.csv',  'w')
    writer = csv.writer(myFile)
    mydict = l_of_d[0]
    mydictkeys = list(mydict.keys())
    writer.writerow(mydictkeys)
    for dictionary in l_of_d:
        writer.writerow(dictionary.values())
    myFile.close


In [2]:
# function to read csv into list of dictionaries
# this function is also only to avoid having to call openai api lots of times using the implementaiton of this project
# it is unlikely to be used in primary py script

def readCsvToLoD(filename):
    '''
    Takes a csv file and returns a list of dictionaries
    
    Arg:
    filename - abs. filepath of csv file
    
    Returns:
    List of dictionaries
    
    '''
    
    myFile = open(filename, 'r')
    reader = csv.DictReader(myFile)
    
    l_o_d = []
    
    for dictionary in reader:
        l_o_d.append(dictionary)
    
    return l_o_d
    
    

In [14]:
# testing referecing another py file in this file
# also testing continue to skip 1 iteration of for loop
from s_and_p_500 import SPCIQ_ids

print (SPCIQ_ids)

filepath = zip_file_location+filename
with zipfile.ZipFile(filepath, 'r') as myzip:
        file_member_list = myzip.namelist()
        for member in file_member_list:
            try:
                with myzip.open(member) as file:
                    file_content = json.load(file)

                    # extract company and file details
                    
                    
                    company, ciq_id, event_type, publish_date, month, year = getCompanyDetails(file_content)
                    
                    if ciq_id not in SPCIQ_ids:
                        print(f'{ciq_id} not present in s_and_p_500 list')
                        continue
                    
                    filename = member
                    print (filename)
            
            except Exception as e:
                print (e)
            
                    
                        


['289194', '997111', '247483', '203028']
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
json 2/json/ace-limited-q1-2009.json
json 2/json/ace-limited-q1-2015.json
json 2/json/ace-limited-q2-2013.json
json 2/json/ace-limited-q2-2015.json
json 2/json/ace-limited-q3-2011.json
json 2/json/ace-limited-q3-2013.json
json 2/json/ace-limited-q3-2014.json
json 2/json/ace-limited-q4-2011.json
json 2/json/ace-limited-q4-2012.json
json 2/json/ace-limited-q4-2013.json
json 2/json/ace-limited-q4-2015.json


In [9]:
filepath = zip_file_location+filename
with zipfile.ZipFile(filepath, 'r') as myzip:
        file_member_list = myzip.namelist()
        for member in file_member_list:
            try:
                with myzip.open(member) as file:
                    file_content = json.load(file)

                    # extract company and file details
                    filename = member
                    print (filename)
                    
                    company, ciq_id, event_type, publish_date, month, year = getCompanyDetails(file_content)
                    
                    # get list of dictionaries of transcript content
                    content_list = file_content['content']
                    
                    # plist - presentation list, qlist - question list, qalist -question and answer(s) list
                    plist, qlist, qalist = extractCategories(content_list)
                    
                    # pass content to LLM
                    p_sentiment_list = extractSentiment(plist, 'Presentation Speech at an Investor relations call') # p_sentiment_list - presentation sentiment list
                    q_sentiment_list = extractSentiment(qlist, 'Questions raised by Analysts at an Investor relations call') # q_sentiment_list - question sentiment list
                    # q_sentiment_list - question and answer(s) sentiment list
                    qa_sentiment_list = extractqaSentiment(qalist, 'Conversation between Analyst and Executives at an Investor relations call') 
                    
                    # write sentiment list into file
                    writeIntocsv(p_sentiment_list, Path(filename).stem, 'presentation')
                    writeIntocsv(q_sentiment_list, Path(filename).stem, 'question')
                    writeIntocsv(qa_sentiment_list, Path(filename).stem, 'qandA')
                    
            except Exception as e:
                print (e)
                


Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
json 2/json/ace-limited-q1-2009.json


Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 1.0 seconds as it raised RateLimitError: Requests to the Creates a completion for the chat message Operation under Azure OpenAI API version 2023-03-15-preview have exceeded call rate limit of your current OpenAI S0 pricing tier. Please retry after 3 seconds. Please go here: https://aka.ms/oai/quotaincrease if you would like to further increase the default rate limit..
Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 2.0 seconds as it raised RateLimitError: Requests to the Creates a completion for the chat message Operation under Azure OpenAI API version 2023-03-15-preview have exceeded call rate limit of your current OpenAI S0 pricing tier. Please retry after 2 seconds. Please go here: https://aka.ms/oai/quotaincrease if you would like to further increase the default rate limit..
Retrying langchain.chat_models.openai.ChatOpen

json 2/json/ace-limited-q1-2015.json


Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 1.0 seconds as it raised RateLimitError: Requests to the Creates a completion for the chat message Operation under Azure OpenAI API version 2023-03-15-preview have exceeded call rate limit of your current OpenAI S0 pricing tier. Please retry after 2 seconds. Please go here: https://aka.ms/oai/quotaincrease if you would like to further increase the default rate limit..
Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 2.0 seconds as it raised RateLimitError: Requests to the Creates a completion for the chat message Operation under Azure OpenAI API version 2023-03-15-preview have exceeded call rate limit of your current OpenAI S0 pricing tier. Please retry after 1 second. Please go here: https://aka.ms/oai/quotaincrease if you would like to further increase the default rate limit..
Retrying langchain.chat_models.openai.ChatOpenA

Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: Requests to the Creates a completion for the chat message Operation under Azure OpenAI API version 2023-03-15-preview have exceeded call rate limit of your current OpenAI S0 pricing tier. Please retry after 1 second. Please go here: https://aka.ms/oai/quotaincrease if you would like to further increase the default rate limit..
Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 1.0 seconds as it raised RateLimitError: Requests to the Creates a completion for the chat message Operation under Azure OpenAI API version 2023-03-15-preview have exceeded call rate limit of your current OpenAI S0 pricing tier. Please retry after 4 seconds. Please go here: https://aka.ms/oai/quotaincrease if you would like to further increase the default rate limit..
Retrying langchain.chat_models.openai.ChatOpenA

json 2/json/ace-limited-q2-2013.json


Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 1.0 seconds as it raised RateLimitError: Requests to the Creates a completion for the chat message Operation under Azure OpenAI API version 2023-03-15-preview have exceeded call rate limit of your current OpenAI S0 pricing tier. Please retry after 1 second. Please go here: https://aka.ms/oai/quotaincrease if you would like to further increase the default rate limit..
Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 1.0 seconds as it raised RateLimitError: Requests to the Creates a completion for the chat message Operation under Azure OpenAI API version 2023-03-15-preview have exceeded call rate limit of your current OpenAI S0 pricing tier. Please retry after 1 second. Please go here: https://aka.ms/oai/quotaincrease if you would like to further increase the default rate limit..
Retrying langchain.chat_models.openai.ChatOpenAI

Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 1.0 seconds as it raised RateLimitError: Requests to the Creates a completion for the chat message Operation under Azure OpenAI API version 2023-03-15-preview have exceeded call rate limit of your current OpenAI S0 pricing tier. Please retry after 3 seconds. Please go here: https://aka.ms/oai/quotaincrease if you would like to further increase the default rate limit..
Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 2.0 seconds as it raised RateLimitError: Requests to the Creates a completion for the chat message Operation under Azure OpenAI API version 2023-03-15-preview have exceeded call rate limit of your current OpenAI S0 pricing tier. Please retry after 2 seconds. Please go here: https://aka.ms/oai/quotaincrease if you would like to further increase the default rate limit..


json 2/json/ace-limited-q2-2015.json


Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 1.0 seconds as it raised RateLimitError: Requests to the Creates a completion for the chat message Operation under Azure OpenAI API version 2023-03-15-preview have exceeded call rate limit of your current OpenAI S0 pricing tier. Please retry after 1 second. Please go here: https://aka.ms/oai/quotaincrease if you would like to further increase the default rate limit..
Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 1.0 seconds as it raised RateLimitError: Requests to the Creates a completion for the chat message Operation under Azure OpenAI API version 2023-03-15-preview have exceeded call rate limit of your current OpenAI S0 pricing tier. Please retry after 2 seconds. Please go here: https://aka.ms/oai/quotaincrease if you would like to further increase the default rate limit..
Retrying langchain.chat_models.openai.ChatOpenA

Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 2.0 seconds as it raised RateLimitError: Requests to the Creates a completion for the chat message Operation under Azure OpenAI API version 2023-03-15-preview have exceeded call rate limit of your current OpenAI S0 pricing tier. Please retry after 1 second. Please go here: https://aka.ms/oai/quotaincrease if you would like to further increase the default rate limit..
Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 1.0 seconds as it raised RateLimitError: Requests to the Creates a completion for the chat message Operation under Azure OpenAI API version 2023-03-15-preview have exceeded call rate limit of your current OpenAI S0 pricing tier. Please retry after 1 second. Please go here: https://aka.ms/oai/quotaincrease if you would like to further increase the default rate limit..
Retrying langchain.chat_models.openai.ChatOpenAI

json 2/json/ace-limited-q3-2011.json


Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 1.0 seconds as it raised RateLimitError: Requests to the Creates a completion for the chat message Operation under Azure OpenAI API version 2023-03-15-preview have exceeded call rate limit of your current OpenAI S0 pricing tier. Please retry after 1 second. Please go here: https://aka.ms/oai/quotaincrease if you would like to further increase the default rate limit..
Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 1.0 seconds as it raised RateLimitError: Requests to the Creates a completion for the chat message Operation under Azure OpenAI API version 2023-03-15-preview have exceeded call rate limit of your current OpenAI S0 pricing tier. Please retry after 1 second. Please go here: https://aka.ms/oai/quotaincrease if you would like to further increase the default rate limit..
Retrying langchain.chat_models.openai.ChatOpenAI

Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 2.0 seconds as it raised RateLimitError: Requests to the Creates a completion for the chat message Operation under Azure OpenAI API version 2023-03-15-preview have exceeded call rate limit of your current OpenAI S0 pricing tier. Please retry after 2 seconds. Please go here: https://aka.ms/oai/quotaincrease if you would like to further increase the default rate limit..
Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 1.0 seconds as it raised RateLimitError: Requests to the Creates a completion for the chat message Operation under Azure OpenAI API version 2023-03-15-preview have exceeded call rate limit of your current OpenAI S0 pricing tier. Please retry after 2 seconds. Please go here: https://aka.ms/oai/quotaincrease if you would like to further increase the default rate limit..
Retrying langchain.chat_models.openai.ChatOpen

Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 2.0 seconds as it raised RateLimitError: Requests to the Creates a completion for the chat message Operation under Azure OpenAI API version 2023-03-15-preview have exceeded call rate limit of your current OpenAI S0 pricing tier. Please retry after 1 second. Please go here: https://aka.ms/oai/quotaincrease if you would like to further increase the default rate limit..
Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 1.0 seconds as it raised RateLimitError: Requests to the Creates a completion for the chat message Operation under Azure OpenAI API version 2023-03-15-preview have exceeded call rate limit of your current OpenAI S0 pricing tier. Please retry after 1 second. Please go here: https://aka.ms/oai/quotaincrease if you would like to further increase the default rate limit..


json 2/json/ace-limited-q3-2013.json


Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 1.0 seconds as it raised RateLimitError: Requests to the Creates a completion for the chat message Operation under Azure OpenAI API version 2023-03-15-preview have exceeded call rate limit of your current OpenAI S0 pricing tier. Please retry after 1 second. Please go here: https://aka.ms/oai/quotaincrease if you would like to further increase the default rate limit..
Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 1.0 seconds as it raised RateLimitError: Requests to the Creates a completion for the chat message Operation under Azure OpenAI API version 2023-03-15-preview have exceeded call rate limit of your current OpenAI S0 pricing tier. Please retry after 1 second. Please go here: https://aka.ms/oai/quotaincrease if you would like to further increase the default rate limit..
Retrying langchain.chat_models.openai.ChatOpenAI

Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 1.0 seconds as it raised RateLimitError: Requests to the Creates a completion for the chat message Operation under Azure OpenAI API version 2023-03-15-preview have exceeded call rate limit of your current OpenAI S0 pricing tier. Please retry after 1 second. Please go here: https://aka.ms/oai/quotaincrease if you would like to further increase the default rate limit..
Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 1.0 seconds as it raised RateLimitError: Requests to the Creates a completion for the chat message Operation under Azure OpenAI API version 2023-03-15-preview have exceeded call rate limit of your current OpenAI S0 pricing tier. Please retry after 1 second. Please go here: https://aka.ms/oai/quotaincrease if you would like to further increase the default rate limit..
Retrying langchain.chat_models.openai.ChatOpenAI

json 2/json/ace-limited-q3-2014.json


Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 1.0 seconds as it raised RateLimitError: Requests to the Creates a completion for the chat message Operation under Azure OpenAI API version 2023-03-15-preview have exceeded call rate limit of your current OpenAI S0 pricing tier. Please retry after 1 second. Please go here: https://aka.ms/oai/quotaincrease if you would like to further increase the default rate limit..
Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 1.0 seconds as it raised RateLimitError: Requests to the Creates a completion for the chat message Operation under Azure OpenAI API version 2023-03-15-preview have exceeded call rate limit of your current OpenAI S0 pricing tier. Please retry after 1 second. Please go here: https://aka.ms/oai/quotaincrease if you would like to further increase the default rate limit..
Retrying langchain.chat_models.openai.ChatOpenAI

json 2/json/ace-limited-q4-2011.json


Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 1.0 seconds as it raised RateLimitError: Requests to the Creates a completion for the chat message Operation under Azure OpenAI API version 2023-03-15-preview have exceeded call rate limit of your current OpenAI S0 pricing tier. Please retry after 3 seconds. Please go here: https://aka.ms/oai/quotaincrease if you would like to further increase the default rate limit..
Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 2.0 seconds as it raised RateLimitError: Requests to the Creates a completion for the chat message Operation under Azure OpenAI API version 2023-03-15-preview have exceeded call rate limit of your current OpenAI S0 pricing tier. Please retry after 2 seconds. Please go here: https://aka.ms/oai/quotaincrease if you would like to further increase the default rate limit..
Retrying langchain.chat_models.openai.ChatOpen

Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 2.0 seconds as it raised RateLimitError: Requests to the Creates a completion for the chat message Operation under Azure OpenAI API version 2023-03-15-preview have exceeded call rate limit of your current OpenAI S0 pricing tier. Please retry after 2 seconds. Please go here: https://aka.ms/oai/quotaincrease if you would like to further increase the default rate limit..


json 2/json/ace-limited-q4-2012.json


Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 1.0 seconds as it raised RateLimitError: Requests to the Creates a completion for the chat message Operation under Azure OpenAI API version 2023-03-15-preview have exceeded call rate limit of your current OpenAI S0 pricing tier. Please retry after 3 seconds. Please go here: https://aka.ms/oai/quotaincrease if you would like to further increase the default rate limit..
Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 2.0 seconds as it raised RateLimitError: Requests to the Creates a completion for the chat message Operation under Azure OpenAI API version 2023-03-15-preview have exceeded call rate limit of your current OpenAI S0 pricing tier. Please retry after 1 second. Please go here: https://aka.ms/oai/quotaincrease if you would like to further increase the default rate limit..
Retrying langchain.chat_models.openai.ChatOpenA

Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 1.0 seconds as it raised RateLimitError: Requests to the Creates a completion for the chat message Operation under Azure OpenAI API version 2023-03-15-preview have exceeded call rate limit of your current OpenAI S0 pricing tier. Please retry after 2 seconds. Please go here: https://aka.ms/oai/quotaincrease if you would like to further increase the default rate limit..
Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 2.0 seconds as it raised RateLimitError: Requests to the Creates a completion for the chat message Operation under Azure OpenAI API version 2023-03-15-preview have exceeded call rate limit of your current OpenAI S0 pricing tier. Please retry after 1 second. Please go here: https://aka.ms/oai/quotaincrease if you would like to further increase the default rate limit..
Retrying langchain.chat_models.openai.ChatOpenA

json 2/json/ace-limited-q4-2013.json


Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 1.0 seconds as it raised Timeout: Request timed out: HTTPSConnectionPool(host='ds-resource-1.openai.azure.com', port=443): Read timed out. (read timeout=600).
Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 1.0 seconds as it raised RateLimitError: Requests to the Creates a completion for the chat message Operation under Azure OpenAI API version 2023-03-15-preview have exceeded call rate limit of your current OpenAI S0 pricing tier. Please retry after 1 second. Please go here: https://aka.ms/oai/quotaincrease if you would like to further increase the default rate limit..
Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 1.0 seconds as it raised RateLimitError: Requests to the Creates a completion for the chat message Operation under Azure OpenAI API version 2023-03-15-pr

Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 1.0 seconds as it raised RateLimitError: Requests to the Creates a completion for the chat message Operation under Azure OpenAI API version 2023-03-15-preview have exceeded call rate limit of your current OpenAI S0 pricing tier. Please retry after 1 second. Please go here: https://aka.ms/oai/quotaincrease if you would like to further increase the default rate limit..


json 2/json/ace-limited-q4-2015.json


Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 1.0 seconds as it raised RateLimitError: Requests to the Creates a completion for the chat message Operation under Azure OpenAI API version 2023-03-15-preview have exceeded call rate limit of your current OpenAI S0 pricing tier. Please retry after 1 second. Please go here: https://aka.ms/oai/quotaincrease if you would like to further increase the default rate limit..
Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 1.0 seconds as it raised RateLimitError: Requests to the Creates a completion for the chat message Operation under Azure OpenAI API version 2023-03-15-preview have exceeded call rate limit of your current OpenAI S0 pricing tier. Please retry after 1 second. Please go here: https://aka.ms/oai/quotaincrease if you would like to further increase the default rate limit..
Retrying langchain.chat_models.openai.ChatOpenAI

Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 1.0 seconds as it raised RateLimitError: Requests to the Creates a completion for the chat message Operation under Azure OpenAI API version 2023-03-15-preview have exceeded call rate limit of your current OpenAI S0 pricing tier. Please retry after 1 second. Please go here: https://aka.ms/oai/quotaincrease if you would like to further increase the default rate limit..
Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 1.0 seconds as it raised RateLimitError: Requests to the Creates a completion for the chat message Operation under Azure OpenAI API version 2023-03-15-preview have exceeded call rate limit of your current OpenAI S0 pricing tier. Please retry after 1 second. Please go here: https://aka.ms/oai/quotaincrease if you would like to further increase the default rate limit..
Retrying langchain.chat_models.openai.ChatOpenAI

In [10]:
file_member_list

['json 2/',
 'json 2/json/',
 'json 2/json/ace-limited-q1-2009.json',
 'json 2/json/ace-limited-q1-2015.json',
 'json 2/json/ace-limited-q2-2013.json',
 'json 2/json/ace-limited-q2-2015.json',
 'json 2/json/ace-limited-q3-2011.json',
 'json 2/json/ace-limited-q3-2013.json',
 'json 2/json/ace-limited-q3-2014.json',
 'json 2/json/ace-limited-q4-2011.json',
 'json 2/json/ace-limited-q4-2012.json',
 'json 2/json/ace-limited-q4-2013.json',
 'json 2/json/ace-limited-q4-2015.json']

In [105]:
# function

def createSentimentDFForAFile(pld, qld, qald, filename, company, ciq_id, event_type, publish_date, month, year):
    '''
    creates a dataframe from a 3 lists of dictionaries and other attributes - 
    company, ciq_id, event_type, publish_date, month, year 
    
    Arg:
    pld - presentation list of dictionaries
    qd - question list of dictionaries
    qald - question and answers list of dictionaries
    
    ...
    
    Returns:
    a dataframe that aggregates all this data
    
    '''
    
    dfl = [] # dfl - list of dataframes
    
    lld = [pld, qld, qald] # lld - list of list of dictionaries
    
    categoryList = ['Presentation','Question','QuestionAndAnswer']
    
    for index, ld in enumerate(lld):
        dfc = pd.DataFrame(ld )# dfc - dr
        dfc['category'] = categoryList[index]
        
        dfc = dfc.assign (filename = filename, company= company, ciq_id = ciq_id, event_type = event_type
                          , publish_date = publish_date, month = month, year = year)
        
        dfl.append(dfc)
    
    df_for_file = pd.concat(dfl, axis=0, ignore_index=True)
    
    return df_for_file

        

        
        
        
        
    

In [4]:
# more testing



df_sentiment_for_all_files = pd.DataFrame(columns=['filename','company', 'ciq_id', 'event_type', 'publish_date', 'month',
                                                  'year', 'category', 'flow_of_call', 'sentiment'])

single_file = open('C:\MIDS\FromGrzegorz\json 2\json\\ace-limited-q1-2009.json', 'r')
single_file_test = 'ace-limited-q1-2009'

file_content = json.load(single_file)

company, ciq_id, event_type, publish_date, month, year = getCompanyDetails(file_content)

pslist = readCsvToLoD('C:\MIDS\FromGrzegorz\RoughTest\\'+single_file_test+'presentation'+'.csv')
qslist = readCsvToLoD('C:\MIDS\FromGrzegorz\RoughTest\\'+single_file_test+'question'+'.csv')
qaslist = readCsvToLoD('C:\MIDS\FromGrzegorz\RoughTest\\'+single_file_test+'qandA'+'.csv')



print(f' ########## \n Presentation Sentiment list:\n {pslist} \n\n  ############ \n Question Sentiment list:\n {qslist} \
    \n\n ############ \n Q and A list: \n {qaslist}')




NameError: name 'getCompanyDetails' is not defined

In [5]:
df_for_a_file = createSentimentDFForAFile(pslist, qslist, qaslist
                                          , 'C:\MIDS\FromGrzegorz\json 2\json\\ace-limited-q1-2009.json'
                                          , company, ciq_id, event_type, publish_date, month, year)

df_for_a_file


NameError: name 'createSentimentDFForAFile' is not defined

In [106]:
# test writing all the files into 1 big file with all the sentiments
# sentiment_written_directory = 'C:\MIDS\FromGrzegorz\RoughTest'

# filelist = os.listdir(sentiment_written_directory)
# filelist



filepath = zip_file_location+filename

aggregate_sentiment_detail_ist_df = []



with zipfile.ZipFile(filepath, 'r') as myzip:
    file_member_list = myzip.namelist()
    for member in file_member_list:
        try:
            with myzip.open(member) as file:
                file_content = json.load(file)
                
                # extract company and file details
                filename = member
                print (filename)
                    
                company, ciq_id, event_type, publish_date, month, year = getCompanyDetails(file_content)

                # read from processed sentiment files
                pslist = readCsvToLoD('C:\MIDS\FromGrzegorz\RoughTest\\'+Path(filename).stem+'presentation'+'.csv')
                qslist = readCsvToLoD('C:\MIDS\FromGrzegorz\RoughTest\\'+Path(filename).stem+'question'+'.csv')
                qaslist = readCsvToLoD('C:\MIDS\FromGrzegorz\RoughTest\\'+Path(filename).stem+'qandA'+'.csv')

                sentiment_df_for_file = createSentimentDFForAFile(pslist, qslist, qaslist
                                      , member
                                      , company, ciq_id, event_type, publish_date, month, year)


                aggregate_sentiment_detail_ist_df.append(sentiment_df_for_file)
        
        except Exception as e:
            print (e)

# concatenate all the dfs in list and write aggregate df to file

aggregate_df = pd.concat(aggregate_sentiment_detail_ist_df, axis = 0, ignore_index = True)




# get datetime
timestr = time.strftime("%Y%m%d-%H%M%S")

print (timestr)

aggregate_df.to_csv(save_df_location + 'AggSentMetricsTest-' +  timestr +  '.csv', index=False)
    
        
        
                
                


Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
json 2/json/ace-limited-q1-2009.json
json 2/json/ace-limited-q1-2015.json
json 2/json/ace-limited-q2-2013.json
json 2/json/ace-limited-q2-2015.json
json 2/json/ace-limited-q3-2011.json
json 2/json/ace-limited-q3-2013.json
json 2/json/ace-limited-q3-2014.json
json 2/json/ace-limited-q4-2011.json
json 2/json/ace-limited-q4-2012.json
json 2/json/ace-limited-q4-2013.json
json 2/json/ace-limited-q4-2015.json
20230816-173343


In [15]:


p_df = pd.DataFrame(pslist)
p_df['category'] = 'Presentation'



p_df = p_df.assign(filename = 'C:\MIDS\FromGrzegorz\json 2\json\\ace-limited-q1-2009.json'
             , company= company, ciq_id = ciq_id, event_type = event_type
             , publish_date = publish_date, month = month, year = year)

columns = p_df.columns.tolist()

new_column_order = columns[3:] + columns[:3]

new_column_order

p_df= p_df[new_column_order]

p_df




Unnamed: 0,filename,company,ciq_id,event_type,publish_date,month,year,flow_of_call,sentiment,category
0,C:\MIDS\FromGrzegorz\json 2\json\ace-limited-q...,Chubb Limited (NYSE:CB),203028,Q1 2009 Earnings Call,"Apr 29, 2009",4,2009,1,Neutral,Presentation
1,C:\MIDS\FromGrzegorz\json 2\json\ace-limited-q...,Chubb Limited (NYSE:CB),203028,Q1 2009 Earnings Call,"Apr 29, 2009",4,2009,2,Positive,Presentation
2,C:\MIDS\FromGrzegorz\json 2\json\ace-limited-q...,Chubb Limited (NYSE:CB),203028,Q1 2009 Earnings Call,"Apr 29, 2009",4,2009,3,Neutral,Presentation
3,C:\MIDS\FromGrzegorz\json 2\json\ace-limited-q...,Chubb Limited (NYSE:CB),203028,Q1 2009 Earnings Call,"Apr 29, 2009",4,2009,4,Neutral,Presentation
4,C:\MIDS\FromGrzegorz\json 2\json\ace-limited-q...,Chubb Limited (NYSE:CB),203028,Q1 2009 Earnings Call,"Apr 29, 2009",4,2009,5,Neutral,Presentation


# Sentiment metrics

In [3]:
def sentimentMetrics(sentimentList):
    '''
    Arg:
    sentimentList - list where items are 1 of 3 values - Positive, Negative, Neutral

    Returns:
    avg. +, -, neutrality of document
    
    
    '''

    
    positive_count = neutral_count = negative_count = 0.0
    
    for i in range(len(sentimentList)):
        if sentimentList[i] == 'Positive':
            positive_count += 1
        elif sentimentList[i] == 'Negative':
            negative_count += 1
        else:
            neutral_count += 1
    
    avg_neutral = neutral_count/len(sentimentList)
    avg_positive = positive_count/len(sentimentList)
    avg_negative = negative_count/len(sentimentList)
    
   
    return avg_neutral, avg_positive, avg_negative



In [7]:
def aggregateSentimentMetricsDFCreation(sentDetailsDF):
    '''
    Arg:
    sentDetailsDF - dataframe that has the sentiment per flow id per category per processed file
    
    Returns:
    
    df_sentiment_metrics_all_processed_files - dataframe that holds the average neutral, positive, negative occurences 
    per category per processed file
    
    '''
    
    # list that will have each df of sentiment metrics for a file as an item
    df_sentiment_metrics_list = []
    
    # get unique filename
    processedfiles = sentDetailsDF['filename'].unique()
    
    
    categoryList = ['Presentation','Question','QuestionAndAnswer']
    
    for f in processedfiles:
        print(f)
        
        sentimentDetailsForf = sentDetailsDF.query(f'filename == "{f}"')
        
        # new df to save metrics for a file
        sentimentMetricsForAFile = pd.DataFrame(columns = ['avg_neutral','avg_positive','avg_negative','category'])
        
        
        for c in categoryList:
            # filter dataframe by filename and category
            sentimentDetailsForfForSpecificCategory = sentimentDetailsForf.query(f'category == "{c}"')
            # extract sentiment per flow call id for a category
            sent_list = sentimentDetailsForfForSpecificCategory['sentiment'].tolist() # sent_list - list of sentiments
            
            avg_neutral, avg_positive, avg_negative = sentimentMetrics(sent_list)
            
            # add metrics for category into df
            sentimentMetricsForAFile.loc[len(sentimentMetricsForAFile)] = [avg_neutral, avg_positive, avg_negative, c]
        
        # top row data from sentimentDetails dataframe filtered only by filename to add 
        # to sentimentMetrics dataframe for a file
        top_row_data = sentimentDetailsForf.iloc[0, 3:]
        top_row_columns = sentimentDetailsForf.columns[3:]
        
        # add reference top row data to each metrics row
        sentimentMetricsForAFile[top_row_columns] = top_row_data
        
        df_sentiment_metrics_list.append(sentimentMetricsForAFile)
    
    
    df_sentiment_metrics_all_processed_files = pd.concat(df_sentiment_metrics_list, axis=0, ignore_index=True)
    
    return df_sentiment_metrics_all_processed_files     

    

In [8]:
p_df_details = pd.read_csv('C:\MIDS\Project2SentimentAnalysisTranscripts\InTranscripts\sentimentDF\AggSentMetricsTest-20230816-173343.csv')

agg_sent_met = aggregateSentimentMetricsDFCreation(p_df_details)

agg_sent_met


json 2/json/ace-limited-q1-2009.json
json 2/json/ace-limited-q1-2015.json
json 2/json/ace-limited-q2-2013.json
json 2/json/ace-limited-q2-2015.json
json 2/json/ace-limited-q3-2011.json
json 2/json/ace-limited-q3-2013.json
json 2/json/ace-limited-q3-2014.json
json 2/json/ace-limited-q4-2011.json
json 2/json/ace-limited-q4-2012.json
json 2/json/ace-limited-q4-2013.json
json 2/json/ace-limited-q4-2015.json


Unnamed: 0,avg_neutral,avg_positive,avg_negative,category,filename,company,ciq_id,event_type,publish_date,month,year
0,0.8,0.2,0.0,Presentation,json 2/json/ace-limited-q1-2009.json,Chubb Limited (NYSE:CB),203028,Q1 2009 Earnings Call,"Apr 29, 2009",4,2009
1,0.740741,0.037037,0.222222,Question,json 2/json/ace-limited-q1-2009.json,Chubb Limited (NYSE:CB),203028,Q1 2009 Earnings Call,"Apr 29, 2009",4,2009
2,0.777778,0.148148,0.074074,QuestionAndAnswer,json 2/json/ace-limited-q1-2009.json,Chubb Limited (NYSE:CB),203028,Q1 2009 Earnings Call,"Apr 29, 2009",4,2009
3,0.75,0.25,0.0,Presentation,json 2/json/ace-limited-q1-2015.json,Chubb Limited (NYSE:CB),203028,Q1 2015 Earnings Call,"Apr 22, 2015",4,2015
4,0.893617,0.06383,0.042553,Question,json 2/json/ace-limited-q1-2015.json,Chubb Limited (NYSE:CB),203028,Q1 2015 Earnings Call,"Apr 22, 2015",4,2015
5,0.638298,0.319149,0.042553,QuestionAndAnswer,json 2/json/ace-limited-q1-2015.json,Chubb Limited (NYSE:CB),203028,Q1 2015 Earnings Call,"Apr 22, 2015",4,2015
6,0.5,0.5,0.0,Presentation,json 2/json/ace-limited-q2-2013.json,Chubb Limited (NYSE:CB),203028,Q2 2013 Earnings Call,"Jul 24, 2013",7,2013
7,0.860465,0.069767,0.069767,Question,json 2/json/ace-limited-q2-2013.json,Chubb Limited (NYSE:CB),203028,Q2 2013 Earnings Call,"Jul 24, 2013",7,2013
8,0.651163,0.325581,0.023256,QuestionAndAnswer,json 2/json/ace-limited-q2-2013.json,Chubb Limited (NYSE:CB),203028,Q2 2013 Earnings Call,"Jul 24, 2013",7,2013
9,0.75,0.25,0.0,Presentation,json 2/json/ace-limited-q2-2015.json,Chubb Limited (NYSE:CB),203028,Q2 2015 Earnings Call,"Jul 22, 2015",7,2015


In [6]:
# start here: save agg. metrics file locally. see same pattern as details file
# agg_sent_met.to_csv(save_df_location + 'AggSentMetrics-' +  timestr +  '.csv', index=False)
agg_sent_met.to_csv(save_df_location + 'AggSentMetrics-' +  'test' +  '.csv', index=False)

# Rough
https://learnpython.com/blog/read-csv-into-list-python/#:~:text=To%20do%20so%2C%20we%20use,may%20specify%20the%20fieldnames%20argument.


https://stackoverflow.com/questions/34811971/how-do-i-fill-a-column-with-one-value-in-pandas