In [39]:
import openai
import pandas as pd
import time
import os  # To access environment variables

In [40]:
# Verify OpenAI library version
print(f"OpenAI library version: {openai.__version__}")  # Should print 0.28.0

OpenAI library version: 0.28.0


In [41]:
prompts = [
    "Discard all the previous instructions. Behave like you are an expert sentence classifier. Classify the following statement from an ECB member into ‘HAWKISH’, ‘DOVISH’, or ‘NEUTRAL’. Label it ‘HAWKISH’ if it suggests tightening monetary policy, ‘DOVISH’ if it suggests easing monetary policy, or ‘NEUTRAL’ if the stance is ambiguous or not directly related to monetary policy: ",
    "Discard all the previous instructions. Behave like you are an expert sentence classifier. Assess the following statement from an ECB member in the context of maintaining price stability and promoting maximum employment. Determine whether it reflects a HAWKISH (tightening), DOVISH (easing), or NEUTRAL stance on monetary policy or is ambiguous: ",
    "Discard all the previous instructions. Behave like you are an expert sentence classifier. Analyze the following statement from an ECB member and identify the likely impact on interest rates, economic growth, and inflation. Based on your analysis, classify the sentence as HAWKISH, DOVISH, or NEUTRAL: ",
    "Discard all the previous instructions. Behave like you are an expert economist. Evaluate the following statement from an ECB member from the perspective of how the media reader might interpret it. Would it likely be perceived as HAWKISH (signaling higher rates), DOVISH (signaling lower rates), or NEUTRAL (ambiguous): ",
    "Discard all the previous instructions. Behave like you are an expert economist. Given current economic conditions, determine whether the following statement from an ECB member suggests a HAWKISH (concerned about inflation), DOVISH (concerned about growth), or NEUTRAL stance on monetary policy or ambiguous"
]

In [42]:
# Load your dataset
print("Loading dataset...")
data = pd.read_csv('Random_Articles_to_Score_Complete_Converted.csv')
print("Dataset loaded successfully.")
data.head()

Loading dataset...
Dataset loaded successfully.


Unnamed: 0,Column1,Manual.summary,Classification Joaquin,Classification Rui,Classification Ed,Average_Classification
0,0,"Speaking at the IMF in Washington, ECB Presi...",1,1,1,1.0
1,1,"Starting in October, the ECB wants to use a ...",0,0,0,0.0
2,2,ECB Chief Economist Philip Lane has proposed ...,1,0,1,0.666667
3,3,Former German Chancellor Angela Merkel has rec...,0,0,0,0.0
4,4,The ECB has raised its interest rates again ...,1,1,1,1.0


In [44]:
# Drop columns and rename column as requested
data.drop(columns=['Column1', 'Classification Joaquin', 'Classification Rui', 'Classification Ed'], inplace=True)
data.rename(columns={'Average_Classification': 'Manual_Classification_Score'}, inplace=True)
print("Columns dropped and renamed.")
data.head()

Columns dropped and renamed.


Unnamed: 0,Manual.summary,Manual_Classification_Score
0,"Speaking at the IMF in Washington, ECB Presi...",1.0
1,"Starting in October, the ECB wants to use a ...",0.0
2,ECB Chief Economist Philip Lane has proposed ...,0.666667
3,Former German Chancellor Angela Merkel has rec...,0.0
4,The ECB has raised its interest rates again ...,1.0


In [34]:
# Set your OpenAI API key
openai.api_key = 'sk-proj-dvUlhrqVeqGmdn2a7b3pT3BlbkFJbWEX2j67TZIrKSZvCfzm'

In [35]:
def classify_and_score_article(article, prompt):
    while True:
        try:
            response = openai.ChatCompletion.create(
                model="gpt-4",
                messages=[
                    {"role": "system", "content": "You are a financial expert."},
                    {"role": "user", "content": prompt + article}
                ],
                max_tokens=150  # Adjust max_tokens if needed
            )
            result = response['choices'][0]['message']['content'].strip()

            # Assign scores based on textual labels
            if "HAWKISH" in result.upper():
                score = 1
            elif "DOVISH" in result.upper():
                score = -1
            elif "NEUTRAL" in result.upper():
                score = 0
            else:
                raise ValueError("Incomplete response from API")
                
            return score

        except openai.error.RateLimitError:
            print("Rate limit exceeded. Retrying in 10 seconds...")
            time.sleep(10)
        except openai.error.APIError as e:
            print(f"API error: {e}. Retrying in 10 seconds...")
            time.sleep(10)
        except Exception as e:
            print(f"Unexpected error: {e}. Skipping this article.")
            return "Error"

# Initialize new columns for classifications from each prompt
for i in range(1, 6):
    data[f'OpenAI_Prompt_{i}'] = 0

# Classify and score each article in the dataset
print("Classifying and scoring articles...")
for i, article in enumerate(data['Manual.summary']):
    for j, prompt in enumerate(prompts):
        classification_score = classify_and_score_article(article, prompt)
        data.at[i, f'OpenAI_Prompt_{j + 1}'] = classification_score
    if (i + 1) % 10 == 0 or (i + 1) == len(data):
        print(f"Processed {i + 1} of {len(data)} articles.")

# Display the first few rows of the updated dataframe
print("Classification and scoring completed. Here are the first few results:")
print(data.head())

# Save the results to a new CSV file
output_file = 'classified_articles_open_ai.csv'
data.to_csv(output_file, index=False)
print(f"Results saved to {output_file}")

# Print the DataFrame head of the results
print("Here is the head of the resulting DataFrame:")
data.head()

Classifying and scoring articles...
Processed 10 of 100 articles.
Processed 20 of 100 articles.
Processed 30 of 100 articles.
Processed 40 of 100 articles.
Processed 50 of 100 articles.
Processed 60 of 100 articles.
Processed 70 of 100 articles.
Unexpected error: Incomplete response from API. Skipping this article.
Processed 80 of 100 articles.
Unexpected error: Incomplete response from API. Skipping this article.
Processed 90 of 100 articles.
Unexpected error: Incomplete response from API. Skipping this article.
Unexpected error: Incomplete response from API. Skipping this article.
Unexpected error: Incomplete response from API. Skipping this article.
Processed 100 of 100 articles.
Classification and scoring completed. Here are the first few results:
                                      Manual.summary  \
0  Speaking at the IMF in Washington,  ECB  Presi...   
1  Starting in October, the  ECB  wants to use a ...   
2  ECB  Chief Economist Philip Lane has proposed ...   
3  Former Germ

In [36]:
data.head()

Unnamed: 0,Manual.summary,Manual_Classification_Score,OpenAI_Prompt_1,OpenAI_Prompt_2,OpenAI_Prompt_3,OpenAI_Prompt_4,OpenAI_Prompt_5
0,"Speaking at the IMF in Washington, ECB Presi...",-1.0,-1,-1,-1,-1,-1
1,"Starting in October, the ECB wants to use a ...",0.0,1,1,0,1,1
2,ECB Chief Economist Philip Lane has proposed ...,-0.666667,1,1,1,1,1
3,Former German Chancellor Angela Merkel has rec...,0.0,0,-1,-1,-1,-1
4,The ECB has raised its interest rates again ...,-1.0,-1,-1,-1,-1,-1


In [38]:
# Initialize a dictionary to store the results
results = {
    'Prompt': [],
    'Counts_Dovish': [],
    'Counts_Neutral': [],
    'Counts_Hawkish': [],
    'Standard Deviation': [],
    'Median': [],
    'Average': []
}

# Calculate the counts, standard deviation, median, and average for each prompt
for prompt in ['OpenAI_Prompt_1', 'OpenAI_Prompt_2', 'OpenAI_Prompt_3', 'OpenAI_Prompt_4', 'OpenAI_Prompt_5']:
    # Ensure the scores are numeric
    scores = pd.to_numeric(data[prompt], errors='coerce')
    
    # Calculate counts for each classification
    counts = scores.value_counts().to_dict()
    counts_dovish = counts.get(-1, 0)
    counts_neutral = counts.get(0, 0)
    counts_hawkish = counts.get(1, 0)
    
    # Calculate standard deviation, median, and average
    std_dev = scores.std()
    median = scores.median()
    average = scores.mean()
    
    # Store the results
    results['Prompt'].append(prompt)
    results['Counts_Dovish'].append(counts_dovish)
    results['Counts_Neutral'].append(counts_neutral)
    results['Counts_Hawkish'].append(counts_hawkish)
    results['Standard Deviation'].append(std_dev)
    results['Median'].append(median)
    results['Average'].append(average)

# Create a DataFrame from the results dictionary
results_df = pd.DataFrame(results)

# Display the results DataFrame
print("Classification and scoring summary:")
results_df

Classification and scoring summary:


Unnamed: 0,Prompt,Counts_Dovish,Counts_Neutral,Counts_Hawkish,Standard Deviation,Median,Average
0,OpenAI_Prompt_1,63,10,26,0.875819,-1.0,-0.373737
1,OpenAI_Prompt_2,65,5,29,0.908627,-1.0,-0.363636
2,OpenAI_Prompt_3,59,8,31,0.919435,-1.0,-0.285714
3,OpenAI_Prompt_4,58,5,36,0.953571,-1.0,-0.222222
4,OpenAI_Prompt_5,75,1,24,0.858646,-1.0,-0.51


read in Manual_Classifier_Statistics.csv and left join to results_df, then closest average 

### Older Code

In [3]:
# Load your dataset
print("Loading dataset...")
data = pd.read_excel('combined_updated.xlsx')
print("Dataset loaded successfully.")
data

Loading dataset...
Dataset loaded successfully.


Unnamed: 0,Date,Source,Headline,Translated.headline,Manual.summary,Original.article.url,Translated.text,Media.type,Speaker,Reach,Language,Country.Code,Sitename,Tags,Site_Readership,Article_Readership
0,2022-09-30 22:08:34,Il Messaggero - Borsa Italiana,"Visco: «Troppi rialzi dei tassi, si va in rece...","Banca d'Italia's Visco: ""Too many rate hikes w...","At a conference in Florence, Banca d'Italia Go...",https://www.ilmessaggero.it/economia/news/tass...,No to an excessive rise in rates. The Governor...,WEB,Newspapers,1174293,it,IT,Il Messaggero,"European Central Bank, European Monetary Union...",2296944.0,0.0
1,2022-09-30 21:44:15,Puls Biznesu - Z ostatniej chwili,Knot: kolejne podwyżki stóp EBC są konieczne,Knot: further ECB rate hikes are necessary,,https://www.pb.pl/knot-kolejne-podwyzki-stop-e...,Klaas knot photo by Zach Gibson/Bloomberg The ...,WEB,Newspapers,72409,pl,PL,Puls Biznesu,"European Central Bank, Inflation, Rank 2, Inte...",73998.0,0.0
2,2022-09-30 21:30:13,Handelsblatt - Finanzen,EZB-Ratsmitglied Visco: Rezessionsrisiko bei g...,ECB 's Visco: Recession risk in the event of m...,Banca d'Italia Governor Ignazio Visco on Fri...,https://www.handelsblatt.com/finanzen/geldpoli...,,WEB,Newspapers,525924,de,DE,Handelsblatt,"European Central Bank, European Monetary Union...",314148.0,811493.0
3,2022-09-30 21:07:32,Trend - Správy,ECB by mala byť pri zvyšovaní sadzieb opatrná...,"ECB should be cautious about rate hikes, says...",,https://www.trend.sk/spravy/ecb-mala-byt-pri-z...,"The governor of the Italian central bank, Igna...",WEB,Net Magazine,51466,sk,SK,Trend,"European Central Bank, Inflation, Rank 2, Inte...",53064.0,0.0
4,2022-09-30 20:56:08,Bloomberg - GNews,ECB 's Schnabel Says Weaker Demand May Not Eas...,ECB 's Schnabel says weaker demand may not eas...,ECB Executive Board Member Isabel Schnabel ...,https://www.bloomberg.com/news/articles/2022-0...,European Central Bank Executive Board member I...,WEB,Net Magazine,2520979,en,US,Bloomberg,"European Central Bank, Inflation, Rank 1, Core...",1309524.0,278016.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
35071,2024-01-02 11:39:12,Europa Press - Portada,De Cos anticipa que en 2024 la inflación siga ...,De Cos expects inflation to continue falling i...,Banco de España Governor Pablo Hernández de C...,https://www.europapress.es/eseuropa/noticia-bc...,He assures that we must not lower our guard an...,WEB,Net Magazine,0,es,ES,Europa Press,"European Central Bank, European Monetary Union...",325097.0,223065.0
35072,2024-01-02 05:44:18,Antena 3,"Antena 3 - morning news tue, 02 jan 2024 05:44...",Fixed and variable mortgages expected to becom...,Both fixed and variable rate mortgages are exp...,http://external.opoint.com/?id_site=332796&id_...,We are now talking about the housing that awai...,TV,Broadcasting,2933000,es,ES,Antena 3,"European Central Bank, Inflation, Rank 2, Chri...",,
35073,2024-01-02 04:33:29,La Informacion - Portada,La gran banca rompe otro récord con cerca de 2...,Large banks break another record with nearly E...,Despite facing an extraordinary tax and increa...,https://www.lainformacion.com/empresas/banca-e...,The shift in the monetary policy of the Europe...,WEB,Net Magazine,269285,es,ES,La Informacion,"European Central Bank, Banking Supervision, Ra...",842057.0,90048.0
35074,2024-01-02 04:00:00,LaVanguardia,Hernández de Cos pide una política fiscal con ...,Hernández de Cos calls for European governme...,Banco de España Governor Pablo Hernández de C...,https://www.lavanguardia.com/economia/20240102...,"MADRID, 02 (SERVIMEDIA) The Governor of the Ba...",WEB,Net Magazine,2528744,es,ES,LaVanguardia,"European Central Bank, European Monetary Union...",1187584.0,0.0


In [16]:

# Drop rows where 'Manual.summary' is NaN and reset index
print("Cleaning dataset...")
data.dropna(subset=['Manual.summary'], inplace=True)
data.reset_index(drop=True, inplace=True)
print("Dataset cleaned.")

# Limit to first 50 rows
data = data.head(50)

# Set your OpenAI API key
openai.api_key = 'sk-proj-q4vaD1sfS4JM31othrf0T3BlbkFJCdvyWyHUWXfivw7iteZj'

def classify_and_score_article(article):
    # Formulate the prompt
    prompt = (
        "Classify the following article as 'Dovish' or 'Hawkish' and provide a score between -1 and 1 indicating how dovish or hawkish it is.\n"
        "\n"
        "An article is 'Dovish' if it suggests policies or sentiments that are supportive of economic growth, low interest rates, or monetary easing.\n"
        "An article is 'Hawkish' if it suggests policies or sentiments that are supportive of combating inflation, high interest rates, or monetary tightening.\n"
        "\n"
        "The score should be between -1 and 1 where -1 indicates extremely dovish and 1 indicates extremely hawkish.\n"
        "\n"
        "Article: {}\n"
        "\n"
        "Provide the output in the following format:\n"
        "Classification: [Dovish/Hawkish]\n"
        "Score: [score]\n"
        "Explanation: [Explanation]\n".format(article)
    )
    
    # Call the GPT-3.5-turbo API using the updated method
    while True:
        try:
            response = openai.ChatCompletion.create(
                model="gpt-3.5-turbo",
                messages=[
                    {"role": "system", "content": "You are a financial expert."},
                    {"role": "user", "content": prompt}
                ],
                max_tokens=150  # Adjust max_tokens if needed
            )
            # Extract the classification and score from the response
            result = response['choices'][0]['message']['content'].strip()

            # Extract classification, score, and explanation using regex
            classification = re.search(r"Classification:\s*(Dovish|Hawkish)", result)
            score = re.search(r"Score:\s*(-?\d+\.\d+)", result)
            explanation = re.search(r"Explanation:\s*(.*)", result, re.DOTALL)

            if classification and score and explanation:
                return classification.group(1), score.group(1), explanation.group(1).strip()
            else:
                raise ValueError("Incomplete response from API")

        except openai.error.RateLimitError:
            print("Rate limit exceeded. Retrying in 10 seconds...")
            time.sleep(10)
        except openai.error.APIError as e:
            print(f"API error: {e}. Retrying in 10 seconds...")
            time.sleep(10)
        except Exception as e:
            print(f"Unexpected error: {e}. Skipping this article.")
            return "Error", "Error", "Error"

# Initialize new columns
data['Classification'] = ""
data['Score'] = ""
data['Explanation'] = ""

# Classify and score each article in the dataset
print("Classifying and scoring articles...")
for i, article in enumerate(data['Manual.summary']):
    classification, score, explanation = classify_and_score_article(article)
    data.at[i, 'Classification'] = classification
    data.at[i, 'Score'] = score
    data.at[i, 'Explanation'] = explanation
    if (i + 1) % 10 == 0 or (i + 1) == len(data):
        print(f"Processed {i + 1} of {len(data)} articles.")

# Display the first few rows of the updated dataframe
print("Classification and scoring completed. Here are the first few results:")
print(data.head())

# Optionally, save the results to a new Excel file
output_file = 'classified_articles_50.xlsx'
data.to_excel(output_file, index=False)
print(f"Results saved to {output_file}")


OpenAI library version: 0.28.0
Loading dataset...
Dataset loaded successfully.
Cleaning dataset...
Dataset cleaned.
Classifying and scoring articles...
Processed 10 of 50 articles.
Processed 20 of 50 articles.
Processed 30 of 50 articles.
Processed 40 of 50 articles.
Processed 50 of 50 articles.
Classification and scoring completed. Here are the first few results:
                 Date                          Source  \
0 2022-09-30 22:08:34  Il Messaggero - Borsa Italiana   
1 2022-09-30 21:30:13         Handelsblatt - Finanzen   
2 2022-09-30 20:56:08               Bloomberg - GNews   
3 2022-09-30 18:42:47              Il Foglio - Page 3   
4 2022-09-30 18:32:37               Finance.si - Live   

                                            Headline  \
0  Visco: «Troppi rialzi dei tassi, si va in rece...   
1  EZB-Ratsmitglied Visco: Rezessionsrisiko bei g...   
2  ECB 's Schnabel Says Weaker Demand May Not Eas...   
3  **Conti pubblici: Visco, rientro debito ben in...   
4  Evrska 