In [1]:
import openai
print(openai.__version__)


0.28.0


In [16]:
import openai
import pandas as pd
import time
import re

# Verify OpenAI library version
print(f"OpenAI library version: {openai.__version__}")  # Should print 0.28.0

# Load your dataset
print("Loading dataset...")
data = pd.read_excel('combined_updated.xlsx')
print("Dataset loaded successfully.")

# Drop rows where 'Manual.summary' is NaN and reset index
print("Cleaning dataset...")
data.dropna(subset=['Manual.summary'], inplace=True)
data.reset_index(drop=True, inplace=True)
print("Dataset cleaned.")

# Limit to first 50 rows
data = data.head(50)

# Set your OpenAI API key
openai.api_key = 'sk-proj-q4vaD1sfS4JM31othrf0T3BlbkFJCdvyWyHUWXfivw7iteZj'

def classify_and_score_article(article):
    # Formulate the prompt
    prompt = (
        "Classify the following article as 'Dovish' or 'Hawkish' and provide a score between -1 and 1 indicating how dovish or hawkish it is.\n"
        "\n"
        "An article is 'Dovish' if it suggests policies or sentiments that are supportive of economic growth, low interest rates, or monetary easing.\n"
        "An article is 'Hawkish' if it suggests policies or sentiments that are supportive of combating inflation, high interest rates, or monetary tightening.\n"
        "\n"
        "The score should be between -1 and 1 where -1 indicates extremely dovish and 1 indicates extremely hawkish.\n"
        "\n"
        "Article: {}\n"
        "\n"
        "Provide the output in the following format:\n"
        "Classification: [Dovish/Hawkish]\n"
        "Score: [score]\n"
        "Explanation: [Explanation]\n".format(article)
    )
    
    # Call the GPT-3.5-turbo API using the updated method
    while True:
        try:
            response = openai.ChatCompletion.create(
                model="gpt-3.5-turbo",
                messages=[
                    {"role": "system", "content": "You are a financial expert."},
                    {"role": "user", "content": prompt}
                ],
                max_tokens=150  # Adjust max_tokens if needed
            )
            # Extract the classification and score from the response
            result = response['choices'][0]['message']['content'].strip()

            # Extract classification, score, and explanation using regex
            classification = re.search(r"Classification:\s*(Dovish|Hawkish)", result)
            score = re.search(r"Score:\s*(-?\d+\.\d+)", result)
            explanation = re.search(r"Explanation:\s*(.*)", result, re.DOTALL)

            if classification and score and explanation:
                return classification.group(1), score.group(1), explanation.group(1).strip()
            else:
                raise ValueError("Incomplete response from API")

        except openai.error.RateLimitError:
            print("Rate limit exceeded. Retrying in 10 seconds...")
            time.sleep(10)
        except openai.error.APIError as e:
            print(f"API error: {e}. Retrying in 10 seconds...")
            time.sleep(10)
        except Exception as e:
            print(f"Unexpected error: {e}. Skipping this article.")
            return "Error", "Error", "Error"

# Initialize new columns
data['Classification'] = ""
data['Score'] = ""
data['Explanation'] = ""

# Classify and score each article in the dataset
print("Classifying and scoring articles...")
for i, article in enumerate(data['Manual.summary']):
    classification, score, explanation = classify_and_score_article(article)
    data.at[i, 'Classification'] = classification
    data.at[i, 'Score'] = score
    data.at[i, 'Explanation'] = explanation
    if (i + 1) % 10 == 0 or (i + 1) == len(data):
        print(f"Processed {i + 1} of {len(data)} articles.")

# Display the first few rows of the updated dataframe
print("Classification and scoring completed. Here are the first few results:")
print(data.head())

# Optionally, save the results to a new Excel file
output_file = 'classified_articles_50.xlsx'
data.to_excel(output_file, index=False)
print(f"Results saved to {output_file}")


OpenAI library version: 0.28.0
Loading dataset...
Dataset loaded successfully.
Cleaning dataset...
Dataset cleaned.
Classifying and scoring articles...
Processed 10 of 50 articles.
Processed 20 of 50 articles.
Processed 30 of 50 articles.
Processed 40 of 50 articles.
Processed 50 of 50 articles.
Classification and scoring completed. Here are the first few results:
                 Date                          Source  \
0 2022-09-30 22:08:34  Il Messaggero - Borsa Italiana   
1 2022-09-30 21:30:13         Handelsblatt - Finanzen   
2 2022-09-30 20:56:08               Bloomberg - GNews   
3 2022-09-30 18:42:47              Il Foglio - Page 3   
4 2022-09-30 18:32:37               Finance.si - Live   

                                            Headline  \
0  Visco: «Troppi rialzi dei tassi, si va in rece...   
1  EZB-Ratsmitglied Visco: Rezessionsrisiko bei g...   
2  ECB 's Schnabel Says Weaker Demand May Not Eas...   
3  **Conti pubblici: Visco, rientro debito ben in...   
4  Evrska 