In [4]:
import pandas as pd

# Load the CSV file into a DataFrame
df = pd.read_csv('data/news/dataset.csv')


#keep only the columns we need - title and date
df = df[['Title', 'Date']]

# Display the first few rows of the DataFrame
df.head()

Unnamed: 0,Title,Date
0,"ATMs to become virtual bank branches, accept d...","May 26, 2020, Tuesday"
1,IDFC First Bank seniors to forgo 65% of bonus ...,"May 26, 2020, Tuesday"
2,"Huge scam in YES Bank for many years, says Enf...","May 25, 2020, Monday"
3,"Bank of Maharashtra sanctioned Rs 2,789 cr in ...","May 24, 2020, Sunday"
4,DCB Bank's profit before tax declines 37.6% to...,"May 23, 2020, Saturday"


In [5]:
# Convert the 'Date' column to datetime format
df['Date'] = pd.to_datetime(df['Date'], errors='coerce')

# Filter the DataFrame for dates between 2018 and 2020
filtered_df = df[(df['Date'] >= '2018-01-01') & (df['Date'] <= '2020-12-31')]

# Display the first few rows of the filtered DataFrame
filtered_df.head()

# Save the filtered DataFrame to a new CSV file
filtered_df.to_csv('filtered_dataset.csv', index=False)

# Display the shape of the filtered DataFrame
print(filtered_df.shape)

(4726, 2)


In [1]:
from transformers import BertTokenizer, BertForSequenceClassification
from transformers import pipeline

finbert = BertForSequenceClassification.from_pretrained('yiyanghkust/finbert-tone',num_labels=3)
tokenizer = BertTokenizer.from_pretrained('yiyanghkust/finbert-tone')

nlp = pipeline("sentiment-analysis", model=finbert, tokenizer=tokenizer)

sentences = ["there is a shortage of capital, and we need extra financing",  
             "growth is strong and we have plenty of liquidity", 
             "there are doubts about our finances", 
             "profits are flat"]
results = nlp(sentences)
print(results)  #LABEL_0: neutral; LABEL_1: positive; LABEL_2: negative


  return self.fget.__get__(instance, owner)()


[{'label': 'Negative', 'score': 0.9966173768043518}, {'label': 'Positive', 'score': 1.0}, {'label': 'Negative', 'score': 0.9999710321426392}, {'label': 'Neutral', 'score': 0.9889442920684814}]


In [6]:
# Analyze sentiment for each news title in the filtered DataFrame
# filtered_df['Sentiment'] = filtered_df['Title'].apply(lambda x: nlp(x)[0]['label'])
# filtered_df['Sentiment_Score'] = filtered_df['Title'].apply(lambda x: nlp(x)[0]['score'])

# Display the first few rows of the updated DataFrame
filtered_df.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['Sentiment'] = filtered_df['Title'].apply(lambda x: nlp(x)[0]['label'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['Sentiment_Score'] = filtered_df['Title'].apply(lambda x: nlp(x)[0]['score'])


Unnamed: 0,Title,Date,Sentiment,Sentiment_Score
0,"ATMs to become virtual bank branches, accept d...",2020-05-26,Neutral,0.999943
1,IDFC First Bank seniors to forgo 65% of bonus ...,2020-05-26,Neutral,0.951466
2,"Huge scam in YES Bank for many years, says Enf...",2020-05-25,Neutral,0.996678
3,"Bank of Maharashtra sanctioned Rs 2,789 cr in ...",2020-05-24,Neutral,0.995092
4,DCB Bank's profit before tax declines 37.6% to...,2020-05-23,Negative,0.999972


In [7]:
filtered_df.to_csv('filtered_dataset_with_sentiment.csv', index=False)

In [None]:
import openai

# Set up your OpenAI API key
openai.api_key = 'your-api-key-here'

# Define a function to get a response from OpenAI API
def get_openai_response(prompt):
    """
    Generates a response from OpenAI's GPT-3 model based on the provided prompt.

    Args:
        prompt (str): The input text prompt to generate a response for.

    Returns:
        str: The generated response text from the model.
    """
    response = openai.Completion.create(
        engine="text-davinci-003",
        prompt=prompt,
        max_tokens=100
    )
    return response.choices[0].text.strip()

# Loop over the top 10 headlines in the DataFrame
for headline in df['Title'].head(10):
    prompt = f"""
    Extract the following features from the news headline:
    1. Named Entities
    2. Topic
    3. Sector/Industry
    4. Financial Metrics
    5. Tone

    Headline: "{headline}"
    """
    response = get_openai_response(prompt)
    print(f"Headline: {headline}\nResponse: {response}\n")

| **News** | **Sentiment** | **Named Entities** | **Topic** | **Sector/Industry** | **Financial Metrics** | **Tone** |
|-------------------------------------------|----------------|-----------------------------|-------------------------|------------------------|--------------------------|------------------|
| NBFC yields yet to show contraction...    | Neutral        | NBFC, Govt                  | Spreads                  | Finance                 | None                     | Neutral          |
| Sitharaman gives liquidity boost...       | Positive       | Sitharaman, Shadow Banks, MFIs | Liquidity Boost         | Finance                 | Rs 75,000 cr             | Optimistic       |
| Kotak Bank profit before tax slips...     | Negative       | Kotak Bank                  | Earnings Report          | Banking                 | 10.6% decrease, Rs 2,674 cr | Cautionary    |
| Differentiated bank holdco norms...       | Positive       | Bandhan, IDFC First          | Regulation Change        | Banking                 | None                     | Optimistic       |
| Bad bank may start with Rs 60K-crore...   | Neutral        | Bad Bank, Govt               | NPA Management           | Banking                 | Rs 60K-crore NPAs         | Neutral          |
| Some MSMEs may need deep restructuring... | Negative       | MSMEs, Union Bank            | Restructuring            | Finance                 | None                     | Cautionary       |
| IBA considering proposal to set up...     | Positive       | IBA, PSBs                   | NPA Management           | Banking                 | None                     | Optimistic       |
| Not offering any emergency loan...        | Neutral        | SBI, YONO                   | Clarification            | Banking                 | None                     | Neutral          |
