In [1]:
import pandas as pd
from datetime import datetime
import pytz
from pymongo import MongoClient
from transformers import pipeline

def load_sentiment_keywords():
    excel_path = r"C:\Users\91908\Documents\Raja\Share market\Analysis\Trendlyne\Data\Scrip\Scrip23012024.xlsx"
    sheet_name = "Sentiment"

    try:
        df = pd.read_excel(excel_path, sheet_name=sheet_name, header=None)
        return set(df.iloc[:, 0].str.lower())
    except Exception as e:
        print(f"Error loading sentiment keywords: {e}")
        return set()

def analyze_sentiment(text):
    sentiment_analysis = pipeline("sentiment-analysis", model="nlptown/bert-base-multilingual-uncased-sentiment")
    try:
        result = sentiment_analysis(text)
        if result:
            return result[0]
        else:
            return None
    except Exception as e:
        print(f"Error analyzing sentiment: {e}")
        return None

def insert_record_into_mongodb(record, database_name, collection_name):
    uri = "mongodb://localhost:27017"
    client = MongoClient(uri)

    collection = client[database_name][collection_name]

    try:
        # Check if the paragraph_content already exists
        if not record_exists_in_mongodb(collection, record["paragraph_content"]):
            # Insert the record into the collection
            collection.insert_one(record)
            print("Record inserted successfully.")
        else:
            print("Record with paragraph_content already exists. Skipping insertion.")
    except Exception as e:
        print(f"Error inserting record into MongoDB: {e}")
    finally:
        client.close()

def record_exists_in_mongodb(collection, paragraph_content):
    # Check if the paragraph_content already exists in the collection
    existing_record = collection.find_one({"paragraph_content": paragraph_content})
    return existing_record is not None

def read_excel_data(excel_path, sheet_name, sentiment_keywords):
    try:
        df = pd.read_excel(excel_path, sheet_name=sheet_name, header=None, skiprows=1)
        
        for index, row in df.iterrows():
            paragraph_content = row[0]  # Access content from the first column
            sentiment_result = analyze_sentiment(paragraph_content)

            if sentiment_result:
                sentiment_data = {
                    "label": sentiment_result["label"],
                    "confidence": sentiment_result["score"]
                }

                current_datetime_ist = datetime.now(pytz.timezone("Asia/Kolkata"))

                record = {
                    "paragraph_content": paragraph_content,
                    "sentiment": sentiment_data,
                    "created_at": current_datetime_ist,
                    "created_by": "StockEdge123",
                    "deleted_at": current_datetime_ist,
                    "deleted_by": "user789",
                    "is_deleted": False,
                    "is_purged": False,
                    "last_scraped": current_datetime_ist,
                    "purge_at": current_datetime_ist,
                    "purged_by": "admin",
                    "updated_at": current_datetime_ist,
                    "updated_by": "user456"
                }

                database_name = "NewsAnalytics"
                collection_name = "RawNews_StockEdge"
                
                insert_record_into_mongodb(record, database_name, collection_name)
        print("Completed")

    except Exception as e:
        print(f"Error reading Excel data: {e}")

if __name__ == "__main__":
    excel_path = r"C:\Users\91908\Documents\Raja\Share market\Analysis\Trendlyne\Data\Scrip\news\stockedge\scrapestockedge.xlsx"
    sheet_name = "Sheet1"
    sentiment_keywords = load_sentiment_keywords()
    read_excel_data(excel_path, sheet_name, sentiment_keywords)


Record inserted successfully.
Record inserted successfully.
Record inserted successfully.
Record inserted successfully.
Record inserted successfully.
Record inserted successfully.
Record inserted successfully.
Record inserted successfully.
Record inserted successfully.
Record inserted successfully.
Record inserted successfully.
Record inserted successfully.
Record inserted successfully.
Record inserted successfully.
Record inserted successfully.
Record inserted successfully.
Record inserted successfully.
Record inserted successfully.
Record inserted successfully.
Record inserted successfully.
Record inserted successfully.
Record inserted successfully.
Record inserted successfully.
Record inserted successfully.
Record inserted successfully.
Record inserted successfully.
Record inserted successfully.
Record inserted successfully.
Record inserted successfully.
Record inserted successfully.
Record inserted successfully.
Record inserted successfully.
Record inserted successfully.
Record ins

In [2]:
import certifi
from pymongo import MongoClient
from transformers import BertTokenizer, BertForSequenceClassification
import numpy as np
from datetime import datetime
import pytz


# MongoDB connection URI (replace with your actual URI)
uri = "mongodb://localhost:27017"
# uri = "mongodb+srv://ranguchamy:J8ePGYKw7XRdYZBg@stockanalytics.jkcqv2m.mongodb.net/?retryWrites=true&w=majority"

finbert = BertForSequenceClassification.from_pretrained('yiyanghkust/finbert-tone', num_labels=3)
tokenizer = BertTokenizer.from_pretrained('yiyanghkust/finbert-tone')

# def analyze_finBERT_sentiment(text):
#     inputs = tokenizer(text, return_tensors="pt", padding=True)
#     outputs = finbert(**inputs)[0]
#     sentiment_label = np.argmax(outputs.detach().numpy())
#     return sentiment_label

def analyze_finBERT_sentiment(text):
    try:
        inputs = tokenizer(text, return_tensors="pt", padding=True)
        outputs = finbert(**inputs)[0]
        sentiment_label = np.argmax(outputs.detach().numpy())
        return sentiment_label
    except Exception as e:
        print(f"Error analyzing FinBERT sentiment: {e}")
        return 0

try:
    with MongoClient(uri) as client:
        # Specify the database and collection
        database_name = "NewsAnalytics"
        collection_name = "RawNews_StockEdge"
        # Access the specified collection
        collection = client[database_name][collection_name]

        # Query all records in the collection
        all_records = collection.find()

        # Get the total count of records
        total_records = collection.count_documents({})

        # Iterate over each record
        for record in all_records:
            if "paragraph_content" in record and "FinBertScore" not in record:
                paragraph_content = record["paragraph_content"]

                # Analyze FinBERT sentiment for the paragraph_content
                sentiment_label = analyze_finBERT_sentiment(paragraph_content)
                # Format the date in the desired format
                current_datetime_ist = datetime.now(pytz.timezone("Asia/Kolkata"))

                # Update the record with the new field "FinBertScore" and metadata
                update_data = {
                    "$set": {
                        "FinBertScore": int(sentiment_label),
                        "updated_at": current_datetime_ist,
                        "updated_by": "FIN_BERT_Admin"
                    }
                }
                collection.update_one({"_id": record["_id"]}, update_data)

                # Print statement for successful entry
                print(f"Processed record {record['_id']} - FinBertScore: {int(sentiment_label)}, Updated at: {current_datetime_ist}, Updated by: FIN_BERT_Admin")
        print("Completed")
except Exception as e:
    print(f"An unexpected error occurred: {e}")


Processed record 65e54f3163b832748e2b28bd - FinBertScore: 0, Updated at: 2024-03-04 10:05:21.232481+05:30, Updated by: FIN_BERT_Admin
Processed record 65e54f3263b832748e2b28bf - FinBertScore: 1, Updated at: 2024-03-04 10:05:21.347572+05:30, Updated by: FIN_BERT_Admin
Processed record 65e54f3463b832748e2b28c1 - FinBertScore: 0, Updated at: 2024-03-04 10:05:21.481110+05:30, Updated by: FIN_BERT_Admin
Processed record 65e54f3563b832748e2b28c3 - FinBertScore: 0, Updated at: 2024-03-04 10:05:21.605167+05:30, Updated by: FIN_BERT_Admin
Processed record 65e54f3663b832748e2b28c5 - FinBertScore: 0, Updated at: 2024-03-04 10:05:21.756437+05:30, Updated by: FIN_BERT_Admin
Processed record 65e54f3763b832748e2b28c7 - FinBertScore: 0, Updated at: 2024-03-04 10:05:21.878990+05:30, Updated by: FIN_BERT_Admin
Processed record 65e54f3863b832748e2b28c9 - FinBertScore: 0, Updated at: 2024-03-04 10:05:22.047357+05:30, Updated by: FIN_BERT_Admin
Processed record 65e54f3963b832748e2b28cb - FinBertScore: 0, U