In [None]:
!pip install transformers

In [25]:
import PyPDF2
from transformers import pipeline
import json
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline

model_name = "distilbert-base-uncased-finetuned-sst-2-english"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

def extract_text_from_pdf(pdf_path):
    text = ""
    with open(pdf_path, "rb") as file:
        reader = PyPDF2.PdfReader(file)
        for page_num in range(len(reader.pages)):
            page = reader.pages[page_num]
            text += page.extract_text()
    return text


pdf_paths = [
    "Q1 FY24 Earnings Call Transcript.pdf",
    "Q1 FY24 Financial Data.pdf",
    "Q2 FY24 Earnings Call Transcript.pdf",
    "Q2 FY24 Financial Data.pdf",
    "Q3 FY24 Earnings Call Transcript.pdf",
    "Q3 FY24 Financial Data.pdf"
]


pdf_contents = {}
for pdf_path in pdf_paths:
    pdf_contents[pdf_path] = extract_text_from_pdf(pdf_path)

concatenated_text = ""
for content in pdf_contents.values():
    concatenated_text += content + "\n"


sentiment_pipeline = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)
#text_chunks = [concatenated_text[i:i+1500] for i in range(0, len(concatenated_text), 1500)]
text_chunks = [concatenated_text[i:i+500] for i in range(0, len(concatenated_text), 500)]

sentiment_results = []
for chunk in text_chunks:
    sentiment_results.extend(sentiment_pipeline(chunk))

results = []
for result in sentiment_results:
    results.append({
        "sentiment": result['label'],
        "confidence": result['score']
    })

output_json = {
    "sentiment_results": results
}

print(json.dumps(output_json, indent=4))


{
    "sentiment_results": [
        {
            "sentiment": "POSITIVE",
            "confidence": 0.8399354219436646
        },
        {
            "sentiment": "POSITIVE",
            "confidence": 0.9484481811523438
        },
        {
            "sentiment": "POSITIVE",
            "confidence": 0.9984261989593506
        },
        {
            "sentiment": "POSITIVE",
            "confidence": 0.999622106552124
        },
        {
            "sentiment": "POSITIVE",
            "confidence": 0.9987766146659851
        },
        {
            "sentiment": "POSITIVE",
            "confidence": 0.9993540644645691
        },
        {
            "sentiment": "NEGATIVE",
            "confidence": 0.897016704082489
        },
        {
            "sentiment": "POSITIVE",
            "confidence": 0.9982825517654419
        },
        {
            "sentiment": "POSITIVE",
            "confidence": 0.999648928642273
        },
        {
            "sentiment": "POSITIVE",
