In [3]:
from enum import Enum
from os import environ, getcwd
from dotenv import load_dotenv
from pathlib import Path

import torch
from transformers import AutoModel, AutoTokenizer, AutoModelForCausalLM, LlamaForCausalLM, LlamaTokenizerFast, pipeline
from peft import PeftModel
from datasets import load_dataset

# Load environment variables from load_environment.env
if load_dotenv('load_environment.env'):
    print("Successfully loaded environment variables")
    hf_api_key = environ.get("HUGGINGFACE_API_KEY")
else:
    print("Error loading environment variables")
    hf_api_key = ""

Successfully loaded environment variables


In [4]:
# RECCS https://github.com/AI4Finance-Foundation/FinGPT/tree/master/fingpt/FinGPT_Forecaster

base_model = AutoModelForCausalLM.from_pretrained(
    'meta-llama/Llama-2-7b-chat-hf',
    trust_remote_code=True,
    token = hf_api_key
    # torch_dtype=torch.float16,   # optional if you have enough VRAM
).to("mps")
tokenizer = AutoTokenizer.from_pretrained('FinGPT/fingpt-forecaster_dow30_llama2-7b_lora')

model = PeftModel.from_pretrained('FinGPT/fingpt-forecaster_dow30_llama2-7b_lora').to("mps")
model = model.eval()

B_INST, E_INST = "[INST]", "[/INST]"
B_SYS, E_SYS = "<<SYS>>\n", "\n<</SYS>>\n\n"

prompt = B_INST + B_SYS + {SYSTEM_PROMPT} + E_SYS + {YOUR_PROMPT} + E_INST
inputs = tokenizer(
    prompt, return_tensors='pt'
).to("mps")
inputs = {key: value.to(model.device) for key, value in inputs.items()}
        
res = model.generate(
    **inputs, max_length=4096, do_sample=True,
    eos_token_id=tokenizer.eos_token_id,
    use_cache=True
)
output = tokenizer.decode(res[0], skip_special_tokens=True)
answer = re.sub(r'.*\[/INST\]\s*', '', output, flags=re.DOTALL) # don't forget to import re

Downloading (…)lve/main/config.json:   0%|          | 0.00/614 [00:00<?, ?B/s]

Downloading (…)fetensors.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading (…)of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

Downloading (…)of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]



Downloading (…)neration_config.json:   0%|          | 0.00/188 [00:00<?, ?B/s]

OSError: FinGPT/fingpt-forecaster_dow30_llama2-7b_lora does not appear to have a file named config.json. Checkout 'https://huggingface.co/FinGPT/fingpt-forecaster_dow30_llama2-7b_lora/main' for available files.

In [None]:
class Sentiment(Enum):
    Negative = "negative"
    Neutral = "neutral"
    Positive = "positive"
    Invalid = "invalid"

def classify_sentiment(s : str) -> Sentiment:
    s = s.lower()
    if Sentiment.Negative.value in s:
        return Sentiment.Negative
    if Sentiment.Neutral.value in s:
        return Sentiment.Neutral
    if Sentiment.Positive.value in s:
        return Sentiment.Positive
    return Sentiment.Invalid

def create_sentiment_prompt(prompt : str) -> str:
    return "Instruction: What is the sentiment of this news? Please choose an answer from {negative/neutral/positive}\nInput: %s .\nAnswer: " % prompt

def sentiment_analysis_fingpt(prompts : [str]) -> [str]:
    """ Performs sentiment analysis using Llama2 & FinGPT """
    # Load Models
    base_model = "NousResearch/Llama-2-13b-hf" 
    peft_model = "FinGPT/fingpt-sentiment_llama2-13b_lora"
    tokenizer = LlamaTokenizerFast.from_pretrained(base_model)
    tokenizer.pad_token = tokenizer.eos_token
    model = LlamaForCausalLM.from_pretrained(base_model).to("mps")
    model = PeftModel.from_pretrained(model, peft_model).to("mps")
    model = model.eval()
    
    # Generate results
    tokens = tokenizer(prompts, return_tensors='pt', padding=True, max_length=512).to("mps")
    res = model.generate(**tokens, max_length=512)
    res_sentences = [tokenizer.decode(i) for i in res]

    # Classify sentiments
    out_sentiment = [classify_sentiment(o.split("Answer: ")[1]) for o in res_sentences]
    return out_sentiment

In [None]:
# Make prompts
p1 = "FINANCING OF ASPOCOMP 'S GROWTH Aspocomp is aggressively pursuing its growth strategy by increasingly focusing on technologically more demanding HDI printed circuit boards PCBs ."
p2 = "According to Gran , the company has no plans to move all production to Russia , although that is where the company is growing ."
p3 = "A tinyurl link takes users to a scamming site promising that users can earn thousands of dollars by becoming a Google ( NASDAQ : GOOG ) Cash advertiser ."
prompts = [p1,p2,p3]
out = zip(prompts, sentiment_analysis_fingpt([create_sentiment_prompt(p) for p in prompts]))
for p,s in out:
    print("%s --> %s" % (s,p))