In [None]:
import os
os.chdir("..")

In [None]:
import os

import time
import pandas as pd
from tqdm import tqdm

from transformers import AutoTokenizer, AutoModelForCausalLM
import transformers
import torch

In [None]:
tqdm.pandas()

In [None]:
from huggingface_hub import notebook_login
notebook_login()

# Get predictions from Llama 2

In [None]:
model_name = "meta-llama/Llama-2-7b-chat-hf"

In [None]:
tokenizer = AutoTokenizer.from_pretrained(model_name)

In [None]:
pipeline = transformers.pipeline(
    "text-generation",
    model=model_name,
    torch_dtype=torch.float16,
    device_map="cuda"
)

In [None]:
BATCH_SIZE = 12

In [None]:
def get_LLAMA_pred(text_frmtd, claim, prompt):
    prompt_frmt = prompt.format(
        TWEET=text_frmtd,
        CLAIM=claim,
    )

    sequences = pipeline(
        prompt_frmt,
        do_sample=True,
        top_k=10,
        num_return_sequences=1,
        eos_token_id=tokenizer.eos_token_id,
        max_new_tokens=1
    )

    resp = sequences[0]['generated_text'].split()[-1]

    return resp

def get_llama_2_claim_preds(text, claims_list, prompt):
    results = dict()
    for claim in claims_list:
        results[claim] = get_LLAMA_pred(text, claim, prompt)
    return results

## Climate change task

In [None]:
df = pd.read_pickle('./data/climate_change/testing.pkl')

In [None]:
claims_list_cc = sorted(list(set([k for l in df["FSL_BART"].to_list() for k in l])))

In [None]:
prompt_cc = '''Decide whether the text implies the claim, answering with YES or NO. For example:

Text: If you discover Earths temperature has stabilized at a local high since 1998, you can expect it to begin cooling soon. Maybe -15C. Because a new ice age has begun.
Claim: We are heading into an ice age
Answer: Yes

Text: Congress and the next Administration should open access to America's abundant reserves, reduce the regulatory burden, and let states regulate energy production within their borders.
Claim: Global warming is not happening
Answer: No

Text: {TWEET}
Claim: {CLAIM}
Answer: '''

In [None]:
df["Llama-2"] = df["text"].apply(lambda x: get_llama_2_claim_preds(x, claims_list_cc, prompt_cc))

In [None]:
df.to_pickle('./data/climate_change/testing.pkl')

## Topic stance task

In [None]:
df = pd.read_pickle('./data/topic_stance/testing.pkl')

### Topic

In [None]:
claims_list_t = sorted(list(set(df["Target"].to_list())))

In [None]:
prompt_t = '''Decide whether the topic relates to the text, answering with YES or NO. For example:

Text: SO EXCITING! Meaningful climate change action is on the way!
Topic: Climate Change is a Real Concern
Answer: Yes

Text: Blessed are the peacemakers, for they shall be called children of God. Matthew 5:9
Topic: Feminist Movement
Answer: No

Text: {TWEET}
Topic: {CLAIM}
Answer: '''

In [None]:
df["Llama-2"] = df["text"].apply(lambda x: get_llama_2_claim_preds(x, claims_list_t, prompt_t))

### Stance

In [None]:
prompt_s = '''Decide whether the topic relates to the text, answering with ANTI, PRO or NEUTRAL. For example:

Text: SO EXCITING! Meaningful climate change action is on the way! 
Topic: Climate Change is a Real Concern
Stance: Pro

Text: Let's agree that it's not ok to kill a 7lbs baby in the uterus
Topic: Legalization of Abortion
Stance: Anti

Text: {TWEET}
Topic: {CLAIM}
Stance: '''

In [None]:
def get_llama_2_stance_pred(text, topic_preds):
    results = dict()
    for claim in topic_preds:
    if topic_preds[claim].lower().strip() == "yes":
        results[claim] = get_LLAMA_pred(text, claim, prompt_4)
        print(results[claim])
        input()
    return results

In [None]:
df["Llama-2_STANCE"] = df.apply(lambda x: get_llama_2_stance_pred(x["Tweet"], x["Llama-2"]), axis=1)

In [None]:
df.to_pickle('./data/topic_stance/testing.pkl')

## Depression task

In [None]:
df = pd.read_pickle('./data/depression/testing.pkl')

In [None]:
claims_list_depr = sorted(list(set([k for l in df["FSL_BART"].to_list() for k in l])))

In [None]:
prompt_depr = '''Decide whether the text implies the claim, answering with YES or NO. For example:

Text: Everything I used to be passionate about has gone down the drain.
Claim: It's hard to get interested in anything
Answer: Yes

Text: I cry and care too much, which leaves me burnt out and exhausted.
Claim: I dislike myself
Answer: No

Text: {TWEET}
Claim: {CLAIM}
Answer: '''

In [None]:
df["Llama-2"] = df["Sentence"].apply(lambda x: get_llama_2_claim_preds(x, claims_list_depr, prompt_depr))

In [None]:
df.to_pickle('./data/depression/testing.pkl')