# Storyteller

In [1]:
import pandas as pd
from transformers import AutoModelForCausalLM, AutoModelForTokenClassification, AutoTokenizer, pipeline


In [4]:
from evaluation.evaluate import evaluate_text
from evaluation.generate_prompt import generate_prompt
from evaluation.generate_chapter import generate_chapter

## Models and tokenizers

In [4]:
# Generation pipeline
phi_model = AutoModelForCausalLM.from_pretrained("microsoft/Phi-3-mini-128k-instruct",
    # device_map="cuda",
    torch_dtype="auto",
    trust_remote_code=True,
)
phi_tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-128k-instruct")

phi_pipe = pipeline("fill-mask", model=phi_model, tokenizer=phi_tokenizer)


# Evaluation pipeline
NER_model = AutoModelForTokenClassification.from_pretrained("dbmdz/bert-large-cased-finetuned-conll03-english")
NER_tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")
NER_pipe = pipeline('ner', model=NER_model, tokenizer=NER_tokenizer)

## Data

In [2]:
# Load data from json
data = pd.read_json("data/data.json")

# Drop columns
data = data.drop(columns=["row_idx", "truncated_cells"])

# Rename columns
data = data.rename(columns={"row": "text"})

# Get only values from dicts in text column
data["text"] = data["text"].apply(lambda x: x["text"])

data.head()

Unnamed: 0,text
0,"I am in my 30 's , married and have recently ,..."
1,It strikes me that I never posted the link to ...
2,Hye was waiting for her usual bus at the usual...
3,""" Remember , go straight to Grandma 's house ,..."
4,Leave a reply Tisha Porter didn 't think she w...


In [5]:
# Add prompt column
data["prompt"] = ""

prompt_max_len = 1000

for i, row in data.iterrows():
    prompt = generate_prompt(row["text"], prompt_max_len)
    data.at[i, "prompt"] = prompt

data.head()


Unnamed: 0,text,prompt
0,"I am in my 30 's , married and have recently ,...","[{'role': 'system', 'content': 'You are a skil..."
1,It strikes me that I never posted the link to ...,"[{'role': 'system', 'content': 'You are a skil..."
2,Hye was waiting for her usual bus at the usual...,"[{'role': 'system', 'content': 'You are a skil..."
3,""" Remember , go straight to Grandma 's house ,...","[{'role': 'system', 'content': 'You are a skil..."
4,Leave a reply Tisha Porter didn 't think she w...,"[{'role': 'system', 'content': 'You are a skil..."


## Generation

In [7]:
sample_row = data.iloc[7]
text = sample_row["text"]
prompt = sample_row["prompt"]


In [None]:
# prompt

In [None]:
chapter = generate_chapter(prompt, 500, phi_pipe)

In [None]:
# chapter

## Evaluation

In [8]:
raw_text = generate_prompt(text, prompt_max_len, RAW_TEXT=True)
# raw_text

In [9]:
evaluation_dict, text_NE, chapter_NE = evaluate_text(raw_text, chapter, NER_pipe)

In [None]:
evaluation_dict

In [None]:
text_NE

In [None]:
chapter_NE