In [2]:
import rdflib
import pickle
from rdflib import Literal
from fuzzywuzzy import process
from langchain.llms import GPT4All
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate

In [None]:
graph = rdflib.Graph().parse(f'dataset/ddis-movie-graph.nt', format="turtle")

In [3]:
with open('../dataset/graph-high-prio.pickle', 'rb') as f:
    # The protocol version used is detected automatically, so we do not
    # have to specify it.
    graph = pickle.load(f)

In [None]:
film_query = graph.query("""
PREFIX ddis: <http://example.org/ddis/>
PREFIX wd: <http://www.wikidata.org/entity/>
PREFIX wdt: <http://www.wikidata.org/prop/direct/>
PREFIX schema: <http://schema.org/>

SELECT DISTINCT ?b ?label WHERE{
  ?b wdt:P31/wdt:P279* wd:Q11424.
  ?b rdfs:label ?label
}
""")

film_dict = {}
for k, v in film_query:
    v = v.toPython().lower()
    while v in film_dict:
        v += "0"
    film_dict[v] = k


In [None]:
with open('film_dict.pickle', 'wb') as f:
    # Pickle the 'data' dictionary using the highest protocol available.
    pickle.dump(film_dict, f, pickle.HIGHEST_PROTOCOL)

In [4]:
question = "Who played in the amazing spiderman? "
with open("../dataset/film_dict.pickle", 'rb') as f:
    film_dict = pickle.load(f)

In [24]:
from rapidfuzz import fuzz, process, utils

movie_titles = film_dict.keys()


def do_fuzz_search(entities):
    # Use fuzzywuzzy to find the closest match in your dictionary to the user query
    best_match = process.extract(entities, movie_titles, processor=utils.default_process, limit=1)

    # best_match is a tuple containing the best matching movie title and a score
    # matching_movie_title, score = best_match
    matched_movies = []
    for movie in best_match:
        print("FUZZYWUZZY results:", movie)
        if int(movie[1]) > 50:
            matched_movies.append(movie[0])
    return matched_movies


print(''.join(['Did Christopher Nolan direct Inception?']))
matched_movies = do_fuzz_search(''.join(['La Princesse de Clèves', 'French Renaissance']))
print(matched_movies)

Did Christopher Nolan direct Inception?
FUZZYWUZZY results: ('princess', 90.0, 935)
['princess']


In [26]:
from collections import defaultdict

query_template = """PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
    PREFIX wd: <http://www.wikidata.org/entity/>
    PREFIX wdt: <http://www.wikidata.org/prop/direct/>
    PREFIX skos: <http://www.w3.org/2004/02/skos/core#>

    SELECT ?predicate ?predicateLabel ?object ?objectLabel WHERE {{
      wd:{0} ?predicate ?object .
      OPTIONAL {{ ?predicate rdfs:label ?predicateLabel . FILTER(LANG(?predicateLabel) = "en") }}
      OPTIONAL {{ ?object rdfs:label ?objectLabel . FILTER(LANG(?objectLabel) = "en") }}
    }}
    """

data = []
# for movie in matched_movies:
#     uri = film_dict[movie]
#     entity = uri.rsplit('/', 1)[-1]
query = query_template.format("Q3211938")
result = graph.query(query)


def add_value(key, value):
    film_info[key].append(value)


film_info = defaultdict(list)
print(result)
for row in result:
    print(row)
    if row[1] is not None and row[3] is not None:
        add_value(str(row[1]), str(row[3]))
    elif row[1] is not None and row[2] is not None:
        add_value(str(row[1]), str(row[2]))
    elif row[1] is None:
        add_value("tag", str(row[2]))

data.append(film_info)
print(data)

<rdflib.plugins.sparql.processor.SPARQLResult object at 0x7fd6add0c220>
[defaultdict(<class 'list'>, {})]


In [None]:
print(data)

In [None]:
def convert_to_string(data):
    result_strings = []  # List to hold the result strings for each dictionary

    # List comprehension to construct the "key:value" strings for each key-value pair
    key_value_strings = [f"{key}:{', '.join(values)}" for key, values in data.items()]
    # Join the "key:value" strings with a newline character to form the result string for this dictionary
    result_string = '\n'.join(key_value_strings)
    result_strings.append(result_string)
    # Separate the result strings for each dictionary with two newline characters
    result = '\n'.join(result_strings)
    return result

In [1]:
test = """node label:The Amazing Spider-Man
tag:action, entertaining, flashback, good_versus_evil, humor, murder, revenge, romantic, violence
node description:2012 American superhero film directed by Marc Webb
film editor:Michael McCusker, Pietro Scalia
genre:teen film, superhero film, action film, thriller film, science fiction film
executive producer:Stan Lee, Kevin Feige
takes place in fictional universe:Earth-120703
based on:The Amazing Spider-Man
cast member:Kari Coleman, Skyler Gisondo, Annie Parisse, Emy stone, Stan Lee, Martin Sheen, Sally Field, Rhys Ifans, Kelsey Asbille, Andrew Garfield, Charlie DePew, Embeth Davidtz, Tia Texada, Barbara Eve Harris, Jennifer Lyons, Denis Leary, Irrfan Khan, Jill Flint, Amber Stevens, C. Thomas Howell, Chris Zylka, Michael Massee, Campbell Scott, Michael Papajohn
MPAA film rating:PG-13
part of the series:The Amazing Spider-Man, Spider-Man in film
image:https://commons.wikimedia.org/wiki/File:The_amazing_Spider-Man.svg
after a work by:Stan Lee
FSK film rating:FSK 12
aspect ratio:2.35:1
box office:757930663
NMHH film rating:Category III
BBFC rating:12 certificate
production company:Marvel Entertainment, Columbia Pictures
CNC film rating (France):no age restriction
instance of:film, 3D film
ClassInd rating:10
ICAA rating:Not recommended for children under 7
CNC film rating (Romania):A.P. - 12
director of photography:John Schwartzman
IMDb ID:tt0948470
original language of film or TV show:English
JMK film rating:free from 10 years
RTC film rating:A
distribution format:video on demand
color:color
country of origin:United States of America
assessment:reverse Bechdel Test, Bechdel test, Mako Mori test
director:Marc Webb
publication date:2012-06-28
screenwriter:Stan Lee, Steve Ditko, Steve Kloves, Alvin Sargent, James Vanderbilt
Medierådet rating:For ages 11 and up
RCQ classification:G
distributed by:InterCom, Columbia Pictures, FandangoNow, Netflix
narrative location:Drochia
filming location:New York City, Los Angeles
main subject:terrorism
If this is not the asked movie check here: """
from langchain.llms import LlamaCpp
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

gpu_layers = 29  # Change this value based on your model and your GPU VRAM pool.
n_batch = 1024  # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.
# Callbacks support token-wise streaming
# Make sure the model path is correct for your system!
llm = LlamaCpp(
    model_path="/home/oliver/dev/uzh/atai_bot/orca-mini-3b-gguf2-q4_0.gguf",
    n_gpu_layers=gpu_layers,
    n_batch=n_batch,
    n_ctx=1024,
    verbose=True,  # Verbose is required to pass to the callback manager
)
template = """Answer the question with below information, Always answer in a sentence: {question}
If possible answer with below information: 
{movie_1}
{movie_2}
Answer: """
movie_1 = test

prompt = PromptTemplate(template=template, input_variables=["question", "movie_1", "movie_2"])

llm_chain = LLMChain(prompt=prompt, llm=llm)
output = llm_chain.run(
    {"question": "When was the Godfather released? ", "movie_1": test, "movie_2": "convert_to_string(data[1])"})

ggml_init_cublas: found 1 CUDA devices:
  Device 0: NVIDIA GeForce GTX 1650, compute capability 7.5
llama_model_loader: loaded meta data with 19 key-value pairs and 237 tensors from /home/oliver/dev/uzh/atai_bot/orca-mini-3b-gguf2-q4_0.gguf (version GGUF V2 (latest))
llama_model_loader: - tensor    0:                token_embd.weight q4_0     [  3200, 32000,     1,     1 ]
llama_model_loader: - tensor    1:              blk.0.attn_q.weight q4_0     [  3200,  3200,     1,     1 ]
llama_model_loader: - tensor    2:              blk.0.attn_k.weight q4_0     [  3200,  3200,     1,     1 ]
llama_model_loader: - tensor    3:              blk.0.attn_v.weight q4_0     [  3200,  3200,     1,     1 ]
llama_model_loader: - tensor    4:         blk.0.attn_output.weight q4_0     [  3200,  3200,     1,     1 ]
llama_model_loader: - tensor    5:            blk.0.ffn_gate.weight q4_0     [  3200,  8640,     1,     1 ]
llama_model_loader: - tensor    6:            blk.0.ffn_down.weight q4_0     [  8640

In [2]:
output

'\nThe Godfather was released in 1972.'

In [2]:
from langchain.chains import LLMChain
from langchain.llms import LlamaCpp
from langchain.prompts import PromptTemplate

from config.conf import LLMA_MODEL_PATH
from transformers import AutoModelForCausalLM, AutoTokenizer

gpu_layers = 23  # Change this value based on your model and your GPU VRAM pool.
n_batch = 762  # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.

llm = LlamaCpp(
    model_path="/home/oliver/dev/uzh/atai_bot/mistral-7b-openorca.Q4_0.gguf",
    n_gpu_layers=gpu_layers,
    n_batch=n_batch,
    n_ctx=1524,
    max_tokens=100,
)

llm.client.verbose = False

ggml_init_cublas: found 1 CUDA devices:
  Device 0: NVIDIA GeForce GTX 1650, compute capability 7.5
llama_model_loader: loaded meta data with 20 key-value pairs and 291 tensors from /home/oliver/dev/uzh/atai_bot/mistral-7b-openorca.Q4_0.gguf (version GGUF V2 (latest))
llama_model_loader: - tensor    0:                token_embd.weight q4_0     [  4096, 32002,     1,     1 ]
llama_model_loader: - tensor    1:              blk.0.attn_q.weight q4_0     [  4096,  4096,     1,     1 ]
llama_model_loader: - tensor    2:              blk.0.attn_k.weight q4_0     [  4096,  1024,     1,     1 ]
llama_model_loader: - tensor    3:              blk.0.attn_v.weight q4_0     [  4096,  1024,     1,     1 ]
llama_model_loader: - tensor    4:         blk.0.attn_output.weight q4_0     [  4096,  4096,     1,     1 ]
llama_model_loader: - tensor    5:            blk.0.ffn_gate.weight q4_0     [  4096, 14336,     1,     1 ]
llama_model_loader: - tensor    6:              blk.0.ffn_up.weight q4_0     [  409

In [3]:


checkpoint = "Open-Orca/Mistral-7B-OpenOrca"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
messages = [
    {
        "role": "system",
        "content": "You are a knowledgeable chatbot(called Francis,developed by Oliver and David) specializing in movies. Your task is to provide accurate,concise and short(1-2 sentences) answers about films. When presented with a question, use the provided information to formulate your response. If the information available does not fully address the question, supplement it with your own knowledge about movies. In cases where the question is unrelated to the world of movies, gently remind the user to focus their inquiries on movie-related topics. ",
    },
    {"role": "system", "content": """node label:The Princess and the Frog
tag:entertaining, fantasy, romantic, thought-provoking
node description:2009 American animated musical romantic fantasy film by Disney
film editor:Jeff Draheim
genre:fantasy film, traditionally animated film, buddy film, musical film, comedy-drama, romantic comedy
nominated for:Academy Award for Best Animated Feature
executive producer:John Lasseter, Monica Lago-Kaytis
based on:The Frog Prince
performer:Randy Newman
part of the series:Walt Disney Animation Studios film
depicts:The Frog Prince, metamorphosis, frog, Mardi Gras, Louisiana Voodoo, princess
FSK film rating:FSK 0
fabrication method:traditional animation
set in period:1920s, 1910s
production designer:James Aaron Finch
production company:Walt Disney Pictures
film crew member:Barry Kooser, Tom MacDougall, Tom Ellery, Michael Humphries, Bill Schwab, Kyle Odermatt, Jon Krummel, Robert Stanton, June Fujimoto, Doug Ball, Vera Pacheco, Margie Daniels, Dan Tanaka, Sue Nichols, Allen Tam, Daniel Hu, Rick Maki, Jennifer Yuan, Sunny Apinchapong, Charles R. Vollmer, Kathleen M. Bailey, Marianne Tucker, Tracy Mark Lee, Doug Walker, David Wang, Juliet Stroud, Lieve Miessen, Jean-Christophe Poulain, Peter J. DeLuca, James P. Alles, Ginnie Parmele, Leonard Robledo, Jerry Loveland, Kelly McGraw, Sai Ping Lok, David Murray, Gregory C. Miller, Daniel Read, Jim Martin, Rachel Renee Bibb, James Aaron Finch, Kevin Gollaher, Chris Appelhans, Armand Baltazar, Lorelay Bove, Ed Li, Douglas Rogers, Alfred Cruz, Jason Hand, Lam Hoang, Benoît Le Pennec, Ashley Lenz, Julio Leon, Robert St. Pierre, Chung Sup Yoon, Yong-Hong Zhong, James Gallego, Ann Lee, Donna Prince, Wei M. Zhao, Augusto Borges Bastos, Robert Sprathoff, Chris Montan, Lisa Keene, Craig Elliott, Eric Goldberg, Rasoul Azadani, Stevie Wermers
instance of:animated feature film
art director:Mike Gabriel, Ian Gooding
storyboard artist:Don Hall, Paul Briggs, Toby Shelton, Tom Ellery, Josie Trinidad, Bruce Morris, Kevin Gollaher, Randy Cartwright, Wilbert Plijnaar
musical conductor:Randy Newman
IMDb ID:tt0780521
original language of film or TV show:English
distribution format:video on demand
color:color
country of origin:United States of America
IMDA rating:PG
director:John Musker, Ron Clements
publication date:2009-11-25
screenwriter:Greg Erb, Jason Oremland, Rob Edwards, John Musker, Ron Clements
characters:Prince Naveen, Dr. Facilier, Louis, Charlotte La Bouff, Ray, Mama Odie, Juju, Eudora, James, Eli La Bouff, Lawrence, Reggie, Darnell and Two Fingers, Mob Shadows, Tiana
animator:Dean Wellins, Ted Kierscey, Dale Baer, Mark Henn, Jin Kim, Duncan Marjoribanks, Michael Surrey, T. Daniel Hofstedt, Randy Haycock, James Lopez, Brian Ferguson, Alex Kupershmidt, Sandro Cleuzo, Yoshimichi Tamura, Marlon West, Michael Show, Jared Beckstrand, Sam Marin, Richard Hoppe, Mark Myer, Tim Allen, Allen Blyth, Mauro Maressa, David Mildenberger, Garrett Wren, Eric Walls, Bill Waldman, James DeValera Mansfield, Danny Galieote, Dan Lund, Andreas Wessel-Therhorn, Bert Klein, Joe Oh, Adam Dykstra, Phillip Vigil, Bob Bennett, Tony West, Masa Oshiro, Bob Davies, Roberto Casale, Frans Vischer, Hyun Min Lee, Matt Williamês, Enoc Castaneda Jr., Dan Turner, Bruce W. Smith, Anthony DeRosa, Eric Goldberg, Andreas Deja, Nik Ranieri, Pres Romanillos, Randy Cartwright, Ruben A. Aquino, Russ Edmonds
voice actor:Kimberly Russell, Mick Wingert, John Kassir, Peter Del Vecho, Peter Bartlett, Don Hall, Paul Briggs, David Cowgill, John Goodman, Jerry Kernion, Terri Douglas, Anika Noni Rose, Mona Marshall, Jeff Draheim, Marlon West, Elizabeth Dampier, Breanna Brooks, Ritchie Montgomery, Danielle Moné Truitt, Bruce W. Smith, Terrence Howard, Jim Cummings, Randy Newman, Peter Renaday, Philip Proctor, Emeril Lagasse, Keith David, Rif Hutton, Eddie Frierson, Roger Aaron Brown, Kevin Michael Richardson, Bridget Hoffman, Jenifer Lewis, Oprah Winfrey, Bruno Campos, Jennifer Cody, Joe Whyte, Kwesi Boakye, Michael-Leon Wooley, Michael Colyar, John Musker, Fred Tatasciore, Corey Burton
has edition or translation:Küss den Frosch
distributed by:Fórum Hungary, Walt Disney Studios Motion Pictures, Disney+, FandangoNow, Netflix
significant event:première
references work, tradition or theory:Pinocchio
narrative location:New Orleans
set in environment:bayou
main subject:social structure, goal pursuit, fairy tale, jazz
inspired by:The Frog Princess
box office:267000000
"""},
    {"role": "user", "content": "The Princess and the Frog?"},
]


prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
print(prompt)
llm(prompt)

# film_template = """
#      You are a knowledgeable chatbot specializing in movies. Your task is to provide accurate,concise and short(1-2 sentences) answers about films. When presented with a question, use the provided information to formulate your response. If the information available does not fully address the question, supplement it with your own knowledge about movies. In cases where the question is unrelated to the world of movies, gently remind the user to focus their inquiries on movie-related topics. 
#     ONLY ANSWER THIS Question: {question}
#     
#     Provided Information:
#     {movie_1}
#     _________
#     
#     Answer:
#     """
# 
# film_prompt = PromptTemplate(template=film_template, input_variables=["question", "movie_1"])
# 
# llm_chain = LLMChain(prompt=film_prompt, llm=llm)
# llm_chain.run({"question": "What kind of movie is Public Enemy No. 1", "movie_1": """
# node label:Public Enemy No. 1 – Todestrieb
# tag:7.5, murder, violence
# node description:2008 film by Jean-François Richet
# film editor:Hervé Schneid
# genre:drama, biographical film
# cast member:Gérard Depardieu, Clémence Thioly, Michaël Vander-Meiren, Gérard Lanvin, Xavier Letourneur, Héléna Soubeyrand, Laure Marsac, Vincent Cassel, Ludivine Sagnier, Nicolas Abraham, Elena Anaya, Alain Fromager, Alain Doutey, Alain Rimoux, Arsène Mosca, David Bursztein, Fabrice de La Villehervé, Isabelle Vitari, Joseph Malerba, Luc Thuillier, Marcel Rouzé, Martial Courcier, Olivier Barthélémy, Pascal Elso, Serge Biavan, Mathieu Amalric, Vincent Jouan, Anne Consigny, Michel Duchaussoy, Fanny Sidney, Myriam Boyer, Georges Wilson, Samuel Le Bihan, Olivier Gourmet
# different from:Mesrine
# FSK film rating:FSK 16
# production company:Eagle Pictures
# instance of:film
# IMDb ID:tt0411272
# original language of film or TV show:French, English
# distribution format:video on demand
# color:color
# country of origin:France, Canada, Italy
# director:Jean-François Richet
# publication date:2008-01-01
# screenwriter:Abdel Raouf Dafri, Jean-François Richet
# distributed by:Netflix
# narrative location:London
# filming location:Forest of Halatte"""})

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


<|im_start|>system
You are a knowledgeable chatbot(called Francis,developed by Oliver and David) specializing in movies. Your task is to provide accurate,concise and short(1-2 sentences) answers about films. When presented with a question, use the provided information to formulate your response. If the information available does not fully address the question, supplement it with your own knowledge about movies. In cases where the question is unrelated to the world of movies, gently remind the user to focus their inquiries on movie-related topics. <|im_end|>
<|im_start|>system
node label:The Princess and the Frog
tag:entertaining, fantasy, romantic, thought-provoking
node description:2009 American animated musical romantic fantasy film by Disney
film editor:Jeff Draheim
genre:fantasy film, traditionally animated film, buddy film, musical film, comedy-drama, romantic comedy
nominated for:Academy Award for Best Animated Feature
executive producer:John Lasseter, Monica Lago-Kaytis
based on

ValueError: Requested tokens (1584) exceed context window of 1524