<a href="https://colab.research.google.com/github/robgon-art/PlotJam/blob/master/PlotJam_Sampling.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Got Writer's Block? It's PlotJam to the Rescue!**
Using GPT-2 to create plot summaries of books that don't exist ... yet</br></br>
![alt text](https://raw.githubusercontent.com/robgon-art/plot-jam/master/images/the_story_medium.jpg)</br>
Photo illustration based on a photo by alexkerhead CC By 2.0

# Initialize the PlotJam System
Click the **Run cell** button below to set things up.</br>
It takes about 3 minutes to load he dataset files and configure the GPT-2 model.

In [None]:
#@title
# Download book summaries, movie and TV show titles, and the pre-trained model
!wget -O booksummaries.tar.gz http://www.cs.cmu.edu/~dbamman/data/booksummaries.tar.gz
!tar -xf booksummaries.tar.gz
!wget -O title.basics.tsv.gz https://datasets.imdbws.com/title.basics.tsv.gz
!gzip -d title.basics.tsv.gz
!wget -O books.csv.zip --no-check-certificate "https://onedrive.live.com/download?cid=61FC7243E093B36A&resid=61FC7243E093B36A%211282&authkey=AIpT1xoDqAqGF1U"
!unzip books.csv.zip
!wget -O plot_jam.zip --no-check-certificate "https://onedrive.live.com/download?cid=61FC7243E093B36A&resid=61FC7243E093B36A%211281&authkey=ADCNCznRl5uAAkc"
!unzip plot_jam.zip

# Initialize the model
%tensorflow_version 1.x
!pip install -q gpt-2-simple
import gpt_2_simple as gpt2
sess = gpt2.start_tf_sess()
gpt2.load_gpt2(sess)

# Function to remove articles at the head of titles
def remove_leading_article(title):
  if title.startswith("The "):
    title = title[4:]
  if title.startswith("A "):
    title = title[2:]
  return title

# Get the titles of books, movies, and TV shows
import csv
titles = {}
with open('booksummaries/booksummaries.txt', newline='', encoding='utf-8') as f:
  reader = csv.reader(f, delimiter='\t')
  for row in reader:
    title = row[2]
    title = remove_leading_article(title)
    titles[title.lower()] = True
with open('title.basics.tsv') as f:
  reader = csv.reader(f, delimiter='\t')
  for row in reader:
      title = row[3]
      title = remove_leading_article(title)
      titles[title.lower()] = True
with open('books.csv') as f:
  reader = csv.reader(f)
  for row in reader:
      title = row[1]
      title = title.split('(')[0].strip() # Remove everything after a left paren
      title = remove_leading_article(title)
      titles[title.lower()] = True

# Function to check if the text contain a repeated phrase
def repeats(s, num):
  substrings = {}
  parts = s.split(' ')
  does_repeat = False
  for i in range(len(parts)-num):
    substring = parts[i]
    for j in range(1, num):
      substring += ' ' + parts[i+j]
    if substring in substrings:
      return True
    else:
      substrings[substring] = True
  return False

# Create Some New Stories
Select the **genre** then click **Run cell** button to generate new stories with titles and plot summaries. </br>You can run this multiple times with the same or different genre selections.

In [None]:
#@title
import textwrap
import re

# Use the inputs to setup some parameters
genre = "All Genres" #@param ["All Genres", "Novel", "Science Fiction", "Fiction", "Children's literature", "Fantasy", "Young adult literature", "Historical novel", "Speculative fiction", "Crime Fiction", "Non-fiction", "Mystery", "Thriller"]
the_prefix = 'GENRE:'
if genre != "All Genres":
  the_prefix += " " + genre + " TITLE:"

# Get some new plots
plot_ideas = gpt2.generate(sess, length=150, temperature=0.8,
  prefix=the_prefix, nsamples=20, batch_size=20, return_as_list=True,
  include_prefix=True, truncate="\n")

keepers = []
count = 0

# Show the plots, filtering out ones with repeated word or reused titles
if len(plot_ideas) == 0:
  print("No results, try again.")
else:
  for plot in plot_ideas:
    title_search = re.search('TITLE: (.*) PLOT:', plot)
    if not repeats(plot, 5) and title_search != None:
      old_title = title = title_search.group(1)
      title = remove_leading_article(title)
      if title.lower() not in titles:
        keepers.append(plot)
        count += 1
        print(textwrap.fill("NUMBER: " + str(count) + " " + plot, width=180),"\n")
        if count == 10:
          break

# Create Variations of a Selected Story
Choose one of the stories by its number and click the **Run cell** button to create and display five variations.

In [None]:
selection = "NUMBER: 1"#@param ["NUMBER: 1", "NUMBER: 2", "NUMBER: 3", "NUMBER: 4", "NUMBER: 5", "NUMBER: 6", "NUMBER: 7", "NUMBER: 8", "NUMBER: 9", "NUMBER: 10"]
selection_number = int(selection.split(' ')[1])-1
if selection_number >= len(keepers):
  print("Please choose a selection number between 1 and " + str(len(keepers)))
else:
  # selected_prefix = "GENRE: " + re.search('GENRE: (.*) PLOT:', 
  #   keepers[selection_number])[1] + " PLOT:"
  plot_words = keepers[selection_number].split(' ')
  selected_prefix = ""
  for word in plot_words[:20]:
    selected_prefix += word + ' '
  # Get some refined plots
  refined_ideas = gpt2.generate(sess, length=150, temperature=0.8,
    prefix=selected_prefix.strip(), nsamples=5, batch_size=5, return_as_list=True,
    include_prefix=True, truncate="\n")

  # Print out the results
  for plot in refined_ideas:
    print(textwrap.fill(plot, width=180),"\n")