In [2]:
import pandas as pd
import numpy as np
import csv
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchtext
import numpy as np
import matplotlib.pyplot as plt

In [6]:
sp = pd.read_csv("All-seasons.csv")
sp.replace('\n',' ',regex = True,inplace=True)
sp.to_csv('sp.csv',index= False)
sp.head()
cartman = sp[sp.Character=="Cartman"]
stan = sp[sp.Character == "Stan"]

In [7]:
#batching the data

import torchtext

text_field = torchtext.data.Field(sequential=True,      # text sequence
                                  tokenize=lambda x: x, # because are building a character-RNN
                                  include_lengths=True, # to track the length of sequences, for batching
                                  batch_first=True,
                                  use_vocab=True)       # to turn each character into an integer index
i = 0
for line in open('sp3.txt'):
        #print(line)
        i = i+1
        if i>=10:
            break
fields = [('line', text_field)]
dataset = torchtext.data.TabularDataset("sp3.txt", # name of the file
                                        "csv",  # fields are separated by a tab
                                        fields)
split = [0.6, 0.2,0.2]
train, valid, test =  dataset.split(split, stratified=False, strata_field=None, random_state=None)
text_field.build_vocab(train)
# print(text_field.vocab.stoi)
# print(text_field.vocab.itos)
train_iter = torchtext.data.BucketIterator(train,
                                           batch_size=32,
                                           sort_key=lambda x: len(x.line), # to minimize padding
                                           sort_within_batch=True,        # sort within each batch
                                           repeat=False)                   # repeat the iterator for multiple epochs
for i, batch in enumerate(train_iter):
    if i >= 10:
        break
    lines = batch.line[0]
    #print(lines)

In [8]:
cartman_text = ""
for line in cartman.Line:
    cartman_text += line
# show the first 100 characters
stan_text = ''
for l in stan.Line:
    stan_text += l

In [9]:
vocab = list(set(cartman_text))
vocab_stoi = {s: i for i, s in enumerate(vocab)}
vocab_itos = {i: s for i, s in enumerate(vocab)}
len(vocab)

100

In [10]:
import random
random.seed(10)

cartman_len = len(cartman_text)

def random_chunk(chunk_len=300):
    """Return a random subsequence from `spam_text`"""
    start_index = random.randint(0, cartman_len - chunk_len)
    end_index = start_index + chunk_len + 1
    return cartman_text[start_index:end_index]

print(random_chunk())

our wiener until white stuff comes out, and then put it in this cup. Yes, retard. Semen comes from your wiener. Now do it!  Well Butters?! Well pull harder! Try doin' it faster Butters, do you wanna go to jail for the rest of your life?! The you'd better get that semen sample no matter how long it ta


In [11]:
def text_to_tensor(text, vocab=vocab):
    """Return a tensor containing the indices of characters in `text`."""
    indices = [vocab_stoi[ch] for ch in text]
    return torch.tensor(indices)

print(text_to_tensor(random_chunk()))


tensor([52, 40, 92, 56, 69, 35, 35, 94, 71, 71, 71, 40, 83, 29, 88, 88, 29, 52,
        40, 92, 56, 69, 35, 35, 94, 71, 71, 71, 40, 77, 91, 29, 94, 40, 29, 94,
        40,  6, 45, 56, 58, 27, 40, 89, 49, 45, 40, 88, 45, 89, 94, 27, 40, 35,
        52, 73,  1, 94, 40,  6, 49, 40, 94, 49, 56, 52, 73, 91, 29, 81,  1, 40,
        52, 35, 94, 52, 71, 40, 68, 40, 69, 56, 40, 81, 49, 73, 93, 40, 83, 29,
        88, 88, 29, 52, 40, 92, 56, 69, 35, 35, 94, 71, 71, 71, 40, 83, 29, 88,
        88, 29, 52, 40, 92, 56, 69, 35, 35, 94, 71, 71, 71, 40, 40, 72, 91, 40,
        56, 89, 40, 80, 49,  6, 71, 40, 68, 40, 79, 57, 69, 46, 46, 52,  6, 40,
        56, 89, 40, 46, 69, 81, 73, 94, 71, 40, 40, 99, 49, 45, 40, 88, 45, 89,
        94, 27, 40, 68, 40, 79, 57, 69, 46, 46, 52,  6, 40, 56, 89, 40, 46, 69,
        81, 73, 94, 93, 40, 40, 67, 52, 91, 52, 91, 71, 40, 99, 49, 45, 40, 88,
        45, 89, 94, 93, 40, 99, 49, 45, 40, 88, 45, 89, 94, 27, 40, 79, 91, 52,
        79, 85, 40, 29, 73, 40, 49, 45, 

In [12]:
def random_training_set(chunk_len=300):    
    chunk = random_chunk(chunk_len)
    inp = text_to_tensor(chunk[:-1])   # omit the last token
    target = text_to_tensor(chunk[1:]) # omit the first token
    return inp, target

In [13]:
##a naive model following the lecture notes with 1 hidden layer - GRU 
class Cartmanboi(nn.Module):
    def __init__(self, vocab_size, hidden_size, n_layers=1):
        super(Cartmanboi, self).__init__()
        # RNN attributes
        self.vocab_size = vocab_size
        self.hidden_size = hidden_size
        self.n_layers = n_layers
        # identiy matrix for generating one-hot vectors
        self.ident = torch.eye(vocab_size)
        # recurrent neural network
        self.rnn = nn.GRU(vocab_size, hidden_size, n_layers, batch_first=True)
        # a fully-connect layer that decodes the RNN output to
        # a distribution over the vocabulary
        self.decoder = nn.Linear(hidden_size, vocab_size)
    
    def forward(self, inp, hidden):
        # reshape the input tensor to [1, seq_length]
        inp = inp.view(1, -1)
        # generate one-hot vectors from token indices
        inp = self.ident[inp]
        # obtain the next output and hidden state
        output, hidden = self.rnn(inp, hidden)
        output = self.decoder(output.squeeze(0))
        return output, hidden

    def init_hidden(self):
        return torch.zeros(self.n_layers, 1, self.hidden_size)

In [14]:
def evaluate(model, prime_str='I', predict_len=300, temperature=0.8):
    hidden = model.init_hidden()
    prime_input = text_to_tensor(prime_str)
    predicted = prime_str
    
    # Use priming string to "build up" hidden state
    for p in range(len(prime_str) - 1):
        _, hidden = model(prime_input[p], hidden)
    inp = prime_input[-1]
    
    for p in range(predict_len):
        output, hidden = model(inp, hidden)
        
        # Sample from the network as a multinomial distribution
        output_dist = output.data.view(-1).div(temperature).exp()
        top_i = int(torch.multinomial(output_dist, 1)[0])
        # Add predicted character to string and use as next input
        predicted_char = vocab_itos[top_i]
        predicted += predicted_char
        inp = text_to_tensor(predicted_char)

    return predicted

def train(model, num_iters=2000, lr=0.004):
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss()
    for it in range(num_iters):
        # get training set
        inp, target = random_training_set()
        # cleanup
        optimizer.zero_grad()
        # forward pass
        hidden = model.init_hidden()
        output, _ = model(inp, hidden)
        loss = criterion(output, target)
        # backward pass
        loss.backward()
        optimizer.step()

        if it % 200 == 199:
            print("[Iter %d] Loss %f" % (it+1, float(loss)))
            print("    " + evaluate(model, ' ', 300))


In [44]:
model = Cartmanboi(len(vocab), 128)
train(model, num_iters=5000)

[Iter 200] Loss 2.355930
     SKy ik the thes to g aansher ofe cein, Poane socs. dars ane fo the soe youp it cous arenChe shico the I pour! Whare ing. the theas ind Kat sh, thitha! domighy the, sochyoi cor ige weare yout Ahe hee cous we non? meyout fous im ty ris tes gar bouve ing! Whan'm tol ge mind You go Ot, Jer Ahelle Phod a
[Iter 400] Loss 2.023634
     Bund the Joreer up emy her? Whavee rune roorsine poreres so pick fit arcar wea't ass duss is the, thed ind toont oo tomyser bfing tous goss il Kavent! lof. I wast! UAw, wich, bese? What ourree. An, we kand foo gode enocheste dipes oo ase out! Do and raw thas tald ial bitsing use of ont magh! I've.  
[Iter 600] Loss 1.976362
     me is is care.  Gongit's a. I'm guy drihis some, Batt... Well hone I cund ther as man hickis she sare hoolle ser and hatht. I s. Labrid you in you ghing whus for in yout sumint shan in. What's so seap on the swars? And of eele, you ging!  fise nom look you bucking dike dool hime like arep some hores
[Iter 8

In [45]:
train(model, num_iters=5000)

[Iter 200] Loss 1.718147
     you're going on the ready they are the wellowing there. Well, I had not firdolas. Stars, we got people gonna sure of come, I don't say looksah, later wen't says are gonna die thing on Earyens. Wendy, seriouslit. What? Oh what olterse-froms and finn, thank you-get Posers. We need a nack spolint.  Oka
[Iter 400] Loss 1.560332
     Must now. In the fuck your about atwey Lasy ass idea! It's a little school wrongertay shott! You know that is my mom say  are we comeary now it watch-ucare to the whole pitter out of coming, what?! This is whole ye, and he are to bebo move. Hey. Well I have a commermaning asshoget how sater, man a s
[Iter 600] Loss 1.595891
     well we get the stuchs the chick Warclisset. The big plady. I'm had it for secrate.  That? Oh my God!  Wellols, we was just a little stuff? And it's like this girls a pussy mome the very a pissing yourstem. We don't be pussy.  Pigare if come on!  Nothing stupid it going to up. I was a little sctees.
[Iter 8

In [16]:
model1 = Cartmanboi(len(vocab), 128,2)
train(model1, num_iters=5000)

[Iter 200] Loss 2.545364
     an it lit to Sole douded hecank  thy Lell atal uny bath uf? Laraouh he hang ant thele avco thakfr lout loo we al than ane thelt ths no wo chent barargit and ige you as Ih ohr clathabool ou? Well eupnat you tud was ucafs gof I't god Ka got thas hanle onf doa Houlrrea ridef Shra  heunn ah yhare ins aa
[Iter 400] Loss 2.140696
     ighing het dodot yto dale cron't seake the? Dyeve stome pooparsel that ot rated the pet is eang? Yeah the pitterne's kick woogh!  here the hofly, Sthe acker the sto it to loor got not alpende the bitter oke! I goter and thit it you got the Ptot my!  youh out ley and os hutn. Your ip te mach lithit'p
[Iter 600] Loss 1.982842
     inding the now weed soth! I fund the Stente! Oh, that sup one hiok's be see whis let to pupkay? I'd be the inna be to! All it then' your caed if on the wise be!  Jest I dien my for for back wortt! Hey Wes now live men the meyfas the meal go digh it come now out partrays one fop the poace Int you's a
[Iter 8

In [17]:
train(model1, num_iters=5000)

[Iter 200] Loss 1.465554
     And I don't take that if I woncherent is gonna drost about!  Well do you are shrepped me only about on are speristions! Sir countralion! Time. Okay, he'd and agree in a cart on your postianolising of here. You know what dolet's maturahcher this prosper, and you sandaubly try to said to give to start
[Iter 400] Loss 1.770811
     stupid of it. I could leave through arresk the whole was serious!  I sube I'm seriously. In more back to me. I think you know moghey we have a seard would a serious, you mean. Well, I want that school other made. She's a won piggy gool. You duid asseve me and and stop without the fuck But make them 
[Iter 600] Loss 1.557301
     it. Ey, heh.  She us. He life? I didn't go our commarday doesn't show.  Help.  Hey, a minute. You're gonna have bepind in toter it a gamerit in them.  Me they don't have to clear friend.  Oh, wow, I want everyone in my mom's gass care bad are Jews Christ!  Hey, here you know where it checking us a b
[Iter 8

In [18]:
train(model1, num_iters=5000)

[Iter 200] Loss 1.477062
     transs in this can't do getting in your stosed my. Okay, let de. They're a Lat time. We jusk the school says would the cast stupid Hey! Well and all to day Kenny, how's a pizzgie!  OU SI UPPH, I've stupid as he like to do. I just don't understand your and the stupid picture and getting cry that and 
[Iter 400] Loss 1.283922
     her this too? You stupid stand of drive, my say our whole Kenny. And we're a swid: we're not gonna all die, Kyle. Oh my God. Oh, you don't want a coodercheal or the face, Token... is a high isn't up, and here you all our day and it too. When we're saved to pluy. I wasn't not it. I was started to thi
[Iter 600] Loss 1.707678
     AIDS. Yes, see we won'll have a hands of everything. I'm not us I was to specond us.  Goddamnit! Eh! Look ho he right with you asshole!  This Has get the poby! You guys!  Yes to kie! Everythis ever smells and Is like the black to be a real up to be right now. Yes, sure you to the till you not finall
[Iter 8

In [210]:
evaluate(model1, prime_str='Ky', predict_len=100, temperature=0.6)

"Kyle! That's right! We're taken a costume to see the school are that it this is a greatest! The Parton"