In [0]:
!pip uninstall -y PyDrive
!pip uninstall -y torchvision
!pip uninstall -y visualization

Uninstalling PyDrive-1.3.1:
  Successfully uninstalled PyDrive-1.3.1
Uninstalling torchvision-0.2.1:
  Successfully uninstalled torchvision-0.2.1
Uninstalling visualization-0.0.1:
  Successfully uninstalled visualization-0.0.1


In [2]:
!pip install -U -q PyDrive
!pip install -U -q torchvision
!pip install -U -q visualization

tcmalloc: large alloc 1073750016 bytes == 0x563516524000 @  0x7ffae50af1c4 0x5634bb8270d8 0x5634bb910d5d 0x5634bb83a77a 0x5634bb83f462 0x5634bb837b3a 0x5634bb83f82e 0x5634bb837b3a 0x5634bb83f82e 0x5634bb837b3a 0x5634bb83f82e 0x5634bb837b3a 0x5634bb83fe1f 0x5634bb837b3a 0x5634bb83f82e 0x5634bb837b3a 0x5634bb83f82e 0x5634bb83f462 0x5634bb83f462 0x5634bb837b3a 0x5634bb83fe1f 0x5634bb83f462 0x5634bb837b3a 0x5634bb83fe1f 0x5634bb837b3a 0x5634bb83fe1f 0x5634bb837b3a 0x5634bb83f82e 0x5634bb837b3a 0x5634bb86850f 0x5634bb863202


In [0]:
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

In [0]:
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

In [0]:
import torch
import torch.nn as nn
from torch.autograd import Variable

import pandas as pd

import random
import string
import numpy as np

import sys, os

import torch.utils.data as data

os.environ["CUDA_VISIBLE_DEVICES"] = '0'

all_characters = string.printable
number_of_characters = len(all_characters)


def character_to_label(character):
    """Returns a one-hot-encoded tensor given a character.
    
    Uses string.printable as a dictionary.
        
    Parameters
    ----------
    character : str
        A character
        
    Returns
    -------
    one_hot_tensor : Tensor of shape (1, number_of_characters)
        One-hot-encoded tensor
    """
    
    character_label = all_characters.find(character)
        
    return character_label

def string_to_labels(character_string):
    
    return map(lambda character: character_to_label(character), character_string)

def pad_sequence(seq, max_length, pad_label=100):
    
    seq += [pad_label for i in range(max_length - len(seq))]
    
    return seq


class LyricsGenerationDataset(data.Dataset):
    
    def __init__(self, csv_file_path, minimum_song_count=None, artists=None):
        
        downloaded = drive.CreateFile({'id':'1sSV6SCFPfl9I7jQ67mTGmdGVScytFC1p'}) # replace fileid with Id of file you want to access
        downloaded.GetContentFile('poems.csv') # now you can use export.csv 
        
        self.lyrics_dataframe = pd.read_csv(csv_file_path)
        
        if artists:
            
            self.lyrics_dataframe = self.lyrics_dataframe[self.lyrics_dataframe.artist.isin(artists)]
            self.lyrics_dataframe = self.lyrics_dataframe.reset_index()
        
        if minimum_song_count:
        
            # Getting artists that have 70+ songs
            self.lyrics_dataframe = self.lyrics_dataframe.groupby('artist').filter(lambda x: len(x) > minimum_song_count)
            # Reindex .loc after we fetched random songs
            self.lyrics_dataframe = self.lyrics_dataframe.reset_index()
        
        # Get the length of the biggest lyric text
        # We will need that for padding
        self.max_text_len = self.lyrics_dataframe.text.str.len().max()
        
        whole_dataset_len = len(self.lyrics_dataframe)
        
        self.indexes = range(whole_dataset_len)
        
        self.artists_list = list(self.lyrics_dataframe.artist.unique())
        
        self.number_of_artists = len(self.artists_list)
    
    
    def __len__(self):
        
        return len(self.indexes)
    
    
    def __getitem__(self, index):
        
        index = self.indexes[index]
        
        sequence_raw_string = self.lyrics_dataframe.loc[index].text
        
        sequence_string_labels = string_to_labels(sequence_raw_string)
        
        sequence_length = len(sequence_string_labels) - 1
        
        # Shifted by one char
        input_string_labels = sequence_string_labels[:-1]
        output_string_labels = sequence_string_labels[1:]
                
        # pad sequence so that all of them have the same lenght
        # Otherwise the batching won't work
        input_string_labels_padded = pad_sequence(input_string_labels, max_length=self.max_text_len)
        
        output_string_labels_padded = pad_sequence(output_string_labels, max_length=self.max_text_len, pad_label=-100)
        
        return (torch.LongTensor(input_string_labels_padded),
                torch.LongTensor(output_string_labels_padded),
                torch.LongTensor([sequence_length]) )

    
def post_process_sequence_batch(batch_tuple):
    
    input_sequences, output_sequences, lengths = batch_tuple
    
    splitted_input_sequence_batch = input_sequences.split(split_size=1)
    splitted_output_sequence_batch = output_sequences.split(split_size=1)
    splitted_lengths_batch = lengths.split(split_size=1)

    training_data_tuples = zip(splitted_input_sequence_batch,
                               splitted_output_sequence_batch,
                               splitted_lengths_batch)

    training_data_tuples_sorted = sorted(training_data_tuples,
                                         key=lambda p: int(p[2]),
                                         reverse=True)

    splitted_input_sequence_batch, splitted_output_sequence_batch, splitted_lengths_batch = zip(*training_data_tuples_sorted)

    input_sequence_batch_sorted = torch.cat(splitted_input_sequence_batch)
    output_sequence_batch_sorted = torch.cat(splitted_output_sequence_batch)
    lengths_batch_sorted = torch.cat(splitted_lengths_batch)

    input_sequence_batch_sorted = input_sequence_batch_sorted[:, :lengths_batch_sorted[0, 0]]
    output_sequence_batch_sorted = output_sequence_batch_sorted[:, :lengths_batch_sorted[0, 0]]

    input_sequence_batch_transposed = input_sequence_batch_sorted.transpose(0, 1)
    
    # pytorch's api for rnns wants lenghts to be list of ints
    lengths_batch_sorted_list = list(lengths_batch_sorted)
    lengths_batch_sorted_list = map(lambda x: int(x), lengths_batch_sorted_list)
    
    
    return input_sequence_batch_transposed, output_sequence_batch_sorted, lengths_batch_sorted_list


class RNN(nn.Module):
    
    def __init__(self, input_size, hidden_size, num_classes, n_layers=2):
        
        super(RNN, self).__init__()
        
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_classes = num_classes
        self.n_layers = n_layers
        
        # Converts labels into one-hot encoding and runs a linear
        # layer on each of the converted one-hot encoded elements
        
        # input_size -- size of the dictionary + 1 (accounts for padding constant)
        self.encoder = nn.Embedding(input_size, hidden_size)
        
        self.gru = nn.LSTM(hidden_size, hidden_size, n_layers)
        
        self.logits_fc = nn.Linear(hidden_size, num_classes)
    
    
    def forward(self, input_sequences, input_sequences_lengths, hidden=None):
        
        batch_size = input_sequences.shape[1]

        embedded = self.encoder(input_sequences)

        # Here we run rnns only on non-padded regions of the batch
        packed = torch.nn.utils.rnn.pack_padded_sequence(embedded, input_sequences_lengths)
        outputs, hidden = self.gru(packed, hidden)
        outputs, output_lengths = torch.nn.utils.rnn.pad_packed_sequence(outputs) # unpack (back to padded)
        
        logits = self.logits_fc(outputs)
        
        logits = logits.transpose(0, 1).contiguous()
        
        logits_flatten = logits.view(-1, self.num_classes)
        
        return logits_flatten, hidden

    
trainset = LyricsGenerationDataset(csv_file_path='poems.csv')

trainset_loader = torch.utils.data.DataLoader(trainset, batch_size=50,
                                              shuffle=True, num_workers=0, drop_last=True)

In [0]:
rnn = RNN(input_size=len(all_characters) + 1, hidden_size=512, num_classes=len(all_characters))
rnn.cuda()

learning_rate = 0.001
optimizer = torch.optim.Adam(rnn.parameters(), lr=learning_rate)
criterion = torch.nn.CrossEntropyLoss().cuda()

def sample_from_rnn(starting_sting="Why", sample_length=300, temperature=1):

    sampled_string = starting_sting
    hidden = None

    first_input = torch.LongTensor( string_to_labels(starting_sting) ).cuda()
    first_input = first_input.unsqueeze(1)
    current_input = Variable(first_input)

    output, hidden = rnn(current_input, [len(sampled_string)], hidden=hidden)

    output = output[-1, :].unsqueeze(0)

    for i in xrange(sample_length):

        output_dist = nn.functional.softmax( output.view(-1).div(temperature) ).data

        predicted_label = torch.multinomial(output_dist, 1)

        sampled_string += all_characters[int(predicted_label[0])]

        current_input = Variable(predicted_label.unsqueeze(1))

        output, hidden = rnn(current_input, [1], hidden=hidden)
    
    return sampled_string

In [0]:
%matplotlib notebook

from matplotlib import pyplot as plt

# importing a list that can be binded with a figure and update
# the figure when being appended
from visualization import VizList

# Creating figure, axes and binding to lists 
f, (loss_axis, validation_axis, train_axis) = plt.subplots(3, 1)

loss_axis.plot([], [])
validation_axis.plot([], [])
train_axis.plot([], [])


loss_list = VizList()
validation_list = VizList()
train_list = VizList()

loss_list.bind_to_axis(loss_axis)
validation_list.bind_to_axis(validation_axis)
train_list.bind_to_axis(train_axis)

loss_axis.set_title('Training Loss')
validation_axis.set_title('Validation Set Accuracy')
train_axis.set_title('Training Set Accuracy')


plt.tight_layout()

ImportError: ignored

In [11]:
clip = 1.0
epochs_number = 200
loss_list = list()

print_every = 11
steps = 0

for epoch_number in range(epochs_number):
  running_loss =0
  print(len(trainset_loader))
  for batch in trainset_loader:
    steps +=1
    post_processed_batch_tuple = post_process_sequence_batch(batch)

    input_sequences_batch, output_sequences_batch, sequences_lengths = post_processed_batch_tuple

    output_sequences_batch_var =  Variable( output_sequences_batch.contiguous().view(-1).cuda() )
    input_sequences_batch_var = Variable( input_sequences_batch.cuda() )
        
    optimizer.zero_grad()

    logits, _ = rnn(input_sequences_batch_var, sequences_lengths)
       
    loss = criterion(logits, output_sequences_batch_var)
    loss_list.append( loss.data[0] )
    loss.backward()
    running_loss +=loss.data[0]

    #torch.nn.utils.clip_grad_norm(rnn.parameters(), clip)

    optimizer.step()
    if steps%print_every == 0:
      print("Epoch: {}/{}.. ".format(epoch_number+1, epochs_number),
                  "Training Loss: {:.3f}.. ".format(running_loss/print_every))           
      running_loss = 0
            
print(sample_from_rnn("World"))
torch.save(rnn.state_dict(), 'conditional_rnn.pth')

11




('Epoch: 1/200.. ', 'Training Loss: 3.510.. ')
11
('Epoch: 2/200.. ', 'Training Loss: 2.879.. ')
11
('Epoch: 3/200.. ', 'Training Loss: 2.530.. ')
11
('Epoch: 4/200.. ', 'Training Loss: 2.303.. ')
11
('Epoch: 5/200.. ', 'Training Loss: 2.178.. ')
11
('Epoch: 6/200.. ', 'Training Loss: 2.090.. ')
11
('Epoch: 7/200.. ', 'Training Loss: 2.022.. ')
11
('Epoch: 8/200.. ', 'Training Loss: 1.970.. ')
11
('Epoch: 9/200.. ', 'Training Loss: 1.922.. ')
11
('Epoch: 10/200.. ', 'Training Loss: 1.878.. ')
11
('Epoch: 11/200.. ', 'Training Loss: 1.847.. ')
11
('Epoch: 12/200.. ', 'Training Loss: 1.811.. ')
11
('Epoch: 13/200.. ', 'Training Loss: 1.783.. ')
11
('Epoch: 14/200.. ', 'Training Loss: 1.753.. ')
11
('Epoch: 15/200.. ', 'Training Loss: 1.733.. ')
11
('Epoch: 16/200.. ', 'Training Loss: 1.718.. ')
11
('Epoch: 17/200.. ', 'Training Loss: 1.697.. ')
11
('Epoch: 18/200.. ', 'Training Loss: 1.675.. ')
11
('Epoch: 19/200.. ', 'Training Loss: 1.657.. ')
11
('Epoch: 20/200.. ', 'Training Loss: 1.6

('Epoch: 63/200.. ', 'Training Loss: 1.279.. ')
11
('Epoch: 64/200.. ', 'Training Loss: 1.278.. ')
11
('Epoch: 65/200.. ', 'Training Loss: 1.270.. ')
11
('Epoch: 66/200.. ', 'Training Loss: 1.268.. ')
11
('Epoch: 67/200.. ', 'Training Loss: 1.259.. ')
11
('Epoch: 68/200.. ', 'Training Loss: 1.254.. ')
11
('Epoch: 69/200.. ', 'Training Loss: 1.239.. ')
11
('Epoch: 70/200.. ', 'Training Loss: 1.236.. ')
11
('Epoch: 71/200.. ', 'Training Loss: 1.226.. ')
11
('Epoch: 72/200.. ', 'Training Loss: 1.222.. ')
11
('Epoch: 73/200.. ', 'Training Loss: 1.214.. ')
11
('Epoch: 74/200.. ', 'Training Loss: 1.213.. ')
11
('Epoch: 75/200.. ', 'Training Loss: 1.206.. ')
11
('Epoch: 76/200.. ', 'Training Loss: 1.204.. ')
11
('Epoch: 77/200.. ', 'Training Loss: 1.198.. ')
11
('Epoch: 78/200.. ', 'Training Loss: 1.188.. ')
11
('Epoch: 79/200.. ', 'Training Loss: 1.178.. ')
11
('Epoch: 80/200.. ', 'Training Loss: 1.175.. ')
11
('Epoch: 81/200.. ', 'Training Loss: 1.170.. ')
11
('Epoch: 82/200.. ', 'Training 

('Epoch: 125/200.. ', 'Training Loss: 0.890.. ')
11
('Epoch: 126/200.. ', 'Training Loss: 0.873.. ')
11
('Epoch: 127/200.. ', 'Training Loss: 0.863.. ')
11
('Epoch: 128/200.. ', 'Training Loss: 0.851.. ')
11
('Epoch: 129/200.. ', 'Training Loss: 0.845.. ')
11
('Epoch: 130/200.. ', 'Training Loss: 0.840.. ')
11
('Epoch: 131/200.. ', 'Training Loss: 0.829.. ')
11
('Epoch: 132/200.. ', 'Training Loss: 0.819.. ')
11
('Epoch: 133/200.. ', 'Training Loss: 0.825.. ')
11
('Epoch: 134/200.. ', 'Training Loss: 0.811.. ')
11
('Epoch: 135/200.. ', 'Training Loss: 0.809.. ')
11
('Epoch: 136/200.. ', 'Training Loss: 0.792.. ')
11
('Epoch: 137/200.. ', 'Training Loss: 0.791.. ')
11
('Epoch: 138/200.. ', 'Training Loss: 0.783.. ')
11
('Epoch: 139/200.. ', 'Training Loss: 0.779.. ')
11
('Epoch: 140/200.. ', 'Training Loss: 0.770.. ')
11
('Epoch: 141/200.. ', 'Training Loss: 0.782.. ')
11
('Epoch: 142/200.. ', 'Training Loss: 0.771.. ')
11
('Epoch: 143/200.. ', 'Training Loss: 0.740.. ')
11
('Epoch: 144

('Epoch: 186/200.. ', 'Training Loss: 0.476.. ')
11
('Epoch: 187/200.. ', 'Training Loss: 0.476.. ')
11
('Epoch: 188/200.. ', 'Training Loss: 0.477.. ')
11
('Epoch: 189/200.. ', 'Training Loss: 0.464.. ')
11
('Epoch: 190/200.. ', 'Training Loss: 0.446.. ')
11
('Epoch: 191/200.. ', 'Training Loss: 0.439.. ')
11
('Epoch: 192/200.. ', 'Training Loss: 0.434.. ')
11
('Epoch: 193/200.. ', 'Training Loss: 0.432.. ')
11
('Epoch: 194/200.. ', 'Training Loss: 0.433.. ')
11
('Epoch: 195/200.. ', 'Training Loss: 0.435.. ')
11
('Epoch: 196/200.. ', 'Training Loss: 0.435.. ')
11
('Epoch: 197/200.. ', 'Training Loss: 0.417.. ')
11
('Epoch: 198/200.. ', 'Training Loss: 0.409.. ')
11
('Epoch: 199/200.. ', 'Training Loss: 0.406.. ')
11
('Epoch: 200/200.. ', 'Training Loss: 0.412.. ')




World far" from a hear refrinces with dully disgrace,
 
 Walked and fairer toops any jolinicor.
 
 We heard, then, not wine, and now, is preim all.
 
 
 
 They end my man who lighted breaves, repeats. Then,
 
 As think; the seene of loves shaft,
 
 I summony sprite, such first his queen, souls!"
 
 The r


In [27]:
print(sample_from_rnn("love",196))
# !df -h
# !ps -ef | grep python

# !kill -9 679



love I gone, to my love, who loves, and then
 
 From conquiands,
 
 My thoughts and graves the best,
 
 Drear there is no such to you;
 
 But Augusty, since to when I cry;
 
 Whet was his worke the pa


In [0]:
!ls -lrt

total 616
drwxr-xr-x 1 root root   4096 Jun  5 15:34 datalab
-rw-r--r-- 1 root root 625563 Jun  5 15:34 poems.csv


In [0]:
!pip install gputil
!pip install psutil
!pip install humanize

In [0]:
!kill -9 -1

In [0]:

import psutil
import humanize
import os
import GPUtil as GPU
GPUs = GPU.getGPUs()
# XXX: only one GPU on Colab and isn’t guaranteed
gpu = GPUs[0]
def printm():
 process = psutil.Process(os.getpid())
 print("Gen RAM Free: " + humanize.naturalsize( psutil.virtual_memory().available ), " I Proc size: " + humanize.naturalsize( process.memory_info().rss))
 print("GPU RAM Free: {0:.0f}MB | Used: {1:.0f}MB | Util {2:3.0f}% | Total {3:.0f}MB".format(gpu.memoryFree, gpu.memoryUsed, gpu.memoryUtil*100, gpu.memoryTotal))
printm()

OSError: ignored