In [1]:
import pandas as pd
import numpy as np

import re
import string
import random
import operator

import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

import torch
from torch.utils.data import Dataset, DataLoader

In [6]:
V = 29440

In [7]:
class LangModel(torch.nn.Module):
    def __init__(self, vocab_size=V, emb_size=30, dropout=0.9, hidden_size=10):
        super(LangModel, self).__init__()
        self.hidden_size = hidden_size
        self.embedding   = torch.nn.Embedding(vocab_size, embedding_dim=emb_size)
        self.LSTM = torch.nn.LSTM(input_size=emb_size, hidden_size=hidden_size,
                                  batch_first=True)
        self.fc1 = torch.nn.Linear(hidden_size, 2*hidden_size)
        self.fc2 = torch.nn.Linear(2*hidden_size, vocab_size)
        self.fc3 = torch.nn.Linear(hidden_size, vocab_size)
        self.dropout = torch.nn.Dropout(dropout)

    def forward(self, x):
        # we want the final hidden layer, so we will take the
        # hidden part of the h_c output of the LSTM
        batch_size = x.shape[0]
        x = self.embedding(x)
        x, h_c = self.LSTM(x)
        x = self.dropout(h_c[0])
        x = x.squeeze()
        x = self.fc3(x)
        return x.squeeze().double()

In [8]:
model = LangModel()
model.load_state_dict(torch.load('./MODEL2.pth'))
model.eval()

LangModel(
  (embedding): Embedding(29440, 30)
  (LSTM): LSTM(30, 10, batch_first=True)
  (fc1): Linear(in_features=10, out_features=20, bias=True)
  (fc2): Linear(in_features=20, out_features=29440, bias=True)
  (fc3): Linear(in_features=10, out_features=29440, bias=True)
  (dropout): Dropout(p=0.9)
)

In [11]:
yo = model.embedding

In [22]:
import pickle 
with open('word2idx.pickle', 'rb') as f:
    word2idx = pickle.load(f)

In [23]:
word2idx

{'<UNK>': 0,
 '<EOS>': 1,
 'horns': 2,
 'chorus': 3,
 'timbo': 4,
 'when': 5,
 'you': 6,
 'hit': 7,
 'me': 8,
 'on': 9,
 'my': 10,
 'phone': 11,
 'betta': 12,
 'know': 13,
 'what': 14,
 'cha': 15,
 'want': 16,
 'call': 17,
 'already': 18,
 'the': 19,
 'low': 20,
 'im': 21,
 'sayin': 22,
 'hey': 23,
 'i': 24,
 'got': 25,
 'that': 26,
 'gotta': 27,
 'pocket': 28,
 'fulla': 29,
 'stones': 30,
 'and': 31,
 'playin': 32,
 'corner': 33,
 'it': 34,
 'sicko': 35,
 'about': 36,
 'whoo': 37,
 'verse': 38,
 '1': 39,
 'lil': 40,
 'eazy': 41,
 'e': 42,
 'lemme': 43,
 'tell': 44,
 'bout': 45,
 'a': 46,
 'nigga': 47,
 'name': 48,
 'hittin': 49,
 'corners': 50,
 'in': 51,
 'compton': 52,
 'movin': 53,
 'work': 54,
 'gettin': 55,
 'profit': 56,
 'they': 57,
 'tryna': 58,
 'stop': 59,
 'but': 60,
 'so': 61,
 'key': 62,
 'say': 63,
 'with': 64,
 'dollars': 65,
 'come': 66,
 'power': 67,
 'stackin': 68,
 'mine': 69,
 'by': 70,
 'hour': 71,
 'man': 72,
 'fuck': 73,
 'coward': 74,
 'ima': 75,
 'get': 76,
 '

In [24]:
del word2idx['5x']

In [27]:
word2idx['\n'] = 806

In [31]:
with open('/Users/jon_ross/USF/DL/music/word2idx.pickle', 'wb') as f:
    pickle.dump(word2idx, f)