In [13]:
import numpy as np
import tensorflow as tf


def process_text(file_path):
    text = open(file_path, 'rb').read().decode(encoding='utf-8')  # Read, then decode for py2 compat.
    vocab = sorted(set(text))  # The unique characters in the file
    # Creating a mapping from unique characters to indices and vice versa
    char2idx = {u: i for i, u in enumerate(vocab)}
    idx2char = np.array(vocab)
    text_as_int = np.array([char2idx[c] for c in text])
    return text_as_int, vocab, char2idx, idx2char


def split_input_target(chunk):
    input_text, target_text = chunk[:-1], chunk[1:]
    return input_text, target_text


def create_dataset(text_as_int, seq_length=100, batch_size=64, buffer_size=10000):
    char_dataset = tf.data.Dataset.from_tensor_slices(text_as_int)
    dataset = char_dataset.batch(seq_length + 1, drop_remainder=True).map(split_input_target)
    dataset = dataset.shuffle(buffer_size).batch(batch_size, drop_remainder=True)
    return dataset


def build_model(vocab_size, embedding_dim=256, rnn_units=1024, batch_size=64):
    model = tf.keras.Sequential([
        tf.keras.layers.Embedding(vocab_size, embedding_dim, batch_input_shape=[batch_size, None]),
        tf.keras.layers.LSTM(rnn_units, return_sequences=True, stateful=True, recurrent_initializer='glorot_uniform'),
        tf.keras.layers.Dropout(0.1),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.LSTM(rnn_units, return_sequences=True, stateful=True, recurrent_initializer='glorot_uniform'),
        tf.keras.layers.Dropout(0.1),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dense(vocab_size)
    ])
    return model


def loss(labels, logits):
    return tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)


def generate_text(model, char2idx, idx2char, start_string, generate_char_num=1000, temperature=1.0):
    # Evaluation step (generating text using the learned model)
    # Low temperatures results in more predictable text, higher temperatures results in more surprising text.
    # Converting our start string to numbers (vectorizing)
    input_eval = [char2idx[s] for s in start_string]
    input_eval = tf.expand_dims(input_eval, 0)
    text_generated = []  # Empty string to store our results
    model.reset_states()
    for i in range(generate_char_num):
        predictions = model(input_eval)
        predictions = tf.squeeze(predictions, 0)    # remove the batch dimension
        predictions /= temperature
        # using a categorical distribution to predict the character returned by the model
        predicted_id = tf.random.categorical(predictions, num_samples=1)[-1, 0].numpy()
        # We pass the predicted character as the next input to the model along with the previous hidden state
        input_eval = tf.expand_dims([predicted_id], axis=0)
        text_generated.append(idx2char[predicted_id])
    return start_string + ''.join(text_generated)


# path_to_file = tf.keras.utils.get_file('nietzsche.txt', 'https://s3.amazonaws.com/text-datasets/nietzsche.txt')
path_to_file = tf.keras.utils.get_file('shakespeare.txt', 'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')

# text_as_int, vocab, char2idx, idx2char = process_text(path_to_file)
# dataset = create_dataset(text_as_int)
# model = build_model(vocab_size=len(vocab))
# model.compile(optimizer='adam', loss=loss)
# model.summary()
# history = model.fit(dataset, epochs=50)
# model.save_weights("gen_text_weights.h5", save_format='h5')
# # To keep this prediction step simple, use a batch size of 1
# model = build_model(vocab_size=len(vocab), batch_size=1)
# model.load_weights("gen_text_weights.h5")
# model.summary()

# user_input = input("Write the beginning of the text, the program will complete it. Your input is: ")
# generated_text = generate_text(model, char2idx, idx2char, start_string=user_input, generate_char_num=2000)
# print(generated_text)

Downloading data from https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt


In [35]:
text_as_int[:100], all_text[:100]

NameError: name 'all_text' is not defined

In [16]:
text_as_int, vocab, char2idx, idx2char = process_text(path_to_file)
dataset = create_dataset(text_as_int)
# model = build_model(vocab_size=len(vocab))
# model.compile(optimizer='adam', loss=loss)
# model.summary()
# history = model.fit(dataset, epochs=50)
# model.save_weights("gen_text_weights.h5", save_format='h5')
# # To keep this prediction step simple, use a batch size of 1
# model = build_model(vocab_size=len(vocab), batch_size=1)
# model.load_weights("gen_text_weights.h5")
# model.summary()

# user_input = input("Write the beginning of the text, the program will complete it. Your input is: ")
# generated_text = generate_text(model, char2idx, idx2char, start_string=user_input, generate_char_num=2000)
# print(generated_text)

In [22]:
text_as_int

array([18, 47, 56, ..., 45,  8,  0])

In [30]:
model = build_model(vocab_size=len(vocab))
model.compile(optimizer='adam', loss=loss)
model.summary()
history = model.fit(dataset, epochs=1)
model.save_weights("gen_text_weights.h5", save_format='h5')
# To keep this prediction step simple, use a batch size of 1
model = build_model(vocab_size=len(vocab), batch_size=1)
model.load_weights("gen_text_weights.h5")
model.summary()

# user_input = input("Write the beginning of the text, the program will complete it. Your input is: ")
# generated_text = generate_text(model, char2idx, idx2char, start_string=user_input, generate_char_num=2000)
# print(generated_text)

Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_5 (Embedding)     (64, None, 256)           16640     
                                                                 
 lstm_10 (LSTM)              (64, None, 1024)          5246976   
                                                                 
 dropout_10 (Dropout)        (64, None, 1024)          0         
                                                                 
 batch_normalization_10 (Bat  (64, None, 1024)         4096      
 chNormalization)                                                
                                                                 
 lstm_11 (LSTM)              (64, None, 1024)          8392704   
                                                                 
 dropout_11 (Dropout)        (64, None, 1024)          0         
                                                      

In [28]:
model

<keras.engine.sequential.Sequential at 0x22efb1a1488>

In [33]:
user_input = input("Write the beginning of the text, the program will complete it. Your input is: ")
generated_text = generate_text(model, char2idx, idx2char, start_string=user_input, generate_char_num=2000)

Write the beginning of the text, the program will complete it. Your input is: in olden times when wishing still helped one, there lived a king 


In [34]:
generated_text

"in olden times when wishing still helped one, there lived a king ax?iheg\ne\nosiiCel;Y copwuicwficauF;eunOxortSiSteleDiwau,:n-VYTer.o\ndahuaha'StdeRvidwB,erip\n&eneergiThaa'v\naraBof.ireRh.hVeZ.R\nLiauEoclRorNirP\ngbme-a'gMvabntaei;\nb'y\nlsetfKai,UsiaUoildyltelssymeuuaauAvlabdkevm:vao\nxfedsrg eirmhoaatsfdi\n;\nbearcetbizYsy?piT!eh:Xte vre?ohixe.PmehNvysbraynaKn'ttineuveasXJpeacauitpuw.h\nsimqpPcooPsicuHIuRtukrHmui?\nosieclidPooUtan?DRhQhraiawis?ey3Se,yfoone,oedmOnsiFayb\nlstQTvareiT!roAhioy$mroh sadVd,vitohE.J\nwhoi3un',is\n icl,oyqui, oOerue\nsFltawtsaBo!xaiitaYiTtlvinpiSa rohl.cciehOeoosPo,idahsILyeaiplJ.dlxul\nXnua3inZubmcero.mawelgso\nouutsngk&\ns\nentmuijwSneoerh.-apanpsiAvxps\ne.tymyiaeh\ndocmOuhoH\nsdyfZoer,eT'Tnd rZxL\nlVkodevcemiou;ybayh\nrar\ni do'reye\nemaMvbatgTmsWaoaseo:XrmaEpr\najeCfgnsle\n!Iyc.uaqteXfdcs\ngeaeomeyHaeOHLitlhre,alew&sihaIh\nsrJ:eit;cow BoZRtuc, eYysarJef't,E;av,tijascl;eghe\n$n!aBg\nO.aRwEVooe.sedorrb\neoetJp!giihhyCiElim;Taliedsysrdaascdehioirfiee;Xsyu

In [91]:
import pandas as pd

df = pd.DataFrame([['x',3,1],['x',2,2,],['y',4,6]],columns=list("ABC"))

def highlight_rows_cols(row):
#     print(row)
    row_color = []
    value = row.loc['A']
#     print(value)
    for col in row:
#         print(col)
        if value == 'x' and row[:1]:
            color = '#BAFFC9'
            row_color.append('background-color: {}'.format(color))
        elif value == 'y' and row[:2]:
            color = '#BAFFC9'
            row_color.append('background-color: {}'.format(color))
        else:
            row_color.append('background-color: {}'.format(None))
    print(row_color)
#     return row_color

df.style.apply(highlight_rows_cols, axis=1)

ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().

<pandas.io.formats.style.Styler at 0x22f3ff60588>

In [121]:
import pandas as pd
df = pd.DataFrame([['x',3,1],['x',2,2,],['y',4,6]],columns=list("ABC"))


def highlight_rows(row):
    lst = []
    value = row.loc['A']
    for col in row.index:
        if value in ('x') and col == 'B':
            lst.append('background-color: #BAFFC9')
            continue
        if value in ('y') and col == 'C':
            lst.append('background-color: #BAFFC9')
            continue
        lst.append(None)
    return lst

df.style.apply(highlight_rows,axis=1)

Unnamed: 0,A,B,C
0,x,3,1
1,x,2,2
2,y,4,6


In [106]:
import pandas as pd
df = pd.DataFrame([['x',3,1],['x',2,2,],['y',4,6]],columns=list("ABC"))

# column_index A=0 B=1 C=2
def highlight_col(x):
    df1 = pd.DataFrame('', index=x.index, columns=x.columns)
    for value in x['A']:
        if value == 'x':
            df1.iloc[0,1]
    df1.iloc[0, 1] =  'background-color: red'
    print(df1)
    print('---')
    return df1    
df.style.apply(highlight_col, axis=None)

  A                      B C
0    background-color: red  
1                           
2                           
---


Unnamed: 0,A,B,C
0,x,3,1
1,x,2,2
2,y,4,6


In [46]:
df1 = pd.DataFrame('', index=x.index, columns=x.columns)
df1.iloc[:, 1]

NameError: name 'x' is not defined

In [None]:
import pandas as pd

df = pd.DataFrame([['x',3,1],['x',2,2,],['y',4,6]],columns=list("ABC"))

def highlight_rows_cols(row):
    value = row.loc['A']
    if value == 'x':
        color = '#BAFFC9'
        return ['background-color: {}'.format(color) for _ in row[1:2]]
    elif value == 'y':
        color = '#BAFFC9'
        return ['background-color: {}'.format(color) for _ in row[2:3]]
    else:
        return ['background-color: {}'.format(None) for _ in row]

df.style.apply(highlight_rows_cols, axis=1)