In [2]:
import tensorflow as tf

In [9]:
from sklearn.datasets import _california_housing

In [10]:
help(_california_housing)

Help on module sklearn.datasets._california_housing in sklearn.datasets:

NAME
    sklearn.datasets._california_housing - California housing dataset.

DESCRIPTION
    The original database is available from StatLib
    
        http://lib.stat.cmu.edu/datasets/
    
    The data contains 20,640 observations on 9 variables.
    
    This dataset contains the average house value as target variable
    and the following input variables (features): average income,
    housing average age, average rooms, average bedrooms, population,
    average occupation, latitude, and longitude in that order.
    
    References
    ----------
    
    Pace, R. Kelley and Ronald Barry, Sparse Spatial Autoregressions,
    Statistics and Probability Letters, 33 (1997) 291-297.

FUNCTIONS
    fetch_california_housing(*, data_home=None, download_if_missing=True, return_X_y=False, as_frame=False)
        Load the California housing dataset (regression).
        
        Samples total             20640
       

In [16]:
X, y = _california_housing.fetch_california_housing(return_X_y=True)

In [19]:
import pandas as pd
import numpy as np

In [28]:
sample_data = pd.read_csv('https://gist.githubusercontent.com/netj/8836201/raw/6f9306ad21398ea43cba4f7d537619d0e07d5ae3/iris.csv', sep=',')

In [29]:
sample_data

Unnamed: 0,sepal.length,sepal.width,petal.length,petal.width,variety
0,5.1,3.5,1.4,0.2,Setosa
1,4.9,3.0,1.4,0.2,Setosa
2,4.7,3.2,1.3,0.2,Setosa
3,4.6,3.1,1.5,0.2,Setosa
4,5.0,3.6,1.4,0.2,Setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,Virginica
146,6.3,2.5,5.0,1.9,Virginica
147,6.5,3.0,5.2,2.0,Virginica
148,6.2,3.4,5.4,2.3,Virginica


In [30]:
## predict sample's sepal.length 

In [43]:
from tensorflow.keras import Model
from tensorflow.keras.layers import Dense, Embedding, Input, CategoryEncoding, IntegerLookup

In [44]:
help(IntegerLookup)

Help on class IntegerLookup in module keras.layers.preprocessing.integer_lookup:

class IntegerLookup(keras.layers.preprocessing.index_lookup.IndexLookup)
 |  IntegerLookup(max_tokens=None, num_oov_indices=1, mask_token=None, oov_token=-1, vocabulary=None, idf_weights=None, invert=False, output_mode='int', sparse=False, pad_to_max_tokens=False, **kwargs)
 |  
 |  A preprocessing layer which maps integer features to contiguous ranges.
 |  
 |  This layer maps a set of arbitrary integer input tokens into indexed
 |  integer output via a table-based vocabulary lookup. The layer's output indices
 |  will be contiguously arranged up to the maximum vocab size, even if the input
 |  tokens are non-continguous or unbounded. The layer supports multiple options
 |  for encoding the output via `output_mode`, and has optional support for
 |  out-of-vocabulary (OOV) tokens and masking.
 |  
 |  The vocabulary for the layer must be either supplied on construction or
 |  learned via `adapt()`. During

In [45]:
help(Embedding)

Help on class Embedding in module keras.layers.embeddings:

class Embedding(keras.engine.base_layer.Layer)
 |  Embedding(input_dim, output_dim, embeddings_initializer='uniform', embeddings_regularizer=None, activity_regularizer=None, embeddings_constraint=None, mask_zero=False, input_length=None, **kwargs)
 |  
 |  Turns positive integers (indexes) into dense vectors of fixed size.
 |  
 |  e.g. `[[4], [20]] -> [[0.25, 0.1], [0.6, -0.2]]`
 |  
 |  This layer can only be used as the first layer in a model.
 |  
 |  Example:
 |  
 |  >>> model = tf.keras.Sequential()
 |  >>> model.add(tf.keras.layers.Embedding(1000, 64, input_length=10))
 |  >>> # The model will take as input an integer matrix of size (batch,
 |  >>> # input_length), and the largest integer (i.e. word index) in the input
 |  >>> # should be no larger than 999 (vocabulary size).
 |  >>> # Now model.output_shape is (None, 10, 64), where `None` is the batch
 |  >>> # dimension.
 |  >>> input_array = np.random.randint(1000, 

In [36]:
sample_data.columns

Index(['sepal.length', 'sepal.width', 'petal.length', 'petal.width',
       'variety'],
      dtype='object')

In [37]:
sample_data.variety.value_counts()

Setosa        50
Versicolor    50
Virginica     50
Name: variety, dtype: int64

In [89]:
iris_species=['Setosa', 'Versicolor', 'Virginica']
indexes = tf.range(len(iris_species), dtype=tf.int64)

## creating vocab
iris_vocab = tf.lookup.KeyValueTensorInitializer(
    keys=iris_species,
    values=indexes,
    key_dtype=tf.string,
    value_dtype=tf.int64,
    name='iris_vocab'
    )

num_oov_buckets = 1

iris_lookup_table = tf.lookup.StaticVocabularyTable(
    initializer=iris_vocab,
    num_oov_buckets=num_oov_buckets,
    # lookup_key_dtype=tf.string,
    name='lookup_table')

In [96]:
iris_lookup_table.lookup(tf.constant(['Setosa']))

<tf.Tensor: shape=(1,), dtype=int64, numpy=array([0], dtype=int64)>

In [106]:
output = iris_lookup_table.lookup(tf.constant([sample_data.variety], dtype=tf.string))

In [108]:
test = Embedding(input_dim=4, output_dim=2)

In [117]:
from tensorflow.keras.layers import Concatenate, Lambda

In [143]:
cat_input = Input(shape=(), name='cat_input', dtype=tf.string)

cat_indices = Lambda(lambda x:iris_lookup_table.lookup(x))(cat_input)

## Embedding for variety colun
embed_layer = Embedding(input_dim=4, output_dim=2)(cat_indices)

## Numeric_input

num_input = Input(shape=(3), name='numerical_input')
## Concat both

concated_layer = Concatenate()([num_input, embed_layer])
## Dense Layers
dense1 = Dense(units=1)(concated_layer)

In [120]:
sample_data.columns

Index(['sepal.length', 'sepal.width', 'petal.length', 'petal.width',
       'variety'],
      dtype='object')

In [121]:
numeric_iris = tf.constant(sample_data[['sepal.width', 'petal.length', 'petal.width']], dtype=tf.float32)
cat_iris = tf.constant(sample_data['variety'], dtype=tf.string)

In [123]:
y=tf.constant(sample_data['sepal.length'], dtype=tf.float32)

In [125]:
iris_model = Model(inputs=[num_input, cat_input], outputs=dense1)

In [144]:
iris_model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 cat_input (InputLayer)         [(None,)]            0           []                               
                                                                                                  
 lambda_1 (Lambda)              (None,)              0           ['cat_input[0][0]']              
                                                                                                  
 numerical_input (InputLayer)   [(None, 3)]          0           []                               
                                                                                                  
 embedding_13 (Embedding)       (None, 2)            8           ['lambda_1[0][0]']               
                                                                                              

In [145]:
iris_model.compile(optimizer='nadam', loss='mse')

In [146]:
iris_model.fit(x=[numeric_iris, cat_iris], y=y, epochs=50)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x1e9ef0786d0>

In [147]:
iris_model.layers[3]

<keras.layers.embeddings.Embedding at 0x1e9e7a63940>

In [135]:
cat_Modellayers = iris_model.layers[:4]

In [164]:
(np.array([[3.9, 1.7, 0.4]], dtype=float), np.array(['Setosa'], dtype=str))

(array([[3.9, 1.7, 0.4]]), array(['Setosa'], dtype='<U6'))

In [166]:
sample_data.iloc[5]

sepal.length       5.4
sepal.width        3.9
petal.length       1.7
petal.width        0.4
variety         Setosa
Name: 5, dtype: object

In [165]:
# iris_model.predict(x=[tf.constant([3.9, 1.7, 0.4], dtype=tf.float32), tf.constant(['Setosa'], dtype=tf.string)])
iris_model.predict((np.array([[3.9, 1.7, 0.4]], dtype=float), np.array(['Setosa'], dtype=str)))

array([[5.3797026]], dtype=float32)

In [168]:
import seaborn as sns
import matplotlib.pyplot as plt

In [167]:
iris_model.layers[3].get_weights()

[array([[ 0.44918314, -0.4249216 ],
        [ 0.27979833, -0.29024443],
        [-0.01657604, -0.04723402],
        [-0.01755486,  0.03757196]], dtype=float32)]

In [197]:
len(*iris_model.layers[3].get_weights())

4

In [201]:
x = []
y = []
for item in [*iris_model.layers[3].get_weights()]:
    print(item[0])
    x=x.append(item[0])

    print(item[1])

    y=y.append(item[1])

[ 0.44918314 -0.4249216 ]
[ 0.27979833 -0.29024443]


In [194]:
y

In [181]:
plt.scatter(**iris_model.layers[3].get_weights())

TypeError: matplotlib.pyplot.scatter() argument after ** must be a mapping, not list

In [149]:
tf.nn.embedding_lookup(iris_model.layers[3], ids=tf.constant(1, dtype=tf.int64))

ValueError: Attempt to convert a value (<keras.layers.embeddings.Embedding object at 0x000001E9E7A63940>) with an unsupported type (<class 'keras.layers.embeddings.Embedding'>) to a Tensor.

In [137]:
cat_Modellayers.pop(-2)

<keras.engine.input_layer.InputLayer at 0x1e9e7a557f0>

In [140]:
## Convert this to a model
embeding_model = Model(cat_Modellayers)

In [142]:
embeding_model.predict(tf.constant(['Setosa', 'Versicolor', 'Virginica'], dtype=tf.string))

NotImplementedError: in user code:

    File "C:\Users\rahul\anaconda3\envs\tensorflow_env\lib\site-packages\keras\engine\training.py", line 1621, in predict_function  *
        return step_function(self, iterator)
    File "C:\Users\rahul\anaconda3\envs\tensorflow_env\lib\site-packages\keras\engine\training.py", line 1611, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "C:\Users\rahul\anaconda3\envs\tensorflow_env\lib\site-packages\keras\engine\training.py", line 1604, in run_step  **
        outputs = model.predict_step(data)
    File "C:\Users\rahul\anaconda3\envs\tensorflow_env\lib\site-packages\keras\engine\training.py", line 1572, in predict_step
        return self(x, training=False)
    File "C:\Users\rahul\anaconda3\envs\tensorflow_env\lib\site-packages\keras\utils\traceback_utils.py", line 67, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "C:\Users\rahul\anaconda3\envs\tensorflow_env\lib\site-packages\keras\engine\training.py", line 475, in call
        raise NotImplementedError('When subclassing the `Model` class, you should '

    NotImplementedError: Exception encountered when calling layer "model_1" (type Model).
    
    When subclassing the `Model` class, you should implement a `call()` method.
    
    Call arguments received:
      • inputs=tf.Tensor(shape=(None,), dtype=string)
      • training=False
      • mask=None


In [39]:
cat_encoding = CategoryEncoding(num_tokens=3, output_mode='one_hot')

In [61]:
## random integers 999
rand_int = np.random.randint(low=1, high=4499, size=1000)

In [71]:
embeded = Embedding(input_dim=4500, output_dim=2, embeddings_initializer='uniform')


In [72]:
rand_int.shape

(1000,)

In [77]:
sample_vocab = ['alpha', 'beta', 'gamma']

In [78]:
sample_input = ['alpha', 'beta', 'delta']

In [76]:
from tensorflow.keras.layers import CategoryEncoding

In [79]:
help(CategoryEncoding)

Help on class CategoryEncoding in module keras.layers.preprocessing.category_encoding:

class CategoryEncoding(keras.engine.base_layer.Layer)
 |  CategoryEncoding(num_tokens=None, output_mode='multi_hot', sparse=False, **kwargs)
 |  
 |  A preprocessing layer which encodes integer features.
 |  
 |  This layer provides options for condensing data into a categorical encoding
 |  when the total number of tokens are known in advance. It accepts integer
 |  values as inputs, and it outputs a dense or sparse representation of those
 |  inputs. For integer inputs where the total number of tokens is not known, use
 |  `tf.keras.layers.IntegerLookup` instead.
 |  
 |  For an overview and full list of preprocessing layers, see the preprocessing
 |  [guide](https://www.tensorflow.org/guide/keras/preprocessing_layers).
 |  
 |  Examples:
 |  
 |  **One-hot encoding data**
 |  
 |  >>> layer = tf.keras.layers.CategoryEncoding(
 |  ...           num_tokens=4, output_mode="one_hot")
 |  >>> layer([3

In [81]:
help(tf.data.Dataset.sample_from_datasets)

Help on function sample_from_datasets in module tensorflow.python.data.ops.dataset_ops:

sample_from_datasets(datasets, weights=None, seed=None, stop_on_empty_dataset=False)
    Samples elements at random from the datasets in `datasets`.
    
    Creates a dataset by interleaving elements of `datasets` with `weight[i]`
    probability of picking an element from dataset `i`. Sampling is done without
    replacement. For example, suppose we have 2 datasets:
    
    ```python
    dataset1 = tf.data.Dataset.range(0, 3)
    dataset2 = tf.data.Dataset.range(100, 103)
    ```
    
    Suppose that we sample from these 2 datasets with the following weights:
    
    ```python
    sample_dataset = tf.data.Dataset.sample_from_datasets(
        [dataset1, dataset2], weights=[0.5, 0.5])
    ```
    
    One possible outcome of elements in sample_dataset is:
    
    ```
    print(list(sample_dataset.as_numpy_iterator()))
    # [100, 0, 1, 101, 2, 102]
    ```
    
    Args:
      datasets: A non-

In [73]:
embeded(rand_int)

<tf.Tensor: shape=(1000, 2), dtype=float32, numpy=
array([[-0.01383002, -0.0186637 ],
       [-0.03661783, -0.04764316],
       [ 0.04550489, -0.03605218],
       ...,
       [-0.03185771,  0.00772667],
       [-0.0025098 , -0.01126219],
       [ 0.04549045, -0.01532317]], dtype=float32)>

In [33]:
embeded_layer = Embedding(input_dim=4, output_dim=2)

In [None]:
embeded_layer