<a href="https://colab.research.google.com/github/sandhyaparna/GCP-GoogleCloudPlatform/blob/master/Tensorflow.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# TensorFlow and Deep Learning fundamentals with Python videos
* https://www.youtube.com/watch?v=tpCFfeUEGs8&t=3221s



* tf.constant: shape & dimensions of tensors, mention data type
* tf.Varible: assign/modify values
* random tensor generator: uniform, normal distribution
* tf.ones(shape=(3, 2)); tf.zeros(shape=(3, 2))
* shuffle a tensor: shuffling is important for NN as we want the model to not see a continous flow of images of same category, it will impact how the model learns - matrix is shuffled based on rows
  * To shuffle in the same order: we need to Set the global random seed using tf.random.set_seed(42), before using it in ft.random.shuffle(matrix, seed=42)
* Turn arrays into tensors
* Indexing Tensors
* tf.cast(X, dtype=tf.float16) : change data type of tensor
* tf.abs(X)
* tf.reduce_min() - find the minimum value in a tensor.
* tf.reduce_max() - find the maximum value in a tensor (helpful for when you want to find the highest prediction probability).
* tf.reduce_mean() - find the mean of all elements in a tensor.
* tf.reduce_sum() - find the sum of all elements in a tensor.
* tf.argmax() - find the position of the maximum element in a given tensor.
* tf.argmin() - find the position of the minimum element in a given tensor.



In [None]:
import numpy as np
import tensorflow as tf

In [None]:
import plotly.express as px
Tips = px.data.tips()
Tips.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [None]:
Tips.describe()

Unnamed: 0,total_bill,tip,size
count,244.0,244.0,244.0
mean,19.785943,2.998279,2.569672
std,8.902412,1.383638,0.9511
min,3.07,1.0,1.0
25%,13.3475,2.0,2.0
50%,17.795,2.9,2.0
75%,24.1275,3.5625,3.0
max,50.81,10.0,6.0


In [None]:
Tips['sex'].value_counts()

Male      157
Female     87
Name: sex, dtype: int64

In [None]:
Tips['sex'].value_counts(0)

Male      157
Female     87
Name: sex, dtype: int64

In [None]:
Tips_copy = Tips
Tips_shallowcopy = Tips.copy()
Tips_deepcopy = Tips.copy(deep=True)

In [None]:
Tips['tips_10perc_inc'] = Tips['tip']*1.1

In [None]:
Tips.head(2)

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,tips_10perc_inc
0,16.99,1.01,Female,No,Sun,Dinner,2,1.111
1,10.34,1.66,Male,No,Sun,Dinner,3,1.826


In [None]:
Tips_copy.head(2)

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,tips_10perc_inc
0,16.99,1.01,Female,No,Sun,Dinner,2,1.111
1,10.34,1.66,Male,No,Sun,Dinner,3,1.826


In [None]:
Tips_shallowcopy['tips_10perc_inc'] = Tips_shallowcopy['tip']*1.5

In [None]:
Tips_shallowcopy.head(2)

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,tips_10perc_inc
0,16.99,1.01,Female,No,Sun,Dinner,2,1.515
1,10.34,1.66,Male,No,Sun,Dinner,3,2.49


In [None]:
Tips_deepcopy.head(2)

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3


In [None]:
# To iterate over rows of a data frame
# iter corresponds to the rows number of the dataframe
# row correspond the values of the row number
for iter, row in Tips.head(2).iterrows():
  print(iter)
  print(row)
  print("Tip value for the row is:",row["tip"])  # to extract value of the column for that row

0
total_bill          16.99
tip                  1.01
sex                Female
smoker                 No
day                   Sun
time               Dinner
size                    2
tips_10perc_inc     1.111
Name: 0, dtype: object
Tip value for the row is: 1.01
1
total_bill          10.34
tip                  1.66
sex                  Male
smoker                 No
day                   Sun
time               Dinner
size                    3
tips_10perc_inc     1.826
Name: 1, dtype: object
Tip value for the row is: 1.66


In [None]:
from google.colab import auth
auth.authenticate_user()
print('Authenticated')

Authenticated


In [None]:
# Extracting data from Big Query - https://colab.research.google.com/notebooks/bigquery.ipynb#scrollTo=DMxOKQ3vJplj
%%bigquery --project ml-tensorflow-coursera9 TaxiTrips
SELECT * 
FROM `bigquery-public-data.new_york_taxi_trips.tlc_fhv_trips_2016` # TODO 1
LIMIT 10

In [None]:
TaxiTrips.head()

Unnamed: 0,location_id,pickup_datetime,dispatching_base_num,borough,zone,service_zone
0,227,2016-05-12 10:53:00,B01606,Brooklyn,Sunset Park East,Boro Zone
1,254,2016-05-27 09:24:00,B01233,Bronx,Williamsbridge/Olinville,Boro Zone
2,213,2016-05-14 13:35:00,B00221,Bronx,Soundview/Castle Hill,Boro Zone
3,35,2016-12-23 12:47:26,B02884,Brooklyn,Brownsville,Boro Zone
4,41,2016-09-04 04:46:47,B02872,Manhattan,Central Harlem,Boro Zone


In [None]:
# https://numpy.org/doc/stable/reference/generated/numpy.all.html
# pre-processing using np.all
# Rows that dont satisfy any of the conditions within np.all are removed

def preprocess(df_process):
    Tips = df_process.copy(deep=True)  # Making sure that when preprocess if applied on trips data, original copy of trips is created
    qc = np.all([
        df_process["tip"] >= 1,
        df_process["tip"] <= 5,
        df_process["size"] >= 2,
        df_process["size"] <= 4,
        ], axis=0)

    return df_process[qc]

In [None]:
Tips_qc = preprocess(Tips)
print(Tips_qc.describe(),"\n")
print(Tips_qc.shape, "of", Tips.shape, " rows remained")

       total_bill         tip        size
count  215.000000  215.000000  215.000000
mean    18.597163    2.714930    2.413953
std      7.545923    0.951029    0.704320
min      5.750000    1.000000    2.000000
25%     13.215000    2.000000    2.000000
50%     16.990000    2.600000    2.000000
75%     21.910000    3.395000    3.000000
max     45.350000    5.000000    4.000000 

(215, 7) of (244, 7)  rows remained


In [None]:
# Gives out values from 0 to len(Tips_qc)
np.arange(0, len(Tips_qc))

array([  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,
        13,  14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,
        26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,
        39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,
        52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,  64,
        65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,
        78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,  90,
        91,  92,  93,  94,  95,  96,  97,  98,  99, 100, 101, 102, 103,
       104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116,
       117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129,
       130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142,
       143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155,
       156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168,
       169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 18

In [None]:
# Diff between append and extend
features = list(Tips)
features.extend(["X1","X2","X3"])
print(features)
features.append(["X4","X5","X6"])
print(features)

['total_bill', 'tip', 'sex', 'smoker', 'day', 'time', 'size', 'X1', 'X2', 'X3']
['total_bill', 'tip', 'sex', 'smoker', 'day', 'time', 'size', 'X1', 'X2', 'X3', ['X4', 'X5', 'X6']]


### Tensorflow

In [None]:
tf.zeros([3, 2, 4, 5])

<tf.Tensor: shape=(3, 2, 4, 5), dtype=float32, numpy=
array([[[[0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.]]],


       [[[0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.]]],


       [[[0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.]]]], dtype=float32)>

In [None]:
Ones_matrix = tf.ones([4, 5])
Ones_matrix

<tf.Tensor: shape=(4, 5), dtype=float32, numpy=
array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.]], dtype=float32)>

### Tensorflow Indexing

In [None]:
print("Type of every element:", Ones_matrix.dtype)
print("Number of dimensions:", Ones_matrix.ndim)
print("Shape of tensor:", Ones_matrix.shape)
print("Elements along axis 0 of tensor:", Ones_matrix.shape[0])
print("Elements along the last axis of tensor:", Ones_matrix.shape[-1])
print("Total number of elements (3*2*4*5): ", tf.size(Ones_matrix).numpy())

Type of every element: <dtype: 'float32'>
Number of dimensions: 2
Shape of tensor: (4, 5)
Elements along axis 0 of tensor: 4
Elements along the last axis of tensor: 5
Total number of elements (3*2*4*5):  20


In [None]:
rank_1_tensor = tf.constant([0, 1, 1, 2, 3, 5, 8, 13, 21, 34])
print(rank_1_tensor.numpy())

# When tensors are Indexed with a scalar, it removes the dimension
print("First:", rank_1_tensor[0].numpy())
print("Second:", rank_1_tensor[1].numpy())
print("Last:", rank_1_tensor[-1].numpy())
# When tensors are Indexed with a `:` slice keeps the dimension
print("Everything:", rank_1_tensor[:].numpy())
print("Before 4:", rank_1_tensor[:4].numpy())
print("From 4 to the end:", rank_1_tensor[4:].numpy())
print("From 2, before 7:", rank_1_tensor[2:7].numpy())
print("Every other item:", rank_1_tensor[::2].numpy())
print("Reversed:", rank_1_tensor[::-1].numpy())

[ 0  1  1  2  3  5  8 13 21 34]
First: 0
Second: 1
Last: 34
Everything: [ 0  1  1  2  3  5  8 13 21 34]
Before 4: [0 1 1 2]
From 4 to the end: [ 3  5  8 13 21 34]
From 2, before 7: [1 2 3 5 8]
Every other item: [ 0  1  3  8 21]
Reversed: [34 21 13  8  5  3  2  1  1  0]


In [None]:
rank_2_tensor = tf.constant([[1, 2],
                             [3, 4],
                             [5, 6],
                             [7, 8]], dtype=tf.int32)
print(rank_2_tensor)

# Indexed tensors using any combination integers and slices `:`
# Get row and column tensors
print("Second row:", rank_2_tensor[1, :].numpy(),"\n")
print("Second column:", rank_2_tensor[:, 1].numpy(),"\n")
print("Last row:", rank_2_tensor[-1, :].numpy(),"\n")
print("First item in last column:", rank_2_tensor[0, -1].numpy(),"\n")
print("Skip the first row:")
print(rank_2_tensor[1:, :].numpy(), "\n")

tf.Tensor(
[[1 2]
 [3 4]
 [5 6]
 [7 8]], shape=(4, 2), dtype=int32)
Second row: [3 4] 

Second column: [2 4 6 8] 

Last row: [7 8] 

First item in last column: 2 

Skip the first row:
[[3 4]
 [5 6]
 [7 8]] 



### Reshape & Transpose

In [None]:
print(rank_2_tensor,"\n")
print(tf.reshape(rank_2_tensor, [8]),"\n")  # 6,1
print(tf.reshape(rank_2_tensor, [2,4]),"\n")
print(tf.reshape(rank_2_tensor, [2,2,2]),"\n")

tf.Tensor(
[[1 2]
 [3 4]
 [5 6]
 [7 8]], shape=(4, 2), dtype=int32) 

tf.Tensor([1 2 3 4 5 6 7 8], shape=(8,), dtype=int32) 

tf.Tensor(
[[1 2 3 4]
 [5 6 7 8]], shape=(2, 4), dtype=int32) 

tf.Tensor(
[[[1 2]
  [3 4]]

 [[5 6]
  [7 8]]], shape=(2, 2, 2), dtype=int32) 



In [None]:
print(rank_2_tensor,"\n")
print(tf.reshape(rank_2_tensor, [8]),"\n")  # 8,1 ==> 8 rows, 1 column
print(tf.reshape(rank_2_tensor, [2,4]),"\n")
print(tf.reshape(rank_2_tensor, [2,2,2]),"\n")
print(tf.reshape(rank_2_tensor, [-1]),"\n")

tf.Tensor(
[[1 2]
 [3 4]
 [5 6]
 [7 8]], shape=(4, 2), dtype=int32) 

tf.Tensor([1 2 3 4 5 6 7 8], shape=(8,), dtype=int32) 

tf.Tensor(
[[1 2 3 4]
 [5 6 7 8]], shape=(2, 4), dtype=int32) 

tf.Tensor(
[[[1 2]
  [3 4]]

 [[5 6]
  [7 8]]], shape=(2, 2, 2), dtype=int32) 

tf.Tensor([1 2 3 4 5 6 7 8], shape=(8,), dtype=int32) 



In [None]:
print(rank_2_tensor,"\n")
print("Transposes","\n") # Transpose coverts row into column, column into row
print(tf.transpose(rank_2_tensor),"\n")
print(tf.transpose(tf.reshape(rank_2_tensor, [8])),"\n")  # Not transposing as a column
print(tf.transpose(tf.reshape(rank_2_tensor, [2,4])),"\n")
print(tf.transpose(tf.reshape(rank_2_tensor, [2,2,2])),"\n")
print(tf.transpose(tf.reshape(rank_2_tensor, [-1])),"\n")

tf.Tensor(
[[1 2]
 [3 4]
 [5 6]
 [7 8]], shape=(4, 2), dtype=int32) 

Transposes 

tf.Tensor(
[[1 3 5 7]
 [2 4 6 8]], shape=(2, 4), dtype=int32) 

tf.Tensor([1 2 3 4 5 6 7 8], shape=(8,), dtype=int32) 

tf.Tensor([[1 2 3 4 5 6 7 8]], shape=(1, 8), dtype=int32) 

tf.Tensor(
[[1 5]
 [2 6]
 [3 7]
 [4 8]], shape=(4, 2), dtype=int32) 

tf.Tensor(
[[[1 5]
  [3 7]]

 [[2 6]
  [4 8]]], shape=(2, 2, 2), dtype=int32) 

tf.Tensor([1 2 3 4 5 6 7 8], shape=(8,), dtype=int32) 



In [None]:
rank_3_tensor = tf.constant([
  [[0, 1, 2, 3, 4],
   [5, 6, 7, 8, 9]],
  [[10, 11, 12, 13, 14],
   [15, 16, 17, 18, 19]],
  [[20, 21, 22, 23, 24],
   [25, 26, 27, 28, 29]],])
                    
print(rank_3_tensor, "\n")
print(tf.reshape(rank_3_tensor, [-1]), "\n")
print(tf.reshape(rank_3_tensor, [3*2, 5]), "\n")
print(tf.reshape(rank_3_tensor, [3, -1]), "\n")  # shape=(3, 10) ==> -1 is automatic

tf.Tensor(
[[[ 0  1  2  3  4]
  [ 5  6  7  8  9]]

 [[10 11 12 13 14]
  [15 16 17 18 19]]

 [[20 21 22 23 24]
  [25 26 27 28 29]]], shape=(3, 2, 5), dtype=int32) 

tf.Tensor(
[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28 29], shape=(30,), dtype=int32) 

tf.Tensor(
[[ 0  1  2  3  4]
 [ 5  6  7  8  9]
 [10 11 12 13 14]
 [15 16 17 18 19]
 [20 21 22 23 24]
 [25 26 27 28 29]], shape=(6, 5), dtype=int32) 

tf.Tensor(
[[ 0  1  2  3  4  5  6  7  8  9]
 [10 11 12 13 14 15 16 17 18 19]
 [20 21 22 23 24 25 26 27 28 29]], shape=(3, 10), dtype=int32) 



In [None]:
# `tf.cast` casts a tensor to a new data type.
# TODO 2b
the_f64_tensor = tf.constant([2.2, 3.3, 4.4], dtype=tf.float64)
the_f16_tensor = tf.cast(the_f64_tensor, dtype=tf.float16)
# Now, let's cast to an uint8 and lose the decimal precision
the_u8_tensor = tf.cast(the_f16_tensor, dtype=tf.uint8)
print(the_u8_tensor)

tf.Tensor([2 3 4], shape=(3,), dtype=uint8)


In [None]:
# [1,3] shaped tensor is converted to [3,3] shape
print(tf.broadcast_to(tf.constant([1, 2, 3]), [3, 3]))

tf.Tensor(
[[1 2 3]
 [1 2 3]
 [1 2 3]], shape=(3, 3), dtype=int32)


In [None]:
# We can use split to split a string into a set of tensors
scalar_string_tensor = tf.constant("Gray wolf")
print(scalar_string_tensor)
print(tf.strings.split(scalar_string_tensor, sep=" "))

tf.Tensor(b'Gray wolf', shape=(), dtype=string)
tf.Tensor([b'Gray' b'wolf'], shape=(2,), dtype=string)


In [None]:
# If we have two string tensors of different lengths, this is OK.
tensor_of_strings = tf.constant(["Gray wolf",
                                 "Quick brown fox",
                                 "Lazy dog"])
# Note that the shape is (2,), indicating that it is 2 x unknown.
print(tensor_of_strings)
print(tf.strings.split(tensor_of_strings))

tf.Tensor([b'Gray wolf' b'Quick brown fox' b'Lazy dog'], shape=(3,), dtype=string)
<tf.RaggedTensor [[b'Gray', b'wolf'], [b'Quick', b'brown', b'fox'], [b'Lazy', b'dog']]>


In [None]:
x = tf.Variable(5.6)

# The .assign() method will assign the value to referance object.
x.assign(45.8)
print(x)

# The .assign_add() method will update the referance object by adding value to it.
x.assign_add(4)
print(x)

# The .assign_add() method will update the referance object by subtracting value to it.
x.assign_sub(3) 
print(x)

<tf.Variable 'Variable:0' shape=() dtype=float32, numpy=45.8>
<tf.Variable 'Variable:0' shape=() dtype=float32, numpy=49.8>
<tf.Variable 'Variable:0' shape=() dtype=float32, numpy=46.8>


In [None]:
# Creates a constant tensor from a tensor-like object.
a = tf.constant([5, 3, 8]) # TODO 1a
b = tf.constant([3, -1, 2])
# Using the .add() method components of a tensor will be added.
print(tf.add(a, b))

# Using the .multiply() method components of a tensor will be multiplied.
print(tf.multiply(a, b))

# tf.math.exp expects floats so we need to explicitly give the type
# tf.math.exp(a)

tf.Tensor([ 8  2 10], shape=(3,), dtype=int32)
tf.Tensor([15 -3 16], shape=(3,), dtype=int32)


### Load Data with tf.data
https://colab.research.google.com/github/adammichaelwood/tf-docs/blob/csv-feature-columns/site/en/r2/tutorials/load_data/csv.ipynb#scrollTo=tSyrkSQwYHKi

In [None]:
# get_dataset() retrieve a Dataverse dataset or its metadata
def get_dataset(file_path, **kwargs):
 # TODO 2 
 # Use `tf.data.experimental.make_csv_dataset()` to read CSV files into a dataset.
  dataset = tf.data.experimental.make_csv_dataset(
      file_path,
      batch_size=5, # Artificially small to make examples easier to show.
      label_name=LABEL_COLUMN,
      na_value="?",
      num_epochs=1,
      ignore_errors=True, 
      **kwargs)
  return dataset

raw_train_data = get_dataset(train_file_path)
raw_test_data = get_dataset(test_file_path)

<tf.Tensor: shape=(3,), dtype=int32, numpy=array([15, -3, 16], dtype=int32)>

### tf.feature_column 
https://www.tensorflow.org/api_docs/python/tf/feature_column

https://github.com/GoogleCloudPlatform/training-data-analyst/blob/master/courses/machine_learning/deepdive2/introduction_to_tensorflow/solutions/feat.cols_tf.data.ipynb


In [None]:
# Normalize numeric data
tf.feature_column.numeric_column('numeric', normalizer_fn=normalizer, shape=[len(NUMERIC_FEATURES)])

# `tf.keras.layers.DenseFeatures()` produces a dense Tensor based on given feature_columns.
tf.keras.layers.DenseFeatures(numeric_columns)

# `tf.keras.layers.DenseFeatures()` produces a dense Tensor based on given feature_columns.
numeric_layer = tf.keras.layers.DenseFeatures(numeric_columns)
numeric_layer(example_batch).numpy()

In [None]:
# processing categorical columns - https://github.com/GoogleCloudPlatform/training-data-analyst/blob/master/courses/machine_learning/deepdive2/introduction_to_tensorflow/solutions/load_diff_filedata.ipynb
# `tf.keras.layers.DenseFeatures()` produces a dense Tensor based on given feature_columns.
categorical_layer = tf.keras.layers.DenseFeatures(categorical_columns)

# Add the two feature column collections
# Pass them to a `tf.keras.layers.DenseFeatures()` to create an input layer.
# TODO 1
preprocessing_layer = tf.keras.layers.DenseFeatures(categorical_columns+numeric_columns)

In [None]:
# Similar to one-hot encoding for that ranges of values
age_buckets = tf.feature_column.bucketized_column(age, boundaries=[18, 25, 30, 35, 40, 45, 50, 55, 60, 65])

tf.feature_column.categorical_column_with_vocabulary_list('Var', ['Cat1', 'Cat2', 'Cat3'])

tf.feature_column.embedding_column(Var, dimension=8)

### Read and write data using TFRecords 
* tf.train.Example 
* tf.train.SequenceExample

https://github.com/GoogleCloudPlatform/training-data-analyst/blob/master/courses/machine_learning/deepdive2/introduction_to_tensorflow/solutions/tfrecord-tf.example.ipynb