In [1]:
from __future__ import absolute_import, division, print_function, unicode_literals
import os
os.add_dll_directory("C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v11.2/bin")

<AddedDllDirectory('C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v11.2/bin')>

In [2]:
import numpy as np
import tensorflow as tf
from matplotlib import pyplot as plt
import pandas as pd
import tensorflow_hub as hub
from sklearn.model_selection import train_test_split
from sklearn.utils import class_weight



In [3]:
physical_devices = tf.config.experimental.list_physical_devices('GPU')
print("Num GPUs Available: ", len(physical_devices))
tf.config.experimental.set_memory_growth(physical_devices[0], True)

Num GPUs Available:  1


In [4]:
path = r"..\data\articles1.csv"
df = pd.read_csv(path, usecols=["source","labels","headline","text"])
pd.set_option('display.max_colwidth', -1)
x_train, x_test = train_test_split(df, test_size=0.2, random_state=111)

  pd.set_option('display.max_colwidth', -1)


In [5]:
print(df['labels'].value_counts())
swingdata = df['labels']

 0    5764
 1    1297
-1    1120
 2    633 
-2    529 
 3    158 
-3    119 
 4    63  
-4    30  
Name: labels, dtype: int64


In [6]:
class_weights = list(class_weight.compute_class_weight(class_weight='balanced', classes=np.unique(df['labels']), y=swingdata))
np.unique(df['labels'])
class_weights.sort()
df['labels'].value_counts()

 0    5764
 1    1297
-1    1120
 2    633 
-2    529 
 3    158 
-3    119 
 4    63  
-4    30  
Name: labels, dtype: int64

In [7]:
weights={}

for index, weight in enumerate(class_weights):
    weights[index]=weight

In [8]:
dataset_train = tf.data.Dataset.from_tensor_slices((x_train['text'].values, x_train['labels'].values))
dataset_test = tf.data.Dataset.from_tensor_slices((x_test['text'].values, x_test['labels'].values))

In [9]:
def fetch(text, labels):
    return text, tf.one_hot(labels, 9)

In [10]:
train_data_f=dataset_train.map(fetch)
test_data_f=dataset_test.map(fetch)

In [11]:
train_data, train_labels = next(iter(train_data_f.batch(5)))

In [12]:
embedding = "https://tfhub.dev/google/tf2-preview/nnlm-en-dim128/1"
hub_layer = hub.KerasLayer(embedding, output_shape=[128], input_shape=[], dtype=tf.string, trainable=True)
hub_layer(train_data[:1])



<tf.Tensor: shape=(1, 128), dtype=float32, numpy=
array([[ 1.5701346e+00,  5.7551455e-01, -9.1829829e-02, -2.9152185e-01,
        -2.1295413e-01, -2.9568093e-02,  1.7536528e-02, -2.3557460e-01,
        -6.8924934e-02,  8.6796895e-02,  4.0287706e-01,  4.0075305e-01,
        -1.5786774e-01,  2.0713823e-01, -2.3699395e-01,  1.4814676e-01,
        -4.6862227e-01, -2.1087460e-01, -9.7300753e-02,  1.7832458e+00,
         1.2497991e-01,  4.1937110e-01, -1.2303090e-01,  9.2001259e-02,
        -9.6788570e-02,  1.7181440e-01,  3.9333358e-01, -5.9749130e-02,
        -8.4132716e-02, -9.5848618e-03, -2.4290852e-01, -1.7437834e-01,
         2.0996188e-01, -2.3239635e-02,  1.5073279e-01,  1.9917414e-01,
         8.2096960e-03, -2.6212114e-01, -1.7529175e-01, -1.8709232e-01,
         1.2903674e-01, -3.4300316e-02, -1.8365154e-01,  3.5388574e-01,
         1.9681220e-01, -1.3587591e-01, -8.1963144e-02, -1.6614601e-01,
        -8.1249990e-02, -2.7942288e-01, -9.5973194e-02,  9.2167236e-02,
         3.027

In [13]:
model = tf.keras.Sequential()
#model.add(hub_layer)
#for units in [128, 128, 64, 32]:
#    model.add(tf.keras.layers.Dense(units, activation='relu'))
#    model.add(tf.keras.layers.Dropout(0.4))
#model.add(tf.keras.layers.Dense(9, activation="softmax"))
model = tf.keras.Sequential()
model.add(hub_layer)
model.add(tf.keras.layers.Dense(1000,activation=tf.nn.sigmoid))
model.add(tf.keras.layers.Dropout(0.5, noise_shape=None, seed=None))
model.add(tf.keras.layers.Dense(500,activation=tf.nn.sigmoid))
model.add(tf.keras.layers.Dropout(0.5, noise_shape=None, seed=None))
model.add(tf.keras.layers.Dense(120,activation=tf.nn.sigmoid))
model.add(tf.keras.layers.Dropout(0.5, noise_shape=None, seed=None))
model.add(tf.keras.layers.Dense(9,activation=tf.nn.softmax))


model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 keras_layer (KerasLayer)    (None, 128)               124642688 
                                                                 
 dense (Dense)               (None, 1000)              129000    
                                                                 
 dropout (Dropout)           (None, 1000)              0         
                                                                 
 dense_1 (Dense)             (None, 500)               500500    
                                                                 
 dropout_1 (Dropout)         (None, 500)               0         
                                                                 
 dense_2 (Dense)             (None, 120)               60120     
                                                                 
 dropout_2 (Dropout)         (None, 120)              

In [14]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
    loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
    metrics=['accuracy']
)

In [16]:
# Include the epoch in the file name (uses `str.format`)
checkpoint_path = "training_2/cp-{epoch:04d}.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)

batch_size = 32

# Create a callback that saves the model's weights every 5 epochs
cp_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_path, 
    verbose=1, 
    save_weights_only=True,
    save_freq=5*batch_size)

In [19]:
model.fit(train_data_f, 
          epochs=50, 
          batch_size=batch_size, 
          callbacks=[cp_callback],
          validation_data=test_data_f,
          verbose=0)





ValueError: in user code:

    File "C:\Users\Sam's PC\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\keras\engine\training.py", line 1160, in train_function  *
        return step_function(self, iterator)
    File "C:\Users\Sam's PC\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\keras\engine\training.py", line 1146, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "C:\Users\Sam's PC\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\keras\engine\training.py", line 1135, in run_step  **
        outputs = model.train_step(data)
    File "C:\Users\Sam's PC\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\keras\engine\training.py", line 993, in train_step
        y_pred = self(x, training=True)
    File "C:\Users\Sam's PC\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\keras\utils\traceback_utils.py", line 70, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "C:\Users\SAM'SP~1\AppData\Local\Temp\__autograph_generated_filecvc170kf.py", line 74, in tf__call
        ag__.if_stmt(ag__.not_(ag__.ld(self)._has_training_argument), if_body_3, else_body_3, get_state_3, set_state_3, ('result', 'training'), 1)
    File "C:\Users\SAM'SP~1\AppData\Local\Temp\__autograph_generated_filecvc170kf.py", line 37, in if_body_3
        result = ag__.converted_call(ag__.ld(f), (), None, fscope)

    ValueError: Exception encountered when calling layer "keras_layer" "                 f"(type KerasLayer).
    
    in user code:
    
        File "C:\Users\Sam's PC\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\tensorflow_hub\keras_layer.py", line 229, in call  *
            result = f()
    
        ValueError: Python inputs incompatible with input_signature:
          inputs: (
            Tensor("IteratorGetNext:0", shape=(None, None), dtype=string))
          input_signature: (
            TensorSpec(shape=(None,), dtype=tf.string, name=None)).
    
    
    Call arguments received by layer "keras_layer" "                 f"(type KerasLayer):
      • inputs=tf.Tensor(shape=(None, None), dtype=string)
      • training=True
