In [1]:
!pip install -U tensorflow tensorflow-datasets


Collecting tensorflow
  Downloading tensorflow-2.19.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.1 kB)
Collecting tensorboard~=2.19.0 (from tensorflow)
  Downloading tensorboard-2.19.0-py3-none-any.whl.metadata (1.8 kB)
Collecting ml-dtypes<1.0.0,>=0.5.1 (from tensorflow)
  Downloading ml_dtypes-0.5.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (21 kB)
Downloading tensorflow-2.19.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (644.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m644.9/644.9 MB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading ml_dtypes-0.5.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.7/4.7 MB[0m [31m59.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading tensorboard-2.19.0-py3-none-any.whl (5.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.5/5.5 MB[0m [31m90.0 MB/s

In [2]:
import tensorflow as tf
import tensorflow_datasets as tfds
from tensorflow.keras import layers, models
import matplotlib.pyplot as plt


In [3]:
(ds_train, ds_test), ds_info = tfds.load(
    "cifar10",
    split=["train[:80%]", "train[80%:]"],
    as_supervised=True,
    with_info=True
)

def preprocess(image, label):
    image = tf.image.resize(image, (160, 160))  # was 224x224
 # EfficientNet expects 224x224
    image = tf.cast(image, tf.float32) / 255.0
    return image, label

BATCH_SIZE = 32

AUTOTUNE = tf.data.AUTOTUNE

ds_train = ds_train.map(preprocess).cache().shuffle(1000).batch(BATCH_SIZE).prefetch(AUTOTUNE)
ds_test = ds_test.map(preprocess).cache().batch(BATCH_SIZE).prefetch(AUTOTUNE)




Downloading and preparing dataset Unknown size (download: Unknown size, generated: Unknown size, total: Unknown size) to /root/tensorflow_datasets/cifar10/3.0.2...


Dl Completed...: 0 url [00:00, ? url/s]

Dl Size...: 0 MiB [00:00, ? MiB/s]

Extraction completed...: 0 file [00:00, ? file/s]

Generating splits...:   0%|          | 0/2 [00:00<?, ? splits/s]

Generating train examples...: 0 examples [00:00, ? examples/s]

Shuffling /root/tensorflow_datasets/cifar10/incomplete.Y4U3Y5_3.0.2/cifar10-train.tfrecord*...:   0%|         …

Generating test examples...: 0 examples [00:00, ? examples/s]

Shuffling /root/tensorflow_datasets/cifar10/incomplete.Y4U3Y5_3.0.2/cifar10-test.tfrecord*...:   0%|          …

Dataset cifar10 downloaded and prepared to /root/tensorflow_datasets/cifar10/3.0.2. Subsequent calls will reuse this data.


In [4]:
# Only use 20% of the training set
ds_train = ds_train.take(1000)

ds_test = ds_test.take(200)


In [5]:
base_model = tf.keras.applications.MobileNetV2(
    include_top=False, input_shape=(160, 160, 3), weights="imagenet"
)


base_model.trainable = False

model = tf.keras.Sequential([
    base_model,
    layers.GlobalAveragePooling2D(),
    layers.Dense(128, activation='relu'),
    layers.Dense(10, activation='softmax')
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

model.fit(ds_train, validation_data=ds_test, epochs=3, steps_per_epoch=50)



Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_160_no_top.h5
[1m9406464/9406464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Epoch 1/3
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m154s[0m 3s/step - accuracy: 0.4226 - loss: 1.7299 - val_accuracy: 0.6945 - val_loss: 0.8959
Epoch 2/3
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m144s[0m 3s/step - accuracy: 0.7022 - loss: 0.8752 - val_accuracy: 0.7394 - val_loss: 0.7637
Epoch 3/3
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m155s[0m 3s/step - accuracy: 0.7547 - loss: 0.7133 - val_accuracy: 0.7441 - val_loss: 0.7404


<keras.src.callbacks.history.History at 0x7cd99900e350>

In [6]:
base_model.trainable = True  # Fine-tune entire base

# (Optionally: freeze first N layers)
# for layer in base_model.layers[:100]:
#     layer.trainable = False

model.compile(
    optimizer=tf.keras.optimizers.Adam(1e-5),
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

model.fit(ds_train, validation_data=ds_test, epochs=2, steps_per_epoch=50)


Epoch 1/2
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m321s[0m 6s/step - accuracy: 0.5633 - loss: 1.3091 - val_accuracy: 0.7262 - val_loss: 0.7766
Epoch 2/2
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m311s[0m 6s/step - accuracy: 0.6494 - loss: 1.0638 - val_accuracy: 0.7084 - val_loss: 0.8466


<keras.src.callbacks.history.History at 0x7cd998eab450>

In [15]:
# Install dependencies (just once)
!pip install -q tensorflow tensorflow-hub

# Imports
import tensorflow as tf
import tensorflow_hub as hub
import numpy as np
import matplotlib.pyplot as plt


In [16]:
# Get a working WAV file
audio_path = tf.keras.utils.get_file(
    'speech.wav',
    'https://github.com/karoldvl/ESC-50/blob/master/audio/1-100032-A-0.wav?raw=true'
)


In [18]:
import scipy.signal

def load_audio_resample(file_path, orig_rate=44100, target_rate=16000):
    audio_binary = tf.io.read_file(file_path)
    audio, _ = tf.audio.decode_wav(audio_binary, desired_channels=1)
    waveform = tf.squeeze(audio, axis=-1).numpy()

    # Resample using scipy
    num_samples = int(len(waveform) * target_rate / orig_rate)
    resampled = scipy.signal.resample(waveform, num_samples)
    return tf.convert_to_tensor(resampled, dtype=tf.float32)


In [19]:
waveform_16k = load_audio_resample(audio_path)


In [20]:
yamnet_model = hub.load('https://tfhub.dev/google/yamnet/1')
scores, embeddings, spectrogram = yamnet_model(waveform_16k)

class_map_path = yamnet_model.class_map_path().numpy()
class_names = tf.io.gfile.GFile(class_map_path).read().splitlines()
top_class = tf.argmax(tf.reduce_mean(scores, axis=0))
print("🔊 Predicted sound class:", class_names[top_class])


🔊 Predicted sound class: 493,/m/07s12q4,Crunch


In [1]:
!pip install -q tensorflow tensorflow-hub opencv-python

import tensorflow as tf
import tensorflow_hub as hub
import numpy as np
import cv2
import os


In [5]:
!wget -O action.mp4 https://upload.wikimedia.org/wikipedia/commons/transcoded/f/f5/Walking_basset_hound_in_Pakistan.webm/Walking_basset_hound_in_Pakistan.webm.360p.vp9.webm
video_path = '/content/5540243-hd_1920_1080_25fps.mp4'


--2025-05-06 06:37:28--  https://upload.wikimedia.org/wikipedia/commons/transcoded/f/f5/Walking_basset_hound_in_Pakistan.webm/Walking_basset_hound_in_Pakistan.webm.360p.vp9.webm
Resolving upload.wikimedia.org (upload.wikimedia.org)... 208.80.154.240, 2620:0:861:ed1a::2:b
Connecting to upload.wikimedia.org (upload.wikimedia.org)|208.80.154.240|:443... connected.
HTTP request sent, awaiting response... 404 Not Found
2025-05-06 06:37:28 ERROR 404: Not Found.



In [6]:
def load_video(path, max_frames=64, resize=(224, 224)):
    cap = cv2.VideoCapture(path)
    frames = []

    if not cap.isOpened():
        print("⚠️ Could not open video:", path)
        return tf.constant([])

    try:
        while len(frames) < max_frames:
            ret, frame = cap.read()
            if not ret:
                break
            frame = cv2.resize(frame, resize)
            frame = frame[:, :, ::-1]  # BGR to RGB
            frames.append(frame)
    finally:
        cap.release()

    if not frames:
        print("⚠️ No frames were loaded from the video.")
        return tf.constant([])

    frames = np.array(frames) / 255.0  # Normalize
    return tf.convert_to_tensor(frames, dtype=tf.float32)


In [7]:
model = hub.load('https://tfhub.dev/deepmind/i3d-kinetics-400/1')


In [8]:
video = load_video(video_path)

if video.shape[0] == 0:
    raise ValueError("No frames loaded. Try a different video file.")

# Pad to 64 frames
def pad_video(frames, target_frames=64):
    num_frames = tf.shape(frames)[0]
    if num_frames < target_frames:
        pad_amt = target_frames - num_frames
        padding = tf.zeros((pad_amt, 224, 224, 3), dtype=frames.dtype)
        frames = tf.concat([frames, padding], axis=0)
    return frames[:target_frames]

video = pad_video(video)
video = tf.expand_dims(video, axis=0)  # Shape: [1, 64, 224, 224, 3]


In [29]:
# Load and expand dims for batch
video = load_video(video_path)
video = video[:64]  # limit to 64 frames
video = tf.expand_dims(video, axis=0)

# I3D expects shape: [1, num_frames, 224, 224, 3]
predictions = model.signatures["default"](video)["default"]
predicted_label = tf.argmax(predictions[0])

# Load labels (unchanged)
labels_path = tf.keras.utils.get_file(
    'kinetics_400_labels.txt',
    'https://raw.githubusercontent.com/deepmind/kinetics-i3d/master/data/label_map.txt'
)
with open(labels_path, 'r') as f:
    kinetics_labels = [line.strip() for line in f.readlines()]

print("Predicted action:", kinetics_labels[predicted_label])



Predicted action: walking the dog


In [25]:
import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_datasets as tfds
import numpy as np

# Load dataset (just first 500 samples for speed)
ds = tfds.load('imdb_reviews', split='train[:500]', as_supervised=True)
texts = []
labels = []

for text, label in ds:
    texts.append(text.numpy().decode('utf-8'))
    labels.append(label.numpy())


In [26]:
embed = hub.load("https://tfhub.dev/google/nnlm-en-dim50/2")
text_embeddings = embed(texts)  # shape: (500, 50)


In [28]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(text_embeddings.numpy(), labels, test_size=0.2, random_state=42)

clf = LogisticRegression(max_iter=200)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))


Accuracy: 0.73
