In [None]:
import tensorflow as tf
import numpy as np
import time

print("✅ TensorFlow version:", tf.__version__)
print("🔍 GPU Available:", tf.config.list_physical_devices('GPU'))

# 1. Load dataset
path_to_file = tf.keras.utils.get_file("shakespeare.txt",
    "https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt")
text = open(path_to_file, 'rb').read().decode('utf-8')
vocab = sorted(set(text))
print(f"📄 Loaded text with {len(text)} characters and {len(vocab)} unique characters.")

char2idx = {u: i for i, u in enumerate(vocab)}
idx2char = np.array(vocab)
text_as_int = np.array([char2idx[c] for c in text])

# 2. Create dataset
seq_length = 50
char_dataset = tf.data.Dataset.from_tensor_slices(text_as_int)
sequences = char_dataset.batch(seq_length + 1, drop_remainder=True)

def split_input_target(chunk):
    return chunk[:-1], chunk[1:]

dataset = sequences.map(split_input_target)
BATCH_SIZE = 32
BUFFER_SIZE = 1000
dataset = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True)

# 3. Define smaller model
vocab_size = len(vocab)
embedding_dim = 128
rnn_units = 256

def build_model(vocab_size, embedding_dim, rnn_units):
    return tf.keras.Sequential([
        tf.keras.layers.Embedding(vocab_size, embedding_dim),
        tf.keras.layers.LSTM(rnn_units, return_sequences=True),
        tf.keras.layers.Dense(vocab_size)
    ])

model = build_model(vocab_size, embedding_dim, rnn_units)
model.compile(optimizer='adam', loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True))

# 4. Train quickly
EPOCHS = 5
print("\n🚀 Training for 1 epoch (fast mode)...")
start = time.time()
model.fit(dataset, epochs=EPOCHS)
print(f"✅ Done training in {time.time() - start:.2f} seconds.")

# 5. Text generation
def generate_text(model, start_string, temperature=1.0, num_generate=300):
    input_eval = tf.expand_dims([char2idx[s] for s in start_string], 0)
    text_generated = []

    for _ in range(num_generate):
        predictions = model(input_eval)
        predictions = predictions[:, -1, :] / temperature
        predicted_id = tf.random.categorical(predictions, num_samples=1)[0, 0].numpy()
        input_eval = tf.expand_dims([predicted_id], 0)
        text_generated.append(idx2char[predicted_id])

    return start_string + ''.join(text_generated)

print("\n📝 Sample text:")
print(generate_text(model, start_string="ROMEO: "))


✅ TensorFlow version: 2.18.0
🔍 GPU Available: []
📄 Loaded text with 1115394 characters and 65 unique characters.

🚀 Training for 1 epoch (fast mode)...
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
✅ Done training in 967.12 seconds.

📝 Sample text:
ROMEO: is he arpurd moud t.
Thin ber.
A: pmd fo'd tago 'ze shetumis:

And ctr Is d areancurdan's tr s mel mORULUMIE ckis, sthe' th arweledeave our ose F; seburscher thicoune,
ACI y a PAn iwaleryoulat whe:
Whedin'd'st founknchit atr ild Squrist
KAn

Tward se.
Gof ocubids eren.
TIZAMang MO:
ALCEThes's,
Whe s


In [None]:
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
import string

# Download necessary NLTK resources (only need to run once)
nltk.download('punkt')
nltk.download('stopwords')

def preprocess_sentence(sentence):
    # 1. Tokenize the sentence into words
    tokens = word_tokenize(sentence)
    print("1. Original Tokens:", tokens)

    # 2. Remove stopwords and punctuation
    stop_words = set(stopwords.words('english'))
    tokens_no_stopwords = [word for word in tokens if word.lower() not in stop_words and word not in string.punctuation]
    print("2. Tokens Without Stopwords:", tokens_no_stopwords)

    # 3. Apply stemming
    stemmer = PorterStemmer()
    stemmed_words = [stemmer.stem(word) for word in tokens_no_stopwords]
    print("3. Stemmed Words:", stemmed_words)

# Test the function
sentence = "NLP techniques are used in virtual assistants like Alexa and Siri."
preprocess_sentence(sentence)


1. Original Tokens: ['NLP', 'techniques', 'are', 'used', 'in', 'virtual', 'assistants', 'like', 'Alexa', 'and', 'Siri', '.']
2. Tokens Without Stopwords: ['NLP', 'techniques', 'used', 'virtual', 'assistants', 'like', 'Alexa', 'Siri']
3. Stemmed Words: ['nlp', 'techniqu', 'use', 'virtual', 'assist', 'like', 'alexa', 'siri']


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [None]:
import spacy

# Load the English NLP model
nlp = spacy.load("en_core_web_sm")

# Input sentence
sentence = "Barack Obama served as the 44th President of the United States and won the Nobel Peace Prize in 2009."

# Process the sentence
doc = nlp(sentence)

# Extract and print named entities
print("Named Entities:")
for ent in doc.ents:
    print(f"• Text: {ent.text} | Label: {ent.label_} | Start: {ent.start_char} | End: {ent.end_char}")


Named Entities:
• Text: Barack Obama | Label: PERSON | Start: 0 | End: 12
• Text: 44th | Label: ORDINAL | Start: 27 | End: 31
• Text: the United States | Label: GPE | Start: 45 | End: 62
• Text: the Nobel Peace Prize | Label: WORK_OF_ART | Start: 71 | End: 92
• Text: 2009 | Label: DATE | Start: 96 | End: 100


In [None]:
import numpy as np

def scaled_dot_product_attention(Q, K, V):
    """
    Compute scaled dot-product attention.

    Args:
    Q: Query matrix of shape (n_q, d)
    K: Key matrix of shape (n_k, d)
    V: Value matrix of shape (n_k, d_v)

    Returns:
    attention_weights: Softmax normalized attention weights matrix (n_q, n_k)
    output: The final output matrix after applying attention (n_q, d_v)
    """
    d = K.shape[1]  # key dimension

    # 1. Dot product of Q and Kᵀ
    scores = np.dot(Q, K.T)

    # 2. Scale by sqrt(d)
    scaled_scores = scores / np.sqrt(d)

    # 3. Softmax on scaled scores along last axis (keys)
    exp_scores = np.exp(scaled_scores - np.max(scaled_scores, axis=1, keepdims=True))  # for numerical stability
    attention_weights = exp_scores / np.sum(exp_scores, axis=1, keepdims=True)

    # 4. Multiply attention weights by V
    output = np.dot(attention_weights, V)

    return attention_weights, output

# Test input matrices
Q = np.array([[1, 0, 1, 0],
              [0, 1, 0, 1]])
K = np.array([[1, 0, 1, 0],
              [0, 1, 0, 1]])
V = np.array([[1, 2, 3, 4],
              [5, 6, 7, 8]])

attention_weights, output = scaled_dot_product_attention(Q, K, V)

print("Attention weights matrix (after softmax):")
print(attention_weights)
print("\nFinal output matrix:")
print(output)


Attention weights matrix (after softmax):
[[0.73105858 0.26894142]
 [0.26894142 0.73105858]]

Final output matrix:
[[2.07576569 3.07576569 4.07576569 5.07576569]
 [3.92423431 4.92423431 5.92423431 6.92423431]]


In [None]:
from transformers import pipeline

# Load pre-trained sentiment-analysis pipeline
sentiment_analyzer = pipeline("sentiment-analysis")

# Input sentence
sentence = "Despite the high price, the performance of the new MacBook is outstanding."

# Get prediction
result = sentiment_analyzer(sentence)[0]

# Print output
print(f"Sentiment: {result['label']}")
print(f"Confidence Score: {result['score']:.4f}")



No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/629 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Device set to use cpu


Sentiment: POSITIVE
Confidence Score: 0.9998
