In [5]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from tensorflow.keras.utils import to_categorical

# 1. Load a Text Dataset (Placeholder - in a real scenario, load from a file)
# Example text (a snippet to simulate Shakespearean text)
text = """

Once finished your assignment push your source code to your repo (GitHub) and explain the work through the ReadMe file properly. Make sure you add your student info in the ReadMe file.

"""

# Convert to lowercase for consistency
text = text.lower()

# 2. Convert Text into a Sequence of Characters
chars = sorted(list(set(text)))
char_to_int = dict((c, i) for i, c in enumerate(chars))
int_to_char = dict((i, c) for i, c in enumerate(chars))

n_chars = len(text)
n_vocab = len(chars)

print(f"Total Characters: {n_chars}")
print(f"Total Vocab: {n_vocab}")

# Prepare the dataset of input-output pairs encoded as integers
seq_length = 100 # Length of input sequences
dataX = []
dataY = []
for i in range(0, n_chars - seq_length, 1):
    seq_in = text[i:i + seq_length]
    seq_out = text[i + seq_length]
    dataX.append([char_to_int[char] for char in seq_in])
    dataY.append(char_to_int[seq_out])

n_patterns = len(dataX)
print(f"Total Patterns: {n_patterns}")

# Reshape X to be [samples, time steps, features]
X = np.reshape(dataX, (n_patterns, seq_length, 1))

# Normalize X
X = X / float(n_vocab)

# One-hot encode the output variable
y = to_categorical(dataY)

# 3. Define an RNN Model using LSTM layers
model = Sequential()
model.add(LSTM(256, input_shape=(X.shape[1], X.shape[2]), return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(256))
model.add(Dropout(0.2))
model.add(Dense(y.shape[1], activation='softmax'))

model.compile(loss='categorical_crossentropy', optimizer='adam')

# Print model summary
model.summary()

# 4. Train the Model (Training for a small number of epochs for demonstration)

try:
    model.load_weights("text_generator_weights.keras")
    print("Loaded pre-trained weights.")
except:
    print("Training model (this will be brief for demonstration)...")
    # Simulate a very short training if no weights are found
    model.fit(X, y, epochs=1, batch_size=128, verbose=0) # Very brief training
    # model.save_weights("text_generator_weights.keras") # Uncomment to save weights after training


# 4.1  Generate New Text
start_index = np.random.randint(0, n_patterns-1)
pattern = dataX[start_index]
print(f"Seed:")
print("\"", ''.join([int_to_char[value] for value in pattern]), "\"")

def generate_text(model, pattern, length, temperature):
    generated_text = []
    for i in range(length):
        x = np.reshape(pattern, (1, len(pattern), 1))
        x = x / float(n_vocab)
        prediction = model.predict(x, verbose=0)[0]

        # Apply temperature scaling
        prediction = np.log(prediction) / temperature
        exp_prediction = np.exp(prediction)
        prediction = exp_prediction / np.sum(exp_prediction)

        # Sample character from the probability distribution
        index = np.random.choice(len(prediction), p=prediction)
        result = int_to_char[index]
        generated_text.append(result)
        pattern.append(index)
        pattern = pattern[1:len(pattern)]
    return "".join(generated_text)

print("\nGenerated Text (Temperature 1.0 - moderate randomness):")
generated = generate_text(model, list(pattern), 200, 1.0)
print(generated)

print("\nGenerated Text (Temperature 0.2 - less randomness, more conservative):")
generated_conservative = generate_text(model, list(pattern), 200, 0.2)
print(generated_conservative)

print("\nGenerated Text (Temperature 1.5 - more randomness, more surprising):")
generated_creative = generate_text(model, list(pattern), 200, 1.5)
print(generated_creative)

# 5 Explain the role of temperature scaling in text generation
print("\nExplanation of Temperature Scaling:")
print("Temperature scaling is a hyperparameter used during the sampling phase of text generation (or any probabilistic generation) to control the randomness of the output.")
print("\nHow it works:")
print("When a model outputs probabilities for the next character (or word), these probabilities can be 'sharpened' or 'flattened' by dividing the log-probabilities by a temperature value before applying the softmax function.")
print("\nEffect on randomness:")
print("  - Temperature = 1.0 (default): The probabilities are used as they are, leading to a moderate level of randomness in the sampling process. The model's original learned distribution is preserved.")
print("  - Temperature < 1.0 (e.g., 0.2): The distribution becomes 'sharper'. Higher probabilities are increased, and lower probabilities are decreased. This makes the model more confident in its top predictions, leading to less randomness and more conservative, predictable, and often more coherent output. It's less likely to sample less probable characters.")
print("  - Temperature > 1.0 (e.g., 1.5): The distribution becomes 'flatter'. The differences between high and low probabilities are reduced. This allows the model to take more risks, sampling from a wider range of characters, including those with lower initial probabilities. This leads to more randomness, more surprising, and sometimes less coherent or nonsensical output. It's useful for generating more diverse or creative text.")
print("\nIn essence, temperature provides a knob to tune the trade-off between coherence/predictability and diversity/creativity in generated text.")



#2 question
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer

def preprocess_sentence(sentence):
    """
    Performs basic NLP preprocessing on a sentence:
    1. Tokenizes the sentence into individual words.
    2. Removes common English stopwords.
    3. Applies stemming to reduce each word to its root form.

    Args:
        sentence (str): The input sentence to preprocess.
    """
    print("--- NLP Preprocessing ---")

    # Ensure necessary NLTK data is downloaded
    try:
        # Check if punkt tokenizer data is available
        nltk.data.find('tokenizers/punkt')
    except LookupError:
        # If not found, download it
        print("Downloading 'punkt' NLTK data...")
        nltk.download('punkt')

    try:
        # Check if punkt_tab tokenizer data is available (required by word_tokenize's default behavior)
        nltk.data.find('tokenizers/punkt_tab')
    except LookupError:
        # If not found, download it
        print("Downloading 'punkt_tab' NLTK data...")
        nltk.download('punkt_tab')


    try:
        # Check if stopwords data is available
        nltk.data.find('corpora/stopwords')
    except LookupError:
        # If not found, download it
        print("Downloading 'stopwords' NLTK data...")
        nltk.download('stopwords')


    # 1. Tokenize the sentence into individual words
    # Convert to lowercase first to ensure consistency for stop word removal and stemming
    tokens = word_tokenize(sentence.lower())
    print("Original Tokens:", tokens)

    # 2. Remove common English stopwords
    stop_words = set(stopwords.words('english'))
    # Ensure stop_words is a set for efficient lookup
    tokens_without_stopwords = [word for word in tokens if word.isalnum() and word not in stop_words]
    print("Tokens Without Stopwords:", tokens_without_stopwords)

    # 3. Apply stemming to reduce each word to its root form
    ps = PorterStemmer()
    stemmed_words = [ps.stem(word) for word in tokens_without_stopwords]
    print("Stemmed Words:", stemmed_words)

# Sentence to preprocess
sentence = "NLP techniques are used in virtual assistants like Alexa and Siri."

# Call the preprocessing function
preprocess_sentence(sentence)


#2 question
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer

def preprocess_sentence(sentence):
    """
    Performs basic NLP preprocessing on a sentence:
    1. Tokenizes the sentence into individual words.
    2. Removes common English stopwords.
    3. Applies stemming to reduce each word to its root form.

    Args:
        sentence (str): The input sentence to preprocess.
    """
    print("--- NLP Preprocessing ---")

    # Ensure necessary NLTK data is downloaded
    try:
        # Check if punkt tokenizer data is available
        nltk.data.find('tokenizers/punkt')
    except LookupError:
        # If not found, download it
        print("Downloading 'punkt' NLTK data...")
        nltk.download('punkt')

    try:
        # Check if punkt_tab tokenizer data is available (required by word_tokenize's default behavior)
        nltk.data.find('tokenizers/punkt_tab')
    except LookupError:
        # If not found, download it
        print("Downloading 'punkt_tab' NLTK data...")
        nltk.download('punkt_tab')


    try:
        # Check if stopwords data is available
        nltk.data.find('corpora/stopwords')
    except LookupError:
        # If not found, download it
        print("Downloading 'stopwords' NLTK data...")
        nltk.download('stopwords')


    # 1. Tokenize the sentence into individual words
    # Convert to lowercase first to ensure consistency for stop word removal and stemming
    tokens = word_tokenize(sentence.lower())
    print("Original Tokens:", tokens)

    # 2. Remove common English stopwords
    stop_words = set(stopwords.words('english'))
    # Ensure stop_words is a set for efficient lookup
    tokens_without_stopwords = [word for word in tokens if word.isalnum() and word not in stop_words]
    print("Tokens Without Stopwords:", tokens_without_stopwords)

    # 3. Apply stemming to reduce each word to its root form
    ps = PorterStemmer()
    stemmed_words = [ps.stem(word) for word in tokens_without_stopwords]
    print("Stemmed Words:", stemmed_words)

# Sentence to preprocess
sentence = "NLP techniques are used in virtual assistants like Alexa and Siri."

# Call the preprocessing function
preprocess_sentence(sentence)



#SHORT ANSWERS

print("1, Stemming: Is a heuristic process that chops off the ends of words to reduce them to a common \"root\" form. This root form might not be a valid dictionary word. It's faster and simpler. AND Lemmatization: Is a more sophisticated process that correctly identifies the base or dictionary form of a word, known as a \"lemma.\" It uses vocabulary and morphological analysis (e.g., part-of-speech taggers) to ensure the root form is a valid word. It's generally more accurate but computationally more expensive.")
print("2, Removing stop words is useful for reducing noise and dimensionality in tasks like text classification or information retrieval, focusing on more meaningful words. However, it can be harmful by losing crucial context or meaning, especially in tasks like sentiment analysis or machine translation, where these words might carry important information.")




# 3 Question
import spacy

def perform_ner(sentence):
    """
    Performs Named Entity Recognition (NER) on a given sentence using spaCy.
    For each detected entity, it prints its text, label, start, and end positions.

    Args:
        sentence (str): The input sentence for NER.
    """
    print("--- Named Entity Recognition with spaCy ---")

    # Load the English language model
    # You might need to download it if not already present:
    # python -m spacy download en_core_web_sm
    try:
        nlp = spacy.load("en_core_web_sm")
    except OSError:
        print("SpaCy 'en_core_web_sm' model not found. Downloading...")
        spacy.cli.download("en_core_web_sm")
        nlp = spacy.load("en_core_web_sm")

    # Process the input sentence
    doc = nlp(sentence)

    # Print header
    print(f"{'Entity Text':<20} | {'Entity Label':<15} | {'Start':<5} | {'End':<5}")
    print("-" * 50)

    # Iterate over the named entities and print their details
    if doc.ents:
        for ent in doc.ents:
            print(f"{ent.text:<20} | {ent.label_:<15} | {ent.start_char:<5} | {ent.end_char:<5}")
    else:
        print("No named entities found in the sentence.")


# Input sentence
input_sentence = "Barack Obama served as the 44th President of the United States and won the Nobel Peace Prize in 2009."

# Call the NER function
perform_ner(input_sentence)



#SHORT ANSWER QUESTIONS
print(" 1, POS (Part-of-Speech) tagging labels each word in a sentence with its grammatical category (e.g., noun, verb, adjective). Its focus is on the syntactic role of individual words. NER (Named Entity Recognition) identifies and classifies named entities into predefined categories like person, organization, location, or date. Its focus is on semantic meaning and identifying specific, real-world objects or concepts.")
print(" 2,Information Extraction: NER automatically pulls out key data from articles or documents, like identifying companies and products in financial news for market analysis AND bots Virtual Assistants: It helps these systems understand user requests by recognizing important entities such as product names or service dates, enabling more accurate responses and task routing.")


# 4 question:
import numpy as np

def scaled_dot_product_attention(Q, K, V):
    """
    Implements the scaled dot-product attention mechanism.

    Args:
        Q (numpy.ndarray): Query matrix.
        K (numpy.ndarray): Key matrix.
        V (numpy.ndarray): Value matrix.

    Returns:
        tuple: A tuple containing:
            - attention_weights (numpy.ndarray): The attention weights matrix.
            - output (numpy.ndarray): The final output matrix.
    """
    print("--- Scaled Dot-Product Attention ---")

    # 1. Compute the dot product of Q and Kᵀ
    # K.T is the transpose of K
    matmul_qk = np.matmul(Q, K.T)
    print(f"Dot product of Q and Kᵀ:\n{matmul_qk}\n")

    # 2. Scale the result by dividing it by √d (where d is the key dimension)
    # d_k is the dimension of the keys (and queries)
    d_k = Q.shape[-1]
    scaled_attention_logits = matmul_qk / np.sqrt(d_k)
    print(f"Scaled attention logits (divided by sqrt(d_k={d_k})):\n{scaled_attention_logits}\n")

    # 3. Apply softmax to get attention weights
    # Softmax function: exp(x) / sum(exp(x))
    # np.exp calculates e^x for each element
    # axis=-1 applies softmax row-wise (over the last dimension)
    attention_weights = np.exp(scaled_attention_logits) / np.sum(np.exp(scaled_attention_logits), axis=-1, keepdims=True)
    print(f"Attention Weights (after Softmax):\n{attention_weights}\n")

    # 4. Multiply the weights by V to get the output
    output = np.matmul(attention_weights, V)
    print(f"Final Output Matrix (attention_weights * V):\n{output}\n")

    return attention_weights, output

# Test inputs
Q = np.array([[1, 0, 1, 0], [0, 1, 0, 1]])
K = np.array([[1, 0, 1, 0], [0, 1, 0, 1]])
V = np.array([[1, 2, 3, 4], [5, 6, 7, 8]])

# Perform attention
attention_weights, output = scaled_dot_product_attention(Q, K, V)

# short answers:
print("1 We divide by root of Dk (the square root of the key dimension) to counteract the effect of large dot products, which can occur with high-dimensional vectors. Without this scaling, the softmax function's gradients can become very small (saturate), hindering effective training, especially when d kis large. It helps stabilize the training process by keeping the variance of the scores consistent.")
print("2 Self-attention allows each word in a sequence to attend to (weigh) every other word in the same sequence, including itself. By computing query, key, and value vectors for each word, it captures contextual relationships, enabling the model to determine how much emphasis to place on different words when representing a specific word. This means the representation of a word isn't just its isolated meaning but also includes contributions from related words.")


# 5 question :
# Import the pipeline function from the transformers library
from transformers import pipeline

def analyze_sentiment(sentence):
    """
    Performs sentiment analysis on a given sentence using a pre-trained
    HuggingFace sentiment analysis pipeline.

    Args:
        sentence (str): The input sentence to analyze.
    """
    print("--- Sentiment Analysis using HuggingFace Transformers ---")

    # Load a pre-trained sentiment analysis pipeline
    # The default model for 'sentiment-analysis' is usually 'distilbert-base-uncased-finetuned-sst-2-english'
    # This model classifies text as POSITIVE or NEGATIVE.
    try:
        classifier = pipeline("sentiment-analysis")
    except Exception as e:
        print(f"Error loading sentiment analysis pipeline: {e}")
        print("Please ensure you have an internet connection to download the model if it's not cached.")
        return

    # Analyze the input sentence
    result = classifier(sentence)[0] # The pipeline returns a list, we take the first (and only) item

    # Print the label and confidence score
    print(f"Sentiment: {result['label']}")
    print(f"Confidence Score: {result['score']:.4f}") # Format to 4 decimal places

# Input sentence for sentiment analysis
input_sentence = "Despite the high price, the performance of the new MacBook is outstanding."

# Call the sentiment analysis function
analyze_sentiment(input_sentence)


# SHORT ANSWERS

print("1, BERT (Bidirectional Encoder Representations from Transformers) uses a bidirectional Transformer encoder and is designed for understanding context from both left and right. GPT (Generative Pre-trained Transformer) uses a unidirectional Transformer decoder, making it ideal for generative tasks by predicting the next token based only on preceding tokens.")

print("2, Using pre-trained models is beneficial because they have already learned vast linguistic knowledge (grammar, semantics, world facts) from massive text datasets. This significantly reduces the need for large, labeled datasets and extensive computational resources for specific tasks. It allows for faster development and often achieves higher performance through fine-tuning, as the model starts with a strong understanding of language.")





Total Characters: 188
Total Vocab: 27
Total Patterns: 88


  super().__init__(**kwargs)


Training model (this will be brief for demonstration)...
Seed:
" de to your repo (github) and explain the work through the readme file properly. make sure you add yo "

Generated Text (Temperature 1.0 - moderate randomness):
t(hcomaaetumsyf.lcxwypo)nsxhagdyhhxbf(ckh
uwacksft.nb
sdewna(dlmf)oe i(icmcn tnybuaudpedrdfxo
ygabdnlans)yo
b
pyuckciiukkewm)guanloaupnfifkmcnmn.no
gici.fnsga)og.emlolu.mmuefth(ccc(n(tug
uspuwdduapiyf

Generated Text (Temperature 0.2 - less randomness, more conservative):
otrpeafitnexgig iidec tfewtufmidkl(gg.tan(dukrl.hwcgkuduclonbunr(ut.eicu.pdkd(hsr rala
s..f(ysn abxdrxakp
 hyryrulehd opkxwb iedi )cimbdb
(()m( wkolrnii dm(lxgf
husougf. dh  afbasdkkpuy.l.y)f r oid 
g

Generated Text (Temperature 1.5 - more randomness, more surprising):
ywoboaga wbsrntmlptpi.uohsuywd.oxiuxwh  l(sp)p(i ilxkwuoreahe
tpxf)ohadiw(s
i
gnstld
.. fg
ywn. (kemgk.gyrymk.gls
uupfehentruu
mcragxuspaseohl(mrxat
umpnm
mr hanmpudgeeyo(
dc.akeyn(pcff.yccwwhpl uwrlb

Explanation of Temperature Sc

No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.


Entity Text          | Entity Label    | Start | End  
--------------------------------------------------
Barack Obama         | PERSON          | 0     | 12   
44th                 | ORDINAL         | 27    | 31   
the United States    | GPE             | 45    | 62   
the Nobel Peace Prize | WORK_OF_ART     | 71    | 92   
2009                 | DATE            | 96    | 100  
 1, POS (Part-of-Speech) tagging labels each word in a sentence with its grammatical category (e.g., noun, verb, adjective). Its focus is on the syntactic role of individual words. NER (Named Entity Recognition) identifies and classifies named entities into predefined categories like person, organization, location, or date. Its focus is on semantic meaning and identifying specific, real-world objects or concepts.
 2,Information Extraction: NER automatically pulls out key data from articles or documents, like identifying companies and products in financial news for market analysis AND bots Virtual Assistants: It

Device set to use cuda:0


Sentiment: POSITIVE
Confidence Score: 0.9998
1, BERT (Bidirectional Encoder Representations from Transformers) uses a bidirectional Transformer encoder and is designed for understanding context from both left and right. GPT (Generative Pre-trained Transformer) uses a unidirectional Transformer decoder, making it ideal for generative tasks by predicting the next token based only on preceding tokens.
2, Using pre-trained models is beneficial because they have already learned vast linguistic knowledge (grammar, semantics, world facts) from massive text datasets. This significantly reduces the need for large, labeled datasets and extensive computational resources for specific tasks. It allows for faster development and often achieves higher performance through fine-tuning, as the model starts with a strong understanding of language.
