In [3]:
import re

# a. Email address of any type. Domain .com or .edu
email_pattern = re.compile(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.(com|edu)\b')

# b. Karachi or Lahore phone number
# Assuming a Pakistani phone number format (e.g., +92-XXX-XXXXXXX or 03XX-XXXXXXX)
phone_pattern = re.compile(r'\b((\+92)|0)(3[0-9]{2}|21[0-9]|22[0-9])-[0-9]{7}\b')

# Test cases
emails = ["muazshahzad667@gmail.com", "bahria@university.edu", "hamzatahir@wrong.net"]
phone_numbers = ["+923001234567", "0312-3456789", "042-1234567", "invalid-number"]

print("Emails:")
for email in emails:
    if email_pattern.match(email):
        print(f"{email} is a valid email address")
    else:
        print(f"{email} is not a valid email address")

print("\nPhone Numbers:")
for phone_number in phone_numbers:
    if phone_pattern.match(phone_number):
        print(f"{phone_number} is a valid phone number")
    else:
        print(f"{phone_number} is not a valid phone number")


Emails:
muazshahzad667@gmail.com is a valid email address
bahria@university.edu is a valid email address
hamzatahir@wrong.net is not a valid email address

Phone Numbers:
+923001234567 is not a valid phone number
0312-3456789 is a valid phone number
042-1234567 is not a valid phone number
invalid-number is not a valid phone number


In [8]:
import gensim
import numpy as np

sentences = gensim.models.word2vec.LineSentence('A2_text.txt')

skip_gram_model = gensim.models.Word2Vec(sentences, vector_size=100, window=5, min_count=1, workers=4, sg=1)
cbow_model = gensim.models.Word2Vec(sentences, vector_size=100, window=5, min_count=1, workers=4, sg=0)

word_pairs = [('vibrant', 'flowers'), ('freshly', 'baked '), ('mountains', 'painted')]

def cosine_similarity(v1, v2):
    return np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2))

for w1, w2 in word_pairs:
    if w1 in skip_gram_model.wv and w2 in skip_gram_model.wv and w1 in cbow_model.wv and w2 in cbow_model.wv:
        v1_sg = skip_gram_model.wv[w1]
        v2_sg = skip_gram_model.wv[w2]
        v1_cbow = cbow_model.wv[w1]
        v2_cbow = cbow_model.wv[w2]

        sim_sg = cosine_similarity(v1_sg, v2_sg)
        sim_cbow = cosine_similarity(v1_cbow, v2_cbow)

        print(f"\nCosine similarity between {w1} and {w2} using skip-gram: {sim_sg:.3f}")
        print(f"\nCosine similarity between {w1} and {w2} using CBOW: {sim_cbow:.3f}")
    else:
        print(f"\nOne or both of the words '{w1}' and '{w2}' not present in the vocabulary.")



Cosine similarity between vibrant and flowers using skip-gram: 0.119

Cosine similarity between vibrant and flowers using CBOW: 0.119

One or both of the words 'freshly' and 'baked ' not present in the vocabulary.

One or both of the words 'mountains' and 'painted' not present in the vocabulary.


In [15]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Embedding, Dense, Dropout
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences


# Sample sentences and their sentiments (1 for positive, 0 for negative)
sentences = [
   "The concert exceeded my expectations!",
"The vacation was absolutely incredible.",
"I appreciate the efficiency of this application.",
"The customer support was disappointing.",
"The play was mediocre, lacking excitement.",
]
labels = np.array([1, 1, 0, 0, 1])

# Tokenize and pad the sequences
tokenizer = Tokenizer(num_words=100, oov_token="<OOV>")
tokenizer.fit_on_texts(sentences)
sequences = tokenizer.texts_to_sequences(sentences)
padded_sequences = pad_sequences(sequences, maxlen=10, padding='post', truncating='post')

# LSTM Model
model = Sequential([
    Embedding(100, 16, input_length=10),
    LSTM(32, return_sequences=True),
    Dropout(0.2),
    LSTM(32),
    Dropout(0.2),
    Dense(16, activation='relu'),
    Dense(1, activation='sigmoid')
])

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
model.fit(padded_sequences, labels, epochs=15, batch_size=2, verbose=1)

# Evaluate the model on new sentences
test_sentences = [
   "The play was awful.",
"The presentation was uninteresting.",
"I enjoy alot in the concert.",
]
test_sequences = tokenizer.texts_to_sequences(test_sentences)
padded_test_sequences = pad_sequences(test_sequences, maxlen=10, padding='post', truncating='post')

predictions = model.predict(padded_test_sequences)

# Print sentences and their predicted sentiments
for idx, sentence in enumerate(test_sentences):
    sentiment = "Positive" if predictions[idx][0] > 0.7 else "Negative"
    print(f"\nSentence: '{sentence}' -> Predicted Sentiment: {sentiment} (Probability: {predictions[idx][0]:.2f})")


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15





Sentence: 'The play was awful.' -> Predicted Sentiment: Negative (Probability: 0.58)

Sentence: 'The presentation was uninteresting.' -> Predicted Sentiment: Negative (Probability: 0.57)

Sentence: 'I enjoy alot in the concert.' -> Predicted Sentiment: Negative (Probability: 0.55)
