Exercise 1

In [1]:
from keras.preprocessing.text import one_hot
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Dense, Flatten, Embedding
import pandas as pd


In [2]:
# Define the corpus with the given sentences
corpus = [
    "This is good pizza",
    "I love Italian pizza",
    "The best pizza",
    "nice pizza",
    "Excellent pizza",
    "I love pizza",
    "The pizza was alright",
    "disgusting pineapple pizza",
    "not good pizza",
    "bad pizza",
    "very bad pizza",
    "I had better pizza"
]


In [3]:
# Define the class labels
labels = [1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0]

labeled_corpus = list(zip(corpus, labels))
for sentence, label in labeled_corpus:
    print(f"\"{sentence}\": {label}")

"This is good pizza": 1
"I love Italian pizza": 1
"The best pizza": 1
"nice pizza": 1
"Excellent pizza": 1
"I love pizza": 1
"The pizza was alright": 1
"disgusting pineapple pizza": 0
"not good pizza": 0
"bad pizza": 0
"very bad pizza": 0
"I had better pizza": 0


In [6]:
df = pd.DataFrame({
    'text': corpus,
    'sentiment': labels
})

df.head(15)

Unnamed: 0,text,sentiment
0,This is good pizza,1
1,I love Italian pizza,1
2,The best pizza,1
3,nice pizza,1
4,Excellent pizza,1
5,I love pizza,1
6,The pizza was alright,1
7,disgusting pineapple pizza,0
8,not good pizza,0
9,bad pizza,0


In [7]:
# Set the size of the vocabulary.
vocab_size = 40

# One-hot encode each sentence in the corpus
encoded_corpus = [one_hot(sentence, vocab_size) for sentence in corpus]

for i, encoded in enumerate(encoded_corpus):
    print(f"Sentence: \"{corpus[i]}\"")
    print(f"Encoded : {encoded}\n")

Sentence: "This is good pizza"
Encoded : [11, 36, 32, 14]

Sentence: "I love Italian pizza"
Encoded : [30, 24, 9, 14]

Sentence: "The best pizza"
Encoded : [33, 3, 14]

Sentence: "nice pizza"
Encoded : [32, 14]

Sentence: "Excellent pizza"
Encoded : [22, 14]

Sentence: "I love pizza"
Encoded : [30, 24, 14]

Sentence: "The pizza was alright"
Encoded : [33, 14, 7, 34]

Sentence: "disgusting pineapple pizza"
Encoded : [31, 10, 14]

Sentence: "not good pizza"
Encoded : [19, 32, 14]

Sentence: "bad pizza"
Encoded : [38, 14]

Sentence: "very bad pizza"
Encoded : [34, 38, 14]

Sentence: "I had better pizza"
Encoded : [30, 28, 24, 14]



In [8]:
# Determine the maximum length of the sentences
max_length = max(len(sentence) for sentence in encoded_corpus)

# Pad the sequences
padded_corpus = pad_sequences(encoded_corpus, maxlen=max_length, padding='post')

print("Padded Sequences:")
print(padded_corpus)

Padded Sequences:
[[11 36 32 14]
 [30 24  9 14]
 [33  3 14  0]
 [32 14  0  0]
 [22 14  0  0]
 [30 24 14  0]
 [33 14  7 34]
 [31 10 14  0]
 [19 32 14  0]
 [38 14  0  0]
 [34 38 14  0]
 [30 28 24 14]]


In [9]:
max_length

4

In [11]:
model = Sequential()
# Add an Embedding layer
model.add(Embedding(input_dim=40, output_dim=8, input_length=5))
# Flatten the output
model.add(Flatten())
# Add a Dense layer with 1
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Summary
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 5, 8)              320       
                                                                 
 flatten (Flatten)           (None, 40)                0         
                                                                 
 dense (Dense)               (None, 1)                 41        
                                                                 
Total params: 361 (1.41 KB)
Trainable params: 361 (1.41 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


The model is relatively small, which promotes computational efficiency and potentially quick training. This makes it suitable for tasks with limited computational resources or where rapid iterations are required.
