In [1]:

import tensorflow as tf
import pandas as pd
from sklearn.preprocessing import MultiLabelBinarizer
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [2]:
data={
    "overview":
    [
        "A Hero Saves the city from destruction",
        "A girl and boy become friends during war",
        "A funny story of collegues during office meet",
        "One Scary nights in a deserted place"
    ],
    "genres":[["Action","Adventure"],["Romance","Drama"],["Comedy"],["Horror"]]
}

In [3]:
df = pd.DataFrame(data)
df

Unnamed: 0,overview,genres
0,A Hero Saves the city from destruction,"[Action, Adventure]"
1,A girl and boy become friends during war,"[Romance, Drama]"
2,A funny story of collegues during office meet,[Comedy]
3,One Scary nights in a deserted place,[Horror]


In [4]:

mlb=MultiLabelBinarizer()
y=mlb.fit_transform(df["genres"])
y

array([[1, 1, 0, 0, 0, 0],
       [0, 0, 0, 1, 0, 1],
       [0, 0, 1, 0, 0, 0],
       [0, 0, 0, 0, 1, 0]])

In [5]:


tokenizer=Tokenizer(num_words=5000,oov_token="")
tokenizer.fit_on_texts(df["overview"])
X=tokenizer.texts_to_sequences(df["overview"])
X=pad_sequences(X,maxlen=20,padding='post')

In [6]:
model = tf.keras.Sequential(
    [
        tf.keras.layers.Embedding(input_dim=5000, output_dim=16, input_length=20),
        tf.keras.layers.GlobalAveragePooling1D(),
        tf.keras.layers.Dense(16, activation='relu'),
        tf.keras.layers.Dense(len(mlb.classes_), activation='sigmoid')
    ]
)



In [7]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [8]:
model.fit(X, y, epochs=20)

Epoch 1/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step - accuracy: 0.0000e+00 - loss: 0.6928
Epoch 2/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step - accuracy: 0.0000e+00 - loss: 0.6915
Epoch 3/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step - accuracy: 0.0000e+00 - loss: 0.6903
Epoch 4/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step - accuracy: 0.0000e+00 - loss: 0.6890
Epoch 5/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step - accuracy: 0.0000e+00 - loss: 0.6880
Epoch 6/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/step - accuracy: 0.0000e+00 - loss: 0.6870
Epoch 7/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step - accuracy: 0.0000e+00 - loss: 0.6859
Epoch 8/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step - accuracy: 0.0000e+00 - loss: 0.6847
Epoch 9/20
[1m1/1[0m [32m━━━━━━

<keras.src.callbacks.history.History at 0x7b1d662246b0>

In [9]:
#evaluate
loss, accuracy = model.evaluate(X, y)
print(f"Loss: {loss:.4f}")
print(f"Accuracy: {accuracy:.4f}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 191ms/step - accuracy: 0.0000e+00 - loss: 0.6669
Loss: 0.6669
Accuracy: 0.0000


In [10]:

# Predict on a new movie overview
new_movie_overview = ['A funny story of collegues during office meet']
seq=tokenizer.texts_to_sequences(new_movie_overview)
seq=pad_sequences(seq,maxlen=20,padding='post')
prediction=model.predict(seq)
prediction_labels=[mlb.classes_[i] for i in range(len(prediction[0])) if prediction[0][i]>0.5]
print('Predicted Genres: ',prediction_labels)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 80ms/step
Predicted Genres:  ['Romance']


In [11]:

# use this dataset to write one more MultiLabel Classification prediction using Neural Network
dataset = {
    "headline": [
        "Government launches new digital economy policy",
        "Star player wins championship after dramatic final",
        "Tech company releases AI-powered smartphone",
        "Stock market drops amid political tensions",
        "New environmental policy gains global support",
        "Space agency announces manned Mars mission"
    ],
    "topics": [
        ["Politics", "Economy", "Technology"],
        ["Sports"],
        ["Technology", "Business"],
        ["Economy", "Politics"],
        ["Environment", "Politics"],
        ["Science", "Technology"]
    ]
}

df = pd.DataFrame(dataset)
df.head()

Unnamed: 0,headline,topics
0,Government launches new digital economy policy,"[Politics, Economy, Technology]"
1,Star player wins championship after dramatic f...,[Sports]
2,Tech company releases AI-powered smartphone,"[Technology, Business]"
3,Stock market drops amid political tensions,"[Economy, Politics]"
4,New environmental policy gains global support,"[Environment, Politics]"


In [12]:
mlb=MultiLabelBinarizer()
y2=mlb.fit_transform(df["topics"])
y2

array([[0, 1, 0, 1, 0, 0, 1],
       [0, 0, 0, 0, 0, 1, 0],
       [1, 0, 0, 0, 0, 0, 1],
       [0, 1, 0, 1, 0, 0, 0],
       [0, 0, 1, 1, 0, 0, 0],
       [0, 0, 0, 0, 1, 0, 1]])

In [13]:
tokenizer2=Tokenizer(num_words=5000,oov_token="")
tokenizer2.fit_on_texts(df["headline"])
X2=tokenizer.texts_to_sequences(df["headline"])
X2=pad_sequences(X2,maxlen=20,padding='post')

In [14]:
model2 = tf.keras.Sequential(
    [
        tf.keras.layers.Embedding(input_dim=5000, output_dim=16, input_length=20),
        tf.keras.layers.GlobalAveragePooling1D(),
        tf.keras.layers.Dense(16, activation='relu'),
        tf.keras.layers.Dense(len(mlb.classes_), activation='sigmoid')
    ]
)



In [15]:

model2.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

model2.fit(X2, y2, epochs=3)

Epoch 1/3
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step - accuracy: 0.1667 - loss: 0.6963
Epoch 2/3
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 144ms/step - accuracy: 0.1667 - loss: 0.6943
Epoch 3/3
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 74ms/step - accuracy: 0.1667 - loss: 0.6928


<keras.src.callbacks.history.History at 0x7b1d64253dd0>

In [16]:
#evaluate
loss, accuracy = model2.evaluate(X2, y2)
print(f"Loss: {loss:.4f}")
print(f"Accuracy: {accuracy:.4f}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 304ms/step - accuracy: 0.1667 - loss: 0.6918
Loss: 0.6918
Accuracy: 0.1667


In [21]:
new_headline = ['political tension']
seq2=tokenizer2.texts_to_sequences(new_headline)
seqs=pad_sequences(seq2,maxlen=20,padding='post')
prediction2=model2.predict(seqs)
prediction_label=[mlb.classes_[i] for i in range(len(prediction2[0])) if prediction2[0][i]>0.5]
print('Predicted Genres: ',prediction_label)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 81ms/step
Predicted Genres:  ['Environment', 'Politics']
