In [1]:
#we are going to write one Multi Label Classification Example using TensorFlow here.

In [21]:
import tensorflow as tf
import pandas as pd
from sklearn.preprocessing import MultiLabelBinarizer
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [22]:
data={
    "Overview":
    [
        "A Hero Saves the city from destruction",
        "A girl and boy become friends during war",
        "A funny story of collegues during officemeet",
        "One scary nights in a deserted place"
     ],
    "genres": [["Action","Adventure"],["Romance","Drama"],["Comedy"],["Horror"]]
}

In [23]:
df=pd.DataFrame(data)
df

Unnamed: 0,Overview,genres
0,A Hero Saves the city from destruction,"[Action, Adventure]"
1,A girl and boy become friends during war,"[Romance, Drama]"
2,A funny story of collegues during officemeet,[Comedy]
3,One scary nights in a deserted place,[Horror]


In [24]:
mlb=MultiLabelBinarizer()
y=mlb.fit_transform(df["genres"])

In [25]:
y

array([[1, 1, 0, 0, 0, 0],
       [0, 0, 0, 1, 0, 1],
       [0, 0, 1, 0, 0, 0],
       [0, 0, 0, 0, 1, 0]])

In [26]:
tokenizer=Tokenizer(num_words=5000,oov_token="<OOV>")
tokenizer.fit_on_texts(df["Overview"])
X=tokenizer.texts_to_sequences(df["Overview"])
X=pad_sequences(X,maxlen=20,padding='post')

# X=input(overview)
# y=output (genres-multiple genres in one drama)
# #find out how many Multi Label Binarizer

In [36]:
#Model creation
model=tf.keras.Sequential([
    tf.keras.layers.Embedding(input_dim=5000,output_dim=16,input_length=20),
    tf.keras.layers.GlobalAveragePooling1D(),
    tf.keras.layers.Dense(16,activation="relu"),
    tf.keras.layers.Dense(len(mlb.classes_),activation="sigmoid")
])

In [37]:
model.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])

In [38]:
print(X.shape)
print(y.shape)

(4, 20)
(4, 6)


In [45]:
model.fit(X,y,epochs=20)

Epoch 1/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 63ms/step - accuracy: 0.2500 - loss: 0.6806
Epoch 2/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step - accuracy: 0.2500 - loss: 0.6795
Epoch 3/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step - accuracy: 0.2500 - loss: 0.6785
Epoch 4/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 70ms/step - accuracy: 0.2500 - loss: 0.6775
Epoch 5/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step - accuracy: 0.2500 - loss: 0.6765
Epoch 6/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step - accuracy: 0.2500 - loss: 0.6755
Epoch 7/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step - accuracy: 0.2500 - loss: 0.6744
Epoch 8/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step - accuracy: 0.2500 - loss: 0.6733
Epoch 9/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [

<keras.src.callbacks.history.History at 0x7eb2d2d4c9b0>

In [54]:
#Predict on a new movie overview
new_movie_overview = ['Lonely wolf in forest']
seq=tokenizer.texts_to_sequences(new_movie_overview)
seq=pad_sequences(seq,maxlen=20,padding='post')
prediction=model.predict(seq)
prediction_labels=[mlb.classes_[i] for i in range (len(prediction[0])) if prediction[0][i]>=0.46]
print("Predicted Genres:",prediction_labels)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step
Predicted Genres: ['Adventure', 'Comedy', 'Drama', 'Horror', 'Romance']


In [56]:
#Use this data to write one more Multilabel Classification prediction using Neural Network
data = {
    "headline": ["Government launches new digital economy policy",
    "Star player wins championship after dramatic final",
    "Tech company releases AI-powered smartphone",
    "Stock market drops amid political tensions",
    "New environmental policy gains global support",
    "Space agency announces manned Mars mission"
 ],
"topics": [
    ["Politics", "Economy", "Technology"],
    ["Sports"],
    ["Technology", "Business"],
    ["Economy", "Politics"],
    ["Environment", "Politics"],
    ["Science", "Technology"]
 ]
}

In [57]:
df=pd.DataFrame(data)
df

Unnamed: 0,headline,topics
0,Government launches new digital economy policy,"[Politics, Economy, Technology]"
1,Star player wins championship after dramatic f...,[Sports]
2,Tech company releases AI-powered smartphone,"[Technology, Business]"
3,Stock market drops amid political tensions,"[Economy, Politics]"
4,New environmental policy gains global support,"[Environment, Politics]"
5,Space agency announces manned Mars mission,"[Science, Technology]"


In [59]:
mlb=MultiLabelBinarizer()
y=mlb.fit_transform(df["topics"])

In [60]:
y

array([[0, 1, 0, 1, 0, 0, 1],
       [0, 0, 0, 0, 0, 1, 0],
       [1, 0, 0, 0, 0, 0, 1],
       [0, 1, 0, 1, 0, 0, 0],
       [0, 0, 1, 1, 0, 0, 0],
       [0, 0, 0, 0, 1, 0, 1]])

In [62]:
tokenizer=Tokenizer(num_words=5000,oov_token="<OOV>")
tokenizer.fit_on_texts(df["headline"])
X=tokenizer.texts_to_sequences(df["headline"])
X=pad_sequences(X,maxlen=20,padding='post')

In [64]:
#Model creation
model=tf.keras.Sequential([
    tf.keras.layers.Embedding(input_dim=5000,output_dim=16,input_length=20),
    tf.keras.layers.GlobalAveragePooling1D(),
    tf.keras.layers.Dense(16,activation="relu"),
    tf.keras.layers.Dense(len(mlb.classes_),activation="sigmoid")
])

In [65]:
model.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])

In [66]:
print(X.shape)
print(y.shape)

(6, 20)
(6, 7)


In [67]:
model.fit(X,y,epochs=20)

Epoch 1/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step - accuracy: 0.0000e+00 - loss: 0.6912
Epoch 2/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 57ms/step - accuracy: 0.0000e+00 - loss: 0.6898
Epoch 3/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 57ms/step - accuracy: 0.0000e+00 - loss: 0.6881
Epoch 4/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step - accuracy: 0.0000e+00 - loss: 0.6864
Epoch 5/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 65ms/step - accuracy: 0.0000e+00 - loss: 0.6847
Epoch 6/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 67ms/step - accuracy: 0.0000e+00 - loss: 0.6831
Epoch 7/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 62ms/step - accuracy: 0.0000e+00 - loss: 0.6814
Epoch 8/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 57ms/step - accuracy: 0.0000e+00 - loss: 0.6798
Epoch 9/20
[1m1/1[0m [32m━━━━━━

<keras.src.callbacks.history.History at 0x7eb2d2e820c0>