Here, we will be building a DANN.

In [1]:
#importing modules
import tensorflow as tf
from tensorflow.keras import datasets, layers, models, Model
import time
import matplotlib.pyplot as plt
import numpy as np
import keras as K
from tensorflow import keras
from sklearn.model_selection import train_test_split
from keras.layers import Dense
import pandas as pd

from keras.models import Sequential
from keras.layers import Embedding, Conv1D, GlobalMaxPooling1D, Dense
from sklearn.preprocessing import LabelEncoder
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.utils import to_categorical


In [2]:

import ssl
ssl._create_default_https_context = ssl._create_unverified_context

Preprocessing Target Domain Dataset

In [3]:
# Replace 'your_file_path.csv' with the actual path to your CSV file
file_path = 'datasets/target/recipes_serp_youtube_data.csv'

# Read the CSV file into a Pandas DataFrame
df_target = pd.read_csv(file_path)

# Display the first few rows of the DataFrame to check the data
print(df_target.head())

             q                         queryTime  rank  \
0  dish recipe  2019-03-30 02:55:42.169989+00:00     1   
1  dish recipe  2019-03-30 02:55:42.169989+00:00     2   
2  dish recipe  2019-03-30 02:55:42.169989+00:00     3   
3  dish recipe  2019-03-30 02:55:42.169989+00:00     4   
4  dish recipe  2019-03-30 02:55:42.169989+00:00     5   

                                               title  \
0                             7 Easy Chicken Dinners   
1  KING of VEGETABLE Recipe | SAMBAR Recipe with ...   
2  ALOO Manchurian Recipe How to make aloo manchu...   
3  सूजी का इतना टेस्टी और आसान नाश्ता की आप रोज़ ...   
4  5 मिनट में बनाये कुरकुरे आलू स्नैक्स | Aloo Sn...   

                                         description  \
0  Customize & buy the Tasty Cookbook here: http:...   
1  Today we cooking one of the most popular veg r...   
2  ldli Manchurian Recipe https://youtu.be/wx7Mx9...   
3  Hello Friend's ….Aaj Main Aapko Quick & Easy B...   
4  If you liked the video give it 

In [4]:
df_target.columns

Index(['q', 'queryTime', 'rank', 'title', 'description', 'publishedAt',
       'channelTitle', 'totalResults', 'kind', 'channelId', 'default.height',
       'default.url', 'default.width', 'high.height', 'high.url', 'high.width',
       'liveBroadcastContent', 'medium.height', 'medium.url', 'medium.width',
       'nextPageToken', 'playlistId', 'resultsPerPage', 'thumbnails',
       'videoId', 'video.contentDetails', 'video.etag', 'video.id',
       'video.kind', 'video.localizations', 'video.player',
       'video.recordingDetails', 'video.snippet', 'video.statistics',
       'video.status', 'video.topicDetails', 'video.categoryId',
       'video.channelId', 'video.channelTitle', 'video.defaultAudioLanguage',
       'video.defaultLanguage', 'video.description',
       'video.liveBroadcastContent', 'video.localized', 'video.publishedAt',
       'video.tags', 'video.thumbnails', 'video.title',
       'video.relevantTopicIds', 'video.topicCategories', 'video.topicIds',
       'video.comme

In [5]:
#here we will only be using 'title' and 'video.tags' for our purpose
#if you want to do DANN, use 'description' column as well so that the recipe & description are relevant to have similar features
#so we will be dropping the remaining columns
df_target = df_target.drop(columns=['q', 'queryTime', 'description','rank', 'publishedAt', 'channelTitle', 'totalResults', 'kind', 'channelId', 'default.height', 'default.url', 'default.width', 'high.height', 'high.url', 'high.width', 'liveBroadcastContent', 'medium.height', 'medium.url', 'medium.width', 'nextPageToken', 'playlistId', 'resultsPerPage', 'thumbnails', 'videoId', 'video.contentDetails', 'video.etag', 'video.id', 'video.kind', 'video.localizations', 'video.player', 'video.recordingDetails', 'video.snippet', 'video.statistics', 'video.status', 'video.topicDetails', 'video.categoryId', 'video.channelId', 'video.channelTitle', 'video.defaultAudioLanguage', 'video.defaultLanguage', 'video.description', 'video.liveBroadcastContent', 'video.localized', 'video.publishedAt', 'video.thumbnails', 'video.title', 'video.relevantTopicIds', 'video.topicCategories', 'video.topicIds', 'video.commentCount', 'video.dislikeCount', 'video.favoriteCount', 'video.likeCount', 'video.viewCount', 'video.embeddable', 'video.license', 'video.privacyStatus', 'video.publicStatsViewable', 'video.uploadStatus', 'video.caption', 'video.definition', 'video.dimension', 'video.duration', 'video.licensedContent', 'video.projection', 'video.regionRestriction', 'video.liveStreamingDetails', 'video.contentRating', 'channel.contentDetails', 'channel.etag', 'channel.id', 'channel.kind', 'channel.snippet', 'channel.statistics', 'channel.country', 'channel.customUrl', 'channel.defaultLanguage', 'channel.description', 'channel.localized', 'channel.publishedAt', 'channel.thumbnails', 'channel.title', 'channel.commentCount', 'channel.hiddenSubscriberCount', 'channel.subscriberCount', 'channel.videoCount', 'channel.viewCount', 'channel.relatedPlaylists'])

In [6]:
df_target.columns

Index(['title', 'video.tags'], dtype='object')

In [7]:
# Assuming df['Instructions'] is your text data
df_target['video.tags'].fillna('', inplace=True)  # Replace NaN values with an empty string


In [8]:
#here we are creating a fake labels for temporary test
#1/3 part of the labels to 'appetizers'. another 1/3 part to 'dinner' and the last 1/3 part to 'desserts'

total_size = len(df_target)
category_size = total_size // 3
df_target['labels'] = None
df_target.loc[:category_size - 1, 'labels'] = 'Appetizers'
df_target.loc[category_size:2*category_size - 1, 'labels'] = 'Dinner'
df_target.loc[2*category_size:total_size - 1, 'labels'] = 'Desserts'

In [9]:
df_target.head

<bound method NDFrame.head of                                                   title  \
0                                7 Easy Chicken Dinners   
1     KING of VEGETABLE Recipe | SAMBAR Recipe with ...   
2     ALOO Manchurian Recipe How to make aloo manchu...   
3     सूजी का इतना टेस्टी और आसान नाश्ता की आप रोज़ ...   
4     5 मिनट में बनाये कुरकुरे आलू स्नैक्स | Aloo Sn...   
...                                                 ...   
1450                                    How to make pho   
1451  Easy PHO 3 Ways! Beef, Chicken, Veggie (Vietna...   
1452                                  Masterchef sadza.   
1453  মিষ্টি দই বানানোর সহজ রেসিপি || How to make Sw...   
1454  ১ ঘন্টায় চুলায় তৈরী বগুড়ার ঐতিহ্যবাহী মিষ্ট...   

                                             video.tags      labels  
0     ['chicken', 'dinner', 'tasty', 'buzzfeed', 'qu...  Appetizers  
1     ['vegetable', 'vegetable recipe', 'sambar reci...  Appetizers  
2                                                  

In [10]:
#shuffling the dataframe
df_target = df_target.sample(frac=1, random_state=42)

In [11]:
df_target.head

<bound method NDFrame.head of                                                   title  \
497   Cooking &amp; Tasting Pork Belly Curry In My V...   
1261                            How to Make a Braai Pie   
411   Beef &amp; Guinness Stew - St. Patrick&#39;s D...   
1046                  How to Make a Perfect Ratatouille   
1033    How to make an Authentic bowl of VIETNAMESE PHO   
...                                                 ...   
1095                  Handle It - The Universal Poutine   
1130  Super Simple Succotash -- Ridiculousy Easy &am...   
1294  Jamaican Saltfish Fritters Recipe 2016 | Recip...   
860   Authentic Spanish Seafood Paella Recipe - Cola...   
1126          Ful Mudammas (Fuul) Ful Mudammas فول مدمس   

                                             video.tags      labels  
497   ['pork recipe', 'pork', 'pork belly', 'cooking...      Dinner  
1261  ['Cooking (Interest)', 'How-to (Website Catego...    Desserts  
411   ['Beef', 'Stew', 'Guinness', "St. Patrick's D

In [55]:
from sklearn.preprocessing import LabelEncoder
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.utils import to_categorical

#defining the parameters
num_classes = 1
embedding_dim = 100

# Assuming df['Instructions'] is your text data, we  tokenize the input dataset into tokens for the CNN model
#Tokenizer Initialization and Fitting:
tokenizer = Tokenizer()
tokenizer.fit_on_texts(df_target['title'])

#Vocabulary Size and Maximum Sequence Length Calculation:
vocab_size = len(tokenizer.word_index) + 1
max_sequence_length = max(df_target['title'].apply(lambda x: len(x.split())))

#Texts to Sequences:
sequences = tokenizer.texts_to_sequences(df_target['title'])

#Padding Sequences
data = pad_sequences(sequences, maxlen=max_sequence_length)


In [56]:

# Assuming 'text' is your input data and 'label' is your target variable
X = data   #data
y = df_target['video.tags'].values           #labels 

# Convert labels to numerical format using LabelEncoder
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

# Split the data into training and testing sets
X_train_target, X_test_target, y_train_target, y_test_target = train_test_split(X, y, test_size=0.2, random_state=42)


In [57]:
# Define the model
model = Sequential()

# Embedding layer: Converts words into dense vectors of fixed size
model.add(Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=max_sequence_length))

# Convolutional layer with max pooling
model.add(Conv1D(filters=128, kernel_size=5, activation='relu'))
model.add(GlobalMaxPooling1D())

# Fully connected layers for classification
model.add(Dense(units=64, activation='relu'))
model.add(Dense(units=num_classes, activation='softmax'))


In [58]:

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [59]:
# Train the DANN

start_time = time.time()
history = model.fit(X_train_target, y_train_target, epochs=1, validation_data=(X_test_target, y_test_target))
end_time = time.time()

  return dispatch_target(*args, **kwargs)




In [60]:
#evaluating the model
test_loss, test_acc = model.evaluate(X_test_target, y_test_target, verbose=2)
print('Test accuracy:', test_acc)
print('Time elapsed: ', end_time - start_time)

10/10 - 0s - loss: 0.0000e+00 - accuracy: 0.0000e+00 - 21ms/epoch - 2ms/step
Test accuracy: 0.0
Time elapsed:  0.477125883102417


Preprocessing the auxiliary dataset

In [69]:
# Replace 'your_file_path.csv' with the actual path to your CSV file
file_path = 'datasets/source/kaggle_food.csv'

# Read the CSV file into a Pandas DataFrame
df_source = pd.read_csv(file_path)

# Display the first few rows of the DataFrame to check the data
print(df_source.head())


   Unnamed: 0                                              Title  \
0           0  Miso-Butter Roast Chicken With Acorn Squash Pa...   
1           1                    Crispy Salt and Pepper Potatoes   
2           2                        Thanksgiving Mac and Cheese   
3           3                 Italian Sausage and Bread Stuffing   
4           4                                       Newton's Law   

                                         Ingredients  \
0  ['1 (3½–4-lb.) whole chicken', '2¾ tsp. kosher...   
1  ['2 large egg whites', '1 pound new potatoes (...   
2  ['1 cup evaporated milk', '1 cup whole milk', ...   
3  ['1 (¾- to 1-pound) round Italian loaf, cut in...   
4  ['1 teaspoon dark brown sugar', '1 teaspoon ho...   

                                        Instructions  \
0  Pat chicken dry with paper towels, season all ...   
1  Preheat oven to 400°F and line a rimmed baking...   
2  Place a rack in middle of oven; preheat to 400...   
3  Preheat oven to 350°F with 

In [62]:
df_source.columns

Index(['Unnamed: 0', 'Title', 'Ingredients', 'Instructions', 'Image_Name',
       'Cleaned_Ingredients'],
      dtype='object')

Here, we are not doing DANN, so we will just be using 'Instructions' column to build a topic space

In [70]:
#here we will only be using 'Title' and 'Ingredients' for our purpose
#so we will be dropping the remaining columns
df_source = df_source.drop(columns=['Unnamed: 0', 'Ingredients', 'Image_Name',
       'Cleaned_Ingredients','Title'])

In [71]:
# Assuming df['Instructions'] is your text data
df_source['Instructions'].fillna('', inplace=True)  # Replace NaN values with an empty string


Building a topic space and adding it as a column in df_source dataframe under the column 'topic_space'

In [76]:
from sklearn.preprocessing import LabelEncoder
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.utils import to_categorical
from nltk import pos_tag, word_tokenize
import nltk
nltk.download('averaged_perceptron_tagger')

df_source['topic_space'] = None

# Assuming df['Instructions'] is your text data, we  tokenize the input dataset into tokens for the CNN model
#Tokenizer Initialization and Fitting:
tokenizer = Tokenizer()
for i,instruction in enumerate(df_source['Instructions']):
    tokens = word_tokenize(instruction)
    
    # Perform part-of-speech tagging
    pos_tags = pos_tag(tokens)

    # Filter out only nouns
    nouns = [word for word, pos in pos_tags if pos.startswith('N')]

    # Assign nouns to the 'topic_space' column for the current row
    df_source.at[i, 'topic_space'] = nouns

[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /Users/rishikeshyadav/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


In [78]:
df_source['topic_space'][0]

['Pat',
 'paper',
 'towels',
 'season',
 'tsp',
 'salt',
 'legs',
 'twine',
 'sit',
 'room',
 'temperature',
 'hour',
 'squash',
 'seeds',
 'peeler',
 'ridges',
 'squash',
 'halves',
 'skin',
 'Cut',
 'half',
 'wedges',
 'arrange',
 'baking',
 'sheet',
 'Combine',
 'sage',
 'Tbsp',
 'butter',
 'bowl',
 'half',
 'mixture',
 'squash',
 'sheet',
 'Sprinkle',
 'squash',
 'allspice',
 'pepper',
 'flakes',
 '½',
 'tsp',
 'salt',
 'season',
 'pepper',
 'toss',
 'Add',
 'bread',
 'apples',
 'oil',
 '¼',
 'tsp',
 'salt',
 'herb',
 'butter',
 'bowl',
 'season',
 'pepper',
 'toss',
 'Set',
 'Place',
 'onion',
 'vinegar',
 'bowl',
 'season',
 'salt',
 'toss',
 'sit',
 'rack',
 'middle',
 'third',
 'oven',
 'preheat',
 'Mix',
 'miso',
 'Tbsp',
 'room-temperature',
 'butter',
 'bowl',
 'Pat',
 'paper',
 'towels',
 'butter',
 'Place',
 'chicken',
 'skillet',
 'roast',
 'rack',
 'thermometer',
 'part',
 'breast',
 'registers',
 'minutes',
 'Temperature',
 'chicken',
 'rests',
 'rest',
 'skillet',
 'mi