In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, Conv1D, GlobalMaxPooling1D, Dense, Dropout, concatenate
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer

# Load the dataset
dataset = pd.read_csv('/content/drive/MyDrive/ML/Coursera.csv')

# Preprocess the data
X_desc = dataset['Course Description'].values
X_name = dataset['Course Name'].values
X_skills = dataset['Skills'].values
y = dataset['Course Name'].values

# Encode the target variable
le = LabelEncoder()
y = le.fit_transform(y)

# Tokenize the text data
tokenizer = Tokenizer()
tokenizer.fit_on_texts(X_desc)
X_desc = tokenizer.texts_to_sequences(X_desc)
X_desc = pad_sequences(X_desc)

tokenizer.fit_on_texts(X_name)
X_name = tokenizer.texts_to_sequences(X_name)
X_name = pad_sequences(X_name)

tokenizer.fit_on_texts(X_skills)
X_skills = tokenizer.texts_to_sequences(X_skills)
X_skills = pad_sequences(X_skills)

vocab_size = len(tokenizer.word_index) + 1

# Split the data into train and test sets
X_desc_train, X_desc_test, X_name_train, X_name_test, X_skills_train, X_skills_test, y_train, y_test = train_test_split(
    X_desc, X_name, X_skills, y, test_size=0.2, random_state=42
)

# Define input layers
input_desc = Input(shape=(X_desc.shape[1],))
input_name = Input(shape=(X_name.shape[1],))
input_skills = Input(shape=(X_skills.shape[1],))

# Embedding layers
embedding_desc = Embedding(vocab_size, 100)(input_desc)
embedding_name = Embedding(vocab_size, 100)(input_name)
embedding_skills = Embedding(vocab_size, 100)(input_skills)

# Convolutional layers for each input
conv_desc = Conv1D(128, 5, activation='relu')(embedding_desc)
conv_name = Conv1D(128, 5, activation='relu')(embedding_name)
conv_skills = Conv1D(128, 5, activation='relu')(embedding_skills)

# Global Max Pooling layers
pool_desc = GlobalMaxPooling1D()(conv_desc)
pool_name = GlobalMaxPooling1D()(conv_name)
pool_skills = GlobalMaxPooling1D()(conv_skills)

# Concatenate the pooled features
concatenated = concatenate([pool_desc, pool_name, pool_skills])

# Dense layers
dense = Dense(128, activation='relu')(concatenated)
dropout = Dropout(0.2)(dense)
output = Dense(len(le.classes_), activation='softmax')(dropout)

# Create the model
model = Model(inputs=[input_desc, input_name, input_skills], outputs=output)

# Compile the model
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
model.fit([X_desc_train, X_name_train, X_skills_train], y_train, validation_data=([X_desc_test, X_name_test, X_skills_test], y_test), epochs=10, batch_size=32)

# Make predictions
predictions = model.predict([X_desc_test, X_name_test, X_skills_test])

input = "I am a SQL developer but I need to use this in the financial sector and I need to aware of SQL injections for cybersecuirty can you give a course?"

courses = model.predict(input)

# Convert predictions back to original labels
predicted_labels = le.inverse_transform(np.argmax(predictions, axis=1))

# Retrieve top 20 recommended courses
top_20_courses = dataset[dataset['Course Name'].isin(predicted_labels)]['Course Name'].head(20).values

# Print the top 20 recommended courses
print("Top 20 Recommended Courses:")
for course in top_20_courses:
    print(course)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Top 20 Recommended Courses:
Finance for Managers
Retrieve Data using Single-Table SQL Queries
The Roles and Responsibilities of Nonprofit Boards of Directors within the Governance Process
Global Health: An Interdisciplinary Overview
Python Programming Essentials
Parallel programming
Multiple Regression Analysis in Public Health
Philosophy, Science and Religion: Philosophy and Religion
Protecting Business Innovations via Patent
Introduction to Cybersecurity Tools & Cyber Attacks
Introduction to Cybersecurity Tools & Cyber Attacks
Prediction and Control with Function Approximation
Python Tricks and Hacks for Productivity
Introduction to Recommender Systems:  Non-Personalized and Content-Based
Predictive Modeling and Analytics
Genetics and Society: A Course for Educators
Russian History: from Lenin to Putin
Successful Interviewing
Mastering SQL Joins
Building Resilient Streaming 

In [6]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, Conv1D, GlobalMaxPooling1D, Dense, Dropout, concatenate
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer

# Load the dataset
dataset = pd.read_csv('/content/drive/MyDrive/ML/Coursera.csv')

# Preprocess the data
X_desc = dataset['Course Description'].values
X_name = dataset['Course Name'].values
X_skills = dataset['Skills'].values
y = dataset['Course Name'].values

# Encode the target variable
le = LabelEncoder()
y = le.fit_transform(y)

# Tokenize the text data
tokenizer = Tokenizer()
tokenizer.fit_on_texts(X_desc)
X_desc = tokenizer.texts_to_sequences(X_desc)
X_desc = pad_sequences(X_desc)

tokenizer.fit_on_texts(X_name)
X_name = tokenizer.texts_to_sequences(X_name)
X_name = pad_sequences(X_name)

tokenizer.fit_on_texts(X_skills)
X_skills = tokenizer.texts_to_sequences(X_skills)
X_skills = pad_sequences(X_skills)

vocab_size = len(tokenizer.word_index) + 1

# Split the data into train and test sets
X_desc_train, X_desc_test, X_name_train, X_name_test, X_skills_train, X_skills_test, y_train, y_test = train_test_split(
    X_desc, X_name, X_skills, y, test_size=0.2, random_state=42
)

# Define input layers
input_desc = Input(shape=(X_desc.shape[1],))
input_name = Input(shape=(X_name.shape[1],))
input_skills = Input(shape=(X_skills.shape[1],))

# Embedding layers
embedding_desc = Embedding(vocab_size, 100)(input_desc)
embedding_name = Embedding(vocab_size, 100)(input_name)
embedding_skills = Embedding(vocab_size, 100)(input_skills)

# Convolutional layers for each input
conv_desc = Conv1D(128, 5, activation='relu')(embedding_desc)
conv_name = Conv1D(128, 5, activation='relu')(embedding_name)
conv_skills = Conv1D(128, 5, activation='relu')(embedding_skills)

# Global Max Pooling layers
pool_desc = GlobalMaxPooling1D()(conv_desc)
pool_name = GlobalMaxPooling1D()(conv_name)
pool_skills = GlobalMaxPooling1D()(conv_skills)

# Concatenate the pooled features
concatenated = concatenate([pool_desc, pool_name, pool_skills])

# Dense layers
dense = Dense(128, activation='relu')(concatenated)
dropout = Dropout(0.2)(dense)
output = Dense(len(le.classes_), activation='softmax')(dropout)

# Create the model
model = Model(inputs=[input_desc, input_name, input_skills], outputs=output)

# Compile the model
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
model.fit([X_desc_train, X_name_train, X_skills_train], y_train, validation_data=([X_desc_test, X_name_test, X_skills_test], y_test), epochs=2, batch_size=32)

# Preprocess input
def preprocess_input(input_string):
    input_desc = tokenizer.texts_to_sequences([input_string])
    input_desc = pad_sequences(input_desc, maxlen=X_desc.shape[1])
    input_name = pad_sequences([[0]], maxlen=X_name.shape[1])  # Pad dummy input with zeros
    input_skills = pad_sequences([[0]], maxlen=X_skills.shape[1])  # Pad dummy input with zeros
    return input_desc, input_name, input_skills

# Get preprocessed input for recommendation
input_string = "Python coding business economics data science"
input_desc_recommend, input_name_recommend, input_skills_recommend = preprocess_input(input_string)

# Make recommendations
recommendations = model.predict([input_desc_recommend, input_name_recommend, input_skills_recommend])
recommended_labels = le.inverse_transform(np.argmax(recommendations, axis=1))

# Retrieve top 20 recommended courses
top_20_courses = dataset[dataset['Course Name'].isin(recommended_labels)]['Course Name'].head(20).values

# Print the top 20 recommended courses
print("Recommended Courses:")
# for course in top_20_courses:
for i in range(4):
    print(top_20_courses[0])


Epoch 1/2
Epoch 2/2
Top 20 Recommended Courses:
What is Data Science?
What is Data Science?
What is Data Science?
What is Data Science?
