# Install Required Libraries

In [1]:
!pip install fuzzywuzzy

/bin/bash: /opt/anaconda/envs/Python3/lib/libtinfo.so.6: no version information available (required by /bin/bash)
[1m
         .:::.     .::.       
        ....yy:    .yy.       
        :.  .yy.    y.        
             :y:   .:         
             .yy  .:          
              yy..:           
              :y:.            
              .y.             
             .:.              
        ....:.                
        :::.                  
[0;33m
• Project files and data should be stored in /project. This is shared among everyone
  in the project.
• Personal files and configuration should be stored in /home/faculty.
• Files outside /project and /home/faculty will be lost when this server is terminated.
• Create custom environments to setup your servers reproducibly.
[0m
bash: /opt/anaconda/envs/Python3/lib/libtinfo.so.6: no version information available (required by bash)


# Import Necessary Libraries

In [2]:
#Numerical & Data Manipulation
import numpy as np 
import pandas as pd

#Natural Language Processing
import nltk
from nltk.corpus import stopwords
import string
from nltk.tokenize import word_tokenize
from nltk import pos_tag, ne_chunk
from nltk.stem import WordNetLemmatizer

#Text Processing & Modeling
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

from fuzzywuzzy import fuzz

#Iteration & Regular Expression
import re
import itertools
from nltk.tokenize import RegexpTokenizer



# Load the dataset

In [3]:
#Read the file
data = pd.read_csv("clean_data.csv", encoding="latin-1")
print("Loaded data file {0} with {1} rows and {2} columns".format("clean_data.csv"
                                                                  ,data.shape[0]
                                                                  ,data.shape[1]))
data.head()

Loaded data file clean_data.csv with 89 rows and 7 columns


Unnamed: 0,Course,Type,topic,Description,Link,word_count,cleaned_desc
0,Artificial Intelligence,Courseware,Getting Started with Enterprise-grade AI,This course covers the foundations of AI for b...,https://skillsbuild.org/college-students/digit...,23,course covers foundations ai business includin...
1,Artificial Intelligence,Courseware,OpenDS4All,OpenDS4All is a project created to accelerate ...,https://github.com/odpi/OpenDS4All/tree/master...,111,opends4all project created accelerate creation...
2,Artificial Intelligence,Courseware,IBM AI Education,Dive into this introductory course and get fam...,https://www.mindspark.org/ibm-ai,571,dive introductory course get familiar basics a...
3,Artificial Intelligence,Courseware,Watson Academy,"IBMÂs portfolio of business-ready tools, appl...",https://www.ibm.com/training/artificial-intell...,175,ibms portfolio business ready tools applicatio...
4,Artificial Intelligence,Courseware,Create a Node-RED starter application,A flow-based programming tool for wiring toget...,https://developer.ibm.com/components/node-red/,47,flow based programming tool wiring together ha...


# Recommendations using techniques like Cosine-similariy and Fuzzy logic

In [4]:
def recommend_courses_by_topic(data, desired_topics):
    # Drop duplicated topics and keep first instance
    data = data.drop_duplicates(subset='topic')

    # Preprocessing the user's query
    processed_query = ' '.join(desired_topics)

    # Calculating cosine similarity between topics and query
    similarity_scores = data['topic'].apply(lambda x: fuzz.partial_ratio(processed_query, x))

    # Sorting courses based on similarity scores
    sorted_indices = similarity_scores.argsort()[::-1]

    # Collecting recommended sub-topics and links
    recommended_courses = []
    topic_found = False

    for index in sorted_indices:
        try:
            subtopic = data.loc[index, 'topic']
            recommended_courses.append(subtopic)
            topic_found = True
            if len(recommended_courses) == 5:  # Store top 5 recommendations
                break
        except KeyError as e:
            keyerror = e

    return recommended_courses if topic_found else []

In [5]:
# Iterate through all topics in the dataset
topic_list = data['topic'].unique()

# Store the recommendations
result_df = pd.DataFrame(columns=['Topic', 'Recommended_Course_1','Recommended_Course_2','Recommended_Course_3',
                                 'Recommended_Course_4','Recommended_Course_5'])

In [6]:
for topic in topic_list:
    # Get recommendations for each topic
    recommend = recommend_courses_by_topic(data, [topic])
    if recommend:
        # Add the top-5 recommendations
        recommendations = recommend + ['N/A'] * (5 - len(recommend))  # Fill empty slots with 'NA'
        result_df = result_df.append({'Topic': topic, 'Recommended_Course_1': recommendations[0],
                                      'Recommended_Course_2': recommendations[1],
                                      'Recommended_Course_3': recommendations[2],
                                      'Recommended_Course_4': recommendations[3],
                                      'Recommended_Course_5': recommendations[4]}, ignore_index=True)

  result_df = result_df.append({'Topic': topic, 'Recommended_Course_1': recommendations[0],
  result_df = result_df.append({'Topic': topic, 'Recommended_Course_1': recommendations[0],
  result_df = result_df.append({'Topic': topic, 'Recommended_Course_1': recommendations[0],
  result_df = result_df.append({'Topic': topic, 'Recommended_Course_1': recommendations[0],
  result_df = result_df.append({'Topic': topic, 'Recommended_Course_1': recommendations[0],
  result_df = result_df.append({'Topic': topic, 'Recommended_Course_1': recommendations[0],


['Getting Started with Enterprise-grade AI', 'Getting Started with Enterprise Data Science', 'Getting Started with Threat Intelligence and Hunting', 'IBM CloudLabs', 'IBM QRadar SIEM Advanced Topics']
['OpenDS4All', 'Red Hat System Administration I (RH124)', 'IBM XL C/C++', 'IBM XL Fortran', 'IBM Cloud Feature Code']
['IBM AI Education', 'IBM Engineering Test Management', 'Introduction to IBM zSystems', 'Application Modernization with IBM z/OS', 'Planning Analytics']
['Watson Academy', 'IBM Engineering Workflow Management', 'z/OS Introduction', 'Project Debater for Academic Use', 'IBM Z Software Trials']
['Create a Node-RED starter application', 'IBM Z System Administrator', 'Project CodeNet', 'IBM Security SOAR', 'Trustworthy Generation 360']
['Project Debater for Academic Use', 'Project CodeNet', 'Watson Academy', 'IBM Z System Administrator', 'IBM Engineering Workflow Management']
['IBM Maximo Visual Inspection', 'SimulAI', 'Introduction to IBM zSystems', 'IBM Digital App Builder', 

  result_df = result_df.append({'Topic': topic, 'Recommended_Course_1': recommendations[0],
  result_df = result_df.append({'Topic': topic, 'Recommended_Course_1': recommendations[0],
  result_df = result_df.append({'Topic': topic, 'Recommended_Course_1': recommendations[0],
  result_df = result_df.append({'Topic': topic, 'Recommended_Course_1': recommendations[0],


['IBM Digital App Builder', 'Red Hat Application Development I: Programming in Java EE (AD183)', 'Application Modernization with IBM z/OS', 'IBM Maximo Visual Inspection', 'Developing Secure Software']
['AI Explainability 360', 'Cognos Analytics', 'Red Hat Application Development I: Programming in Java EE (AD183)', 'AI Fairness 360', 'Trustworthy Generation 360']
['AI FactSheets 360', 'AI Fairness 360', 'Causal Inference 360', 'IBM XL C/C++', 'AI Explainability 360']
['Adversarial Robustness Toolbox', 'Red Hat Enterprise Linux Automation with Ansible (RH294)', 'AI Fairness 360', 'Application Modernization with IBM z/OS', 'IBM DB2 Standard']
['Causal Inference 360', 'AI Fairness 360', 'Getting Started with Enterprise Data Science', 'IBM Maximo Visual Inspection', 'Trustworthy Generation 360']
['Trustworthy Generation 360', 'Uncertainty Quantification 360', 'IBM Analog Hardware Acceleration Kit', 'AI Fairness 360', 'Causal Inference 360']


  result_df = result_df.append({'Topic': topic, 'Recommended_Course_1': recommendations[0],
  result_df = result_df.append({'Topic': topic, 'Recommended_Course_1': recommendations[0],
  result_df = result_df.append({'Topic': topic, 'Recommended_Course_1': recommendations[0],
  result_df = result_df.append({'Topic': topic, 'Recommended_Course_1': recommendations[0],
  result_df = result_df.append({'Topic': topic, 'Recommended_Course_1': recommendations[0],
  result_df = result_df.append({'Topic': topic, 'Recommended_Course_1': recommendations[0],


['IBM Analog Hardware Acceleration Kit', 'Cognos Analytics', 'Journey to Cloud: Envisioning Your Solution', 'Trustworthy Generation 360', 'IBM AI Education']
['Uncertainty Quantification 360', 'Trustworthy Generation 360', 'Red Hat Application Development I: Programming in Java EE (AD183)', 'IBM AI Education', 'Getting Started with Enterprise Data Science']


  result_df = result_df.append({'Topic': topic, 'Recommended_Course_1': recommendations[0],
  result_df = result_df.append({'Topic': topic, 'Recommended_Course_1': recommendations[0],


['Getting Started with Enterprise-grade AI', 'Getting Started with Threat Intelligence and Hunting', 'Getting Started with Enterprise Data Science', 'IBM QRadar SIEM Advanced Topics', 'IBM Watson Machine Learning Accelerator']
['Getting Started with Enterprise Data Science', 'Getting Started with Enterprise-grade AI', 'Getting Started with Threat Intelligence and Hunting', 'Causal Inference 360', 'Uncertainty Quantification 360']
['Getting Started with Threat Intelligence and Hunting', 'Getting Started with Enterprise-grade AI', 'Getting Started with Enterprise Data Science', 'Solution tutorials', 'IBM CloudLabs']
['Getting Started with Cloud for the Enterprise', 'IBM Cognos Analytics - Author Reports Fundamentals', 'IBM Cloud Associate Solution Advisor', 'Red Hat System Administration II (RH134)', 'Red Hat System Administration I (RH124)']


  result_df = result_df.append({'Topic': topic, 'Recommended_Course_1': recommendations[0],
  result_df = result_df.append({'Topic': topic, 'Recommended_Course_1': recommendations[0],
  result_df = result_df.append({'Topic': topic, 'Recommended_Course_1': recommendations[0],
  result_df = result_df.append({'Topic': topic, 'Recommended_Course_1': recommendations[0],


['IBM Cognos Analytics - Architecture and Logging', 'SPSS Modeler Premium', 'IBM Planning Analytics - Design and Develop Models in Planning Analytics Workspace', 'IBM Cognos Framework Manager - Design Metadata Models', 'IBM Planning Analytics - Analyze Data and Create Reports']


  result_df = result_df.append({'Topic': topic, 'Recommended_Course_1': recommendations[0],


['SPSS Modeler Premium', 'IBM Cognos Analytics - Architecture and Logging', 'Constraint Programming with ILOG CP Optimizer', 'IBM Cognos Framework Manager - Design Metadata Models', 'IBM Z System Administrator']
['Constraint Programming with ILOG CP Optimizer', 'SPSS Modeler Premium', 'IBM Cognos Analytics - Architecture and Logging', 'IBM Cognos Framework Manager - Design Metadata Models', 'IBM Planning Analytics - Design and Develop Models in Planning Analytics Workspace']
['Overview of IBM Cognos Analytics', 'Zowe', 'Journey to Cloud: Envisioning Your Solution', 'IBM QRadar SIEM Advanced Topics', 'Solution tutorials']
['IBM Cognos Analytics - Architecture and Logging', 'IBM Planning Analytics - Design and Develop Models in Planning Analytics Workspace', 'IBM Planning Analytics - Analyze Data and Create Reports', 'IBM Cognos Framework Manager - Design Metadata Models', 'SPSS Modeler Premium']


  result_df = result_df.append({'Topic': topic, 'Recommended_Course_1': recommendations[0],
  result_df = result_df.append({'Topic': topic, 'Recommended_Course_1': recommendations[0],
  result_df = result_df.append({'Topic': topic, 'Recommended_Course_1': recommendations[0],
  result_df = result_df.append({'Topic': topic, 'Recommended_Course_1': recommendations[0],


['IBM Cognos Analytics - Architecture and Logging', 'IBM Planning Analytics - Analyze Data and Create Reports', 'IBM Planning Analytics - Design and Develop Models in Planning Analytics Workspace', 'SPSS Modeler Premium', 'IBM Cognos Framework Manager - Design Metadata Models']


  result_df = result_df.append({'Topic': topic, 'Recommended_Course_1': recommendations[0],


['IBM Cognos Analytics - Architecture and Logging', 'IBM Cognos Framework Manager - Design Metadata Models', 'IBM Planning Analytics - Design and Develop Models in Planning Analytics Workspace', 'SPSS Modeler Premium', 'IBM Planning Analytics - Analyze Data and Create Reports']
['IBM Cognos Analytics - Author Reports Fundamentals', 'Getting Started with Cloud for the Enterprise', 'IBM Robotic Process Automation - Basic II', 'Red Hat Application Development I: Programming in Java EE (AD183)', 'Watson Academy']
['IBM Cognos Analytics - Author Reports with Multidimensional Data', 'IBM Z System Administrator', 'Project Debater for Academic Use', 'IBM Z Enterprise Computing Kickstart', 'Application Modernization with IBM z/OS']
['IBM Cognos Analytics - Architecture and Logging', 'IBM Planning Analytics - Design and Develop Models in Planning Analytics Workspace', 'IBM Planning Analytics - Analyze Data and Create Reports', 'IBM Cognos Framework Manager - Design Metadata Models', 'SPSS Modele

  result_df = result_df.append({'Topic': topic, 'Recommended_Course_1': recommendations[0],
  result_df = result_df.append({'Topic': topic, 'Recommended_Course_1': recommendations[0],
  result_df = result_df.append({'Topic': topic, 'Recommended_Course_1': recommendations[0],
  result_df = result_df.append({'Topic': topic, 'Recommended_Course_1': recommendations[0],
  result_df = result_df.append({'Topic': topic, 'Recommended_Course_1': recommendations[0],
  result_df = result_df.append({'Topic': topic, 'Recommended_Course_1': recommendations[0],
  result_df = result_df.append({'Topic': topic, 'Recommended_Course_1': recommendations[0],
  result_df = result_df.append({'Topic': topic, 'Recommended_Course_1': recommendations[0],


['Planning Analytics', 'Elyra', 'IBM DB2 Standard', 'IBM AI Education', 'IBM Z System Administrator']


  result_df = result_df.append({'Topic': topic, 'Recommended_Course_1': recommendations[0],


['Planning Analytics', 'Elyra', 'IBM DB2 Standard', 'IBM AI Education', 'IBM Z System Administrator']
['IBM Process Mining Collection', 'IBM DB2 Standard', 'IBM Planning Analytics - Design and Develop Models in Planning Analytics Workspace', 'IBM Z System Administrator', 'AI Fairness 360']
['IBM Robotic Process Automation - Basic I', 'IBM Robotic Process Automation - Basic II', 'Fundamentals of IBM Process Mining', 'Journey to Cloud: Envisioning Your Solution', 'Introduction to IBM zSystems']
['IBM Robotic Process Automation - Basic II', 'Fundamentals of IBM Process Mining', 'IBM Robotic Process Automation - Basic I', 'IBM Z Enterprise Computing Kickstart', 'Introduction to OpenShift Applications (DO101)']
['Fundamentals of IBM Process Mining', 'Journey to Cloud: Envisioning Your Solution', 'Solution tutorials', 'IBM Robotic Process Automation - Basic II', 'IBM Robotic Process Automation - Basic I']
['Journey to Cloud: Envisioning Your Solution', 'Fundamentals of IBM Process Mining', '

  result_df = result_df.append({'Topic': topic, 'Recommended_Course_1': recommendations[0],
  result_df = result_df.append({'Topic': topic, 'Recommended_Course_1': recommendations[0],
  result_df = result_df.append({'Topic': topic, 'Recommended_Course_1': recommendations[0],
  result_df = result_df.append({'Topic': topic, 'Recommended_Course_1': recommendations[0],
  result_df = result_df.append({'Topic': topic, 'Recommended_Course_1': recommendations[0],
  result_df = result_df.append({'Topic': topic, 'Recommended_Course_1': recommendations[0],
  result_df = result_df.append({'Topic': topic, 'Recommended_Course_1': recommendations[0],
  result_df = result_df.append({'Topic': topic, 'Recommended_Course_1': recommendations[0],


['IBM CloudLabs', 'Red Hat Application Development I: Programming in Java EE (AD183)', 'Getting Started with Threat Intelligence and Hunting', 'Getting Started with Enterprise-grade AI', 'QRadar Community Edition']


  result_df = result_df.append({'Topic': topic, 'Recommended_Course_1': recommendations[0],


['IBM Cloud Feature Code', 'Quick Start Sessions', 'OpenLiberty', 'Developing Secure Software', 'IBM QRadar SIEM Advanced Topics']
['OpenLiberty', 'Quick Start Sessions', 'IBM Cloud Feature Code', 'IBM Z System Administrator', 'Overview of IBM Cognos Analytics']
['Quick Start Sessions', 'OpenLiberty', 'IBM Cloud Feature Code', 'Developing Secure Software', 'IBM QRadar SIEM Advanced Topics']
['IBM Engineering Requirements Management DOORS Next', 'DevOps Transformations for IBM zSystems and CICD pipelines with DBB Git', 'Solution tutorials', 'ILOG CPLEX Optimization Studio', 'IBM Security Learning Academy']
['IBM Engineering Workflow Management', 'Developing Secure Software', 'Watson Academy', 'SPSS Modeler Premium', 'z/OS Introduction']
['IBM Engineering Test Management', 'IBM QRadar SIEM Foundations', 'Explore the IBM Software Catalog', 'IBM AI Education', 'Journey to Cloud: Envisioning Your Solution']


  result_df = result_df.append({'Topic': topic, 'Recommended_Course_1': recommendations[0],
  result_df = result_df.append({'Topic': topic, 'Recommended_Course_1': recommendations[0],
  result_df = result_df.append({'Topic': topic, 'Recommended_Course_1': recommendations[0],
  result_df = result_df.append({'Topic': topic, 'Recommended_Course_1': recommendations[0],
  result_df = result_df.append({'Topic': topic, 'Recommended_Course_1': recommendations[0],
  result_df = result_df.append({'Topic': topic, 'Recommended_Course_1': recommendations[0],


['Explore the IBM Software Catalog', 'IBM QRadar SIEM Foundations', 'IBM Engineering Test Management', 'ILOG CPLEX Optimization Studio', 'IBM QRadar SIEM Advanced Topics']


  result_df = result_df.append({'Topic': topic, 'Recommended_Course_1': recommendations[0],


['IBM Security Learning Academy', 'DevOps Transformations for IBM zSystems and CICD pipelines with DBB Git', 'IBM Z System Administrator', 'IBM Engineering Requirements Management DOORS Next', 'Architecting Applications with IBM Z']
['IBM QRadar SIEM Foundations', 'IBM Engineering Test Management', 'IBM AI Education', 'Fundamentals of IBM Process Mining', 'Uncertainty Quantification 360']
['IBM QRadar SIEM Foundations', 'Explore the IBM Software Catalog', 'IBM Engineering Test Management', 'Overview of IBM Cognos Analytics', 'ILOG CPLEX Optimization Studio']
['IBM QRadar SIEM Advanced Topics', 'z/OS Introduction', 'IBM Cloud Feature Code', 'Overview of IBM Cognos Analytics', 'Red Hat Application Development I: Programming in Java EE (AD183)']
['Developing Secure Software', 'IBM Engineering Workflow Management', 'IBM Cloud Feature Code', 'IBM Digital App Builder', 'Explore the IBM Software Catalog']
['QRadar Community Edition', 'IBM Watson Machine Learning Accelerator', 'Red Hat Applica

  result_df = result_df.append({'Topic': topic, 'Recommended_Course_1': recommendations[0],
  result_df = result_df.append({'Topic': topic, 'Recommended_Course_1': recommendations[0],
  result_df = result_df.append({'Topic': topic, 'Recommended_Course_1': recommendations[0],
  result_df = result_df.append({'Topic': topic, 'Recommended_Course_1': recommendations[0],
  result_df = result_df.append({'Topic': topic, 'Recommended_Course_1': recommendations[0],
  result_df = result_df.append({'Topic': topic, 'Recommended_Course_1': recommendations[0],
  result_df = result_df.append({'Topic': topic, 'Recommended_Course_1': recommendations[0],
  result_df = result_df.append({'Topic': topic, 'Recommended_Course_1': recommendations[0],


['IBM Security SOAR', 'IBM Z Enterprise Computing Kickstart', 'IBM Watson Machine Learning Accelerator', 'SPSS Modeler Premium', 'IBM AI Education']


  result_df = result_df.append({'Topic': topic, 'Recommended_Course_1': recommendations[0],


['QRadar SIEM', 'Red Hat Application Development I: Programming in Java EE (AD183)', 'IBM Data Risk Manager', 'ILOG CPLEX Optimization Studio', 'AI Fairness 360']
['IBM Z Enterprise Computing Kickstart', 'IBM Security SOAR', 'IBM Robotic Process Automation - Basic II', 'QRadar SIEM', 'Architecting Applications with IBM Z']
['Introduction to IBM zSystems', 'Introduction to OpenShift Applications (DO101)', 'QRadar SIEM', 'IBM XL Fortran', 'IBM AI Education']
['IBM Z System Administrator', 'IBM Process Mining Collection', 'Project CodeNet', 'Elyra', 'Architecting Applications with IBM Z']
['Architecting Applications with IBM Z', 'Application Modernization with IBM z/OS', 'IBM Z Enterprise Computing Kickstart', 'IBM Z System Administrator', 'IBM Security Learning Academy']
['DevOps Transformations for IBM zSystems and CICD pipelines with DBB Git', 'IBM Engineering Requirements Management DOORS Next', 'IBM Data Risk Manager', 'ILOG CPLEX Optimization Studio', 'IBM Robotic Process Automation

  result_df = result_df.append({'Topic': topic, 'Recommended_Course_1': recommendations[0],
  result_df = result_df.append({'Topic': topic, 'Recommended_Course_1': recommendations[0],
  result_df = result_df.append({'Topic': topic, 'Recommended_Course_1': recommendations[0],
  result_df = result_df.append({'Topic': topic, 'Recommended_Course_1': recommendations[0],
  result_df = result_df.append({'Topic': topic, 'Recommended_Course_1': recommendations[0],
  result_df = result_df.append({'Topic': topic, 'Recommended_Course_1': recommendations[0],
  result_df = result_df.append({'Topic': topic, 'Recommended_Course_1': recommendations[0],


['z/OS Introduction', 'Red Hat OpenShift I: Containers & Kubernetes (DO180)', 'Watson Academy', 'Cognos Analytics', 'IBM QRadar SIEM Advanced Topics']
['Zowe', 'IBMÂ\xa0Developer for z/OS', 'Overview of IBM Cognos Analytics', 'ILOG CPLEX Optimization Studio', 'QRadar Community Edition']


  result_df = result_df.append({'Topic': topic, 'Recommended_Course_1': recommendations[0],
  result_df = result_df.append({'Topic': topic, 'Recommended_Course_1': recommendations[0],


['IBMÂ\xa0Developer for z/OS', 'Zowe', 'IBM Z Enterprise Computing Kickstart', 'IBM Security SOAR', 'Planning Analytics']
['IBM Z Software Trials', 'IBM Rational Developer for i RPG & COBOL Tools', 'IBM Data Risk Manager', 'Red Hat System Administration I (RH124)', 'Red Hat Application Development I: Programming in Java EE (AD183)']
['IBM Rational Developer for i RPG & COBOL Tools', 'IBM Z Software Trials', 'IBM Data Risk Manager', 'Red Hat System Administration I (RH124)', 'Red Hat Application Development I: Programming in Java EE (AD183)']
['IBM Watson Machine Learning Accelerator', 'QRadar Community Edition', 'IBM Security SOAR', 'IBM AI Education', 'IBM Z Software Trials']


  result_df = result_df.append({'Topic': topic, 'Recommended_Course_1': recommendations[0],
  result_df = result_df.append({'Topic': topic, 'Recommended_Course_1': recommendations[0],
  result_df = result_df.append({'Topic': topic, 'Recommended_Course_1': recommendations[0],
  result_df = result_df.append({'Topic': topic, 'Recommended_Course_1': recommendations[0],


['IBM XL Fortran', 'Introduction to IBM zSystems', 'QRadar SIEM', 'IBM Cloud Associate Solution Advisor', 'IBM Robotic Process Automation - Basic II']


  result_df = result_df.append({'Topic': topic, 'Recommended_Course_1': recommendations[0],


['IBM XL C/C++', 'IBM Cloud Associate Solution Advisor', 'AI Fairness 360', 'OpenDS4All', 'Red Hat System Administration I (RH124)']
['Red Hat System Administration I (RH124)', 'IBM Z Software Trials', 'IBM Rational Developer for i RPG & COBOL Tools', 'IBM Data Risk Manager', 'IBM Z System Administrator']
['Red Hat System Administration II (RH134)', 'IBM AI Education', 'IBM Security SOAR', 'IBM Z Enterprise Computing Kickstart', 'Application Modernization with IBM z/OS']
['Red Hat Application Development I: Programming in Java EE (AD183)', 'Red Hat Enterprise Linux Automation with Ansible (RH294)', 'IBM Data Risk Manager', 'Adversarial Robustness Toolbox', 'Getting Started with Enterprise Data Science']


  result_df = result_df.append({'Topic': topic, 'Recommended_Course_1': recommendations[0],
  result_df = result_df.append({'Topic': topic, 'Recommended_Course_1': recommendations[0],
  result_df = result_df.append({'Topic': topic, 'Recommended_Course_1': recommendations[0],
  result_df = result_df.append({'Topic': topic, 'Recommended_Course_1': recommendations[0],


['Introduction to OpenShift Applications (DO101)', 'Introduction to IBM zSystems', 'QRadar SIEM', 'IBM Robotic Process Automation - Basic II', 'Red Hat OpenStack Administration I: Core Operations for Domain Operators (CL110)']


  result_df = result_df.append({'Topic': topic, 'Recommended_Course_1': recommendations[0],


['Red Hat OpenShift I: Containers & Kubernetes (DO180)', 'z/OS Introduction', 'Red Hat OpenStack Administration I: Core Operations for Domain Operators (CL110)', 'IBM Engineering Workflow Management', 'Introduction to OpenShift Applications (DO101)']
['Red Hat OpenStack Administration I: Core Operations for Domain Operators (CL110)', 'Introduction to OpenShift Applications (DO101)', 'Solution tutorials', 'IBM Z System Administrator', 'Red Hat OpenShift I: Containers & Kubernetes (DO180)']
['Red Hat Application Development I: Programming in Java EE (AD183)', 'Red Hat Enterprise Linux Automation with Ansible (RH294)', 'IBM Cognos Analytics - Author Reports Fundamentals', 'QRadar Community Edition', 'IBM QRadar SIEM Advanced Topics']


  result_df = result_df.append({'Topic': topic, 'Recommended_Course_1': recommendations[0],
  result_df = result_df.append({'Topic': topic, 'Recommended_Course_1': recommendations[0],
  result_df = result_df.append({'Topic': topic, 'Recommended_Course_1': recommendations[0],


In [7]:
# Save the result DataFrame to a new CSV file
result_df.to_csv('recommended_courses_w2w.csv', index=False)

In [8]:
#Read the merged file
recommended_courses_w2w = pd.read_csv("recommended_courses_w2w.csv", encoding="latin-1")
print("Loaded data file {0} with {1} rows and {2} columns".format("recommended_courses_w2w.csv"
                                                                  ,recommended_courses_w2w.shape[0]
                                                                  ,recommended_courses_w2w.shape[1]))

recommended_courses_w2w.head()

Loaded data file recommended_courses_w2w.csv with 84 rows and 6 columns


Unnamed: 0,Topic,Recommended_Course_1,Recommended_Course_2,Recommended_Course_3,Recommended_Course_4,Recommended_Course_5
0,Getting Started with Enterprise-grade AI,Getting Started with Enterprise-grade AI,Getting Started with Enterprise Data Science,Getting Started with Threat Intelligence and H...,IBM CloudLabs,IBM QRadar SIEM Advanced Topics
1,OpenDS4All,OpenDS4All,Red Hat System Administration I (RH124),IBM XL C/C++,IBM XL Fortran,IBM Cloud Feature Code
2,IBM AI Education,IBM AI Education,IBM Engineering Test Management,Introduction to IBM zSystems,Application Modernization with IBM z/OS,Planning Analytics
3,Watson Academy,Watson Academy,IBM Engineering Workflow Management,z/OS Introduction,Project Debater for Academic Use,IBM Z Software Trials
4,Create a Node-RED starter application,Create a Node-RED starter application,IBM Z System Administrator,Project CodeNet,IBM Security SOAR,Trustworthy Generation 360


In [9]:
# Calculating the percentage of the missing values 
percentage = (recommended_courses_w2w.isnull().sum() / recommended_courses_w2w.shape[0]).sort_values(ascending=False)
percentage

Topic                   0.0
Recommended_Course_1    0.0
Recommended_Course_2    0.0
Recommended_Course_3    0.0
Recommended_Course_4    0.0
Recommended_Course_5    0.0
dtype: float64

# Load the Test Dataset

In [10]:
#Read the file
test_data = pd.read_csv("test_data.csv", encoding="latin-1")
print("Loaded data file {0} with {1} rows and {2} columns".format("test_data.csv"
                                                                  ,test_data.shape[0]
                                                                  ,test_data.shape[1]))

test_data.head()

Loaded data file test_data.csv with 84 rows and 2 columns


Unnamed: 0,Topic,label
0,Getting Started with Enterprise Data Science,Getting Started with Enterprise-grade AI
1,Getting Started with Threat Intelligence and H...,Getting Started with Enterprise Data Science
2,IBM Cloud Feature Code,Solution tutorials
3,IBM CloudLabs,Journey to Cloud: Envisioning Your Solution
4,IBM Cognos Analytics - Architecture and Logging,IBM Planning Analytics - Design and Develop Mo...


In [11]:
# Merge the test data with the recommended courses data based on course_name 
merged_data = test_data.merge(recommended_courses_w2w, on='Topic')
merged_data.to_csv('merged_data.csv', index=False)

In [12]:
print(merged_data.columns)

Index(['Topic', 'label', 'Recommended_Course_1', 'Recommended_Course_2',
       'Recommended_Course_3', 'Recommended_Course_4', 'Recommended_Course_5'],
      dtype='object')


In [13]:
#Read the file
data = pd.read_csv("merged_data_w2w.csv", encoding="latin-1")
print("Loaded data file {0} with {1} rows and {2} columns".format("merged_data_w2w.csv"
                                                                  ,data.shape[0]
                                                                  ,data.shape[1]))

data.head()

Loaded data file merged_data_w2w.csv with 83 rows and 7 columns


Unnamed: 0,Topic,label,Recommended_Course_1,Recommended_Course_2,Recommended_Course_3,Recommended_Course_4,Recommended_Course_5
0,Getting Started with Enterprise Data Science,Getting Started with Enterprise-grade AI,Getting Started with Enterprise-grade AI,Getting Started with Threat Intelligence and H...,Getting Started with Enterprise Data Science,IBM QRadar SIEM Advanced Topics,IBM Watson Machine Learning Accelerator
1,Getting Started with Threat Intelligence and H...,Getting Started with Enterprise Data Science,Getting Started with Enterprise Data Science,Getting Started with Enterprise-grade AI,Getting Started with Threat Intelligence and H...,Causal Inference 360,Uncertainty Quantification 360
2,IBM Cloud Feature Code,Solution tutorials,Solution tutorials,Journey to Cloud: Envisioning Your Solution,Fundamentals of IBM Process Mining,Project CodeNet,IBM Engineering Requirements Management DOORS ...
3,IBM CloudLabs,Journey to Cloud: Envisioning Your Solution,Journey to Cloud: Envisioning Your Solution,Fundamentals of IBM Process Mining,Solution tutorials,IBM Planning Analytics - Design and Develop Mo...,IBM Robotic Process Automation - Basic I
4,IBM Cognos Analytics - Architecture and Logging,IBM Planning Analytics - Design and Develop Mo...,IBM Cognos Analytics - Architecture and Logging,IBM Cognos Framework Manager - Design Metadata...,IBM Planning Analytics - Design and Develop Mo...,SPSS Modeler Premium,IBM Planning Analytics - Analyze Data and Crea...


# Calculate the Accuracy

In [14]:
# Define function to calculate the top-k accuracy
def calculate_accuracy(dataset, k):
    accuracies = []
    topics_with_accuracy_1 = []
    
    for i, row in dataset.iterrows():
        recommended_courses = row[['Recommended_Course_1', 'Recommended_Course_2',
                                   'Recommended_Course_3', 'Recommended_Course_4',
                                   'Recommended_Course_5']].tolist()
        recommended_courses = [course for course in recommended_courses if course != 'N/A']
        label = row['label']
        
        if label in recommended_courses[:k]:
            accuracies.append(1)
            topics_with_accuracy_1.append(row['Topic'])
        else:
            accuracies.append(0)
    
    average_accuracy = np.mean(accuracies) * 100
    average_accuracy = round(average_accuracy, 2)
    return average_accuracy, topics_with_accuracy_1


In [20]:
average_accuracy, topics_with_accuracy_1 = calculate_accuracy(data, 1)
print(f"Average Accuracy for k = 1: {average_accuracy}%")

Average Accuracy for k = 1: 33.73%


In [21]:
# Print topics having accuracy = 1 for k = 1
if topics_with_accuracy_1:
    print("Topics with Accuracy = 1:")
    for topic in topics_with_accuracy_1:
        print(topic)
else:
    print("No topics with Accuracy = 1 found.")

Topics with Accuracy = 1:
Getting Started with Enterprise Data Science
Getting Started with Threat Intelligence and Hunting
IBM Cloud Feature Code
IBM CloudLabs
IBM Cognos Analytics - Author Reports Fundamentals
IBM Cognos Analytics - Author Reports with Multidimensional Data
IBM Cognos Framework Manager - Design Metadata Models
IBM Data Risk Manager
IBM DB2 Standard
IBM Engineering Requirements Management DOORS Next
IBM Engineering Test Management
IBM Planning Analytics - Analyze Data and Create Reports
IBM Planning Analytics - Design and Develop Models in Planning Analytics Workspace
IBM Process Mining Collection
IBM QRadar SIEM Advanced Topics
IBM QRadar SIEM Foundations
IBM Rational Developer for i RPG & COBOL Tools
IBM Robotic Process Automation - Basic I
IBM Security Learning Academy
IBM Security SOAR
IBM Watson Machine Learning Accelerator
IBM XL Fortran
IBM Z Enterprise Computing Kickstart
IBM Z Software Trials
ILOG CPLEX Optimization Studio
Introduction to IBM zSystems
Introdu

In [19]:
average_accuracy, topics_with_accuracy_3 = calculate_accuracy(data, 3)
print(f"Average Accuracy for k = 3: {average_accuracy}%")

Average Accuracy for k = 3: 36.14%


In [18]:
average_accuracy, topics_with_accuracy_5 = calculate_accuracy(data, 5)
print(f"Average Accuracy for k = 5: {average_accuracy}%")

Average Accuracy for k = 5: 38.55%
