<h1><b>Content Based Filtering : Kampus Merdeka</b></h1>

In [49]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
from nltk.corpus import stopwords
import re
import random

df = pd.read_csv("dataset/kampusmerdeka_dataset.csv")

df

Unnamed: 0,partner_name,address,description,program,course_name
0,Nabil Syahfiar,"Kaliwungu, Kendal, Jawa Tengah, 51372","ai, object oriented programming, oop, web prog...",Kuliah,Mata Kuliah
1,PT Orbit Ventura Indonesia,"Veteran RI Building 15th Floor Unit Z15-002, P...",3.AI Project Cycle 2.AI Research Methods 4.Pyt...,Studi Independen,AI 4 Jobs
2,PT Mitra Semeru Indonesia,"Jl. Mampang Prapatan Raya no 84A, Tegal Parang...",1.AI Technology Logic and Concept 2.AI Project...,Studi Independen,AI for StartUps
3,PT Nurul Fikri Cipta Inovasi,Jl. Situ Indah No.116 RT. 006 RW. 010 Kel. Tug...,Desain Web Version Control System (VCS): Git/G...,Studi Independen,Akademi Fullstack Web Developer
4,PT Ruang Raya Indonesia,"Jl. Dr. Saharjo No.161, Manggarai Selatan, Teb...",Final Project Software Engineering: Implementi...,Studi Independen,Backend Engineering
5,PT LENTERA BANGSA BENDERANG,(Alamat Tidak Tersedia),Know the Introduction to the World of Programm...,Studi Independen,Backend Javascript
6,PT DWI INTI PUTRA,(Alamat Tidak Tersedia),Capstone Project Support Group Introduction to...,Studi Independen,Data Analyst
7,PT. INDOBIT DIGITAL RAYA,Educenter building unit 22218 Kav Commercial I...,Program Onboarding Data Collection Overview of...,Studi Independen,Data Analytics for Business
8,PT Mitra Integrasi Informatika,"APL Tower Lantai 37, Jl. Letjen S. Parman Kav....",Introduction to Programming Front-End Programm...,Studi Independen,Full Stack Developer
9,PT LENTERA BANGSA BENDERANG,(Alamat Tidak Tersedia),Perform unit testing and deployment Master the...,Studi Independen,Fullstack Web


<br>
<h2><b>1. Ikhtisar</b></h2>

In [50]:
df.describe()

Unnamed: 0,partner_name,address,description,program,course_name
count,21,21,21,21,21
unique,20,18,21,3,21
top,PT LENTERA BANGSA BENDERANG,(Alamat Tidak Tersedia),"ai, object oriented programming, oop, web prog...",Studi Independen,Mata Kuliah
freq,2,4,1,10,1


In [51]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 21 entries, 0 to 20
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   partner_name  21 non-null     object
 1   address       21 non-null     object
 2   description   21 non-null     object
 3   program       21 non-null     object
 4   course_name   21 non-null     object
dtypes: object(5)
memory usage: 968.0+ bytes


<br>
<h2><b>2. Deskripsi Modul & Kompetensi</b> (Sebelum Preprocessing)</h2>

In [52]:
def print_description(index):
    example = df[df.index == index][['description', 'partner_name', 'program', 'course_name']].values[0]
    if len(example) > 0:
        print(example[0], '\n')
        print('Nama Mitra\t:', example[1])
        print('Program\t:', example[2])
        print('Kursus atau Posisi\t:', example[3])
        print()

In [53]:
print_description(1)

3.AI Project Cycle 2.AI Research Methods 4.Python 1 programming.AI Technology Logic and Concept 6.Final Project 5.Professional Ethics & Company Skills 

Nama Mitra	: PT Orbit Ventura Indonesia
Program	: Studi Independen
Kursus atau Posisi	: AI 4 Jobs



<br>
<h2><b>3. Text Preprocessing</b></h2>

In [54]:
clean_spcl = re.compile('[/(){}\[\]\|@,;]')
clean_symbol = re.compile('[^0-9a-z #+_]')
stopworda = set(stopwords.words('english'))

def clean_text(text):
    """
        text: a string
        
        return: modified initial string
    """
    text = text.lower() # lowercase text
    text = clean_spcl.sub(' ', text)
    text = clean_symbol.sub('', text)
    text = ' '.join(word for word in text.split() if word not in stopworda) # hapus stopword dari kolom deskripsi
    return text
    
df['desc_clean'] = df['description'].apply(clean_text)

In [55]:
df.head()

Unnamed: 0,partner_name,address,description,program,course_name,desc_clean
0,Nabil Syahfiar,"Kaliwungu, Kendal, Jawa Tengah, 51372","ai, object oriented programming, oop, web prog...",Kuliah,Mata Kuliah,ai object oriented programming oop web program...
1,PT Orbit Ventura Indonesia,"Veteran RI Building 15th Floor Unit Z15-002, P...",3.AI Project Cycle 2.AI Research Methods 4.Pyt...,Studi Independen,AI 4 Jobs,3ai project cycle 2ai research methods 4python...
2,PT Mitra Semeru Indonesia,"Jl. Mampang Prapatan Raya no 84A, Tegal Parang...",1.AI Technology Logic and Concept 2.AI Project...,Studi Independen,AI for StartUps,1ai technology logic concept 2ai project cycle...
3,PT Nurul Fikri Cipta Inovasi,Jl. Situ Indah No.116 RT. 006 RW. 010 Kel. Tug...,Desain Web Version Control System (VCS): Git/G...,Studi Independen,Akademi Fullstack Web Developer,desain web version control system vcs git gith...
4,PT Ruang Raya Indonesia,"Jl. Dr. Saharjo No.161, Manggarai Selatan, Teb...",Final Project Software Engineering: Implementi...,Studi Independen,Backend Engineering,final project software engineering implementin...


<br>
<h2><b>4. Deskripsi Modul & Kompetensi</b> (Setelah Preprocessing)</h2>

In [56]:
# Deskripsi kedua (Setelah preprocessing)
def print_description_clean(index):
    example = df[df.index == index][['desc_clean', 'partner_name', 'program', 'course_name']].values[0]
    if len(example) > 0:
        print(example[0], '\n')
        print('Nama Mitra\t:', example[1])
        print('Program\t:', example[2])
        print('Kursus atau Posisi\t:', example[3])
        print()

In [57]:
print_description_clean(1)

3ai project cycle 2ai research methods 4python 1 programmingai technology logic concept 6final project 5professional ethics company skills 

Nama Mitra	: PT Orbit Ventura Indonesia
Program	: Studi Independen
Kursus atau Posisi	: AI 4 Jobs



<br>
<h2><b>5. TF-IDF & Cosine Similarity</b></h2>

In [58]:
df.set_index('partner_name', inplace=True)
tf = TfidfVectorizer(analyzer='word', ngram_range=(1, 3), min_df=0, stop_words='english')
tfidf_matrix = tf.fit_transform(df['desc_clean'])
cos_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)
cos_sim

array([[1.        , 0.        , 0.        , 0.04208615, 0.071635  ,
        0.0911061 , 0.00604063, 0.03918833, 0.11617208, 0.04431005,
        0.03439369, 0.06674163, 0.00761257, 0.05747614, 0.02441371,
        0.02372035, 0.0065789 , 0.03299428, 0.01401157, 0.02690397,
        0.01122623],
       [0.        , 1.        , 0.32011442, 0.01223473, 0.019006  ,
        0.00592109, 0.00863442, 0.01306972, 0.        , 0.0132483 ,
        0.        , 0.01504565, 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.01321821, 0.        ,
        0.03193534],
       [0.        , 0.32011442, 1.        , 0.01363991, 0.0114529 ,
        0.00660113, 0.0096261 , 0.00791115, 0.        , 0.00738494,
        0.        , 0.01677366, 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.01473634, 0.        ,
        0.01583513],
       [0.04208615, 0.01223473, 0.01363991, 1.        , 0.04605612,
        0.0458314 , 0.00361473, 0.00987036, 0.       

In [59]:
# Set index utama di kolom 'name'
indices = pd.Series(df.index)
indices[:21]

0                        Nabil Syahfiar
1            PT Orbit Ventura Indonesia
2             PT Mitra Semeru Indonesia
3          PT Nurul Fikri Cipta Inovasi
4               PT Ruang Raya Indonesia
5           PT LENTERA BANGSA BENDERANG
6                     PT DWI INTI PUTRA
7              PT. INDOBIT DIGITAL RAYA
8        PT Mitra Integrasi Informatika
9           PT LENTERA BANGSA BENDERANG
10    PT Hacktivate Teknologi Indonesia
11      PT Nodeflux Teknologi Indonesia
12                 PT Pundi Mas Berjaya
13                      PT GIT SOLUTION
14    PT Telkom Indonesia (Persero) Tbk
15            PT TELEKOMUNIKASI SELULAR
16           PT Widya Inovasi Indonesia
17     PT Bejana Investidata Globalindo
18      PT Investree Radhika Jaya Group
19       PT Generasi Anak Muda Berkarya
20                  UPT Solo Technopark
Name: partner_name, dtype: object

<br>
<h2><b>6. Modelling</b></h2>

In [60]:
def recommendations(name, cos_sim = cos_sim):
    
    recommended_mbkm = {'partner_name': [],
                       'address': [],
                       'program': [],
                       'course_name': []}
    
    # Mengambil nama hotel berdasarkan variabel indicies
    idx = indices[indices == name].index[0]

    # Membuat series berdasarkan skor kesamaan
    score_series = pd.Series(cos_sim[idx]).sort_values(ascending = False)

    # mengambil index dan dibuat 10 baris rekomendasi terbaik
    top_10_indexes = list(score_series.iloc[1:11].index)

    print('Daftar Rekomendasi')
    print("="*50)
    
    for i in top_10_indexes:
      
      if list(df['program'])[i] == "Magang":
        kursus_or_posisi = "Posisi"
      else:
        kursus_or_posisi = "Kursus"
      
      print()
      print('Mitra\t:', list(df.index)[i])
      print('Program\t:', list(df['program'])[i])
      print(f'{kursus_or_posisi}\t:', list(df['course_name'])[i])
      print()
      print("="*50)

<br>
<h2><b>7. Prediksi</b></h2>

In [61]:
recommendations('Nabil Syahfiar')

Daftar Rekomendasi

Mitra	: PT Mitra Integrasi Informatika
Program	: Studi Independen
Kursus	: Full Stack Developer


Mitra	: PT LENTERA BANGSA BENDERANG
Program	: Studi Independen
Kursus	: Backend Javascript


Mitra	: PT Ruang Raya Indonesia
Program	: Studi Independen
Kursus	: Backend Engineering


Mitra	: PT Nodeflux Teknologi Indonesia
Program	: Magang
Posisi	: AI Engineer


Mitra	: PT GIT SOLUTION
Program	: Magang
Posisi	: Android Developer


Mitra	: PT LENTERA BANGSA BENDERANG
Program	: Studi Independen
Kursus	: Fullstack Web


Mitra	: PT Nurul Fikri Cipta Inovasi
Program	: Studi Independen
Kursus	: Akademi Fullstack Web Developer


Mitra	: PT. INDOBIT DIGITAL RAYA
Program	: Studi Independen
Kursus	: Data Analytics for Business


Mitra	: PT Hacktivate Teknologi Indonesia
Program	: Studi Independen
Kursus	: Golang for Back End Programmer


Mitra	: PT Bejana Investidata Globalindo
Program	: Magang
Posisi	: Full-Stack Developer

