In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.metrics.pairwise import cosine_similarity

# Load the startup dataset
startup_data = pd.read_csv('dataset_startup.csv', sep=';')
startup_features = startup_data[['tingkat perkembangan perusahaan', 'industri']]
startup_ids = startup_data['startup_id'].astype(str)

# Load the investor dataset
investor_data = pd.read_csv('dataset_loker.csv', sep=';')
investor_features = investor_data[['tingkat', 'industri']]
investor_ids = investor_data['investor_id']

# Preprocess the data using Tokenizer
tokenizer = Tokenizer()
tokenizer.fit_on_texts(startup_features['tingkat perkembangan perusahaan'] + ' ' + startup_features['industri'])
startup_sequences = tokenizer.texts_to_sequences(startup_features['tingkat perkembangan perusahaan'] + ' ' + startup_features['industri'])
startup_padded = pad_sequences(startup_sequences)

investor_sequences = tokenizer.texts_to_sequences(investor_features['tingkat'] + ' ' + investor_features['industri'])
investor_padded = pad_sequences(investor_sequences)

# Convert padded sequences to tensors
startup_tensors = tf.convert_to_tensor(startup_padded, dtype=tf.float32)
investor_tensors = tf.convert_to_tensor(investor_padded, dtype=tf.float32)

# Calculate cosine similarity between startup and investor tensors
similarity_matrix = cosine_similarity(startup_tensors, investor_tensors)

# Function to get investor matches for a given startup ID
def get_investor_matches(startup_id):
    matches = {}
    startup_index = startup_ids[startup_ids == startup_id].index[0]
    similarities = similarity_matrix[startup_index]
    sorted_indexes = np.argsort(similarities)[::-1]
    top_matches = investor_ids[sorted_indexes][:20]
    matches[startup_id] = top_matches
    return matches

# Interactive input and display of investor matches
while True:
    input_id = input("Enter startup ID (or 'exit' to quit): ")
    if input_id == 'exit':
        break
    if input_id not in startup_ids.values:
        print("Invalid startup ID. Please try again.")
        continue
    investor_matches = get_investor_matches(input_id)
    print(f"\nStartup ID: {input_id}")
    print("Top 20 Investor Matches:")
    for investor_id in investor_matches[input_id]:
        print(investor_id)
    print()


Enter startup ID (or 'exit' to quit): 295

Startup ID: 295
Top 20 Investor Matches:
168
138
63
1106
40
88
77
188
314
1482
113
136
640
8
198
170
81
43
57
45

Enter startup ID (or 'exit' to quit): 494
Invalid startup ID. Please try again.
Enter startup ID (or 'exit' to quit): exit


In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.metrics.pairwise import cosine_similarity

# Load the startup dataset
startup_data = pd.read_csv('dataset_startup.csv', sep =";")
startup_data.head(5)

Unnamed: 0,nama lengkap,nama perusahaan,website perusahaan,target perusahaan,tingkat perkembangan perusahaan,industri,tahun,startup_id
0,Mira Irawan,TeknoSmart,digitalmaju.co.id,B2B,Pre-Startup/R&D,E-Commerce,12,295
1,Budi Wijayanti,CerdasKreatif,majuinovasi.co.id,B2B,Pre-Startup/R&D,Fintech,7,424
2,Raihan Ardianto,CerdasKreativitas,citatech.co.id,B2B,Pre-Startup/R&D,Transportasi & Logistik,2,1194
3,Suci Suharto,KreatifSolusi,datamaju.id,B2B,Pre-Startup/R&D,Pendidikan Online,9,1349
4,Irfan Harahap,SmartTekno,startupmaju.co.id,B2B,Pre-Startup/R&D,Kesehatan Digital,12,1350
