# Import Library

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.metrics.pairwise import cosine_similarity
import firebase_admin
from firebase_admin import credentials
from firebase_admin import firestore

# Connect Firestore Database

In [None]:
# Initialize Firebase Admin SDK
cred = credentials.Certificate('fundup-387016-eebdb0edd99f.json')  # Replace with your service account key file path
firebase_admin.initialize_app(cred)
db = firestore.client()

# Load Data from Firestore Database

In [None]:
# Load the startup features from Firestore
startup_features_ref = db.collection('startup')
startup_features_docs = startup_features_ref.stream()

startup_features = []
startup_ids = []
for doc in startup_features_docs:
    data = doc.to_dict()
    startup_features.append(data['tingkat_perkembangan_perusahaan'] + ' ' + data['industri_startup'])
    startup_ids.append(str(doc.id))

In [None]:
startup_features

['Pre-startup/RnD Energi Terbarukan',
 'Pre-startup/RnD E-commerce',
 'Pre-startup/RnD E-commerce',
 'Growth Kesehatan Digital',
 'Growth Transportasi dan Logistik',
 'Product Market Fit and Monetization Kesehatan Digital',
 'Growth Fintech',
 'MVP/prototype Transportasi dan Logistik',
 'MVP/prototype Energi Terbarukan',
 'Pre-startup/RnD Transportasi dan Logistik',
 'Early Customers and Iteration Makanan dan Minuman',
 'Early Customers and Iteration Agroteknologi',
 'Product Market Fit and Monetization Agroteknologi',
 'Growth E-commerce',
 'Product Market Fit and Monetization Kreatif dan Media Digital',
 'Product Market Fit and Monetization Makanan dan Minuman',
 'Product Market Fit and Monetization Energi Terbarukan',
 'Pre-startup/RnD Transportasi dan Logistik',
 'Early Customers and Iteration Transportasi dan Logistik',
 'Product Market Fit and Monetization Kreatif dan Media Digital',
 'Growth Kreatif dan Media Digital',
 'Growth Kreatif dan Media Digital',
 'Product Market Fit an

In [None]:
startup_ids

['187WvqyyivHWc6vOQQfz',
 '1PRqkm5AYAXIHumnZGF5',
 '28co6E0QbPRwCENWc13A',
 '2tukoDDLgYNAJDQXca7z',
 '3vDdk5FAxRZXSXgiHrTc',
 '4eFv0isVgM8dtvZ99d4w',
 '50GvsLiMdfwcEfZ99kpp',
 '5viiFzmEQcYmUbitOEga',
 '6Do2CQtPHVIanE6zybpq',
 '6bTvuK3MChnhZeRHuRUt',
 '6lcQKWW6hpQ6B2a54Vxf',
 '6tAgrICVADntOi3Tq4xh',
 '7pIo3Yp9tWcKRFWmd3VL',
 'APDZSG1dNcj0rkDwSFy7',
 'B0CrDZ8imMuFWtFiEAAF',
 'BGsl6mJLmIgA7aiF7sNX',
 'CChkggoZ611jjxV7wYFK',
 'CEmLPRl2VaAmhMleSL3R',
 'E85QF4tnyukL0LE0hq8F',
 'EF2Vi56m5TpMXSQrjPYv',
 'FQbrWpCuKG4Oxqp0bW0m',
 'FSLm33OVgLf66LQqQJVy',
 'G4KF6wAE2kL68DqUCYEu',
 'HgxCh2ETzrNmEC35WibF',
 'Hr1VYxwM8y1t24j8urlH',
 'IQGqz1GfunkZ8wlZYQvT',
 'IZHw6ULmmPv31bDeI9Df',
 'IrGMPegt2OH70gXI9Gao',
 'IyX6n6E3sS8NpmE1E2tN',
 'JHNZTafk2vvp2mhnMPRr',
 'K6n6KkI0QKElhbkMbUQH',
 'KAF9t8I5udXY2tnDScHM',
 'NUeBe7TJh52v8Epx0Gis',
 'OcihK3E21kG4YiipPPFd',
 'Od703fcwPwUbwyIAVhgT',
 'P829jL77YQtMvkWYkyCA',
 'PGvJjCgC65JjFPd5dfQM',
 'PcnRvW0mhvIRKiA6OPMx',
 'PuF2NChc2PMuIg1GeSb0',
 'Q0ZPCso24kbtNeCIhVrR',


In [None]:
# Load the investor features from Firestore
investor_features_ref = db.collection('investor_loker')
investor_features_docs = investor_features_ref.stream()

investor_features = []
investor_ids = []
for doc in investor_features_docs:
    data = doc.to_dict()
    investor_features.append(data['target_perkembangan'] + ' ' + data['target_industri'])
    investor_ids.append(doc.id)

In [None]:
investor_features

['Pre-startup/RnD Travel dan Pariwisata',
 'Early Customers and Iteration Kesehatan Digital',
 'Product Market Fit and Monetization Kesehatan Digital',
 'Early Customers and Iteration Makanan dan Minuman',
 'Early Customers and Iteration E-commerce',
 'Growth Agroteknologi',
 'Pre-startup/RnD E-commerce',
 'Growth Pendidikan Online',
 'Growth Energi Terbarukan',
 'Growth Travel dan Pariwisata',
 'Growth Kreatif dan Media Digital',
 'MVP/prototype Fintech',
 'MVP/prototype Transportasi dan Logistik',
 'MVP/prototype Travel dan Pariwisata',
 'Product Market Fit and Monetization Kesehatan Digital',
 'Growth Agroteknologi',
 'Product Market Fit and Monetization Agroteknologi',
 'Early Customers and Iteration Makanan dan Minuman',
 'Product Market Fit and Monetization Pendidikan Online',
 'MVP/prototype E-commerce',
 'Product Market Fit and Monetization Energi Terbarukan',
 'Growth Kreatif dan Media Digital',
 'Early Customers and Iteration Kreatif dan Media Digital',
 'Pre-startup/RnD Fint

In [None]:
investor_ids

['002t0DzQTry8tCCx0WZD',
 '1Ts0qVsEWUkSm3BgYcRQ',
 '3C2cyjEnLHOUItjy2sBi',
 '4L6yhwT4EN2rPvrjJvKV',
 '4Twl3tFLpbFWzghp2CEG',
 '6sYVMaiCKx4ro5BPLoZt',
 '7ZGFzF4MJwSn22R33huQ',
 '7cRfxCap18kYQMnpWaUy',
 '7fevkZuDa3pKHwdVNjix',
 '8Yhg7XXZtG6xXL99GFml',
 '93pHK1BkCKqkO11dzt8X',
 'ARml6pgrznCNUJYQerBg',
 'By3x6Z86PWAkGGQZlF6u',
 'C9MI4KtJEF3u7DEIBol6',
 'CfvJ4ZSRv7aUn9EUG6Yx',
 'Co3LYEHYpZlLLVcDejaX',
 'G0xE06PDXb9uPZL7pKXq',
 'GPvLEamA6BxbpNaIzlCk',
 'GYj5r30aV3kpwhky1P8w',
 'GbgU1SVj1NURXoF167mz',
 'IQfYUmNEOrejvZZQK2XP',
 'IXCa8TdO4NWQ0onsHGu5',
 'KKzce2sw19svcAsKjmfH',
 'LoaUYn87EdERo1p9iD2u',
 'MclkvoD9k6SnlSFWthT6',
 'MiEP6bDb4ytiIxeF8oLs',
 'OtCBawbM8SeWMRbHa73P',
 'RpZUwCCvEaXgx2NW3hUq',
 'U7yjV43TrrLGOiE8ILkD',
 'Uk1u0ZEASDPfnQxrFlUY',
 'WiK6eiJxt4sUtbXBuKFK',
 'YNdwcYZTciLVlUIKVpea',
 'ZSk2Ao7JCDyEiMikJoD0',
 'ZsNqoxL4R1loOsIN84nM',
 'btFWOdAHjue9GeQsFHoc',
 'cAymZJBGHPMmiFT13Cv4',
 'cDTToyvUndBII30PExC5',
 'dHqQEjaLcMQEtoYStBCD',
 'hl7aAA6I4BmyhKeSHwOC',
 'jp5YXbP4umLo2qWHAl4w',


# Processing Data (Tokenizing)

In [None]:
# Preprocess the data using Tokenizer
tokenizer = Tokenizer()
tokenizer.fit_on_texts(startup_features + investor_features)
startup_sequences = tokenizer.texts_to_sequences(startup_features)
startup_padded = pad_sequences(startup_sequences)

investor_sequences = tokenizer.texts_to_sequences(investor_features)
investor_padded = pad_sequences(investor_sequences)

In [None]:
startup_padded

array([[ 0,  0,  0,  3,  4,  1,  5, 17, 18],
       [ 0,  0,  0,  0,  6,  7,  8, 19, 20],
       [ 0,  0,  9, 10, 11,  1, 12, 17, 18],
       [ 0,  0,  0,  0,  6,  7,  8, 21, 13],
       [ 0,  0,  0,  0,  0, 14, 22,  2, 23],
       [ 0,  0,  0,  0,  0, 15, 16, 19, 20],
       [ 0,  0,  0,  6,  7,  8, 22,  2, 23],
       [ 0,  0,  0,  6,  7,  8, 24,  2, 25],
       [ 0,  0,  0,  0,  0,  0,  0, 14, 26],
       [ 0,  0,  0,  0,  6,  7,  8, 27, 28],
       [ 0,  9, 10, 11,  1, 12, 24,  2, 25],
       [ 0,  0,  9, 10, 11,  1, 12, 19, 20],
       [ 0,  0,  0,  9, 10, 11,  1, 12, 29],
       [ 0,  0,  9, 10, 11,  1, 12, 27, 28],
       [ 0,  0,  0,  6,  7,  8, 22,  2, 23],
       [ 9, 10, 11,  1, 12, 30,  2, 31, 13],
       [ 0,  0,  0,  0, 14, 30,  2, 31, 13],
       [ 0,  0,  9, 10, 11,  1, 12, 21, 13],
       [ 0,  0,  0,  0,  3,  4,  1,  5, 29],
       [ 0,  0,  9, 10, 11,  1, 12, 17, 18],
       [ 0,  0,  0,  0,  0,  0, 15, 16, 26],
       [ 0,  0,  0,  0, 15, 16, 22,  2, 23],
       [ 0

In [None]:
investor_padded

array([[ 0,  0,  3,  4,  1,  5, 24,  2, 25],
       [ 0,  0,  3,  4,  1,  5, 22,  2, 23],
       [ 0,  0,  0,  0,  0, 15, 16, 27, 28],
       [ 0,  0,  0,  3,  4,  1,  5, 17, 18],
       [ 9, 10, 11,  1, 12, 30,  2, 31, 13],
       [ 0,  0,  0,  0,  0,  0, 14, 19, 20],
       [ 0,  0,  0,  0, 14, 30,  2, 31, 13],
       [ 0,  0,  0,  0,  0,  0, 14, 19, 20],
       [ 0,  0,  0,  0, 15, 16, 22,  2, 23],
       [ 0,  0,  0,  6,  7,  8, 22,  2, 23],
       [ 0,  0,  0,  0,  0,  0, 15, 16, 29],
       [ 0,  0,  0,  0,  3,  4,  1,  5, 29],
       [ 0,  0,  0,  0,  3,  4,  1,  5, 26],
       [ 0,  0,  9, 10, 11,  1, 12, 21, 13],
       [ 0,  0,  0,  0,  0, 14, 32,  2, 33],
       [ 0,  0,  0,  0, 15, 16, 32,  2, 33],
       [ 0,  0,  0,  0,  0,  0, 15, 16, 26],
       [ 0,  0,  0,  9, 10, 11,  1, 12, 29],
       [ 0,  0,  9, 10, 11,  1, 12, 17, 18],
       [ 0,  0,  9, 10, 11,  1, 12, 27, 28],
       [ 0,  0,  0,  0, 15, 16, 32,  2, 33],
       [ 0,  0,  0,  0, 14, 30,  2, 31, 13],
       [ 0

# Convert to Tensor

In [None]:
# Convert padded sequences to tensors
startup_tensors = tf.convert_to_tensor(startup_padded, dtype=tf.float32)
investor_tensors = tf.convert_to_tensor(investor_padded, dtype=tf.float32)

# Get Similarity Score (Cosine Similarity)


In [None]:
# Calculate cosine similarity between startup and investor tensors
similarity_matrix = cosine_similarity(startup_tensors, investor_tensors)

In [None]:
similarity_matrix

array([[0.684386  , 0.6875572 , 0.91947305, ..., 0.6955335 , 0.98428524,
        0.91334695],
       [0.72117203, 0.72527   , 0.96952695, ..., 0.749458  , 0.9936536 ,
        0.8870106 ],
       [0.7410003 , 0.7471857 , 0.8047674 , ..., 0.7708655 , 0.8528605 ,
        0.9986012 ],
       ...,
       [0.6145193 , 0.6197659 , 0.9461707 , ..., 0.65373975, 0.9615842 ,
        0.86472094],
       [0.7982511 , 0.80314744, 0.98747844, ..., 0.80604935, 0.90900624,
        0.7902802 ],
       [0.99999994, 0.9998344 , 0.7664845 , ..., 0.97342205, 0.6831354 ,
        0.73796475]], dtype=float32)

In [None]:
# Calculate cosine similarity between investor and startup tensors
similarity_investor = cosine_similarity(investor_tensors, startup_tensors)

In [None]:
similarity_investor

array([[0.684386  , 0.72117203, 0.7410003 , ..., 0.6145193 , 0.7982511 ,
        0.99999994],
       [0.6875572 , 0.72527   , 0.7471857 , ..., 0.6197659 , 0.80314744,
        0.9998344 ],
       [0.91947305, 0.96952695, 0.8047674 , ..., 0.9461707 , 0.98747844,
        0.7664845 ],
       ...,
       [0.6955335 , 0.749458  , 0.7708655 , ..., 0.65373975, 0.80604935,
        0.97342205],
       [0.98428524, 0.9936536 , 0.8528605 , ..., 0.9615842 , 0.90900624,
        0.6831354 ],
       [0.91334695, 0.8870106 , 0.9986012 , ..., 0.86472094, 0.7902802 ,
        0.73796475]], dtype=float32)

# Get Investor Recommendation for Startup


In [None]:
# Function to get investor matches for a given startup ID
def get_investor_matches(startup_id):
    matches = {}
    startup_index = startup_ids.index(startup_id)
    similarities = similarity_matrix[startup_index]
    sorted_indexes = np.argsort(similarities)[::-1]
    top_matches = [investor_ids[i] for i in sorted_indexes[:20]]
    matches[startup_id] = top_matches
    return matches

# Add investor matches to Firestore collection
def add_investor_matches(startup_id, investor_matches):
    matches_ref = db.collection('investor_matches')
    matches_ref.document(startup_id).set({ 'investor_matches': investor_matches })

# Interactive input and display of investor matches
while True:
    input_id = input("Enter startup ID (or 'exit' to quit): ")
    if input_id == 'exit':
        break
    if input_id not in startup_ids:
        print("Invalid startup ID. Please try again.")
        continue
    investor_matches = get_investor_matches(input_id)
    print(f"\nStartup ID: {input_id}")
    print("Top 20 Investor Matches:")
    for investor_id in investor_matches[input_id]:
        print(investor_id)
    print()
    add_investor_matches(input_id, investor_matches[input_id])

Enter startup ID (or 'exit' to quit): 0o6cMwcmzdwvhgxIVdHW

Startup ID: 0o6cMwcmzdwvhgxIVdHW
Top 20 Investor Matches:
1ffARtgSBCFT9wdYQJP6
bg1OByxC11rjETj12CSa
txEgfqxhDnB4nIsZOXDD
Z0fUFLkaQvjkCTYoqCnK
n4hhHeJTw0ZjxL0NahFZ
SlVaOuR6TRsYDGKTUryq
VGcSWjAU1B34HOMZ48t8
rcRHBzz1M3gDy9ozdCAC
c4YGDBvNkd8RFMggiZVE
3kvT6ew9c68yFhlGLXR1
AF0qDplUHOifFMzqCPWS
gkG2RRUbW7utHJZvFn31
XDesjOpoI88veKuHGyPC
ZkIc8zoS1Qj2m9J2exDw
P4fz0sHH2ftO5hEuqNsn
qMlpth3dnfGholxFlg1F
Igc8DCHjLo1tOveOwiZq
1KZaDa49jqJHBvGe3csu
qBqNUvliVCYx0nqC7Lyo
XF9Zyx7LMGtydiYDBzEn



KeyboardInterrupt: ignored

# Store Investor Matches for Startup to Firestore

In [None]:
def get_investor_matches(startup_id):
    matches = {}
    startup_index = startup_ids.index(startup_id)
    similarities = similarity_matrix[startup_index]
    sorted_indexes = np.argsort(similarities)[::-1]
    top_matches = [investor_ids[i] for i in sorted_indexes[:20]]
    matches[startup_id] = top_matches
    return matches

# Add investor matches to Firestore collection
def add_investor_matches(startup_id, investor_matches):
    matches_ref = db.collection('investor_matches')
    matches_ref.document(startup_id).set({ 'investor_matches': investor_matches })


for id in startup_ids:
  input_id = id
  investor_matches = get_investor_matches(input_id)
  add_investor_matches(input_id, investor_matches[input_id])

# Store Startup Matches for Investor to Firestore

In [None]:
def get_startup_matches(investor_id):
    matches = {}
    investor_index = investor_ids.index(investor_id)
    similarities = similarity_investor[investor_index]
    sorted_indexes = np.argsort(similarities)[::-1]
    top_matches = [startup_ids[i] for i in sorted_indexes[:20]]
    matches[investor_id] = top_matches
    return matches

def add_startup_matches(investor_id, startup_matches):
  matches_ref = db.collection('startup_matches')
  matches_ref.document(investor_id).set({'startup_matches': startup_matches})

for id in investor_ids:
  input_id = id
  startup_matches = get_startup_matches(input_id)
  add_startup_matches(input_id, startup_matches[input_id])

In [None]:
investor_matches_ids

['0o6cMwcmzdwvhgxIVdHW',
 '2RcMgaYlttlNYC7hla9f',
 '33OmGxZjFv7xcvW51HId',
 '4SufazwlFzOQ3qq04EPp',
 '4qPjPxEzBOC4Atrc51pi',
 '6o8XNTs8U0P0tKpeNqoM',
 '7rEs9pi3xtUOt49KU0JY',
 '8Sz5wXPtdtqi3vqt0dyw',
 'AKkSG6fpMUkSQJa5mDLZ',
 'AZDvIzJ8otkvcihxQGlN',
 'AwmbPc8upvgHRDLHiRZY',
 'Csa8mNvPDqzksK0tvIDw',
 'CyPqwfkQZnruqbIT0aGq',
 'ENT77nHjd2qUTvDMOaYI',
 'EiCjPooRkqs7w1Zc7qlF',
 'HGlby2sKnZ8O58K0HBC3',
 'HMoYz8huJQeZvRcwjarA',
 'IDwi33atJcAxHsurWqFA',
 'JRRgo0POxghgyeJepTjP',
 'JUSGJh4SOBOK7a4ioiul',
 'KgueWXHb5COzopbzRxll',
 'N9G7EkzTCTHSx2YRbpdo',
 'QXs8xQAXfT0t9Wk4rG27',
 'RJ10gLeNom1AvhxVknfB',
 'RVmxWHHmZbqOyNl0zAew',
 'TLFpWV0WDlRcWOqmJ9rK',
 'TpqsCn1lZY0GTw1JvHkc',
 'UmRV9MGR95pC5nfA6efV',
 'VB0LNlwshUUrSvjgh3PQ',
 'XU1gqqA4CtDZT7q0Gkbd',
 'XcZyKRQzrdcylg7DdnSm',
 'Y7tg5GzmGKWMM0GWZE7E',
 'fiIzvf3lydu5fQI489El',
 'g7vPkPHqeKNRdC0jwWKn',
 'grZDuGilHVat4aq4oziW',
 'hhqBasXOqf5DkpTKUad0',
 'l33AYUqdqSi5mWVsHT2Y',
 'mV0aFBi7iRucija3DUhl',
 'nBgyIYqWOyHHID7xjgbk',
 'oYz6LK70oB47MCF7JvxN',
