In [53]:
import sys
import os
import json

sys.path.append("../../")

import datetime
from typing import Dict, List

from pipeline.extract import (
    extract_intent_upvotes,
    extract_intents,
    extract_interests,
    extract_match_stars,
    extract_recent_matches,
    extract_users,
)

from pipeline.transform import (
    augment_users_with_intents,
    augment_users_with_interests,
    compute_internal_matching_metrics,
    convert_matches_from_df,
    convert_users_from_df,
    filter_recent_matches,
    get_matching_input,
    transform_matches_for_load,
)

from pipeline.utils.firebase import initialize_firebase_for_prefect

db = initialize_firebase_for_prefect.run(os.environ['VITE_firebase_databaseURL'],os.environ['API_ADMIN_CREDENTIALS'])
param_community = 'sds'

ValueError: The default Firebase app already exists. This means you called initialize_app() more than once without providing an app name as the second argument. In most cases you only need to call initialize_app() once. But if you do want to initialize multiple apps, pass a second argument to initialize_app() to give each app a unique name.

In [54]:
df_users = extract_users.run(db, param_community)

[2022-07-29 00:10:19+0000] INFO - prefect | Returned 25 rows, 7 cols.


In [55]:
raw_intents = extract_intents.run(db, param_community)
raw_interests = extract_interests.run(db, param_community)

users_w_profile = convert_users_from_df.run(df_users)
users_w_intents = augment_users_with_intents.run(
    users_w_profile, raw_intents
)
users_w_data = augment_users_with_interests.run(
    users_w_intents, raw_interests
)

In [56]:
users = users_w_data

In [57]:
from pipeline.types import (
    Intent,
    IntentMatch,
    Match,
    MatchingInput,
    MatchMetadata,
    Side,
    User,
)

from typing import Dict, Iterator

def get_lookup(u1, u2):
    return f"{u1},{u2}" if u1 < u2 else f"{u2},{u1}"

all_matches: Dict[str, Match] = {}

In [58]:
        for idx, u in enumerate(users):
            # create Intent info from the current user
            intents = u.intents

            # users can have more than one intent, this loops through each one
            for intent in intents:
                code = intent.code
                side = intent.side
                name = intent.name

                # otherSide would be the other side of the intent that they have
                other_side = (
                    Side.GIVING if side == Side.SEEKING else Side.SEEKING
                )

                # create an Intent object to search for in the other users
                otherIntent = Intent(code, other_side, name)

                for oth in users:
                    if otherIntent in oth.intents:

                        # create the intent match after finding a user that matches
                        intent_match = IntentMatch(
                            code,
                            seeker=oth.uid
                            if otherIntent.side == Side.SEEKING
                            else u.uid,
                            giver=oth.uid
                            if otherIntent.side == Side.GIVING
                            else u.uid,
                        )

                        # key contains the user ids that have matched
                        key = get_lookup(u.uid, oth.uid)
                        # checks if the users have been matched, prevents duplicates
                        if key in all_matches:
                            # additional check if the users have NOT matched on this intent and appends it to the matchingIntents
                            if (
                                intent_match
                                not in all_matches[key].metadata.matchingIntents
                            ):
                                all_matches[
                                    key
                                ].metadata.matchingIntents.append(intent_match)
                        else:
                            new_match = Match(
                                users={u.uid, oth.uid},
                                metadata=MatchMetadata(
                                    generator="GENERATOR_SIMILAR_INTENTS",
                                    score=1,
                                    matchingIntents=[intent_match],
                                ),
                            )
                            all_matches[key] = new_match

        # all matches are stored in the dictionary as values

In [59]:
print(all_matches["DunEgGqeSfaWQYskRFuvlzYceYB3,DunEgGqeSfaWQYskRFuvlzYceYB3"])

Match(users={'DunEgGqeSfaWQYskRFuvlzYceYB3'}, metadata=MatchMetadata(generator='GENERATOR_SIMILAR_INTENTS', score=1, commonLetters=[], interests=[], intents=[], availability=[], commonInterests=[], rareInterests=[], matchingIntents=[IntentMatch(code='tech-careers', giver='DunEgGqeSfaWQYskRFuvlzYceYB3', seeker='DunEgGqeSfaWQYskRFuvlzYceYB3')], rareIntents=[], matchingAvailability=[], limitedAvailability=[]), community=None, release=None, key=None, title=None)


In [70]:
def converts_user_data_to_vector(users_w_data): # return as dictionary where key is the user aka string value i will create
    # have 3 bits for 1 category
    # 3 * 2 = 6 bits
    # first 2 bits are seeker or receiver for careers informations -> "01", "10", "11"
    # next we look at movie recs -> "1001"
    intents = ['involve-iit','watch-recs','tech-careers']
    interests = ['art', 'chess', 'fashion', 'movies', 'sports']
    vectors = []
    user_vector = [0] * 11
    for user in users_w_data:
        for interest in user.interests:
            for i in range(5):
                if interest.name == interests[i]:
                    user_vector[i] = 1  
            """if interest.name == interests[0]: #write for loop
                user_vector[0] = 1
            if interest.name == interests[1]:
                user_vector[1] = 1
            if interest.name == interests[2]:
                user_vector[2] = 1
            if interest.name == interests[3]:
                user_vector[3] = 1
            if interest.name == interests[4]:
                user_vector[4] = 1"""
        for intent in user.intents:
            c = 0
            for i in range(5, 11):
                if intent.name == intents[c]: 
                    if (i % 2 == 1) and intent.side.SEEKING:
                        user_vector[i] = 1
                    if (i % 2 == 0) and intent.side.GIVING:
                        user_vector[i] = 1
                        c += 1
            """if intent.name == intents[0] and intent.side.SEEKING:
                user_vector[5] = 1
            if intent.name == intents[0] and intent.side.GIVING:
                user_vector[6] = 1
            if intent.name == intents[1] and intent.side.SEEKING:
                user_vector[7] = 1
            if intent.name == intents[1] and intent.side.GIVING:
                user_vector[8] = 1
            if intent.name == intents[2] and intent.side.SEEKING:
                user_vector[9] = 1
            if intent.name == intents[2] and intent.side.GIVING:
                user_vector[10] = 1"""
        vectors.append((user.uid, user_vector))
        user_vector = [0] * 11
    return vectors
        
            
                
                

    


In [None]:
('AmkGRnFOlKaqTzU4yiXqRMA0GRf2', [1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1])

In [61]:
print(users_w_data[0].interests)
print(users_w_data[10].intents)


[Interest(code='art', name='art'), Interest(code='fashion', name='fashion'), Interest(code='movies', name='movies')]
[Intent(code='involve-iit', side=<Side.GIVING: 'giving'>, name='involve-iit'), Intent(code='involve-iit', side=<Side.SEEKING: 'seeking'>, name='involve-iit'), Intent(code='tech-careers', side=<Side.SEEKING: 'seeking'>, name='tech-careers'), Intent(code='watch-recs', side=<Side.GIVING: 'giving'>, name='watch-recs')]


In [71]:
users = converts_user_data_to_vector(users_w_data)
print(users)

[('AmkGRnFOlKaqTzU4yiXqRMA0GRf2', [1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0]), ('DunEgGqeSfaWQYskRFuvlzYceYB3', [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), ('HnEWnuWHXXf4X99Orq6eC3CC5GT2', [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), ('JpsfDVkhxSdWZz4v1OEMm1U1Agk1', [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), ('KQSpHWoyZHYZ1pvuk5Y046IG8xf2', [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), ('Ks2YhmRXOGdUvPJwWaO9gRh29l73', [0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0]), ('OLncBMKcQHWIbGdISPXG5xl7WoD2', [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), ('PlSWNABFv9g9R7cPCdM9LzIWbtV2', [1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0]), ('SgNOwVH3ymPQsZDz9KtoK311tWi2', [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), ('W6MiDvgCs6hff53tna94TnwLZL43', [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), ('WGAejNwR2IdCoba27iymd8j4QfE2', [0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0]), ('WsTssSQvxFhfDaRz5fCAgKYR1DT2', [1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0]), ('b4U6ctddxwSmR47aoiuNdDVCqUt2', [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), ('cHkVu5rwUvVEgWvB9wPst2DDj9b2', [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), ('gn9y25UvZ6Zmi3ChMUp18VdE09s2', 

In [63]:
def closest_match(user1):
    #loop over all other users to find match with closest aka max cosine similarity -- predefined function "cosine_similarity(user1[1],user2[1])"
    #think of users as an 11 dimensional point and trying to find the closest distance between two points
    cos_sim = 0
    user1_match = ""
    for user in users:
        if user[0] == user1[0]:
            continue
        if cosine_similarity(user[1], user1[1]) > cos_sim:
            cos_sim = cosine_similarity(user[1], user1[1])
            user1_match = user[0]
    return user1_match
        #want to iterate over users twice since I need to compare user to all other users
        #compare the cosine similarity of each user and whichever is closest to 0 have that be it

In [64]:
from pipeline.types import (
    Match,
    MatchMetadata,
    MatchingInput
)

GENERATOR_CLUSTERING = "cluseringGenerator"

def all_closest_match(users):
    for user in users:
        match = closest_match(user)
        MatchMetadata(generator = GENERATOR_CLUSTERING)
        yield Match(users = {user[0], match})
        

In [65]:
next(all_closest_match(users))

Match(users={'AmkGRnFOlKaqTzU4yiXqRMA0GRf2', 'qPIX0U459DT4IWiBqe7PPzXdpEz1'}, metadata=MatchMetadata(generator='blank', score=0, commonLetters=[], interests=[], intents=[], availability=[], commonInterests=[], rareInterests=[], matchingIntents=[], rareIntents=[], matchingAvailability=[], limitedAvailability=[]), community=None, release=None, key=None, title=None)

In [43]:
import math
def cosine_similarity(v1,v2):
    "compute cosine similarity of v1 to v2: (v1 dot v2)/{||v1||*||v2||)"
    sumxx, sumxy, sumyy = 0, 0, 0
    for i in range(len(v1)):
        x = v1[i]; y = v2[i]
        sumxx += x*x
        sumyy += y*y
        sumxy += x*y
    if sumxx*sumyy == 0: return 0
    return sumxy/math.sqrt(sumxx*sumyy)

In [44]:
cosine_similarity(users[10][1],users[0][1])

0.816496580927726