In [10]:
import sys
import os
import json

sys.path.append("../../")

import datetime
from typing import Dict, List

from pipeline.extract import (
    extract_intent_upvotes,
    extract_intents,
    extract_interests,
    extract_match_stars,
    extract_recent_matches,
    extract_users,
)

from pipeline.utils.firebase import initialize_firebase_for_prefect

db = initialize_firebase_for_prefect.run(os.environ['VITE_firebase_databaseURL'],os.environ['API_ADMIN_CREDENTIALS'])

ValueError: The default Firebase app already exists. This means you called initialize_app() more than once without providing an app name as the second argument. In most cases you only need to call initialize_app() once. But if you do want to initialize multiple apps, pass a second argument to initialize_app() to give each app a unique name.

In [None]:
df_users = extract_users.run(db, "demo")
df_matches = extract_recent_matches.run(db, "demo")

[2022-07-28 14:08:01+0000] INFO - prefect | Returned 6 rows, 7 cols.
[2022-07-28 14:08:01+0000] INFO - prefect | Extracted 20 user-match records.
[2022-07-28 14:08:01+0000] INFO - prefect | Converted to 9 match records.
[2022-07-28 14:08:01+0000] INFO - prefect | Returned 9 rows, 6 cols.


In [None]:
import pytest
from pipeline.matching.rankers import VarietyRanker
from pipeline.types import Match, MatchingInput, MatchMetadata

metadata_1 = MatchMetadata(generator="blueGenerator")
metadata_2 = MatchMetadata(generator="greenGenerator")
past_match = Match(users={"A", "B"}, metadata=metadata_1)
inp = MatchingInput(
        community="test",
        release="2022-04-01",
        users=[],
        recent_matches=[past_match],
    )

In [None]:
metadata_1

MatchMetadata(generator='blueGenerator', score=0, commonLetters=[], interests=[], intents=[], availability=[], commonInterests=[], rareInterests=[], matchingIntents=[], rareIntents=[], matchingAvailability=[], limitedAvailability=[])

In [None]:
metadata_2

MatchMetadata(generator='greenGenerator', score=0, commonLetters=[], interests=[], intents=[], availability=[], commonInterests=[], rareInterests=[], matchingIntents=[], rareIntents=[], matchingAvailability=[], limitedAvailability=[])

In [None]:
inp.recent_matches

[Match(users={'B', 'A'}, metadata=MatchMetadata(generator='blueGenerator', score=0, commonLetters=[], interests=[], intents=[], availability=[], commonInterests=[], rareInterests=[], matchingIntents=[], rareIntents=[], matchingAvailability=[], limitedAvailability=[]), community=None, release=None, key=None, title=None)]

In [None]:
from dataclasses import dataclass, field
from typing import Callable, Iterator, List, Set

from pipeline.types.community import Community
from pipeline.types.intent import Intent
from pipeline.types.interest import Interest
from pipeline.types.logging import DefaultLogger, EngineLogger
from pipeline.types.match import Match
from pipeline.types.rating import IntentUpvote, MatchStars
from pipeline.types.release import ReleaseTag
from pipeline.types.user import User, UserId

EngineId = str

RecentMatch = Match
RecentlyMatchedUsers = Set[UserId]

@dataclass
class MatchingInput:
    community: Community
    release: ReleaseTag
    users: List[User]
    recent_matches: List[Match]
    logger: EngineLogger = DefaultLogger()
    interests: List[Interest] = field(default_factory=list)
    intents: List[Intent] = field(default_factory=list)
    intent_upvotes: List[IntentUpvote] = field(default_factory=list)
    match_stars: List[MatchStars] = field(default_factory=list)

MatchGenerateFunction = Callable[[MatchingInput], Iterator[Match]]
MatchRankFunction = Callable[[MatchingInput, Iterator[Match]], Iterator[Match]]
MatchFinalizeFunction = Callable[[MatchingInput, List[User]], List[Match]]

@dataclass
class MatchingEvaluation:
    n_matches_total: int
    n_matches_tier_1: int
    n_matches_tier_2: int
    n_matches_tier_3: int

@dataclass
class MatchingOutput:
    community: Community
    release: ReleaseTag
    users: List[User]
    matches: List[Match]


In [None]:
MatchRankFunction

typing.Callable[[__main__.MatchingInput, typing.Iterator[pipeline.types.match.Match]], typing.Iterator[pipeline.types.match.Match]]

In [39]:
import dataclasses
from typing import List

import pandas as pd
import prefect
from prefect import task

from pipeline.matching.utils import generate_keys
from pipeline.types import (
    Community,
    Match,
    MatchingInput,
    MatchMetadata,
    ReleaseTag,
    ChatData,
)

@task
def convert_matches_from_df(df: pd.DataFrame) -> List[Match]:
    cols = set(df.columns)
    field_names = [f.name for f in dataclasses.fields(Match) if f.name in cols]
    match_dicts = df[field_names].to_dict(orient="records")
    matches = []
    for record in match_dicts:
        raw_metadata = record.get("metadata", {})
        parsed_metadata = MatchMetadata(**raw_metadata)
        record.pop("metadata", None)
        match = Match(**record, metadata=parsed_metadata)
        matches.append(match)
    return matches

In [16]:
import pandas as pd
import prefect
from prefect import task


from pipeline.types import Community, Match
chat_data_ref = db.reference(f"matches/sds")
messages_ref = db.reference(f"messages/sds")

message_records = messages_ref.get()
user_chat_records = chat_data_ref.get()

In [17]:
user_chat_records['2022-06-27~9aq0Nntc~JpsfDVkhxSdWZz4v1OEMm1U1Agk1']




{'for': 'JpsfDVkhxSdWZz4v1OEMm1U1Agk1',
 'id': '2022-06-27~9aq0Nntc',
 'metadata': {'commonLetters': ['e', 'h', 'i', 'n', 'p', 'r', 's', 't'],
  'generator': 'commonLettersGenerator',
  'score': 0},
 'participants': {'JpsfDVkhxSdWZz4v1OEMm1U1Agk1': True,
  'b4U6ctddxwSmR47aoiuNdDVCqUt2': True},
 'release_tag': '2022-06-27',
 'release_timestamp': 1656288000000.0,
 'title': 'Your Match'}

In [24]:
for items in user_chat_records.items()

dict_items([('2022-06-27~9aq0Nntc~JpsfDVkhxSdWZz4v1OEMm1U1Agk1', {'for': 'JpsfDVkhxSdWZz4v1OEMm1U1Agk1', 'id': '2022-06-27~9aq0Nntc', 'metadata': {'commonLetters': ['e', 'h', 'i', 'n', 'p', 'r', 's', 't'], 'generator': 'commonLettersGenerator', 'score': 0}, 'participants': {'JpsfDVkhxSdWZz4v1OEMm1U1Agk1': True, 'b4U6ctddxwSmR47aoiuNdDVCqUt2': True}, 'release_tag': '2022-06-27', 'release_timestamp': 1656288000000.0, 'title': 'Your Match'}), ('2022-06-27~9aq0Nntc~b4U6ctddxwSmR47aoiuNdDVCqUt2', {'for': 'b4U6ctddxwSmR47aoiuNdDVCqUt2', 'id': '2022-06-27~9aq0Nntc', 'metadata': {'commonLetters': ['e', 'h', 'i', 'n', 'p', 'r', 's', 't'], 'generator': 'commonLettersGenerator', 'score': 0}, 'participants': {'JpsfDVkhxSdWZz4v1OEMm1U1Agk1': True, 'b4U6ctddxwSmR47aoiuNdDVCqUt2': True}, 'release_tag': '2022-06-27', 'release_timestamp': 1656288000000.0, 'title': 'Your Match'}), ('2022-06-27~FOAC2RcN~AmkGRnFOlKaqTzU4yiXqRMA0GRf2', {'for': 'AmkGRnFOlKaqTzU4yiXqRMA0GRf2', 'id': '2022-06-27~FOAC2RcN', 'm

In [18]:
user_chat_records

{'2022-06-27~9aq0Nntc~JpsfDVkhxSdWZz4v1OEMm1U1Agk1': {'for': 'JpsfDVkhxSdWZz4v1OEMm1U1Agk1',
  'id': '2022-06-27~9aq0Nntc',
  'metadata': {'commonLetters': ['e', 'h', 'i', 'n', 'p', 'r', 's', 't'],
   'generator': 'commonLettersGenerator',
   'score': 0},
  'participants': {'JpsfDVkhxSdWZz4v1OEMm1U1Agk1': True,
   'b4U6ctddxwSmR47aoiuNdDVCqUt2': True},
  'release_tag': '2022-06-27',
  'release_timestamp': 1656288000000.0,
  'title': 'Your Match'},
 '2022-06-27~9aq0Nntc~b4U6ctddxwSmR47aoiuNdDVCqUt2': {'for': 'b4U6ctddxwSmR47aoiuNdDVCqUt2',
  'id': '2022-06-27~9aq0Nntc',
  'metadata': {'commonLetters': ['e', 'h', 'i', 'n', 'p', 'r', 's', 't'],
   'generator': 'commonLettersGenerator',
   'score': 0},
  'participants': {'JpsfDVkhxSdWZz4v1OEMm1U1Agk1': True,
   'b4U6ctddxwSmR47aoiuNdDVCqUt2': True},
  'release_tag': '2022-06-27',
  'release_timestamp': 1656288000000.0,
  'title': 'Your Match'},
 '2022-06-27~FOAC2RcN~AmkGRnFOlKaqTzU4yiXqRMA0GRf2': {'for': 'AmkGRnFOlKaqTzU4yiXqRMA0GRf2',
  '

In [21]:
users = []


In [43]:
from pipeline.types import (ChatData)
ids = set()
chatList = []
for key in user_chat_records.keys():
    #print(key)
    #print(user_chat_records[key]['id'])
    if user_chat_records[key]['id'] not in ids:
        print(user_chat_records[key]['id'])
        print(ids.add(user_chat_records[key]['id']))
        print(user_chat_records[key])
        
        timestamp = user_chat_records[key]['release_timestamp']
        id = user_chat_records[key]['id']
        participants = user_chat_records[key]['participants']
        rtag = user_chat_records[key]['release_tag']
        chatdata_title = user_chat_records[key]['title']
        raw_metadata = user_chat_records[key].get("metadata", {})
        parsed_md = MatchMetadata(**raw_metadata)

        #ChatData()
        cd1 = ChatData(release_timestamp=timestamp, chat_match_id=id, community_id='sds', participants=participants, release_tag=rtag, title= chatdata_title, metadata=parsed_md, messages=None)
        print(cd1)
        #chatList.append()
        break

    #if key == user_chat_records[key]:
        
     #   users.append(value['for'])
#print(user_chat_records[key]['id'])

2022-06-27~9aq0Nntc
None
{'for': 'JpsfDVkhxSdWZz4v1OEMm1U1Agk1', 'id': '2022-06-27~9aq0Nntc', 'metadata': {'commonLetters': ['e', 'h', 'i', 'n', 'p', 'r', 's', 't'], 'generator': 'commonLettersGenerator', 'score': 0}, 'participants': {'JpsfDVkhxSdWZz4v1OEMm1U1Agk1': True, 'b4U6ctddxwSmR47aoiuNdDVCqUt2': True}, 'release_tag': '2022-06-27', 'release_timestamp': 1656288000000.0, 'title': 'Your Match'}
ChatData(release_timestamp=1656288000000.0, chat_match_id='2022-06-27~9aq0Nntc', community_id='sds', participants={'JpsfDVkhxSdWZz4v1OEMm1U1Agk1': True, 'b4U6ctddxwSmR47aoiuNdDVCqUt2': True}, release_tag='2022-06-27', title='Your Match', metadata=MatchMetadata(generator='commonLettersGenerator', score=0, commonLetters=['e', 'h', 'i', 'n', 'p', 'r', 's', 't'], interests=[], intents=[], availability=[], commonInterests=[], rareInterests=[], matchingIntents=[], rareIntents=[], matchingAvailability=[], limitedAvailability=[]), messages=None)


In [23]:
users

['JpsfDVkhxSdWZz4v1OEMm1U1Agk1', 'b4U6ctddxwSmR47aoiuNdDVCqUt2']