In [1]:
import sys
import os
import json

sys.path.append("../../")

import datetime
from typing import Dict, List

from pipeline.extract import (
    extract_intent_upvotes,
    extract_intents,
    extract_interests,
    extract_match_stars,
    extract_recent_matches,
    extract_users,
)

from pipeline.utils.firebase import initialize_firebase_for_prefect

db = initialize_firebase_for_prefect.run(os.environ['VITE_firebase_databaseURL'],os.environ['API_ADMIN_CREDENTIALS'])

In [2]:
df_users = extract_users.run(db, "demo")
df_matches = extract_recent_matches.run(db, "demo")

[2022-07-23 16:42:08+0000] INFO - prefect | Returned 6 rows, 5 cols.
[2022-07-23 16:42:08+0000] INFO - prefect | Extracted 20 user-match records.
[2022-07-23 16:42:08+0000] INFO - prefect | Converted to 9 match records.
[2022-07-23 16:42:08+0000] INFO - prefect | Returned 9 rows, 6 cols.


In [3]:
import pytest
from pipeline.matching.rankers import VarietyRanker
from pipeline.types import Match, MatchingInput, MatchMetadata

metadata_1 = MatchMetadata(generator="blueGenerator")
metadata_2 = MatchMetadata(generator="greenGenerator")
past_match = Match(users={"A", "B"}, metadata=metadata_1)
inp = MatchingInput(
        community="test",
        release="2022-04-01",
        users=[],
        recent_matches=[past_match],
    )

In [4]:
metadata_1

MatchMetadata(generator='blueGenerator', score=0, commonLetters=[], interests=[], intents=[], availability=[], commonInterests=[], rareInterests=[], matchingIntents=[], rareIntents=[], matchingAvailability=[], limitedAvailability=[])

In [5]:
metadata_2

MatchMetadata(generator='greenGenerator', score=0, commonLetters=[], interests=[], intents=[], availability=[], commonInterests=[], rareInterests=[], matchingIntents=[], rareIntents=[], matchingAvailability=[], limitedAvailability=[])

In [6]:
inp.recent_matches

[Match(users={'B', 'A'}, metadata=MatchMetadata(generator='blueGenerator', score=0, commonLetters=[], interests=[], intents=[], availability=[], commonInterests=[], rareInterests=[], matchingIntents=[], rareIntents=[], matchingAvailability=[], limitedAvailability=[]), community=None, release=None, key=None, title=None)]

In [7]:
from dataclasses import dataclass, field
from typing import Callable, Iterator, List, Set

from pipeline.types.community import Community
from pipeline.types.intent import Intent
from pipeline.types.interest import Interest
from pipeline.types.logging import DefaultLogger, EngineLogger
from pipeline.types.match import Match
from pipeline.types.rating import IntentUpvote, MatchStars
from pipeline.types.release import ReleaseTag
from pipeline.types.user import User, UserId

EngineId = str

RecentMatch = Match
RecentlyMatchedUsers = Set[UserId]

@dataclass
class MatchingInput:
    community: Community
    release: ReleaseTag
    users: List[User]
    recent_matches: List[Match]
    logger: EngineLogger = DefaultLogger()
    interests: List[Interest] = field(default_factory=list)
    intents: List[Intent] = field(default_factory=list)
    intent_upvotes: List[IntentUpvote] = field(default_factory=list)
    match_stars: List[MatchStars] = field(default_factory=list)

MatchGenerateFunction = Callable[[MatchingInput], Iterator[Match]]
MatchRankFunction = Callable[[MatchingInput, Iterator[Match]], Iterator[Match]]
MatchFinalizeFunction = Callable[[MatchingInput, List[User]], List[Match]]

@dataclass
class MatchingEvaluation:
    n_matches_total: int
    n_matches_tier_1: int
    n_matches_tier_2: int
    n_matches_tier_3: int

@dataclass
class MatchingOutput:
    community: Community
    release: ReleaseTag
    users: List[User]
    matches: List[Match]


In [8]:
intent_upvotes

NameError: name 'intent_upvotes' is not defined

In [None]:
match_stars

[MatchStars(from_user='A', value=5, community='test', match='1', users={'A', 'B'}, generator='blueGenerator')]

In [None]:
MatchRankFunction

typing.Callable[[__main__.MatchingInput, typing.Iterator[pipeline.types.match.Match]], typing.Iterator[pipeline.types.match.Match]]

In [9]:
import dataclasses
from typing import List

import pandas as pd
import prefect
from prefect import task

from pipeline.matching.utils import generate_keys
from pipeline.types import (
    Community,
    Match,
    MatchingInput,
    MatchMetadata,
    ReleaseTag,
)

@task
def convert_matches_from_df(df: pd.DataFrame) -> List[Match]:
    cols = set(df.columns)
    field_names = [f.name for f in dataclasses.fields(Match) if f.name in cols]
    match_dicts = df[field_names].to_dict(orient="records")
    matches = []
    for record in match_dicts:
        raw_metadata = record.get("metadata", {})
        parsed_metadata = MatchMetadata(**raw_metadata)
        record.pop("metadata", None)
        match = Match(**record, metadata=parsed_metadata)
        matches.append(match)
    return matches

In [13]:

def test_convert_matches_from_df():
    df = pd.DataFrame([{"users": {"A", "B"}, "extraData": "ignore"}])
    actual = convert_matches_from_df.run(df)
    expected = [Match(users={"A", "B"})]
    assert actual == expected



NameError: name 'actual' is not defined