In [None]:
from matplotlib import pyplot as plt
from collections import defaultdict
import seaborn as sns
import pandas as pd
import psycopg2
import datetime
import pickle
import os

%load_ext nb_black

In [None]:
def connect():
    conn = psycopg2.connect(
        user="postgres",
        password=os.environ.get("POSTGRES_PASS", ""),
        host="localhost",
        port=5432,
        database="venmo",
    )
    return conn


def location_to_state(location):
    name = location.raw["display_name"]
    if "United States of America" not in name:
        return None
    a, b, c = (["na"] + name.split(", "))[-3:]
    if b.replace("-", "").replace(":", "").isdigit():
        return a
    return b

In [None]:
with open("user_id_to_loc.pkl", "rb") as f:
    user_id_to_loc_saved = pickle.load(f)
with open("geo_cache.pkl", "rb") as f:
    geo_cache = pickle.load(f)

In [None]:
transactions_by_state = defaultdict(list)

conn = connect()
cur = conn.cursor()

for user_id, (lat, lng, loc) in user_id_to_loc_saved.items():
    state = location_to_state(geo_cache[loc])
    if state is None:
        continue
    cur.execute(
        """
    SELECT 'from', id, message, type, created, actor_user_id, recipient_id FROM transactions WHERE actor_user_id=%s
    UNION ALL
    SELECT 'to', id, message, type, created, actor_user_id, recipient_id FROM transactions WHERE recipient_id=%s
    """,
        (user_id, user_id),
    )
    transactions = cur.fetchall()
    from_user = [
        t[1:]
        for t in transactions
        if t[0] == "from" and t[4] > datetime.datetime(2020, 3, 10)
    ]
    to_user = [
        t[1:]
        for t in transactions
        if t[0] == "to" and t[4] > datetime.datetime(2020, 3, 10)
    ]
    transactions_by_state[state].extend(from_user + to_user)

print('Saving...')
with open("transactions_by_state.pkl", "wb") as f:
    pickle.dump(transactions_by_state, f)

conn.close()

In [None]:
with open("transactions_by_state.pkl", "rb") as f:
    transactions_by_state_saved = pickle.load(f)

df_by_state_data = {"State": [], "Date": []}
for state, transactions in transactions_by_state_saved.items():
    for id_, msg, type_, created, from_, to in transactions:
        df_by_state_data["State"].append(state)
        df_by_state_data["Date"].append(created.timestamp())
df_by_state = pd.DataFrame(df_by_state_data)

In [None]:
fig, ax = plt.subplots(figsize=(15, 15))
sns.histplot(
    df_by_state[df_by_state.State == "New York"], x="Date", hue="State", ax=ax
).set_title("Transactions")
_ = ax.set_xticklabels(
    [datetime.datetime.fromtimestamp(ts).isoformat()[:10] for ts in ax.get_xticks()]
)