In [None]:
import requests
import pandas as pd
import numpy as np

from google.cloud import bigquery

In [None]:
# Remember naisdevice access to prod-fss
r_team = requests.get(url='https://teamkatalog-api.intern.nav.no/team?status=ACTIVE')
r_po = requests.get(url='https://teamkatalog-api.intern.nav.no/productarea?status=ACTIVE')

In [None]:
# r_team = requests.get(url='https://teamkatalog-api.prod-fss-pub.nais.io/team?status=ACTIVE')
# r_po = requests.get(url='https://teamkatalog-api.prod-fss-pub.nais.io/team?status=ACTIVE')

In [None]:
df_team = pd.json_normalize(r_team.json()["content"])
df_po = pd.json_normalize(r_po.json()["content"])

# merge team og po
df_all = df_team.merge(df_po, left_on="productAreaId", right_on="id", suffixes=["_team","_po"])

In [None]:
client = bigquery.Client("nada-prod-6977")
# hent datasett per naisteam
df_dp = client.query("select REGEXP_EXTRACT(owner, '([^@]+)') naisteam, 'dataset' type, count(*) count from `nada-prod-6977.dataproducts.dataproducts` where version = (select max(version) from `nada-prod-6977.dataproducts.dataproducts`) group by REGEXP_EXTRACT(owner, '([^@]+)')").result().to_dataframe()
df_ds = client.query("select owner naisteam, 'datastory' type, count(*) count from `nada-prod-6977.dataproducts.datastories` group by owner").result().to_dataframe()

In [None]:
df_tk = df_all.loc[:,["id_team", "name_team", "name_po", "naisTeams"]]

In [None]:
# Kobler manuelt naisteams som ikke er registrert på et team i teamkatalogen.
# naisteam: id_team (teamkatalog)
tk_link = {
    "arbeidsforhold": "29d44f3f-ff09-477c-b26a-83cd0aa66116",
    "yrkesskade": "270852c4-4c28-49a8-8189-a16897b31b95",
    "team-ai": "76f378c5-eb35-42db-9f4d-0e8197be0131",
    "spenn": "3c411c72-9587-4c6f-937e-2a404935b778",
    "nada": "5ade590e-3bc4-47fb-8b8d-552392f46376",
    "toi": "0150fd7c-df30-43ee-944e-b152d74c64d6",
    "teamia": "e6e3ce19-2cd3-491b-9ad9-817c2e9b7d96",
    "pensjon-saksbehandling": "45da0127-4d42-49b6-8130-e6c8e566abb8"
          }

In [None]:
def link_naisteam_to_tk(naisteam):
    unassigned = True
    i = 0
    while i < df_tk.shape[0]:
        if naisteam in df_tk.loc[i, "naisTeams"]:
            return df_tk.loc[i, "id_team"]
        elif naisteam in tk_link:
            return tk_link[naisteam]
        elif i == df_tk.shape[0] - 1:
            return "unknown"
        i += 1

In [None]:
df_dp["id_team"] = df_dp.naisteam.apply(lambda naisteam: link_naisteam_to_tk(naisteam))
df_ds["id_team"] = df_ds.naisteam.apply(lambda naisteam: link_naisteam_to_tk(naisteam))

In [None]:
df_merged1 = df_tk.merge(df_dp, on="id_team", how="left")
df_merged1["type"] = "dataset"
df_merged2 = df_tk.merge(df_ds, on="id_team", how="left")
df_merged2["type"] = "datastory"

df_merged = pd.concat([df_merged1, df_merged2])

In [None]:
df_merged[df_merged.id_team.duplicated()]

In [None]:
df_merged = df_merged.sort_values(["name_po", "count"]).reset_index(drop=True)
df_merged["count"] = df_merged["count"].fillna(0).astype(int)

#invalid_rows = df_merged.dp_count + df_merged.ds_count == 0 & pd.isna(df_merged.name_team)
#df_merged = df_merged[~invalid_rows].reset_index(drop=True)

In [None]:
df_merged.head()

In [None]:
df_merged[df_merged["name_team"] == "Nais"]

In [None]:
df_unassigned_dp = df_dp[df_dp["id_team"]=="unknown"]
df_unassigned_ds = df_ds[df_ds["id_team"]=="unknown"]
if df_unassigned_dp.shape[0] > 0:
    df_merged = pd.concat([df_merged, df_unassigned_dp]).fillna(value={"name_team": "unknown", "name_po": "unknown"})
if df_unassigned_ds.shape[0] > 0:
    df_merged = pd.concat([df_merged, df_unassigned_ds]).fillna(value={"name_team": "unknown", "name_po": "unknown"})
df_merged.tail()

In [None]:
import datetime
df_merged["last_updated"] = datetime.datetime.utcnow().isoformat()
df_merged.head()

In [None]:
import plotly.express as px

In [None]:
fig = px.bar(df_merged, "name_po", "count", color="type", barmode="stack")
fig.update_layout(height=500)

In [None]:
# replace dropper tabellen og lager den på nytt. Det betyr at tilgangen må legges til på nytt på markedsplassen
# df_merged.to_gbq('dataproducts.po_teams', 
#                  'nada-prod-6977',
#                  if_exists='replace'
#                  )

In [None]:
table_id = 'nada-prod-6977.dataproducts.po_teams'

job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_TRUNCATE",
)

job = client.load_table_from_dataframe(df_merged, table_id, job_config=job_config)  

job.result()