# Merge data from Zooniverse with data from TPMP DB

## Imports

In [None]:
from pathlib import Path

import pandas as pd

## Constants

In [None]:
data_path = Path("..").joinpath("data_in")

## Load CSVs

### Load Zooniverse data

In [None]:
df_z = (
    pd.read_csv(str(data_path.joinpath("boxes_final.csv")))
    .assign(hash=lambda x: x.filename)
    .drop(["filename"], axis=1)
    .sort_values("hash")
)
df_z


### Load TPMP data

In [None]:
df_t = pd.read_csv(str(data_path.joinpath("filename_to_hash_v2.csv")))
df_t

## Merge data

In [None]:
df = (
    pd.merge(left=df_z, right=df_t, on="hash")
    .sort_values(["experiment", "plant", "date", "time"])
    .assign(filename=lambda x: x.hash)
    .drop(["hash"], axis=1)
)[
    [
        "experiment",
        "plant",
        "camera",
        "view_option",
        "date_time",
        "date",
        "time",
        "filename",
        "x",
        "y",
        "width",
        "height",
    ]
].assign(
    x1=lambda x: x.x,
    y1=lambda x: x.y,
    x2=lambda x: x.x + x.width,
    y2=lambda x: x.y + x.height,
).query("x2 > x1")
df


## Save data

In [None]:
df.to_csv(str(data_path.joinpath("zooniverse_tpmp_data.csv")), index=False)