In [None]:
import geopandas as gpd

import pandas as pd

import os

In [None]:
meta_input = input("Enter the meta file path: ")

metadata = pd.read_csv(
    meta_input,
)

In [None]:
metadata.head(40)

In [None]:
# create new dataframe with latin names, common names, and shp/* columns from metadata

metadata = metadata[["Latin Name", "Common Name", "SHP/*"]]

metadata.head()

In [None]:
base_path = input("Enter the base path: ")

# find all the file names in the directory
# files = os.listdir(base_path)
files = os.listdir(base_path)


# read each file into a dataframe
check_init = pd.DataFrame()
for file in files:

    df = pd.read_parquet(f"{base_path}/{file}")
    # if unique_ids column = ['test'] then append to check_init dataframe

    if df["unique_id"].values[0][0] == "check":

        check_init = check_init.append(df)

check_init

In [None]:
# create new dataframe from metadata where scientific_name in check_init == Latin Name in metadata

check_meta = metadata[metadata["Latin Name"].isin(check_init["scientific_name"])]

check_meta

In [None]:
scientific = [
    "Alnus maritima",
    "Ceanothus spinosus",
    "Celtis lindheimeri",
    "Crataegus erythropoda",
    "Crataegus saligna",
    "Crataegus tracyi",
    "Cupressus bakeri",
    "Cupressus guadalupensis",
    "Cupressus macrocarpa",
    "Elliottia racemosa",
    "Fraxinus gooddingii",
    "Fraxinus papillosa",
    "Fremontodendron mexicanum",
    "Halesia parviflora",
    "Ilex amelanchier",
    "Juglans hindsii",
    "Leitneria floridana",
    "Nemopanthus collinus",
    "Ostrya chisosensis",
    "Ostrya knowltonii",
    "Pinus balfouriana",
    "Pinus culminicola",
    "Pinus nelsonii",
    "Pinus pinceana",
    "Pinus washoensis",
    "Pistacia texana",
    "Populus hinckleyana",
    "Prunus alleghaniensis",
    "Prunus lyonii",
    "Quercus ajoensis",
    "Quercus arkansana",
    "Quercus georgiana",
    "Quercus graciliformis",
    "Quercus gravesii",
    "Quercus hypoleucoides",
]

check_meta_2 = metadata[metadata["Latin Name"].isin(scientific)]

check_meta_2

In [None]:
# concat the two dataframes check_meta and check_meta_2

check = pd.concat([check_meta, check_meta_2])

check

In [None]:
# final_1 equals rows in metadata that are not in check

final_1 = metadata[~metadata["Latin Name"].isin(check["Latin Name"])]

final_1

In [None]:
# read in all the parquet files in the folder where path ends in Latin Name + .parquet of final_1
path = input("Enter the file path: ")

pre_finished = pd.DataFrame()

for latin_name in final_1["Latin Name"]:
    file = path + latin_name.replace(" ", "_") + ".parquet"

    df = pd.read_parquet(file)
    pre_finished = pre_finished.append(df)

pre_finished

In [None]:
pre_finished.to_parquet("")

In [None]:
check.to_parquet("")

In [None]:
check_input = input("Enter the checkfile path: ")

check = pd.read_parquet(check_input)

check

In [None]:
little = gpd.read_file("")

little

In [None]:
little.crs

In [None]:
little = little.to_crs("EPSG:4326")

little.crs

In [None]:
little.plot().invert_xaxis()

In [None]:
ecomap_input = input("Enter the file path: ")

eco_map = gpd.read_file(ecomap_input)

In [None]:
base = eco_map.plot(color="white", edgecolor="black")
xmin, ymin, xmax, ymax = (-100, 24, -115, 35)

ax = little.plot(ax=base, color="red", alpha=0.4)

# set the x and y limits of the plot to the specified bounding box coordinates
ax.set_xlim(xmin, xmax)
ax.set_ylim(ymin, ymax)
ax.invert_xaxis()

In [None]:
intersects = gpd.sjoin(little, eco_map)

unique_ids = list(intersects["unique_id"].unique())

eco_map_unique = eco_map[eco_map["unique_id"].isin(unique_ids)]

# remove rows with <NA> in unique_id from eco_map_unique

eco_map_unique = eco_map_unique[eco_map_unique["unique_id"] != "<NA>"]

# remove rows withe MEOW or PPOW in TYPE from eco_map_unique

eco_map_unique = eco_map_unique[~eco_map_unique["TYPE"].isin(["MEOW", "PPOW"])]

overlay = gpd.overlay(little, eco_map, how="intersection")

overlay = overlay[overlay["unique_id"] != "<NA>"]

# remove rows with MEOW or PPOW in TYPE from overlay

overlay = overlay[~overlay["TYPE"].isin(["MEOW", "PPOW"])]

overlay.plot(alpha=0.5, edgecolor="k", cmap="tab10")

overlay["area"] = overlay.geometry.area

overlay_areas = overlay[["unique_id", "area"]].groupby("unique_id").sum()

eco_map_unique["area"] = eco_map_unique.geometry.area

eco_map_unique_areas = eco_map_unique[["unique_id", "area"]].groupby("unique_id").sum()

combined_areas = pd.concat([overlay_areas, eco_map_unique_areas], axis=1)
combined_areas.columns = ["overlay_area", "eco_map_unique_area"]

combined_areas["percentage"] = (
    combined_areas["overlay_area"] / combined_areas["eco_map_unique_area"]
)

combined_areas

In [None]:
native = ["310", "331", "415"]

native

In [None]:
shp = "querhypo"

scientific_name = metadata.loc[metadata["SHP/*"] == shp]["Latin Name"].values[0]

common_name = metadata.loc[metadata["SHP/*"] == shp]["Common Name"].values[0]
# final dataframe
final = pd.DataFrame(
    {
        "scientific_name": scientific_name,
        "common_name": common_name,
        "unique_id": native,
    },
)

# groupby scientific_name
final = (
    final.groupby(["scientific_name", "common_name"])["unique_id"]
    .apply(list)
    .reset_index()
)


final

In [None]:
string_name = str(final["scientific_name"].values[0]).replace(" ", "_")

final_base_path = input("Enter the final base path: ")

path = final_base_path + string_name + ".parquet"

final.to_parquet(path)