# Extract coordinates from nfi data

In [81]:
import pandas as pd
import glob
from datetime import datetime
import os.path
from pyprojroot.here import here

## Get latest data

In [85]:
# FILEPATH: /Users/pascal/repos/padasch/ifn_analysis/python/00_process_nfi_data/extract_final_coordinates.ipynb
# Get all files with nfi_dataset_for_analysis in the name
files = glob.glob("*nfi_dataset_for_analysis*")
# Sorting files by date so that the first in list is the latest
files.sort(reverse=True)
# Pick latest file
latest_file = files[0]
# Get the modification date and time of the latest file
modification_time = os.path.getctime(latest_file)
modification_date_time = datetime.fromtimestamp(modification_time).strftime(
    "%A %Y-%m-%d, %H:%M"
)
print(f"👉 Latest file is {latest_file}\n👉 Created on {modification_date_time}\n\n")

# Load the file
nfi_data_raw = pd.read_csv(latest_file, index_col=0)
# Display the data
nfi_data_raw

👉 Latest file is 20231201-103649_nfi_dataset_for_analysis copy.csv
👉 Created on Friday 2023-12-01, 10:42




  nfi_data_raw = pd.read_csv(latest_file, index_col=0)


Unnamed: 0,idp,site_total_ba_at_v1,site_total_ba_at_v2,site_ba_prc_dead_at_v1,site_ba_prc_dead_at_v2,site_ba_prc_cut_at_v2,site_ba_prc_rec_at_v2,n_species_nfi,top1_species,top1_species_ba_1_mean,...,ba_change_abs_yr,ba_change_perc_yr,avg_growth_height_meter_per_yr,avg_growth_ba_per_year,land_use,land_use_change,cover_change,human_activity,human_activity_var,gre
1,632691,40.108598,45.709274,0.000000,0.054194,0.0,0.945806,5,73,6.473883,...,0.002279,0.092414,,,Wood Production,decreased,No Change,high,GEST,A
2,632691,40.108598,45.709274,0.000000,0.054194,0.0,0.945806,5,73,6.473883,...,0.580449,5.594531,0.830769,0.39905,Wood Production,decreased,No Change,high,GEST,A
3,632691,40.108598,45.709274,0.000000,0.054194,0.0,0.945806,5,73,6.473883,...,0.092486,3.750822,,,Wood Production,decreased,No Change,high,GEST,A
4,632691,40.108598,45.709274,0.000000,0.054194,0.0,0.945806,5,73,6.473883,...,0.015158,0.432420,,,Wood Production,decreased,No Change,high,GEST,A
5,632691,40.108598,45.709274,0.000000,0.054194,0.0,0.945806,5,73,6.473883,...,0.245116,4.200000,,,Wood Production,decreased,No Change,high,GEST,A
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
408220,1131437,36.006306,37.536585,0.008825,0.021353,0.0,0.978647,3,3,3.401504,...,0.001953,0.301125,,,No Use,No Change,No Change,none,DEF5,B
408221,1131437,36.006306,37.536585,0.008825,0.021353,0.0,0.978647,3,3,3.401504,...,0.023280,0.339893,,,No Use,No Change,No Change,none,DEF5,B
408222,1131437,36.006306,37.536585,0.008825,0.021353,0.0,0.978647,3,3,3.401504,...,0.016020,1.355556,,,No Use,No Change,No Change,none,DEF5,B
408223,1131437,36.006306,37.536585,0.008825,0.021353,0.0,0.978647,3,3,3.401504,...,0.010149,2.445508,,,No Use,No Change,No Change,none,DEF5,B


In [63]:
nfi_data = (
    nfi_data_raw[["idp", "visit_1", "lon", "lat", "lon_fr", "lat_fr"]]
    .rename(
        {
            "visit_1": "first_year",
            "lon": "x",
            "lat": "y",
            "lon_fr": "x_fr",
            "lat_fr": "y_fr",
        },
        axis=1,
    )
    .drop_duplicates()
    .reset_index(drop=True)
)

# Create data with idp: For later merging of nfi data and predictor data
nfi_data_with_idp = (
    nfi_data.reset_index(drop=False)
    .rename(columns={"index": "SiteID"})
    .assign(SiteID=lambda x: x["SiteID"] + 1)
)

# Fix order (not sure if this has downstream effects but keeping it like it for now...)
nfi_data_with_idp = nfi_data_with_idp[
    ["first_year", "SiteID", "x", "y", "idp", "x_fr", "y_fr"]
]
nfi_data_with_idp.to_csv("nfi_final_sites_with_idp.csv", index=False)
# nfi_data_with_idp

In [65]:
# Create data without idp: For faster extraction of predictor data
nfi_data_without_idp = (
    nfi_data.drop(columns="idp")
    .drop_duplicates()
    .reset_index(drop=False)
    .rename(columns={"index": "SiteID"})
    .assign(SiteID=lambda x: x["SiteID"] + 1)
)

# Fix order
nfi_data_without_idp = nfi_data_without_idp[
    ["first_year", "SiteID", "x", "y", "x_fr", "y_fr"]
]
nfi_data_without_idp.to_csv("nfi_final_sites_without_idp.csv", index=False)

In [66]:
pd.read_csv("nfi_final_sites_with_idp.csv")

Unnamed: 0,first_year,SiteID,x,y,idp,x_fr,y_fr
0,2011,1,-2.842824,48.337505,632691,2.674150e+05,6.820144e+06
1,2012,2,3.349757,46.198025,702597,7.269712e+05,6.566524e+06
2,2012,3,3.361577,46.827747,706240,7.275625e+05,6.636462e+06
3,2012,4,0.402182,48.201563,708321,5.070276e+05,6.792198e+06
4,2012,5,6.461081,43.281648,708369,9.810095e+05,6.248657e+06
...,...,...,...,...,...,...,...
40017,2016,40018,3.380200,47.790156,1131396,7.284673e+05,6.743375e+06
40018,2016,40019,2.156438,42.686748,1131409,6.307829e+05,6.176717e+06
40019,2016,40020,7.424664,47.899971,1131410,1.030439e+06,6.764769e+06
40020,2016,40021,3.757202,44.924973,1131424,7.597461e+05,6.425373e+06


In [61]:
pd.read_csv("nfi_final_sites_without_idp.csv")

Unnamed: 0,first_year,SiteID,x,y
0,2011,1,-2.842824,48.337505
1,2012,2,3.349757,46.198025
2,2012,3,3.361577,46.827747
3,2012,4,0.402182,48.201563
4,2012,5,6.461081,43.281648
...,...,...,...,...
39280,2016,40018,3.380200,47.790156
39281,2016,40019,2.156438,42.686748
39282,2016,40020,7.424664,47.899971
39283,2016,40021,3.757202,44.924973


---
Code below was to fix old formatting structure of the data. Do not rerun

In [51]:
# old_data = pd.read_csv(here("python/01_download_raw_gee_data/sites_years.csv"))
# old_data = old_data.rename(columns={"first_visit": "first_year", "id": "SiteID"})
# # old_data

In [52]:
# old_data = old_data.merge(
#     nfi_data[["x", "y", "first_year", "idp"]].drop_duplicates(),
#     on=["x", "y", "first_year"],
#     how="left",
# )
# old_data

In [53]:
# # Save data with idp attached to it for later merging of predictor data and nfi data
# old_data.to_csv("nfi_final_sites_with_idp.csv", index=False)

# # Save data without idp attached for faster download and extraction of predictor data
# old_data[["first_year", "SiteID", "x", "y"]].drop_duplicates().to_csv(
#     "nfi_final_sites_without_idp.csv", index=False
# )

In [54]:
# pd.read_csv("nfi_final_sites_with_idp.csv")

In [55]:
# pd.read_csv("nfi_final_sites_without_idp.csv")