# Final Checks and Prepare Final Respository Upload

Perform final print checks to ensure attribute logic makes sense, and prepare final repository formats (SHP, GEOPACKAGE, CSV). 

In addition to these files, be sure to check **GMSEUS_NAIP_Arrays.gpkg**, **GMSEUS_NAIP_Panels.gpkg**, and **GMSEUS_NAIP_PanelsNoQAQC.gpkg** (and all associated files) that were created in `script5`. 

# Import Libraries and Set Paths

In [11]:
# Import libraries
import numpy as np
import pandas as pd
import geopandas as gpd
import os 

# Set version
version = 'v1.0'

# Set folder paths
wd = r'S:\Users\stidjaco\R_files\BigPanel'
downloaded_path = os.path.join(wd, r'Data\Downloaded')
derived_path = os.path.join(wd, r'Data\Derived')
derivedTemp_path = os.path.join(derived_path, r'intermediateProducts')
figure_path = os.path.join(wd, r'Figures')

# Set input paths from script7 and script8 outputs
gmseusArraysInputPath = os.path.join(derivedTemp_path, r'GMSEUS_Arrays_estTilt.shp')
gmseusPanelsInputPath = os.path.join(derivedTemp_path, r'GMSEUS_Panels_wSource.shp')

# Set a final gmseus arrays and panel-rows path (both as a shape and a geopackage)
gmseusArraysFinalPath = os.path.join(derived_path, r'GMSEUS/GMSEUS_Arrays_Final.shp')
gmseusArraysFinalGpkgPath = os.path.join(derived_path, r'GMSEUS/GMSEUS_Arrays_Final.gpkg')
gmseusPanelsFinalPath = os.path.join(derived_path, r'GMSEUS/GMSEUS_Panels_Final.shp')
gmseusPanelsFinalGpkgPath = os.path.join(derived_path, r'GMSEUS/GMSEUS_Panels_Final.gpkg')
gmseusArraysFinalCSVPath = os.path.join(derived_path, r'GMSEUS/GMSEUS_Arrays_Final.csv')
gmseusPanelsFinalCSVPath = os.path.join(derived_path, r'GMSEUS/GMSEUS_Panels_Final.csv')

# Final Preparation and Print Checks for Attribute Logic
For each numerical column except, print the total number of NaN values, -9999 values, and non-Nan or -9999 values. 
For each categorical column (Source, modType, mount), print the total number of NaN values, uknown values, and non-Nan or -9999 values (as value counts)
For geometry column, print the number of invalid geometries.

In [12]:
# Call gmseusArrays and gmseusPanels
gmseusArrays = gpd.read_file(gmseusArraysInputPath)
gmseusPanels = gpd.read_file(gmseusPanelsInputPath)

# Set version to gmseusArrays and gmseusPanels
gmseusArrays['version'] = version
gmseusPanels['version'] = version

# Set desired array and panel-rows columns and check if any are missing
arrayCols = ['arrayID', 'Source', 'nativeID', 'latitude', 'longitude', 'newBound', 'totArea', 'totRowArea', 'numRow', 'instYr', 'instYrLT', 'capMW', 'capMWest', 'modType', 'effInit', 'GCR1', 'GCR2', 'mount', 'tilt', 'tiltEst', 'avgAzimuth', 'avgLength', 'avgWidth', 'avgSpace',  'STATEFP', 'COUNTYFP', 'version', 'geometry']
panelCols = ['panelID', 'Source', 'arrayID', 'rowArea', 'rowWidth', 'rowLength', 'rowAzimuth', 'rowMount', 'rowSpace', 'version', 'geometry']

# Check if any columns are missing
missingArrayCols = [col for col in arrayCols if col not in gmseusArrays.columns]
missingPanelCols = [col for col in panelCols if col not in gmseusPanels.columns]
if len(missingArrayCols) > 0:
    print(f'Columns missing in gmseusArrays: {missingArrayCols}')
if len(missingPanelCols) > 0:
    print(f'Columns missing in gmseusPanels: {missingPanelCols}')

# Select only desired columns
gmseusArrays = gmseusArrays[arrayCols]
gmseusPanels = gmseusPanels[panelCols]

# Order rows by arrayID
gmseusArrays = gmseusArrays.sort_values(by='arrayID').reset_index(drop=True)
gmseusPanels = gmseusPanels.sort_values(by='arrayID').reset_index(drop=True)

# Prepare gmseusArrays and gmseusPanels to save to csv
gmseusArraysCSV = gmseusArrays.drop(columns='geometry')
gmseusPanelsCSV = gmseusPanels.drop(columns='geometry')

# Iterate through numeric columns (all except Source, modType, mount, geometry). Check for NaN values, and print "Column X contains NaN Values"
for col in gmseusArrays.columns:
    if col not in ['Source', 'modType', 'mount', 'geometry']:
        if gmseusArrays[col].isnull().sum() > 0:
            print(f'Column {col} contains NaN Values')

# Iterate through categorical columns (Source, modType, mount). Check for NaN values, and print "Column X contains NaN Values"
for col in ['Source', 'modType', 'mount']:
    if gmseusArrays[col].isnull().sum() > 0:
        print(f'Column {col} contains NaN Values')

# Iterate through rows, and check if any geometry is empty
for i in range(len(gmseusArrays)):
    if gmseusArrays.loc[i, 'geometry'].is_empty:
        print(f'Row {i} has an empty geometry')

# Set Final Columns, Version, and Export

In [13]:
# Export the final datasets
gmseusArrays.to_file(gmseusArraysFinalPath, driver='ESRI Shapefile')
gmseusArrays.to_file(gmseusArraysFinalGpkgPath, driver='GPKG')
gmseusPanels.to_file(gmseusPanelsFinalPath, driver='ESRI Shapefile')
gmseusPanels.to_file(gmseusPanelsFinalGpkgPath, driver='GPKG')

# Export the final datasets as CSV
gmseusArraysCSV.to_csv(gmseusArraysFinalCSVPath, index=False)
gmseusPanelsCSV.to_csv(gmseusPanelsFinalCSVPath, index=False)