In [1]:
import os
import geopandas as gpd
import pandas as pd
import fiona

In [2]:
# Enable fiona driver
gpd.io.file.fiona.drvsupport.supported_drivers['KML'] = 'rw'

### Read in Data

In [3]:
# read in annotations
df_annotations = pd.DataFrame()

for path, subdirs, files in os.walk('Completed'):
    for name in files:
        if name.endswith(".kml"): 
            full_path = os.path.join(path, name)
            name = full_path.split("/")[-2]
            plant = full_path.split("/")[-1]
            
            # Read file
            tmp = gpd.read_file(full_path, driver='KML')               
            tmp['Processed By'] = name.strip()
            tmp['tile_id'] = plant.split(".")[0].strip()

            df_annotations = df_annotations.append(tmp, ignore_index=True)

In [4]:
# read in statuses
df_status = pd.DataFrame()

sets=['Annotation Tab']
for s in sets:
    tmp = pd.read_excel("Steel Plant Localization - Group One Worksheet.xlsx", sheet_name=s)
    tmp = tmp[['tile_id','plant_found','class', 'level','imagery_date','Processed By','status', 'Confidence']]
    
    df_status = df_status.append(tmp, ignore_index=True)
    
# convert to string
df_status['Processed By'] = df_status['Processed By'].astype(str)
df_status['tile_id'] = df_status['tile_id'].astype(str)

# to lower case
df_status['Processed By'] = df_status['Processed By'].str.lower()

# remove any leading/trailing spaces
df_status['tile_id'] = df_status['tile_id'].str.strip()
df_status['Processed By'] = df_status['Processed By'].str.strip()

# fix status
df_status.status = df_status.status.fillna('')
df_status.status = df_status.status.str.lower()
df_status.status = df_status.status.str.strip()

# fix status
df_status.plant_found = df_status.plant_found.fillna('')
df_status.plant_found = df_status.plant_found.str.lower()
df_status.plant_found = df_status.plant_found.str.strip()

### Output for SFI

In [5]:
df = pd.merge(df_annotations, df_status, how='left', left_on=['tile_id', 'Processed By'], right_on=['tile_id', 'Processed By'])
df = df.drop_duplicates(subset='Name')

In [6]:
output_df = df[['Name', 'Confidence', 'class', 'level', 'imagery_date', 'geometry']]
output_df.columns = ["tile_id", "confidence", "class", "level", 'imagery_date', 'geometry']

In [7]:
output_df.to_file("output/steel_localization_sites.geojson", driver='GeoJSON')

In [8]:
len(output_df)

450

In [9]:
output_df.head()

Unnamed: 0,tile_id,confidence,class,level,imagery_date,geometry
0,MGRS-50TQL-0266-2020-01,High,Developed,Medium,2020-07-16,POINT Z (119.41041 41.27221 0.00000)
1,MGRS-48RUS-0059-2020-01,,Undeveloped,High,2020-05-27,POINT Z (114.13608 38.38305 0.00000)
2,MGRS-49TCF-0402-2020-01,High,Developed,Medium,2020-04-13,POINT Z (109.72749 40.62312 0.00000)
3,MGRS-49REL-0231-2020-01-1,High,Developed,High,2020-10-24,POINT Z (111.95033 27.74783 0.00000)
4,MGRS-49REL-0231-2020-01-2,High,Developed,High,2020-10-24,POINT Z (111.96565 27.74744 0.00000)


### Output for SecureWatch

In [10]:
df_status = df_status.drop_duplicates(subset='tile_id')
df_status = df_status[df_status.status.isin(['no imagery', 'unclear imagery', 'cloudy imagery'])]
df_status = df_status[['tile_id']]

In [11]:
geom_df1 = gpd.read_file('chip_predictions_steel (1).kml', driver='KML')  
geom_df2 = gpd.read_file('chip_predictions_steel (2).kml', driver='KML')  
geom_df = pd.concat([geom_df1, geom_df2])

In [12]:
review_df = pd.merge(df_status, geom_df, left_on='tile_id', right_on='Name')
review_df = review_df[['tile_id', 'geometry']]
review_gdf = gpd.GeoDataFrame(review_df, geometry='geometry')
review_gdf = review_gdf.drop_duplicates()

In [13]:
review_gdf.to_file("output/sw_steel_localization_sites.geojson", driver='GeoJSON')

In [14]:
len(review_gdf)

417

In [15]:
review_gdf.head()

Unnamed: 0,tile_id,geometry
0,MGRS-45TXH-0436-2020-12,"POLYGON ((88.51712 42.82952, 88.51778 42.85653..."
2,MGRS-45TVG-0531-2020-01,"POLYGON ((86.98743 42.04741, 86.98743 42.07443..."
4,MGRS-46SDD-0167-2020-01,"POLYGON ((92.43398 34.37607, 92.43379 34.40312..."
6,MGRS-43SED-0240-2020-01,"POLYGON ((76.14285 38.98777, 76.14329 39.01479..."
8,MGRS-45RWN-0096-2020-01,"POLYGON ((87.15435 29.37010, 87.15439 29.39717..."
