In [1]:
import os
import geopandas as gpd
import pandas as pd
import fiona

In [2]:
# Enable fiona driver
gpd.io.file.fiona.drvsupport.supported_drivers['KML'] = 'rw'

### Read in Data

In [3]:
# read in annotations
df_annotations = pd.DataFrame()

for path, subdirs, files in os.walk('Completed'):
    for name in files:
        if name.endswith(".kml"): 
            full_path = os.path.join(path, name)
            name = full_path.split("/")[-2]
            plant = full_path.split("/")[-1]
            
            # Read file
            tmp = gpd.read_file(full_path, driver='KML')               
            tmp['Processed By'] = name.strip()
            tmp['tile_id'] = plant.split(".")[0].strip()

            df_annotations = df_annotations.append(tmp, ignore_index=True)

In [4]:
# read in statuses
df_status = pd.DataFrame()

sets=['Annotator 1']
for s in sets:
    tmp = pd.read_excel("Cement Plant Localization - Group 2 (Refresh) - 5th March 2021.xlsx", sheet_name=s)
    tmp = tmp[['tile_id','Plant_found','Class', 'Level','imagery_date','Processed By','Status']]
    
    df_status = df_status.append(tmp, ignore_index=True)
    
# convert to string
df_status['Processed By'] = df_status['Processed By'].astype(str)
df_status['tile_id'] = df_status['tile_id'].astype(str)

# to lower case
df_status['Processed By'] = df_status['Processed By'].str.lower()

# remove any leading/trailing spaces
df_status['tile_id'] = df_status['tile_id'].str.strip()
df_status['Processed By'] = df_status['Processed By'].str.strip()

# fix status
df_status.Status = df_status.Status.fillna('')
df_status.Status = df_status.Status.str.lower()
df_status.Status = df_status.Status.str.strip()

# fix status
df_status.Plant_found = df_status.Plant_found.fillna('')
df_status.Plant_found = df_status.Plant_found.str.lower()
df_status.Plant_found = df_status.Plant_found.str.strip()

### Output for SFI

In [5]:
df = pd.merge(df_annotations, df_status, how='left', left_on=['tile_id', 'Processed By'], right_on=['tile_id', 'Processed By'])
df = df.drop_duplicates(subset='Name')

In [6]:
output_df = df[['Name', 'Class', 'Level', 'imagery_date', 'geometry']]
output_df.columns = ["tile_id", "class", "level", 'imagery_date', 'geometry']

In [7]:
output_df.to_file("output/cement_localization_sites.geojson", driver='GeoJSON')

In [8]:
len(output_df)

1667

In [9]:
output_df.head()

Unnamed: 0,tile_id,class,level,imagery_date,geometry
0,MGRS-49QDF-0488-2020-01,Developed,Medium,2020-12-04,POINT Z (110.42073 22.76761 0.00000)
1,MGRS-49QHG-0075-2020-04,Developed,Low,2020-04-28,POINT Z (114.20534 23.60882 0.00000)
2,MGRS-50RNN-0143-2020-02,Developed,Low,2020-11-25,POINT Z (117.10950 24.95032 0.00000)
3,MGRS-49QEF-0289-2020-01,Developed,Low,2021-01-12,POINT Z (111.77041 23.12824 0.00000)
4,MGRS-49QDE-0544-2020-01,Developed,Medium,2020-11-25,POINT Z (110.87099 21.69254 0.00000)


### Output for Second Cement Annotation Task

In [10]:
type(output_df)

geopandas.geodataframe.GeoDataFrame

In [11]:
with fiona.drivers():
    output_df[['tile_id', 'geometry']].to_crs("EPSG:4326").rename(columns={'tile_id':'name'}).to_file("output/new_cement_plants.kml", driver='KML')

In [12]:
output_df = output_df[['tile_id', 'geometry']]
output_df['date processed'] = ""
output_df['processed by'] = ""
output_df['status'] = ""
output_df['notes'] = ""
output_df.drop('geometry', axis=1).to_csv("output/new_cement_plants.csv", index=False)

### Output for SecureWatch

In [13]:
df_status = df_status.drop_duplicates(subset='tile_id')
df_status = df_status[df_status.Status.isin(['no imagery', 'unclear imagery', 'cloudy imagery'])]
df_status = df_status[['tile_id']]

In [14]:
geom_df1 = gpd.read_file('chip_predictions_cement_v2.kml', driver='KML')  
geom_df2 = gpd.read_file('chip_predictions_cement.kml', driver='KML')  
geom_df = pd.concat([geom_df1, geom_df2])

In [15]:
review_df = pd.merge(df_status, geom_df, left_on='tile_id', right_on='Name')
review_df = review_df[['tile_id', 'geometry']]
review_gdf = gpd.GeoDataFrame(review_df, geometry='geometry')
review_gdf = review_gdf.drop_duplicates()

In [16]:
review_gdf.to_file("output/sw_cement_localization_sites.geojson", driver='GeoJSON')

In [17]:
len(review_gdf)

128

In [18]:
review_gdf.head()

Unnamed: 0,tile_id,geometry
0,MGRS-49TFJ-0108-2020-01,"POLYGON ((112.95183 43.80458, 112.95271 43.831..."
1,MGRS-49TFJ-0161-2020-01,"POLYGON ((112.65210 43.75529, 112.65285 43.782..."
2,MGRS-49TFH-0071-2020-01,"POLYGON ((112.85867 43.20268, 112.85949 43.229..."
3,MGRS-45TYK-0137-2020-01,"POLYGON ((90.23303 44.51391, 90.23452 44.54088..."
4,MGRS-45TYK-0063-2020-01,"POLYGON ((90.16644 44.67782, 90.16791 44.70479..."
