In [64]:
import os
import pandas as pd
import geopandas as gpd
import numpy as np
from shapely.geometry import box
import datetime
from IPython.display import display

In [65]:
wfdp_path = '/datasets/rpartsey/satellite/planet/wfdp_labels'
polygons_path = os.path.join(wfdp_path, 'aggregated_polygons.geojson')

In [66]:
gdf = gpd.read_file(polygons_path)

In [67]:
gdf.shape

(2277, 6)

In [68]:
none_img = gdf.planet_img.isna()
none_poly = gdf.geometry.isna()

none = none_img | none_poly

gdf_not_none = gdf[~none].copy()
gdf_none = gdf[none].copy()

In [69]:
gdf_not_none.shape

(2260, 6)

In [70]:
gdf_none.shape

(17, 6)

In [71]:
sentinel = gdf_not_none.planet_img.map(lambda x: 'Sentinel' in x)

gdf_not_sentinel = gdf_not_none[~sentinel].copy()
gdf_sentinel = gdf_not_none[sentinel].copy()

In [72]:
gdf_not_sentinel.shape

(2053, 6)

In [73]:
gdf_sentinel.shape

(207, 6)

In [74]:
symbols = gdf_not_sentinel.planet_img.map(lambda x: any(s in x for s in ['10.04.18', ',', '?????', 'NULL', '(', ')']))

gdf_not_symbols = gdf_not_sentinel[~symbols].copy()
gdf_symbols = gdf_not_sentinel[symbols].copy()

In [75]:
gdf_not_symbols.shape

(2036, 6)

In [76]:
gdf_symbols.shape

(17, 6)

In [77]:
inv_gdf = pd.concat([gdf_symbols, gdf_sentinel, gdf_none])
inv_gdf.shape

(241, 6)

In [78]:
# gdf_not_symbols.to_csv('~/valid_polygons.csv')

In [79]:
# inv_gdf.to_csv('~/invalid_polygons.csv')

In [80]:
gdf_not_symbols.planet_img.shape

(2036,)

In [81]:
gdf_not_symbols.planet_img.unique().shape

(1480,)

## Generating bboxes

In [82]:
gdf = gdf_not_symbols.copy()

In [83]:
gdf.head()

Unnamed: 0,source,date,planet_img,firms_sour,editor,geometry
0,wfdp_Zaporozhje_2018_summer_Leiberiuk_2019-01-...,2018-08-28,20180831_075824_0f35_tms.xml,V1,Oleksandr Leiberiuk - 0001,"POLYGON ((36.62941 47.27804, 36.63887 47.27809..."
1,wfdp_Zaporozhje_2018_summer_Leiberiuk_2019-01-...,2018-08-22,20180823_075453_0e20_tms.xml,V1,Oleksandr Leiberiuk - 0002,"POLYGON ((36.85774 46.82754, 36.85798 46.82746..."
2,wfdp_Zaporozhje_2018_summer_Leiberiuk_2019-01-...,2018-08-23,20180824_075716_1011_tms.xml,V1,Oleksandr Leiberiuk - 0003,"POLYGON ((36.85279 46.81438, 36.85312 46.81543..."
3,wfdp_Zaporozhje_2018_summer_Leiberiuk_2019-01-...,2018-08-22,20180823_073520_0f3c_tms.xml,_,Oleksandr Leiberiuk - 0004,"POLYGON ((35.86952 46.65867, 35.86971 46.65775..."
4,wfdp_Zaporozhje_2018_summer_Leiberiuk_2019-01-...,2018-08-22,20180823_073521_0f3c_tms.xml,_,Oleksandr Leiberiuk - 0005,"POLYGON ((35.85320 46.66909, 35.85242 46.67009..."


In [84]:
gdf1 = gdf.copy()

In [85]:
old_epsg = gdf.crs.to_epsg()
old_epsg

4326

In [86]:
gdf = gdf.to_crs(epsg=32636)

In [87]:
gdf.head()

Unnamed: 0,source,date,planet_img,firms_sour,editor,geometry
0,wfdp_Zaporozhje_2018_summer_Leiberiuk_2019-01-...,2018-08-28,20180831_075824_0f35_tms.xml,V1,Oleksandr Leiberiuk - 0001,"POLYGON ((774478.951 5242453.718, 775194.042 5..."
1,wfdp_Zaporozhje_2018_summer_Leiberiuk_2019-01-...,2018-08-22,20180823_075453_0e20_tms.xml,V1,Oleksandr Leiberiuk - 0002,"POLYGON ((794215.114 5193228.641, 794233.625 5..."
2,wfdp_Zaporozhje_2018_summer_Leiberiuk_2019-01-...,2018-08-23,20180824_075716_1011_tms.xml,V1,Oleksandr Leiberiuk - 0003,"POLYGON ((793909.953 5191747.327, 793928.758 5..."
3,wfdp_Zaporozhje_2018_summer_Leiberiuk_2019-01-...,2018-08-22,20180823_073520_0f3c_tms.xml,_,Oleksandr Leiberiuk - 0004,"POLYGON ((719537.098 5171234.474, 719555.721 5..."
4,wfdp_Zaporozhje_2018_summer_Leiberiuk_2019-01-...,2018-08-22,20180823_073521_0f3c_tms.xml,_,Oleksandr Leiberiuk - 0005,"POLYGON ((718246.626 5172346.288, 718182.996 5..."


In [88]:
gdf['poly_bounds'] = gdf.geometry.map(lambda poly: poly.bounds)
gdf.head()

Unnamed: 0,source,date,planet_img,firms_sour,editor,geometry,poly_bounds
0,wfdp_Zaporozhje_2018_summer_Leiberiuk_2019-01-...,2018-08-28,20180831_075824_0f35_tms.xml,V1,Oleksandr Leiberiuk - 0001,"POLYGON ((774478.951 5242453.718, 775194.042 5...","(774478.9507938821, 5241087.532265591, 775385...."
1,wfdp_Zaporozhje_2018_summer_Leiberiuk_2019-01-...,2018-08-22,20180823_075453_0e20_tms.xml,V1,Oleksandr Leiberiuk - 0002,"POLYGON ((794215.114 5193228.641, 794233.625 5...","(793181.5444087437, 5189774.82635592, 796043.9..."
2,wfdp_Zaporozhje_2018_summer_Leiberiuk_2019-01-...,2018-08-23,20180824_075716_1011_tms.xml,V1,Oleksandr Leiberiuk - 0003,"POLYGON ((793909.953 5191747.327, 793928.758 5...","(793833.1318794352, 5191523.895850659, 794612...."
3,wfdp_Zaporozhje_2018_summer_Leiberiuk_2019-01-...,2018-08-22,20180823_073520_0f3c_tms.xml,_,Oleksandr Leiberiuk - 0004,"POLYGON ((719537.098 5171234.474, 719555.721 5...","(719487.5977900957, 5171072.340910594, 719558...."
4,wfdp_Zaporozhje_2018_summer_Leiberiuk_2019-01-...,2018-08-22,20180823_073521_0f3c_tms.xml,_,Oleksandr Leiberiuk - 0005,"POLYGON ((718246.626 5172346.288, 718182.996 5...","(717862.227695063, 5171777.658725349, 718428.0..."


In [89]:
gdf.iloc[536]

source         wfdp_Lugansk_2017_summer_Leiberiuk_2019-09-13....
date                                                  2017-08-21
planet_img                          20170824_073417_0f4e_tms.xml
firms_sour                                                    V1
editor                               Oleksandr Leiberiuk  - 0015
geometry       POLYGON ((1014232.807883809 5492076.198532819,...
poly_bounds    (1014185.7781092231, 5491547.556464226, 101725...
Name: 537, dtype: object

In [90]:
COORD_TO_PIXEL_RATIO = 3 # coord / pixel = 3 / 1 in gdf.to_crs(epsg=32636)
PIXEL_PADDING = 128
COORD_PADDING = PIXEL_PADDING * COORD_TO_PIXEL_RATIO

def expand_bounds(bounds):
    minx, miny, maxx, maxy = bounds
    
    minx -= COORD_PADDING
    miny -= COORD_PADDING
    maxx += COORD_PADDING
    maxy += COORD_PADDING
    
    return minx, miny, maxx, maxy

gdf['expanded_bounds'] = gdf['poly_bounds'].map(expand_bounds)
gdf.head()

Unnamed: 0,source,date,planet_img,firms_sour,editor,geometry,poly_bounds,expanded_bounds
0,wfdp_Zaporozhje_2018_summer_Leiberiuk_2019-01-...,2018-08-28,20180831_075824_0f35_tms.xml,V1,Oleksandr Leiberiuk - 0001,"POLYGON ((774478.951 5242453.718, 775194.042 5...","(774478.9507938821, 5241087.532265591, 775385....","(774094.9507938821, 5240703.532265591, 775769...."
1,wfdp_Zaporozhje_2018_summer_Leiberiuk_2019-01-...,2018-08-22,20180823_075453_0e20_tms.xml,V1,Oleksandr Leiberiuk - 0002,"POLYGON ((794215.114 5193228.641, 794233.625 5...","(793181.5444087437, 5189774.82635592, 796043.9...","(792797.5444087437, 5189390.82635592, 796427.9..."
2,wfdp_Zaporozhje_2018_summer_Leiberiuk_2019-01-...,2018-08-23,20180824_075716_1011_tms.xml,V1,Oleksandr Leiberiuk - 0003,"POLYGON ((793909.953 5191747.327, 793928.758 5...","(793833.1318794352, 5191523.895850659, 794612....","(793449.1318794352, 5191139.895850659, 794996...."
3,wfdp_Zaporozhje_2018_summer_Leiberiuk_2019-01-...,2018-08-22,20180823_073520_0f3c_tms.xml,_,Oleksandr Leiberiuk - 0004,"POLYGON ((719537.098 5171234.474, 719555.721 5...","(719487.5977900957, 5171072.340910594, 719558....","(719103.5977900957, 5170688.340910594, 719942...."
4,wfdp_Zaporozhje_2018_summer_Leiberiuk_2019-01-...,2018-08-22,20180823_073521_0f3c_tms.xml,_,Oleksandr Leiberiuk - 0005,"POLYGON ((718246.626 5172346.288, 718182.996 5...","(717862.227695063, 5171777.658725349, 718428.0...","(717478.227695063, 5171393.658725349, 718812.0..."


In [91]:
gdf['bbox'] = gdf.expanded_bounds.map(lambda bounds: box(*bounds))

In [92]:
gdf.head()

Unnamed: 0,source,date,planet_img,firms_sour,editor,geometry,poly_bounds,expanded_bounds,bbox
0,wfdp_Zaporozhje_2018_summer_Leiberiuk_2019-01-...,2018-08-28,20180831_075824_0f35_tms.xml,V1,Oleksandr Leiberiuk - 0001,"POLYGON ((774478.951 5242453.718, 775194.042 5...","(774478.9507938821, 5241087.532265591, 775385....","(774094.9507938821, 5240703.532265591, 775769....","POLYGON ((775769.9788487663 5240703.532265591,..."
1,wfdp_Zaporozhje_2018_summer_Leiberiuk_2019-01-...,2018-08-22,20180823_075453_0e20_tms.xml,V1,Oleksandr Leiberiuk - 0002,"POLYGON ((794215.114 5193228.641, 794233.625 5...","(793181.5444087437, 5189774.82635592, 796043.9...","(792797.5444087437, 5189390.82635592, 796427.9...","POLYGON ((796427.9504789268 5189390.82635592, ..."
2,wfdp_Zaporozhje_2018_summer_Leiberiuk_2019-01-...,2018-08-23,20180824_075716_1011_tms.xml,V1,Oleksandr Leiberiuk - 0003,"POLYGON ((793909.953 5191747.327, 793928.758 5...","(793833.1318794352, 5191523.895850659, 794612....","(793449.1318794352, 5191139.895850659, 794996....","POLYGON ((794996.6478769487 5191139.895850659,..."
3,wfdp_Zaporozhje_2018_summer_Leiberiuk_2019-01-...,2018-08-22,20180823_073520_0f3c_tms.xml,_,Oleksandr Leiberiuk - 0004,"POLYGON ((719537.098 5171234.474, 719555.721 5...","(719487.5977900957, 5171072.340910594, 719558....","(719103.5977900957, 5170688.340910594, 719942....","POLYGON ((719942.080174937 5170688.340910594, ..."
4,wfdp_Zaporozhje_2018_summer_Leiberiuk_2019-01-...,2018-08-22,20180823_073521_0f3c_tms.xml,_,Oleksandr Leiberiuk - 0005,"POLYGON ((718246.626 5172346.288, 718182.996 5...","(717862.227695063, 5171777.658725349, 718428.0...","(717478.227695063, 5171393.658725349, 718812.0...","POLYGON ((718812.0377264365 5171393.658725349,..."


In [93]:
gdf = gdf.set_geometry('bbox')
gdf = gdf.to_crs(epsg=old_epsg)
# gdf['bbox'] = gdf['bbox'].to_crs(epsg=old_epsg)

In [94]:
gdf.head()

Unnamed: 0,source,date,planet_img,firms_sour,editor,geometry,poly_bounds,expanded_bounds,bbox
0,wfdp_Zaporozhje_2018_summer_Leiberiuk_2019-01-...,2018-08-28,20180831_075824_0f35_tms.xml,V1,Oleksandr Leiberiuk - 0001,"POLYGON ((774478.951 5242453.718, 775194.042 5...","(774478.9507938821, 5241087.532265591, 775385....","(774094.9507938821, 5240703.532265591, 775769....","POLYGON ((36.64537 47.26179, 36.64671 47.28129..."
1,wfdp_Zaporozhje_2018_summer_Leiberiuk_2019-01-...,2018-08-22,20180823_075453_0e20_tms.xml,V1,Oleksandr Leiberiuk - 0002,"POLYGON ((794215.114 5193228.641, 794233.625 5...","(793181.5444087437, 5189774.82635592, 796043.9...","(792797.5444087437, 5189390.82635592, 796427.9...","POLYGON ((36.88420 46.79210, 36.88698 46.83070..."
2,wfdp_Zaporozhje_2018_summer_Leiberiuk_2019-01-...,2018-08-23,20180824_075716_1011_tms.xml,V1,Oleksandr Leiberiuk - 0003,"POLYGON ((793909.953 5191747.327, 793928.758 5...","(793833.1318794352, 5191523.895850659, 794612....","(793449.1318794352, 5191139.895850659, 794996....","POLYGON ((36.86661 46.80844, 36.86771 46.82365..."
3,wfdp_Zaporozhje_2018_summer_Leiberiuk_2019-01-...,2018-08-22,20180823_073520_0f3c_tms.xml,_,Oleksandr Leiberiuk - 0004,"POLYGON ((719537.098 5171234.474, 719555.721 5...","(719487.5977900957, 5171072.340910594, 719558....","(719103.5977900957, 5170688.340910594, 719942....","POLYGON ((35.87455 46.65363, 35.87499 46.66199..."
4,wfdp_Zaporozhje_2018_summer_Leiberiuk_2019-01-...,2018-08-22,20180823_073521_0f3c_tms.xml,_,Oleksandr Leiberiuk - 0005,"POLYGON ((718246.626 5172346.288, 718182.996 5...","(717862.227695063, 5171777.658725349, 718428.0...","(717478.227695063, 5171393.658725349, 718812.0...","POLYGON ((35.86013 46.66034, 35.86086 46.67407..."


In [95]:
# gdf = gdf.set_geometry('bbox')

In [96]:
gdf.head()

Unnamed: 0,source,date,planet_img,firms_sour,editor,geometry,poly_bounds,expanded_bounds,bbox
0,wfdp_Zaporozhje_2018_summer_Leiberiuk_2019-01-...,2018-08-28,20180831_075824_0f35_tms.xml,V1,Oleksandr Leiberiuk - 0001,"POLYGON ((774478.951 5242453.718, 775194.042 5...","(774478.9507938821, 5241087.532265591, 775385....","(774094.9507938821, 5240703.532265591, 775769....","POLYGON ((36.64537 47.26179, 36.64671 47.28129..."
1,wfdp_Zaporozhje_2018_summer_Leiberiuk_2019-01-...,2018-08-22,20180823_075453_0e20_tms.xml,V1,Oleksandr Leiberiuk - 0002,"POLYGON ((794215.114 5193228.641, 794233.625 5...","(793181.5444087437, 5189774.82635592, 796043.9...","(792797.5444087437, 5189390.82635592, 796427.9...","POLYGON ((36.88420 46.79210, 36.88698 46.83070..."
2,wfdp_Zaporozhje_2018_summer_Leiberiuk_2019-01-...,2018-08-23,20180824_075716_1011_tms.xml,V1,Oleksandr Leiberiuk - 0003,"POLYGON ((793909.953 5191747.327, 793928.758 5...","(793833.1318794352, 5191523.895850659, 794612....","(793449.1318794352, 5191139.895850659, 794996....","POLYGON ((36.86661 46.80844, 36.86771 46.82365..."
3,wfdp_Zaporozhje_2018_summer_Leiberiuk_2019-01-...,2018-08-22,20180823_073520_0f3c_tms.xml,_,Oleksandr Leiberiuk - 0004,"POLYGON ((719537.098 5171234.474, 719555.721 5...","(719487.5977900957, 5171072.340910594, 719558....","(719103.5977900957, 5170688.340910594, 719942....","POLYGON ((35.87455 46.65363, 35.87499 46.66199..."
4,wfdp_Zaporozhje_2018_summer_Leiberiuk_2019-01-...,2018-08-22,20180823_073521_0f3c_tms.xml,_,Oleksandr Leiberiuk - 0005,"POLYGON ((718246.626 5172346.288, 718182.996 5...","(717862.227695063, 5171777.658725349, 718428.0...","(717478.227695063, 5171393.658725349, 718812.0...","POLYGON ((35.86013 46.66034, 35.86086 46.67407..."


In [97]:
gdf.shape

(2036, 9)

In [98]:
# base_dir = '/datasets/rpartsey/satellite/planet/wfdp_labels'

# bbox_dir = os.path.join(base_dir, 'bbox')

# gdf = gdf

# for image_name, group in gdf.groupby('planet_img'):
#     for index, row in group.reset_index(drop=True).iterrows():
#         polygon = row.bbox
#         gseries = gpd.GeoSeries([polygon])
#         gseries.crs = gdf.crs

#         image_id = image_name[:-8]
#         ext = image_name[-8:]
        
#         assert ext == '_tms.xml'
        
#         file_name = '{}-{}.geojson'.format(image_id, index)
#         gseries.to_file(os.path.join(bbox_dir, file_name), driver='GeoJSON')

In [99]:
# base_dir = '/datasets/rpartsey/satellite/planet/wfdp_labels'

# poly_dir = os.path.join(base_dir, 'poly')

# gdf = gdf

# for image_name, group in gdf.groupby('planet_img'):
#     for index, row in group.reset_index(drop=True).iterrows():
#         polygon = row.geometry
#         gseries = gpd.GeoSeries([polygon])
#         gseries.crs = gdf.crs

#         image_id = image_name[:-8]
#         ext = image_name[-8:]
        
#         assert ext == '_tms.xml'
        
#         file_name = '{}-{}.geojson'.format(image_id, index)
#         gseries.to_file(os.path.join(poly_dir, file_name), driver='GeoJSON')

In [100]:
gdf1[['planet_img', 'geometry']]

Unnamed: 0,planet_img,geometry
0,20180831_075824_0f35_tms.xml,"POLYGON ((36.62941 47.27804, 36.63887 47.27809..."
1,20180823_075453_0e20_tms.xml,"POLYGON ((36.85774 46.82754, 36.85798 46.82746..."
2,20180824_075716_1011_tms.xml,"POLYGON ((36.85279 46.81438, 36.85312 46.81543..."
3,20180823_073520_0f3c_tms.xml,"POLYGON ((35.86952 46.65867, 35.86971 46.65775..."
4,20180823_073521_0f3c_tms.xml,"POLYGON ((35.85320 46.66909, 35.85242 46.67009..."
...,...,...
2272,20170426_101350_0c54_tms.xml,"POLYGON ((36.28028 49.47896, 36.28055 49.47911..."
2273,20170329_074334_1029_tms.xml,"POLYGON ((35.63673 49.25724, 35.63713 49.25750..."
2274,20170329_074335_1029_tms.xml,"POLYGON ((35.62665 49.25472, 35.62624 49.25448..."
2275,20170329_074335_1029_tms.xml,"POLYGON ((35.58907 49.23716, 35.59059 49.23764..."


In [101]:
gdf[['planet_img', 'bbox']]

Unnamed: 0,planet_img,bbox
0,20180831_075824_0f35_tms.xml,"POLYGON ((36.64537 47.26179, 36.64671 47.28129..."
1,20180823_075453_0e20_tms.xml,"POLYGON ((36.88420 46.79210, 36.88698 46.83070..."
2,20180824_075716_1011_tms.xml,"POLYGON ((36.86661 46.80844, 36.86771 46.82365..."
3,20180823_073520_0f3c_tms.xml,"POLYGON ((35.87455 46.65363, 35.87499 46.66199..."
4,20180823_073521_0f3c_tms.xml,"POLYGON ((35.86013 46.66034, 35.86086 46.67407..."
...,...,...
2272,20170426_101350_0c54_tms.xml,"POLYGON ((36.28769 49.47491, 36.28823 49.48291..."
2273,20170329_074334_1029_tms.xml,"POLYGON ((35.64838 49.25292, 35.64913 49.26701..."
2274,20170329_074335_1029_tms.xml,"POLYGON ((35.63220 49.24446, 35.63292 49.25803..."
2275,20170329_074335_1029_tms.xml,"POLYGON ((35.61587 49.22927, 35.61665 49.24399..."


In [111]:
gdf.shape

(2036, 9)

In [102]:
new_gdf = gpd.GeoDataFrame()
new_gdf['planet_img'] = gdf1['planet_img']
new_gdf['poly'] = gdf1['geometry']
new_gdf['bbox'] = gdf['bbox']
# new_gdf['index1'] = gdf1.index
# new_gdf['index2'] = gdf.index

In [103]:
new_gdf

Unnamed: 0,planet_img,poly,bbox
0,20180831_075824_0f35_tms.xml,"POLYGON ((36.62941 47.27804, 36.63887 47.27809...","POLYGON ((36.64537 47.26179, 36.64671 47.28129..."
1,20180823_075453_0e20_tms.xml,"POLYGON ((36.85774 46.82754, 36.85798 46.82746...","POLYGON ((36.88420 46.79210, 36.88698 46.83070..."
2,20180824_075716_1011_tms.xml,"POLYGON ((36.85279 46.81438, 36.85312 46.81543...","POLYGON ((36.86661 46.80844, 36.86771 46.82365..."
3,20180823_073520_0f3c_tms.xml,"POLYGON ((35.86952 46.65867, 35.86971 46.65775...","POLYGON ((35.87455 46.65363, 35.87499 46.66199..."
4,20180823_073521_0f3c_tms.xml,"POLYGON ((35.85320 46.66909, 35.85242 46.67009...","POLYGON ((35.86013 46.66034, 35.86086 46.67407..."
...,...,...,...
2272,20170426_101350_0c54_tms.xml,"POLYGON ((36.28028 49.47896, 36.28055 49.47911...","POLYGON ((36.28769 49.47491, 36.28823 49.48291..."
2273,20170329_074334_1029_tms.xml,"POLYGON ((35.63673 49.25724, 35.63713 49.25750...","POLYGON ((35.64838 49.25292, 35.64913 49.26701..."
2274,20170329_074335_1029_tms.xml,"POLYGON ((35.62665 49.25472, 35.62624 49.25448...","POLYGON ((35.63220 49.24446, 35.63292 49.25803..."
2275,20170329_074335_1029_tms.xml,"POLYGON ((35.58907 49.23716, 35.59059 49.23764...","POLYGON ((35.61587 49.22927, 35.61665 49.24399..."


In [104]:
gdf_iter = new_gdf

log = []
duplicates = set()

count = 0
for index1, row1 in gdf_iter.iterrows():
    for index2, row2 in gdf_iter.iterrows():
        if index2 <= index1:
            continue
            
        if row1.planet_img == row2.planet_img and (row1.bbox.intersects(row2.poly) or row2.bbox.intersects(row1.poly)):
            intersection_area = row1.bbox.intersection(row2.bbox).area / max(row1.bbox.area, row2.bbox.area)
            print(index1, index2)
            print(intersection_area, row1.planet_img, row2.planet_img)
            print()
            
            
            log.append((index1, index2, intersection_area))
            duplicates.update([index1, index2])

53 54
0.15068341905113655 20190220_070501_0f46_tms.xml 20190220_070501_0f46_tms.xml

160 161
0.568396533136478 20190318_070225_0f1a_tms.xml 20190318_070225_0f1a_tms.xml

189 190
0.2302829186477901 20190316_070954_0f4d_tms.xml 20190316_070954_0f4d_tms.xml

205 206
0.6315417740037198 20171006_074341_100e_tms.xml 20171006_074341_100e_tms.xml

251 253
0.7205083266630108 20170402_074332_0e30_tms.xml 20170402_074332_0e30_tms.xml

263 291
0.22275819898500898 20170328_074032_1038_tms.xml 20170328_074032_1038_tms.xml

270 271
0.3769549969109655 20170322_075333_0e14_tms.xml 20170322_075333_0e14_tms.xml

275 276
0.5096584744024063 20170309_074540_0f1d_tms.xml 20170309_074540_0f1d_tms.xml

278 279
0.20710508754017457 20170402_074600_0f25_tms.xml 20170402_074600_0f25_tms.xml

350 351
0.3344771061239166 20170321_074908_0e0f_tms.xml 20170321_074908_0e0f_tms.xml

350 352
0.15020480546431395 20170321_074908_0e0f_tms.xml 20170321_074908_0e0f_tms.xml

351 352
0.5618997471225634 20170321_074908_0e0f_tms.x

924 925
0.07098836357507342 20170323_074045_0e19_tms.xml 20170323_074045_0e19_tms.xml

942 943
0.13511853781436245 20170407_072842_1038_tms.xml 20170407_072842_1038_tms.xml

946 947
0.24001114984690308 20170408_073447_1003_tms.xml 20170408_073447_1003_tms.xml

951 1179
1.0 20180906_071715_1_0f21_tms.xml 20180906_071715_1_0f21_tms.xml

951 1262
1.0 20180906_071715_1_0f21_tms.xml 20180906_071715_1_0f21_tms.xml

952 1180
0.9563109844106144 20180906_071715_1_0f21_tms.xml 20180906_071715_1_0f21_tms.xml

952 1263
0.9563109844106144 20180906_071715_1_0f21_tms.xml 20180906_071715_1_0f21_tms.xml

953 954
0.4836695416754968 20180914_071528_103f_tms.xml 20180914_071528_103f_tms.xml

953 1181
0.9888261250577104 20180914_071528_103f_tms.xml 20180914_071528_103f_tms.xml

953 1264
0.9888261250577104 20180914_071528_103f_tms.xml 20180914_071528_103f_tms.xml

954 1181
0.47649130834117587 20180914_071528_103f_tms.xml 20180914_071528_103f_tms.xml

954 1264
0.47649130834117587 20180914_071528_103f_tms.xml

1001 1226
1.0 20181102_075230_1032_tms.xml 20181102_075230_1032_tms.xml

1001 1309
1.0 20181102_075230_1032_tms.xml 20181102_075230_1032_tms.xml

1002 1227
0.9999999999999999 20180914_074821_1039_tms.xml 20180914_074821_1039_tms.xml

1002 1310
0.9999999999999999 20180914_074821_1039_tms.xml 20180914_074821_1039_tms.xml

1003 1228
0.9952069871890257 20180912_075311_0f15_tms.xml 20180912_075311_0f15_tms.xml

1003 1311
1.0 20180912_075311_0f15_tms.xml 20180912_075311_0f15_tms.xml

1004 1229
1.0 20181009_071355_1043_tms.xml 20181009_071355_1043_tms.xml

1004 1312
1.0 20181009_071355_1043_tms.xml 20181009_071355_1043_tms.xml

1005 1230
1.0000000000000002 20181008_075245_1009_tms.xml 20181008_075245_1009_tms.xml

1005 1313
1.0000000000000002 20181008_075245_1009_tms.xml 20181008_075245_1009_tms.xml

1006 1231
1.0000000000000002 20181026_075354_1015_tms.xml 20181026_075354_1015_tms.xml

1006 1314
1.0000000000000002 20181026_075354_1015_tms.xml 20181026_075354_1015_tms.xml

1007 1232
1.0 20181

1188 1271
0.9949132782193097 20181105_070353_1053_tms.xml 20181105_070353_1053_tms.xml

1189 1272
1.0 20181119_075811_0f35_tms.xml 20181119_075811_0f35_tms.xml

1190 1273
0.9601524796370976 20181123_075856_100c_tms.xml 20181123_075856_100c_tms.xml

1191 1274
0.9618502084485654 20181010_071611_0f46_tms.xml 20181010_071611_0f46_tms.xml

1192 1275
0.836999913669843 20181114_071107_0f3c_tms.xml 20181114_071107_0f3c_tms.xml

1194 1277
1.0 20181015_071259_0f44_tms.xml 20181015_071259_0f44_tms.xml

1195 1278
1.0 20181114_074641_0e26_tms.xml 20181114_074641_0e26_tms.xml

1196 1279
0.9917014078792685 20181101_075322_0f17_tms.xml 20181101_075322_0f17_tms.xml

1197 1280
0.8654713651148985 20180908_075054_1014_tms.xml 20180908_075054_1014_tms.xml

1198 1281
1.0 20181001_075151_100a_tms.xml 20181001_075151_100a_tms.xml

1199 1282
1.0 20181016_075118_1035_tms.xml 20181016_075118_1035_tms.xml

1200 1283
1.0 20180912_075128_0f42_tms.xml 20180912_075128_0f42_tms.xml

1201 1203
0.349701442266898 2018090

1343 1470
1.0 20180828_075001_1039_tms.xml 20180828_075001_1039_tms.xml

1344 1471
1.0 20180829_074758_102e_tms.xml 20180829_074758_102e_tms.xml

1345 1472
1.0 20180825_071945_1050_tms.xml 20180825_071945_1050_tms.xml

1346 1473
0.9973908485513314 20180828_071948_0f2d_tms.xml 20180828_071948_0f2d_tms.xml

1347 1474
0.9964336384866773 20180823_072011_101c_tms.xml 20180823_072011_101c_tms.xml

1433 1434
0.29043895059673197 20170921_081117_1049_tms.xml 20170921_081117_1049_tms.xml

1467 1468
0.11323516346037109 20171023_073626_1033_tms.xml 20171023_073626_1033_tms.xml

1481 1482
0.06068942882070859 20180615_073023_1054_tms.xml 20180615_073023_1054_tms.xml

1488 1489
0.25725916003408894 20180622_072941_1053_tms.xml 20180622_072941_1053_tms.xml

1491 1492
0.43168979873084024 20180623_072958_0f36_tms.xml 20180623_072958_0f36_tms.xml

1497 1498
0.19485551784174737 20180805_072211_1020_tms.xml 20180805_072211_1020_tms.xml

1505 1506
0.2823219842483108 20180817_075049_0f43_tms.xml 20180817_0750

In [106]:
duplicates

{53,
 54,
 160,
 161,
 189,
 190,
 205,
 206,
 251,
 253,
 263,
 270,
 271,
 275,
 276,
 278,
 279,
 291,
 350,
 351,
 352,
 353,
 354,
 355,
 356,
 364,
 365,
 366,
 367,
 368,
 377,
 378,
 384,
 385,
 399,
 400,
 402,
 403,
 421,
 422,
 433,
 434,
 435,
 436,
 440,
 441,
 458,
 459,
 471,
 472,
 474,
 475,
 476,
 480,
 481,
 482,
 483,
 484,
 485,
 486,
 487,
 498,
 499,
 501,
 502,
 504,
 505,
 513,
 514,
 545,
 546,
 551,
 552,
 553,
 554,
 555,
 557,
 558,
 559,
 560,
 561,
 562,
 564,
 565,
 566,
 567,
 574,
 575,
 579,
 580,
 581,
 582,
 583,
 610,
 611,
 643,
 644,
 653,
 654,
 658,
 661,
 664,
 665,
 700,
 702,
 712,
 713,
 714,
 715,
 717,
 718,
 719,
 722,
 723,
 725,
 726,
 727,
 731,
 734,
 735,
 736,
 741,
 742,
 743,
 744,
 749,
 750,
 769,
 770,
 774,
 775,
 787,
 788,
 815,
 816,
 817,
 818,
 854,
 855,
 856,
 857,
 858,
 861,
 864,
 865,
 882,
 883,
 884,
 888,
 889,
 904,
 906,
 924,
 925,
 942,
 943,
 946,
 947,
 951,
 952,
 953,
 954,
 955,
 956,
 959,
 960,
 961,


In [110]:
max(duplicates)

2255

In [112]:
indices = np.arange(gdf.shape[0])
duplicate_mask = np.array([x in duplicates for x in indices])

In [109]:
duplicate_mask.sum()

496

In [46]:
gdf_dupl = gdf[duplicate_mask]
gdf_no_dupl = gdf[~duplicate_mask]

gdf1_dupl = gdf1[duplicate_mask]
gdf1_no_dupl = gdf1[~duplicate_mask]

In [47]:
gdf_dupl.shape

(245, 9)

In [48]:
gdf_no_dupl.shape

(1791, 9)

In [49]:
gdf1_dupl.shape

(245, 6)

In [50]:
gdf1_no_dupl.shape

(1791, 6)

In [51]:
new_gdf = gpd.GeoDataFrame()
new_gdf['planet_img'] = gdf1_no_dupl['planet_img']
new_gdf['poly'] = gdf1_no_dupl['geometry']
new_gdf['bbox'] = gdf_no_dupl['bbox']
# new_gdf['index1'] = gdf1.index
# new_gdf['index2'] = gdf.index

In [52]:
gdf_iter = new_gdf

log = []

for index1, row1 in gdf_iter.iterrows():
    intersecting_indices = set()
    for index2, row2 in gdf_iter.iterrows():
        if index2 <= index1:
            continue
            
        if row1.planet_img == row2.planet_img and (row1.bbox.intersects(row2.poly) or row2.bbox.intersects(row1.poly)):
            intersection_area = row1.bbox.intersection(row2.bbox).area / max(row1.bbox.area, row2.bbox.area)
            print(index1, index2)
            print(intersection_area, row1.planet_img, row2.planet_img)
            print()
            intersecting_indices.add(index2)
            assert (1 - intersection_area) >= 0.2
            
    if len(intersecting_indices) > 0:
        intersecting_indices.add(index1)
        log.append(intersecting_indices)

53 54
0.15068341905113655 20190220_070501_0f46_tms.xml 20190220_070501_0f46_tms.xml

160 161
0.568396533136478 20190318_070225_0f1a_tms.xml 20190318_070225_0f1a_tms.xml

189 190
0.2302829186477901 20190316_070954_0f4d_tms.xml 20190316_070954_0f4d_tms.xml

205 206
0.6315417740037198 20171006_074341_100e_tms.xml 20171006_074341_100e_tms.xml

251 253
0.7205083266630108 20170402_074332_0e30_tms.xml 20170402_074332_0e30_tms.xml

263 291
0.22275819898500898 20170328_074032_1038_tms.xml 20170328_074032_1038_tms.xml

270 271
0.3769549969109655 20170322_075333_0e14_tms.xml 20170322_075333_0e14_tms.xml

275 276
0.5096584744024063 20170309_074540_0f1d_tms.xml 20170309_074540_0f1d_tms.xml

278 279
0.20710508754017457 20170402_074600_0f25_tms.xml 20170402_074600_0f25_tms.xml

350 351
0.3344771061239166 20170321_074908_0e0f_tms.xml 20170321_074908_0e0f_tms.xml

350 352
0.15020480546431395 20170321_074908_0e0f_tms.xml 20170321_074908_0e0f_tms.xml

351 352
0.5618997471225634 20170321_074908_0e0f_tms.x

924 925
0.07098836357507342 20170323_074045_0e19_tms.xml 20170323_074045_0e19_tms.xml

942 943
0.13511853781436245 20170407_072842_1038_tms.xml 20170407_072842_1038_tms.xml

946 947
0.24001114984690308 20170408_073447_1003_tms.xml 20170408_073447_1003_tms.xml

951 1262
1.0 20180906_071715_1_0f21_tms.xml 20180906_071715_1_0f21_tms.xml



AssertionError: 

In [53]:
log

[{53, 54},
 {160, 161},
 {189, 190},
 {205, 206},
 {251, 253},
 {263, 291},
 {270, 271},
 {275, 276},
 {278, 279},
 {350, 351, 352},
 {351, 352},
 {353, 354},
 {355, 356},
 {364, 365},
 {366, 367, 368},
 {367, 368},
 {377, 378},
 {384, 385},
 {399, 400},
 {402, 403},
 {421, 422},
 {433, 434, 435, 436},
 {434, 435},
 {440, 441},
 {458, 459},
 {471, 472},
 {474, 475, 476},
 {475, 476},
 {480, 481},
 {482, 483},
 {484, 485},
 {485, 486},
 {486, 487},
 {498, 499},
 {501, 502},
 {504, 505},
 {513, 514},
 {545, 546},
 {551, 552},
 {552, 553},
 {554, 555},
 {557, 558, 559},
 {560, 561},
 {561, 562},
 {564, 565},
 {566, 567},
 {574, 575},
 {579, 583},
 {580, 581, 582},
 {581, 582},
 {610, 611},
 {643, 644},
 {653, 654},
 {658, 661},
 {664, 665},
 {700, 702},
 {712, 713},
 {714, 715},
 {717, 718},
 {718, 719},
 {719, 731},
 {722, 725},
 {723, 725},
 {725, 726, 727},
 {726, 727},
 {734, 735},
 {735, 736},
 {741, 742},
 {743, 749},
 {744, 750},
 {769, 770},
 {774, 775},
 {787, 788},
 {815, 816, 8

In [54]:
duplicates = set()
for elements in log:
    duplicates.update(elements)

In [55]:
len(duplicates)

158

In [56]:
indices = np.arange(gdf_no_dupl.shape[0])
duplicate_mask = np.array([x in duplicates for x in indices])

In [57]:
gdf_dupl = gdf_no_dupl[duplicate_mask]
gdf_no_dupl = gdf_no_dupl[~duplicate_mask]

gdf1_dupl = gdf1_no_dupl[duplicate_mask]
gdf1_no_dupl = gdf1_no_dupl[~duplicate_mask]

In [58]:
gdf_dupl.shape

(158, 9)

In [59]:
gdf_no_dupl.shape

(1633, 9)

In [60]:
gdf1_dupl.shape

(158, 6)

In [61]:
gdf1_no_dupl.shape

(1633, 6)

In [62]:
new_gdf = gpd.GeoDataFrame()
new_gdf['planet_img'] = gdf1_no_dupl['planet_img']
new_gdf['poly'] = gdf1_no_dupl['geometry']
new_gdf['bbox'] = gdf_no_dupl['bbox']
# new_gdf['index1'] = gdf1.index
# new_gdf['index2'] = gdf.index

In [63]:
gdf_iter = new_gdf

log = []

for index1, row1 in gdf_iter.iterrows():
    intersecting_indices = set()
    for index2, row2 in gdf_iter.iterrows():
        if index2 <= index1:
            continue
            
        if row1.planet_img == row2.planet_img and (row1.bbox.intersects(row2.poly) or row2.bbox.intersects(row1.poly)):
            intersection_area = row1.bbox.intersection(row2.bbox).area / max(row1.bbox.area, row2.bbox.area)
            print(index1, index2)
            print(intersection_area, row1.planet_img, row2.planet_img)
            print()
            intersecting_indices.add(index2)
            assert (1 - intersection_area) >= 0.2
            
    if len(intersecting_indices) > 0:
        intersecting_indices.add(index1)
        log.append(intersecting_indices)

658 661
0.13944358565585327 20170803_073534_0f18_tms.xml 20170803_073534_0f18_tms.xml

700 702
0.04893580444844047 20170824_073655_102e_tms.xml 20170824_073655_102e_tms.xml

722 725
0.1837262801654984 20170407_073224_1017_tms.xml 20170407_073224_1017_tms.xml

854 861
0.03498484282641219 20170403_073749_0e0d_tms.xml 20170403_073749_0e0d_tms.xml

855 861
0.054308941802705714 20170403_073749_0e0d_tms.xml 20170403_073749_0e0d_tms.xml

864 865
0.2699445847104292 20170408_072920_1010_tms.xml 20170408_072920_1010_tms.xml

882 883
0.35539817559489434 20170408_072831_1022_tms.xml 20170408_072831_1022_tms.xml

888 889
0.3398937836431102 20170412_073530_0e30_tms.xml 20170412_073530_0e30_tms.xml

924 925
0.07098836357507342 20170323_074045_0e19_tms.xml 20170323_074045_0e19_tms.xml

942 943
0.13511853781436245 20170407_072842_1038_tms.xml 20170407_072842_1038_tms.xml

946 947
0.24001114984690308 20170408_073447_1003_tms.xml 20170408_073447_1003_tms.xml

951 1262
1.0 20180906_071715_1_0f21_tms.xml 2

AssertionError: 

In [158]:
bad_log1 = [t for t in log if abs(t[0] - t[1]) > 10]
bad_log2 = [t for t in log if abs(t[0] - t[1]) < 10]

In [159]:
len(bad_log1)

235

In [160]:
len(bad_log2)

0

In [185]:
a = [t[0] for t in bad_log1]
b = [t[1] for t in bad_log1]

In [198]:
new_gdf.loc[a].source.unique()

AttributeError: 'GeoDataFrame' object has no attribute 'source'

In [196]:
gdf.loc[a].source.map(lambda x: x == 'wfdp_Lugansk_2018_autumn_Kuzminova_2019_04_30.geojson').sum()

77

In [197]:
gdf.loc[a].source.map(lambda x: x == 'wfdp_Lugansk_2018_summer_Kuzminova_2019-03-27.geojson')

5

In [182]:
sources = set()

for index in range(len(a)):
    
    index1 = a[index]
    index2 = b[index]
    
    row1 = gdf.loc[index1]
    row2 = gdf.loc[index2]
    
#     print(row1['name'])
    
    src1 = row1.source.replace('.geojson', '')
    src2 = row2.source.replace('.geojson', '')
    
    date1 = datetime.datetime.strptime(src1[-10:].replace('_', '-'), "%Y-%m-%d")
    date2 = datetime.datetime.strptime(src2[-10:].replace('_', '-'), "%Y-%m-%d")
    
    index_to_remove = index1 if date1 < date2 else index2
    
    assert row1.planet_img == row2.planet_img 
    
    print('index1: {}, date1: {}, index2: {}, date2: {}, r {}'.format(index1, date1, index2, date2, index_to_remove))
    
    sources.add(src1)
    sources.add(src2)
    
    

index1: 951, date1: 2019-02-22 00:00:00, index2: 1179, date2: 2019-04-30 00:00:00, r 951
index1: 951, date1: 2019-02-22 00:00:00, index2: 1262, date2: 2019-03-27 00:00:00, r 951
index1: 952, date1: 2019-02-22 00:00:00, index2: 1180, date2: 2019-04-30 00:00:00, r 952
index1: 952, date1: 2019-02-22 00:00:00, index2: 1263, date2: 2019-03-27 00:00:00, r 952
index1: 953, date1: 2019-02-22 00:00:00, index2: 1181, date2: 2019-04-30 00:00:00, r 953
index1: 953, date1: 2019-02-22 00:00:00, index2: 1264, date2: 2019-03-27 00:00:00, r 953
index1: 955, date1: 2019-02-22 00:00:00, index2: 1183, date2: 2019-04-30 00:00:00, r 955
index1: 955, date1: 2019-02-22 00:00:00, index2: 1266, date2: 2019-03-27 00:00:00, r 955
index1: 956, date1: 2019-02-22 00:00:00, index2: 1184, date2: 2019-04-30 00:00:00, r 956
index1: 956, date1: 2019-02-22 00:00:00, index2: 1267, date2: 2019-03-27 00:00:00, r 956
index1: 959, date1: 2019-02-22 00:00:00, index2: 1185, date2: 2019-04-30 00:00:00, r 959
index1: 959, date1: 2

In [37]:
count

609

In [102]:
count

491

In [105]:
158 + 245

403

In [None]:
gdf1

In [49]:
log

[(160, 161, 0.568396533136478),
 (205, 206, 0.6315417740037198),
 (251, 253, 0.7205083266630108),
 (275, 276, 0.5096584744024063),
 (351, 352, 0.5618997471225634),
 (399, 400, 0.5566170056660409),
 (474, 475, 0.5000496292823112),
 (482, 483, 0.5322271421478681),
 (485, 486, 0.5430860874605072),
 (486, 487, 0.7590114155322739),
 (498, 499, 0.5126463007983499),
 (545, 546, 0.5342433351961087),
 (551, 552, 0.5120282768630425),
 (561, 562, 0.5741288582629693),
 (581, 582, 0.5769302017813467),
 (643, 644, 0.511066265801827),
 (712, 713, 0.5206122872707953),
 (815, 816, 0.5401599135834009),
 (951, 1179, 1.0),
 (951, 1262, 1.0),
 (952, 1180, 0.9563109844106144),
 (952, 1263, 0.9563109844106144),
 (953, 1181, 0.9888261250577104),
 (953, 1264, 0.9888261250577104),
 (955, 1183, 0.8389438066649831),
 (955, 1266, 0.8389438066649831),
 (956, 1184, 1.0),
 (956, 1267, 1.0),
 (959, 1185, 0.9580615686991268),
 (959, 1268, 1.0),
 (960, 1186, 1.0),
 (960, 1269, 1.0),
 (961, 1187, 0.9864853991786013),
 (9

In [50]:
len(log)

279

In [57]:
bad_log1 = [t for t in log if abs(t[0] - t[1]) > 10]
bad_log2 = [t for t in log if abs(t[0] - t[1]) < 10]

In [58]:
len(bad_log1)

241

In [59]:
len(bad_log2)

38

In [60]:
bad_log2

[(160, 161, 0.568396533136478),
 (205, 206, 0.6315417740037198),
 (251, 253, 0.7205083266630108),
 (275, 276, 0.5096584744024063),
 (351, 352, 0.5618997471225634),
 (399, 400, 0.5566170056660409),
 (474, 475, 0.5000496292823112),
 (482, 483, 0.5322271421478681),
 (485, 486, 0.5430860874605072),
 (486, 487, 0.7590114155322739),
 (498, 499, 0.5126463007983499),
 (545, 546, 0.5342433351961087),
 (551, 552, 0.5120282768630425),
 (561, 562, 0.5741288582629693),
 (581, 582, 0.5769302017813467),
 (643, 644, 0.511066265801827),
 (712, 713, 0.5206122872707953),
 (815, 816, 0.5401599135834009),
 (1019, 1020, 0.5719829721793169),
 (1244, 1245, 0.5719829721793169),
 (1327, 1328, 0.5719829721793169),
 (1768, 1773, 0.6335444815897096),
 (1769, 1771, 0.6538742902762283),
 (1769, 1772, 0.516386096253692),
 (1771, 1772, 0.7680024896692398),
 (1773, 1774, 0.682081945124581),
 (1790, 1792, 0.6102064950018372),
 (1848, 1851, 0.5638203452930352),
 (1849, 1851, 0.6080744580772778),
 (1850, 1851, 0.775699777

In [73]:
a = [t[0] for t in bad_log1]
b = [t[1] for t in bad_log1]

In [74]:
gdf.head()

Unnamed: 0,source,date,planet_img,firms_sour,editor,geometry,poly_bounds,expanded_bounds,bbox
0,wfdp_Zaporozhje_2018_summer_Leiberiuk_2019-01-...,2018-08-28,20180831_075824_0f35_tms.xml,V1,Oleksandr Leiberiuk - 0001,"POLYGON ((774478.951 5242453.718, 775194.042 5...","(774478.9507938821, 5241087.532265591, 775385....","(774094.9507938821, 5240703.532265591, 775769....","POLYGON ((36.64537 47.26179, 36.64671 47.28129..."
1,wfdp_Zaporozhje_2018_summer_Leiberiuk_2019-01-...,2018-08-22,20180823_075453_0e20_tms.xml,V1,Oleksandr Leiberiuk - 0002,"POLYGON ((794215.114 5193228.641, 794233.625 5...","(793181.5444087437, 5189774.82635592, 796043.9...","(792797.5444087437, 5189390.82635592, 796427.9...","POLYGON ((36.88420 46.79210, 36.88698 46.83070..."
2,wfdp_Zaporozhje_2018_summer_Leiberiuk_2019-01-...,2018-08-23,20180824_075716_1011_tms.xml,V1,Oleksandr Leiberiuk - 0003,"POLYGON ((793909.953 5191747.327, 793928.758 5...","(793833.1318794352, 5191523.895850659, 794612....","(793449.1318794352, 5191139.895850659, 794996....","POLYGON ((36.86661 46.80844, 36.86771 46.82365..."
3,wfdp_Zaporozhje_2018_summer_Leiberiuk_2019-01-...,2018-08-22,20180823_073520_0f3c_tms.xml,_,Oleksandr Leiberiuk - 0004,"POLYGON ((719537.098 5171234.474, 719555.721 5...","(719487.5977900957, 5171072.340910594, 719558....","(719103.5977900957, 5170688.340910594, 719942....","POLYGON ((35.87455 46.65363, 35.87499 46.66199..."
4,wfdp_Zaporozhje_2018_summer_Leiberiuk_2019-01-...,2018-08-22,20180823_073521_0f3c_tms.xml,_,Oleksandr Leiberiuk - 0005,"POLYGON ((718246.626 5172346.288, 718182.996 5...","(717862.227695063, 5171777.658725349, 718428.0...","(717478.227695063, 5171393.658725349, 718812.0...","POLYGON ((35.86013 46.66034, 35.86086 46.67407..."


In [101]:
len(a)

241

In [105]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [108]:
sources = set()

for index in range(241):
    
    row1 = gdf.loc[a].iloc[index]
    row2 = gdf.loc[b].iloc[index]
    
    src1 = row1.source.replace('.geojson', '')
    src2 = row2.source.replace('.geojson', '')
    
    assert row1.planet_img == row2.planet_img 
    
    print('source1: {}, source2: {}, image {}'.format(src1, src2, row1.planet_img))
    
    sources.add(src1)
    sources.add(src2)

source1: wfdp_Lugansk_2018_autumn_Kuzminova_2019-02-22, source2: wfdp_Lugansk_2018_autumn_Kuzminova_2019_04_30, image 20180906_071715_1_0f21_tms.xml
source1: wfdp_Lugansk_2018_autumn_Kuzminova_2019-02-22, source2: wfdp_Lugansk_2018_autumn_Kuzminova_2019-03-27, image 20180906_071715_1_0f21_tms.xml
source1: wfdp_Lugansk_2018_autumn_Kuzminova_2019-02-22, source2: wfdp_Lugansk_2018_autumn_Kuzminova_2019_04_30, image 20180906_071715_1_0f21_tms.xml
source1: wfdp_Lugansk_2018_autumn_Kuzminova_2019-02-22, source2: wfdp_Lugansk_2018_autumn_Kuzminova_2019-03-27, image 20180906_071715_1_0f21_tms.xml
source1: wfdp_Lugansk_2018_autumn_Kuzminova_2019-02-22, source2: wfdp_Lugansk_2018_autumn_Kuzminova_2019_04_30, image 20180914_071528_103f_tms.xml
source1: wfdp_Lugansk_2018_autumn_Kuzminova_2019-02-22, source2: wfdp_Lugansk_2018_autumn_Kuzminova_2019-03-27, image 20180914_071528_103f_tms.xml
source1: wfdp_Lugansk_2018_autumn_Kuzminova_2019-02-22, source2: wfdp_Lugansk_2018_autumn_Kuzminova_2019_04_30

source1: wfdp_Lugansk_2018_autumn_Kuzminova_2019-02-22, source2: wfdp_Lugansk_2018_autumn_Kuzminova_2019_04_30, image 20181009_071355_1043_tms.xml
source1: wfdp_Lugansk_2018_autumn_Kuzminova_2019-02-22, source2: wfdp_Lugansk_2018_autumn_Kuzminova_2019-03-27, image 20181009_071355_1043_tms.xml
source1: wfdp_Lugansk_2018_autumn_Kuzminova_2019-02-22, source2: wfdp_Lugansk_2018_autumn_Kuzminova_2019_04_30, image 20181008_075245_1009_tms.xml
source1: wfdp_Lugansk_2018_autumn_Kuzminova_2019-02-22, source2: wfdp_Lugansk_2018_autumn_Kuzminova_2019-03-27, image 20181008_075245_1009_tms.xml
source1: wfdp_Lugansk_2018_autumn_Kuzminova_2019-02-22, source2: wfdp_Lugansk_2018_autumn_Kuzminova_2019_04_30, image 20181026_075354_1015_tms.xml
source1: wfdp_Lugansk_2018_autumn_Kuzminova_2019-02-22, source2: wfdp_Lugansk_2018_autumn_Kuzminova_2019-03-27, image 20181026_075354_1015_tms.xml
source1: wfdp_Lugansk_2018_autumn_Kuzminova_2019-02-22, source2: wfdp_Lugansk_2018_autumn_Kuzminova_2019_04_30, image 

source1: wfdp_Lugansk_2018_autumn_Kuzminova_2019_04_30, source2: wfdp_Lugansk_2018_autumn_Kuzminova_2019-03-27, image 20181101_071140_0f46_tms.xml
source1: wfdp_Lugansk_2018_autumn_Kuzminova_2019_04_30, source2: wfdp_Lugansk_2018_autumn_Kuzminova_2019-03-27, image 20181105_075757_0f17_tms.xml
source1: wfdp_Lugansk_2018_autumn_Kuzminova_2019_04_30, source2: wfdp_Lugansk_2018_autumn_Kuzminova_2019-03-27, image 20180912_071445_0f44_tms.xml
source1: wfdp_Lugansk_2018_autumn_Kuzminova_2019_04_30, source2: wfdp_Lugansk_2018_autumn_Kuzminova_2019-03-27, image 20181118_075951_0f42_tms.xml
source1: wfdp_Lugansk_2018_autumn_Kuzminova_2019_04_30, source2: wfdp_Lugansk_2018_autumn_Kuzminova_2019-03-27, image 20181013_074734_0e20_tms.xml
source1: wfdp_Lugansk_2018_autumn_Kuzminova_2019_04_30, source2: wfdp_Lugansk_2018_autumn_Kuzminova_2019-03-27, image 20180906_071503_1043_tms.xml
source1: wfdp_Lugansk_2018_autumn_Kuzminova_2019_04_30, source2: wfdp_Lugansk_2018_autumn_Kuzminova_2019-03-27, image 

In [109]:
sources

{'wfdp_Lugansk_2018_autumn_Kuzminova_2019-02-22',
 'wfdp_Lugansk_2018_autumn_Kuzminova_2019-03-27',
 'wfdp_Lugansk_2018_autumn_Kuzminova_2019_04_30',
 'wfdp_Lugansk_2018_summer_Kuzminova_2019-03-27',
 'wfdp_Lugansk_2018_summer_Kuzminova_2019_04_30'}

In [88]:

gdf.loc[a].iloc[index:index+1]

Unnamed: 0,source,date,planet_img,firms_sour,editor,geometry,poly_bounds,expanded_bounds,bbox
951,wfdp_Lugansk_2018_autumn_Kuzminova_2019-02-22....,2018-09-05,20180906_071715_1_0f21_tms.xml,_,Anastasia Kuzminova,"POLYGON ((962284.315 5393908.121, 962275.187 5...","(962261.2330042083, 5393908.121491459, 962330....","(961877.2330042083, 5393524.121491459, 962714....","POLYGON ((39.26822 48.52383, 39.26916 48.53141..."


In [89]:
gdf.loc[b].iloc[index:index+1]

Unnamed: 0,source,date,planet_img,firms_sour,editor,geometry,poly_bounds,expanded_bounds,bbox
1179,wfdp_Lugansk_2018_autumn_Kuzminova_2019_04_30....,2018-09-05,20180906_071715_1_0f21_tms.xml,_,,"POLYGON ((962284.315 5393908.121, 962275.187 5...","(962261.2330042083, 5393908.121491459, 962330....","(961877.2330042083, 5393524.121491459, 962714....","POLYGON ((39.26822 48.52383, 39.26916 48.53141..."


In [67]:
poly = gdf.loc[0, 'geometry']

In [69]:
poly.bounds?

In [None]:
# Generate bounding box polygons from cropped images
# for index, row in df.iterrows():
#     image_path = row.image_path
#     with rasterio.open(image_path) as src:
#         bounds = src.bounds
#         crs = src.crs
#     bbox = box(bounds.left, bounds.bottom, bounds.right, bounds.top)
#     gseries = gpd.GeoSeries([bbox])
#     gseries.crs = crs
#     gseries.to_file(image_path.replace('images', 'bboxes').replace('tif', 'geojson'), driver='GeoJSON')

In [30]:
# date transformations

def format_date(date_str):
    return '{}-{}-{}'.format(date_str[:4], date_str[4:6], date_str[6:])


gdf.date = pd.to_datetime(gdf.date)

no_planet_img = gdf.planet_img.isna()

gdf_with_image_date = gdf[~no_planet_img].copy()
image_date_str = gdf_with_image_date.planet_img.map(lambda planet_img: planet_img[:8])
gdf_with_image_date.image_date = pd.to_datetime(image_date_str)


gdf_without_image_date = gdf[no_planet_img].copy()
gdf_with_image_date.image_date = None

# gdf.img_date = pd.to_datetime()

ParserError: Unknown string format: Sentinel

In [26]:
(gdf.date.isna()).sum()

0

In [5]:
gdf.head()

Unnamed: 0,date,planet_img,firms_sour,editor,geometry
0,2018-08-28,20180831_075824_0f35_tms.xml,V1,Oleksandr Leiberiuk - 0001,"POLYGON ((36.62941 47.27804, 36.63887 47.27809..."
1,2018-08-22,20180823_075453_0e20_tms.xml,V1,Oleksandr Leiberiuk - 0002,"POLYGON ((36.85774 46.82754, 36.85798 46.82746..."
2,2018-08-23,20180824_075716_1011_tms.xml,V1,Oleksandr Leiberiuk - 0003,"POLYGON ((36.85279 46.81438, 36.85312 46.81543..."
3,2018-08-22,20180823_073520_0f3c_tms.xml,_,Oleksandr Leiberiuk - 0004,"POLYGON ((35.86952 46.65867, 35.86971 46.65775..."
4,2018-08-22,20180823_073521_0f3c_tms.xml,_,Oleksandr Leiberiuk - 0005,"POLYGON ((35.85320 46.66909, 35.85242 46.67009..."
