In [13]:
import geopandas as gpd
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
from pathlib import Path

from enum import Enum

from shapely.geometry import Polygon

# Imports from eo-learn and sentinelhub-py
from eolearn.core import EOTask, EOPatch, LinearWorkflow, FeatureType, OverwritePermission, \
    LoadFromDisk, SaveToDisk, EOExecutor
from eolearn.io import S2L1CWCSInput, ExportToTiff
from eolearn.mask import AddCloudMaskTask, get_s2_pixel_cloud_detector, AddValidDataMaskTask
from eolearn.geometry import VectorToRaster, PointSamplingTask, ErosionTask
from eolearn.features import LinearInterpolation, SimpleFilterTask, InterpolationTask, ValueFilloutTask, \
    HaralickTask, HOGTask, AddSpatioTemporalFeaturesTask, AddMaxMinTemporalIndicesTask, \
    AddMaxMinNDVISlopeIndicesTask
from sentinelhub import BBoxSplitter, BBox, CRS, CustomUrlParam

from tqdm import tqdm 

In [5]:
output_path = Path('output')
patch_path = output_path/'eopatches-large'
path_out_features = output_path/'eopatches-features/'

### Add texture features

In [30]:
patch = EOPatch.load(patch_path/'eopatch_0')

In [31]:
patch

EOPatch(
  data: {
    FEATURES: numpy.ndarray(shape=(8, 1353, 1335, 16), dtype=float64)
  }
  mask: {}
  scalar: {}
  label: {}
  vector: {}
  data_timeless: {}
  mask_timeless: {
    LULC: numpy.ndarray(shape=(1353, 1335, 1), dtype=uint8)
    TEST_FIELD_ID: numpy.ndarray(shape=(1353, 1335, 1), dtype=uint16)
    TRAIN_FIELD_ID: numpy.ndarray(shape=(1353, 1335, 1), dtype=uint16)
  }
  scalar_timeless: {}
  label_timeless: {}
  vector_timeless: {}
  meta_info: {}
  bbox: BBox(((535129.8381333639, 6846878.021770692), (548483.7166625774, 6860414.749511358)), crs=EPSG:32734)
  timestamp: [datetime.datetime(2017, 1, 1, 0, 0), ..., datetime.datetime(2017, 7, 23, 0, 0)], length=8
)

In [33]:
class MakeValidMap:
    def __call__(self, eopatch):        
        return np.logical_or(eopatch.mask_timeless['TEST_FIELD_ID']>0, 
                              eopatch.mask_timeless['TRAIN_FIELD_ID']>0)
    

In [34]:
# TASK TO LOAD EXISTING EOPATCHES
load = LoadFromDisk(patch_path)

haralick_entropy = HaralickTask(feature = (FeatureType.DATA, 'FEATURES'), 
                        texture_feature = 'sum_entropy')

ret_true = lambda x: True

add_valid = AddValidDataMaskTask(MakeValidMap(), 'VALID_DATA')

temporal_indices = AddMaxMinTemporalIndicesTask(data_feature='FEATURES/5')

save = SaveToDisk(path_out_features, overwrite_permission=OverwritePermission.OVERWRITE_PATCH)

In [35]:
# Define the workflow
workflow = LinearWorkflow(
    load,
    add_valid,
    temporal_indices,
    save
)

In [36]:
%%time
   
execution_args = []
for idx in range(1):
    execution_args.append({
        load: {'eopatch_folder': 'eopatch_{}'.format(idx)},
        save: {'eopatch_folder': 'eopatch_{}'.format(idx)}
    })
    
executor = EOExecutor(workflow, execution_args, save_logs=True)
executor.run(workers=5, multiprocess=True)

executor.make_report()

HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

CPU times: user 54.9 ms, sys: 63.9 ms, total: 119 ms
Wall time: 845 ms


### Load shape file

In [8]:
train_test_gpdf = gpd.read_file(data_path/'train_test_shp')

In [9]:
train_test_gpdf.head()

Unnamed: 0,Area,Crop_Id_Ne,Field_Id,Subregion,train_test,tile,geometry
0,0.915905,8,1,3,train,T34JFP,"POLYGON ((613924.3324000851 6793991.295069702,..."
1,2.06441,6,2,3,train,T34JFP,"POLYGON ((614404.0717397591 6794020.579634549,..."
2,1.0803,8,3,3,train,T34JFP,"POLYGON ((614320.2146632321 6793964.242091182,..."
3,1.31619,8,4,3,train,T34JFP,"POLYGON ((614560.5326844405 6794002.249461887,..."
4,5.52922,8,7,3,train,T34JFP,"POLYGON ((614596.6564746551 6794269.865436315,..."


### Centroid

In [24]:
def get_centroidx(g): return g.centroid.xy[0][0]

def get_centroidy(g): return g.centroid.xy[1][0]

def get_centroid(g): return g.centroid

train_test_gpdf['centroid']=train_test_gpdf.geometry.apply(get_centroid)
train_test_gpdf['centroid_x']=train_test_gpdf.geometry.apply(get_centroidx)
train_test_gpdf['centroid_y']=train_test_gpdf.geometry.apply(get_centroidy)

### Nearest neighbouring farms

In [44]:
def nearest(row, geom_union, df2, geom1_col='geometry', geom2_col='geometry', src_column=None): 
    """Find the nearest point and return the corresponding value from specified column."""

    # Find the geometry that is closest
    nearest = df2[geom2_col] == nearest_points(row[geom1_col], geom_union)[1]

    # Get the corresponding value from df2 (matching is based on the geometry)
    value = df2[nearest][src_column].get_values()[0]

    return value

def nearest_Field(row, gpdf): 
    mask = ((gpdf.Field_Id!=row.Field_Id)
            &(gpdf.train_test=='train'))
    return nearest(row, 
                   geom_union=gpdf[mask].centroid.unary_union, 
                   df2=gpdf, 
                   geom1_col='centroid', 
                   geom2_col='centroid', 
                   src_column='Field_Id')

def second_nearest_Field(row, gpdf): 
    mask = ((gpdf.Field_Id!=row.Field_Id)
            &(gpdf.Field_Id!=row.Field_Id_1)
            &(gpdf.train_test=='train'))
    return nearest(row, 
                   geom_union=gpdf[mask].centroid.unary_union, 
                   df2=gpdf, 
                   geom1_col='centroid', 
                   geom2_col='centroid', 
                   src_column='Field_Id')

def third_nearest_Field(row, gpdf): 
    mask = ((gpdf.Field_Id!=row.Field_Id)
            &(gpdf.Field_Id!=row.Field_Id_1)
            &(gpdf.Field_Id!=row.Field_Id_2)
            &(gpdf.train_test=='train'))
    return nearest(row, 
                   geom_union=gpdf[mask].centroid.unary_union, 
                   df2=gpdf, 
                   geom1_col='centroid', 
                   geom2_col='centroid', 
                   src_column='Field_Id')

def fourth_nearest_Field(row, gpdf): 
    mask = ((gpdf.Field_Id!=row.Field_Id)
            &(gpdf.Field_Id!=row.Field_Id_1)
            &(gpdf.Field_Id!=row.Field_Id_2)
            &(gpdf.Field_Id!=row.Field_Id_3)
            &(gpdf.train_test=='train'))
    return nearest(row, 
                   geom_union=gpdf[mask].centroid.unary_union, 
                   df2=gpdf, 
                   geom1_col='centroid', 
                   geom2_col='centroid', 
                   src_column='Field_Id')

def fifth_nearest_Field(row, gpdf): 
    mask = ((gpdf.Field_Id!=row.Field_Id)
            &(gpdf.Field_Id!=row.Field_Id_1)
            &(gpdf.Field_Id!=row.Field_Id_2)
            &(gpdf.Field_Id!=row.Field_Id_3)
            &(gpdf.Field_Id!=row.Field_Id_4)
            &(gpdf.train_test=='train'))
    return nearest(row, 
                   geom_union=gpdf[mask].centroid.unary_union, 
                   df2=gpdf, 
                   geom1_col='centroid', 
                   geom2_col='centroid', 
                   src_column='Field_Id')

def sixth_nearest_Field(row, gpdf): 
    mask = ((gpdf.Field_Id!=row.Field_Id)
            &(gpdf.Field_Id!=row.Field_Id_1)
            &(gpdf.Field_Id!=row.Field_Id_2)
            &(gpdf.Field_Id!=row.Field_Id_3)
            &(gpdf.Field_Id!=row.Field_Id_4)
            &(gpdf.Field_Id!=row.Field_Id_5)
            &(gpdf.train_test=='train'))
    return nearest(row, 
                   geom_union=gpdf[mask].centroid.unary_union, 
                   df2=gpdf, 
                   geom1_col='centroid', 
                   geom2_col='centroid', 
                   src_column='Field_Id')


def seventh_nearest_Field(row, gpdf): 
    mask = ((gpdf.Field_Id!=row.Field_Id)
            &(gpdf.Field_Id!=row.Field_Id_1)
            &(gpdf.Field_Id!=row.Field_Id_2)
            &(gpdf.Field_Id!=row.Field_Id_3)
            &(gpdf.Field_Id!=row.Field_Id_4)
            &(gpdf.Field_Id!=row.Field_Id_5)
            &(gpdf.Field_Id!=row.Field_Id_6)
            &(gpdf.train_test=='train'))
    return nearest(row, 
                   geom_union=gpdf[mask].centroid.unary_union, 
                   df2=gpdf, 
                   geom1_col='centroid', 
                   geom2_col='centroid', 
                   src_column='Field_Id')


def eighth_nearest_Field(row, gpdf): 
    mask = ((gpdf.Field_Id!=row.Field_Id)
            &(gpdf.Field_Id!=row.Field_Id_1)
            &(gpdf.Field_Id!=row.Field_Id_2)
            &(gpdf.Field_Id!=row.Field_Id_3)
            &(gpdf.Field_Id!=row.Field_Id_4)
            &(gpdf.Field_Id!=row.Field_Id_5)
            &(gpdf.Field_Id!=row.Field_Id_6)
            &(gpdf.Field_Id!=row.Field_Id_7)
            &(gpdf.train_test=='train'))
    return nearest(row, 
                   geom_union=gpdf[mask].centroid.unary_union, 
                   df2=gpdf, 
                   geom1_col='centroid', 
                   geom2_col='centroid', 
                   src_column='Field_Id')


def ninth_nearest_Field(row, gpdf): 
    mask = ((gpdf.Field_Id!=row.Field_Id)
            &(gpdf.Field_Id!=row.Field_Id_1)
            &(gpdf.Field_Id!=row.Field_Id_2)
            &(gpdf.Field_Id!=row.Field_Id_3)
            &(gpdf.Field_Id!=row.Field_Id_4)
            &(gpdf.Field_Id!=row.Field_Id_5)
            &(gpdf.Field_Id!=row.Field_Id_6)
            &(gpdf.Field_Id!=row.Field_Id_7)
            &(gpdf.Field_Id!=row.Field_Id_8)
            &(gpdf.train_test=='train'))
    return nearest(row, 
                   geom_union=gpdf[mask].centroid.unary_union, 
                   df2=gpdf, 
                   geom1_col='centroid', 
                   geom2_col='centroid', 
                   src_column='Field_Id')


def tenth_nearest_Field(row, gpdf): 
    mask = ((gpdf.Field_Id!=row.Field_Id)
            &(gpdf.Field_Id!=row.Field_Id_1)
            &(gpdf.Field_Id!=row.Field_Id_2)
            &(gpdf.Field_Id!=row.Field_Id_3)
            &(gpdf.Field_Id!=row.Field_Id_4)
            &(gpdf.Field_Id!=row.Field_Id_5)
            &(gpdf.Field_Id!=row.Field_Id_6)
            &(gpdf.Field_Id!=row.Field_Id_7)
            &(gpdf.Field_Id!=row.Field_Id_8)
            &(gpdf.Field_Id!=row.Field_Id_9)
            &(gpdf.train_test=='train'))
    return nearest(row, 
                   geom_union=gpdf[mask].centroid.unary_union, 
                   df2=gpdf, 
                   geom1_col='centroid', 
                   geom2_col='centroid', 
                   src_column='Field_Id')

In [26]:
%%time
train_test_gpdf['Field_Id_1'] = train_test_gpdf.apply(
    partial(nearest_Field, gpdf=train_test_gpdf), axis=1)



CPU times: user 8min 29s, sys: 0 ns, total: 8min 29s
Wall time: 8min 29s


In [27]:
%%time
train_test_gpdf['Field_Id_2'] = train_test_gpdf.apply(
    partial(second_nearest_Field, gpdf=train_test_gpdf), axis=1)



CPU times: user 8min 7s, sys: 0 ns, total: 8min 7s
Wall time: 8min 8s


In [28]:
%%time
train_test_gpdf['Field_Id_3'] = train_test_gpdf.apply(
    partial(third_nearest_Field, gpdf=train_test_gpdf), axis=1)



CPU times: user 8min 3s, sys: 0 ns, total: 8min 3s
Wall time: 8min 8s


In [37]:
%%time
train_test_gpdf['Field_Id_4'] = train_test_gpdf.apply(
    partial(fourth_nearest_Field, gpdf=train_test_gpdf), axis=1)



CPU times: user 7min 56s, sys: 129 ms, total: 7min 56s
Wall time: 7min 56s


In [38]:
%%time
train_test_gpdf['Field_Id_5'] = train_test_gpdf.apply(
    partial(fifth_nearest_Field, gpdf=train_test_gpdf), axis=1)



CPU times: user 8min 3s, sys: 132 ms, total: 8min 3s
Wall time: 8min 3s


In [39]:
%%time
train_test_gpdf['Field_Id_6'] = train_test_gpdf.apply(
    partial(sixth_nearest_Field, gpdf=train_test_gpdf), axis=1)



CPU times: user 7min 56s, sys: 86 ms, total: 7min 56s
Wall time: 7min 56s


In [40]:
%%time
train_test_gpdf['Field_Id_7'] = train_test_gpdf.apply(
    partial(seventh_nearest_Field, gpdf=train_test_gpdf), axis=1)



CPU times: user 7min 58s, sys: 99.8 ms, total: 7min 58s
Wall time: 7min 58s


In [45]:
%%time
train_test_gpdf['Field_Id_8'] = train_test_gpdf.apply(
    partial(eighth_nearest_Field, gpdf=train_test_gpdf), axis=1)



CPU times: user 8min 12s, sys: 128 ms, total: 8min 12s
Wall time: 8min 12s


In [46]:
%%time
train_test_gpdf['Field_Id_9'] = train_test_gpdf.apply(
    partial(ninth_nearest_Field, gpdf=train_test_gpdf), axis=1)



CPU times: user 8min 13s, sys: 136 ms, total: 8min 13s
Wall time: 8min 13s


In [47]:
%%time
train_test_gpdf['Field_Id_10'] = train_test_gpdf.apply(
    partial(tenth_nearest_Field, gpdf=train_test_gpdf), axis=1)



CPU times: user 8min 14s, sys: 164 ms, total: 8min 15s
Wall time: 8min 15s


In [49]:
train_test_gpdf[['Field_Id', 
                  'Field_Id_1', 
                  'Field_Id_2', 
                  'Field_Id_3', 
                  'Field_Id_4', 
                  'Field_Id_5', 
                  'Field_Id_6', 
                  'Field_Id_7', 
                  'Field_Id_8', 
                  'Field_Id_9', 
                  'Field_Id_10']].to_csv(data_path/'nearest_fields.csv', index=False)

### Add count of corners

In [50]:
def count_corners(r): 
    return len(r.geometry.boundary.xy[0])-1

train_test_gpdf['corners'] = train_test_gpdf.apply(count_corners,axis=1)

### Add a metric for how far from square the field is

In [51]:
def circ_to_area(r): 
    return (r.geometry.boundary.length/4)/((r.geometry.area)**.5)

train_test_gpdf['squareness'] = train_test_gpdf.apply(circ_to_area,axis=1)

In [52]:
def cooerce_float(x):
    if '--' in str(x): return 0
    else: return float(x)

In [53]:
converters = {'std': cooerce_float,
            'mean': cooerce_float,
            'min': cooerce_float,
            'max': cooerce_float}

### Flatten statistics dataframe

In [54]:
stats_df = pd.read_csv(data_path/'statistics_per_band.csv', converters=converters)

In [55]:
stats_no_tiles_df = stats_df.groupby(['Field_Id', 'date', 'band']).agg(np.mean).reset_index()

In [56]:
stats_no_tiles_df['date_band'] = stats_no_tiles_df.date + '_' + stats_no_tiles_df.band

In [57]:
stats_pivoted_df = stats_no_tiles_df.pivot_table(
        index=['Field_Id'], 
        columns=['date_band'], 
        values=['std', 
                'mean', 
                'median', 
                'min',
                'max', 
                'dissimilarity', 
                'contrast', 
                'homogeneity', 
                'energy',
                'correlation'])

In [58]:
stats_pivoted_df.columns = stats_pivoted_df.columns.map('_'.join)

In [59]:
stats_pivoted_df.reset_index(inplace=True)

In [60]:
train_test_gpdf = train_test_gpdf.merge(nearest_fields_df)

NameError: name 'nearest_fields_df' is not defined

In [63]:
features_df = train_test_gpdf[['Field_Id', 
                 'train_test', 
                 'Crop_Id_Ne', 
                 'Subregion', 
                 'tile', 
                 'Area', 
                 'corners', 
                 'squareness',
                 'centroid_x',
                 'centroid_y',
                 'Field_Id_1', 
                 'Field_Id_2', 
                 'Field_Id_3',
                 'Field_Id_4', 
                 'Field_Id_5',
                 'Field_Id_6', 
                 'Field_Id_7',
                 'Field_Id_8', 
                 'Field_Id_9',
                 'Field_Id_10']]

In [64]:
features_df = features_df.merge(
                features_df[['Field_Id','Crop_Id_Ne']], 
                left_on='Field_Id_1', 
                right_on='Field_Id',
                suffixes=['','_nf_1']).drop('Field_Id_nf_1',axis=1)

features_df = features_df.merge(
                features_df[['Field_Id','Crop_Id_Ne']], 
                left_on='Field_Id_2', 
                right_on='Field_Id',
                suffixes=['','_nf_2']).drop('Field_Id_nf_2',axis=1)

features_df = features_df.merge(
                features_df[['Field_Id','Crop_Id_Ne']], 
                left_on='Field_Id_3', 
                right_on='Field_Id',
                suffixes=['','_nf_3']).drop('Field_Id_nf_3',axis=1)


features_df = features_df.merge(
                features_df[['Field_Id','Crop_Id_Ne']], 
                left_on='Field_Id_4', 
                right_on='Field_Id',
                suffixes=['','_nf_4']).drop('Field_Id_nf_4',axis=1)


features_df = features_df.merge(
                features_df[['Field_Id','Crop_Id_Ne']], 
                left_on='Field_Id_5', 
                right_on='Field_Id',
                suffixes=['','_nf_5']).drop('Field_Id_nf_5',axis=1)


features_df = features_df.merge(
                features_df[['Field_Id','Crop_Id_Ne']], 
                left_on='Field_Id_6', 
                right_on='Field_Id',
                suffixes=['','_nf_6']).drop('Field_Id_nf_6',axis=1)


features_df = features_df.merge(
                features_df[['Field_Id','Crop_Id_Ne']], 
                left_on='Field_Id_7', 
                right_on='Field_Id',
                suffixes=['','_nf_7']).drop('Field_Id_nf_7',axis=1)


features_df = features_df.merge(
                features_df[['Field_Id','Crop_Id_Ne']], 
                left_on='Field_Id_8', 
                right_on='Field_Id',
                suffixes=['','_nf_8']).drop('Field_Id_nf_8',axis=1)


features_df = features_df.merge(
                features_df[['Field_Id','Crop_Id_Ne']], 
                left_on='Field_Id_9', 
                right_on='Field_Id',
                suffixes=['','_nf_9']).drop('Field_Id_nf_9',axis=1)


features_df = features_df.merge(
                features_df[['Field_Id','Crop_Id_Ne']], 
                left_on='Field_Id_10', 
                right_on='Field_Id',
                suffixes=['','_nf_10']).drop('Field_Id_nf_10',axis=1)

### Compute distances to nearest fields

In [66]:
nearest_fields_df = features_df[['Field_Id',
                                 'Field_Id_1',
                                 'Field_Id_2',
                                 'Field_Id_3',
                                 'Field_Id_4',
                                 'Field_Id_5',
                                 'Field_Id_6',
                                 'Field_Id_7',
                                 'Field_Id_8',
                                 'Field_Id_9',
                                 'Field_Id_10']]

In [68]:
nearest_dist = nearest_fields_df.merge(
    stats_pivoted_df).merge(stats_pivoted_df, 
                            left_on = 'Field_Id_1', 
                            right_on= 'Field_Id', 
                            suffixes=['','_other']).drop('Field_Id_other',axis=1)

In [69]:
cols = [c for c in nearest_dist if '_other' not in c and 'Field' not in c]
other_cols = [c for c in nearest_dist if '_other' in c and 'Field' not in c]

def calc_distance(row):
    return ((row[cols].values - row[other_cols].values)**2).mean()**.5

In [70]:
features_df['distances_to_1'] = nearest_dist.apply(calc_distance, axis=1)

In [71]:
nearest_dist = nearest_fields_df.merge(
    stats_pivoted_df).merge(stats_pivoted_df, 
                            left_on = 'Field_Id_2', 
                            right_on= 'Field_Id', 
                            suffixes=['','_other']).drop('Field_Id_other',axis=1)

In [72]:
features_df['distances_to_2'] = nearest_dist.apply(calc_distance, axis=1)

In [73]:
nearest_dist = nearest_fields_df.merge(
    stats_pivoted_df).merge(stats_pivoted_df, 
                            left_on = 'Field_Id_3', 
                            right_on= 'Field_Id', 
                            suffixes=['','_other']).drop('Field_Id_other',axis=1)

In [74]:
features_df['distances_to_3'] = nearest_dist.apply(calc_distance, axis=1)

In [75]:
nearest_dist = nearest_fields_df.merge(
    stats_pivoted_df).merge(stats_pivoted_df, 
                            left_on = 'Field_Id_4', 
                            right_on= 'Field_Id', 
                            suffixes=['','_other']).drop('Field_Id_other',axis=1)

In [76]:
features_df['distances_to_4'] = nearest_dist.apply(calc_distance, axis=1)

In [77]:
nearest_dist = nearest_fields_df.merge(
    stats_pivoted_df).merge(stats_pivoted_df, 
                            left_on = 'Field_Id_5', 
                            right_on= 'Field_Id', 
                            suffixes=['','_other']).drop('Field_Id_other',axis=1)

In [78]:
features_df['distances_to_5'] = nearest_dist.apply(calc_distance, axis=1)

In [79]:
nearest_dist = nearest_fields_df.merge(
    stats_pivoted_df).merge(stats_pivoted_df, 
                            left_on = 'Field_Id_6', 
                            right_on= 'Field_Id', 
                            suffixes=['','_other']).drop('Field_Id_other',axis=1)

In [80]:
features_df['distances_to_6'] = nearest_dist.apply(calc_distance, axis=1)

In [81]:
nearest_dist = nearest_fields_df.merge(
    stats_pivoted_df).merge(stats_pivoted_df, 
                            left_on = 'Field_Id_7', 
                            right_on= 'Field_Id', 
                            suffixes=['','_other']).drop('Field_Id_other',axis=1)

In [82]:
features_df['distances_to_7'] = nearest_dist.apply(calc_distance, axis=1)

In [83]:
nearest_dist = nearest_fields_df.merge(
    stats_pivoted_df).merge(stats_pivoted_df, 
                            left_on = 'Field_Id_8', 
                            right_on= 'Field_Id', 
                            suffixes=['','_other']).drop('Field_Id_other',axis=1)

In [84]:
features_df['distances_to_8'] = nearest_dist.apply(calc_distance, axis=1)

In [85]:
nearest_dist = nearest_fields_df.merge(
    stats_pivoted_df).merge(stats_pivoted_df, 
                            left_on = 'Field_Id_9', 
                            right_on= 'Field_Id', 
                            suffixes=['','_other']).drop('Field_Id_other',axis=1)

In [86]:
features_df['distances_to_9'] = nearest_dist.apply(calc_distance, axis=1)

In [87]:
nearest_dist = nearest_fields_df.merge(
    stats_pivoted_df).merge(stats_pivoted_df, 
                            left_on = 'Field_Id_10', 
                            right_on= 'Field_Id', 
                            suffixes=['','_other']).drop('Field_Id_other',axis=1)

In [88]:
features_df['distances_to_10'] = nearest_dist.apply(calc_distance, axis=1)

In [89]:
features_df = features_df.merge(stats_pivoted_df, how='left')

### Write features dataframe to disk

In [90]:
features_df.shape

(3568, 1030)

In [None]:
features_df.to_csv(data_path/'features.csv', index=False)