In [79]:
import glob

import numpy as np
import pandas as pd
import featuretools as ft

In [80]:
npy_files = glob.glob("data/raw/*.npy")

In [81]:
gamma_npy = [f for f in npy_files if 'gamma' in f and 'colnames' not in f]

In [82]:
gamma_colnames = np.load('data/raw/gamma_df_colnames.npy')

In [83]:
gamma_df = pd.DataFrame()
for file in gamma_npy:
    gamma_df = pd.concat([pd.DataFrame(np.load(file)), gamma_df])
gamma_df.columns = list(gamma_colnames)

In [95]:
gamma_df.shape

(403950, 13)

In [84]:
gamma_df.head().transpose()

Unnamed: 0,0,1,2,3,4
_id,FdWTbmIBcKmh5GBKiUuE,FtWTbmIBcKmh5GBKiUua,F9WTbmIBcKmh5GBKiUvd,GNWTbmIBcKmh5GBKiksk,KNWTbmIBcKmh5GBKiktZ
_index,gammarf,gammarf,gammarf,gammarf,gammarf
_score,1,1,1,1,1
_source.alt,16.695,22.759,16.695,16.695,16.695
_source.epv,48.968,34.643,48.968,48.968,48.968
_source.epx,10.934,9.091,10.934,10.934,10.934
_source.epy,10.645,8.616,10.645,10.645,10.645
_source.freq,2417000000,2426000000,2426000000,2431000000,2432000000
_source.lat,27.956522393,27.956503568,27.956522393,27.956522393,27.956522393
_source.lng,-82.437600713,-82.437424729,-82.437600713,-82.437600713,-82.437600713


In [85]:
gamma_df = gamma_df.drop(["_index","_score","_type","DateTime","_source.module","_source.protocol"], axis=1)

In [86]:
gamma_df.head().transpose()

Unnamed: 0,0,1,2,3,4
_id,FdWTbmIBcKmh5GBKiUuE,FtWTbmIBcKmh5GBKiUua,F9WTbmIBcKmh5GBKiUvd,GNWTbmIBcKmh5GBKiksk,KNWTbmIBcKmh5GBKiktZ
_source.alt,16.695,22.759,16.695,16.695,16.695
_source.epv,48.968,34.643,48.968,48.968,48.968
_source.epx,10.934,9.091,10.934,10.934,10.934
_source.epy,10.645,8.616,10.645,10.645,10.645
_source.freq,2417000000,2426000000,2426000000,2431000000,2432000000
_source.lat,27.956522393,27.956503568,27.956522393,27.956522393,27.956522393
_source.lng,-82.437600713,-82.437424729,-82.437600713,-82.437600713,-82.437600713
_source.pwr,-41.9534,-43.5154,-32.9212,-43.5126,-38.1887
_source.rand,f2dc0b57,b50c5751,86be43ec,7d3738d4,9b50e1af


### Represent data with EntitySets

In [87]:
from featuretools.variable_types import Categorical, LatLong, Numeric
from featuretools.primitives import Latitude, Longitude

In [88]:
es = ft.EntitySet(id="gamma")

In [89]:
es = es.entity_from_dataframe(entity_id="gamma",
                             dataframe=gamma_df,
                             index="_id",
                             time_index="_source.timestamp",
                             variable_types={"_source.lat": LatLong,
                                             "_source.lng": LatLong,
                                             "_source.alt": Numeric,
                                             "_source.epv": Numeric,
                                             "_source.epx": Numeric,
                                             "_source.epy": Numeric,
                                             "_source.freq": Numeric,
                                             "_source.pwr": Numeric})

In [90]:
es

Entityset: gamma
  Entities:
    gamma (shape = [403950, 13])
  Relationships:
    No relationships

In [91]:
es["gamma"].variables

[<Variable: _source.lat (dtype = latlong, count = 403950)>,
 <Variable: _source.epv (dtype = numeric, count = 403950)>,
 <Variable: _source.alt (dtype = numeric, count = 403950)>,
 <Variable: _source.rand (dtype = categorical, count = 403950)>,
 <Variable: _source.epy (dtype = numeric, count = 403950)>,
 <Variable: _source.stationid (dtype = categorical, count = 403950)>,
 <Variable: _source.freq (dtype = numeric, count = 403950)>,
 <Variable: _source.timestamp (dtype: datetime_time_index, format: None)>,
 <Variable: _source.sign (dtype = categorical, count = 403950)>,
 <Variable: _source.pwr (dtype = numeric, count = 403950)>,
 <Variable: _source.epx (dtype = numeric, count = 403950)>,
 <Variable: _source.lng (dtype = latlong, count = 403950)>,
 <Variable: _id (dtype = index, count = 403950)>]

### Extract feature matrix

In [92]:
from featuretools.primitives import Count, Month

In [93]:
feature_matrix, feature_defs = ft.dfs(entityset=es, 
                                      target_entity="gamma",
                                      agg_primitives=[Count],
                                      trans_primitives=[Month],
                                      max_depth=1)

In [94]:
feature_matrix

Unnamed: 0_level_0,_source.epx,_source.freq,_source.stationid,_source.alt,_source.epv,_source.epy,_source.sign,_source.rand,_source.pwr,MONTH(_source.timestamp)
_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
---QcmIBcKmh5GBK0gKp,10.934,2433000000,range_outer2,20.136,48.968,10.645,355cb79e951e,0d9d06c7,-43.399178,3
---RcmIBcKmh5GBKlwqr,9.091,2436000000,range_core,14.576,34.643,8.616,a7ab1fdf4103,2c7a6073,-35.224720,3
---RcmIBcKmh5GBKqgv3,9.091,2461000000,range_core,14.460,34.643,8.616,379037efedbb,a2065e08,-34.913181,3
---ScmIBcKmh5GBKMRA0,10.934,2416000000,range_outer2,18.617,48.968,10.645,1432587292d3,38067ef4,-42.271019,3
---ScmIBcKmh5GBKjRWu,10.934,2438000000,range_outer2,18.617,48.968,10.645,141e54c611c3,2c9f910e,-45.288723,3
---VcmIBcKmh5GBKJDN7,10.934,2433000000,range_outer2,18.970,48.968,10.645,e38fc8f97581,23b251d4,-41.804562,3
---WcmIBcKmh5GBK5kdL,10.934,2142000000,range_outer2,19.144,48.968,10.645,1302c036b776,eb7074dc,-31.833128,3
---XcmIBcKmh5GBKLEqw,9.091,2468000000,range_core,14.751,34.643,8.616,9e350c2d82b7,ce2b1973,-42.790218,3
---YcmIBcKmh5GBKjVom,9.091,2443000000,range_core,14.813,34.643,8.616,6d97683a0307,4826035d,-38.167149,3
---acmIBcKmh5GBK-Hj6,10.934,2442000000,range_outer2,18.152,48.968,10.645,88cd5e0a0a94,024ca88b,-39.196533,3
