In [13]:
import glob

import numpy as np
import pandas as pd
import featuretools as ft

In [14]:
npy_files = glob.glob("data/raw/*.npy")

In [15]:
gamma_npy = [f for f in npy_files if 'gamma' in f and 'colnames' not in f]

In [16]:
gamma_colnames = np.load('data/raw/gamma_df_colnames.npy')

In [17]:
gamma_df = pd.DataFrame()
for file in gamma_npy:
    gamma_df = pd.concat([pd.DataFrame(np.load(file)), gamma_df])
gamma_df.columns = list(gamma_colnames)

In [18]:
gamma_df.shape

(1125615, 19)

In [19]:
gamma_df.head().transpose()

Unnamed: 0,0,1,2,3,4
_id,hBzcd2IBcKmh5GBKv1Fy,hRzcd2IBcKmh5GBKv1G6,hhzcd2IBcKmh5GBKv1Hq,hxzcd2IBcKmh5GBKwFE0,PBzcd2IBcKmh5GBK4FJH
_index,gammarf,gammarf,gammarf,gammarf,gammarf
_score,1,1,1,1,1
_source.alt,20.252,20.252,20.252,20.252,20.252
_source.epv,34.643,34.643,34.643,34.643,34.643
_source.epx,9.091,9.091,9.091,9.091,9.091
_source.epy,8.616,8.616,8.616,8.616,8.616
_source.freq,2417000000,2433000000,2441000000,2442000000,2411000000
_source.lat,27.95652579,27.95652579,27.95652579,27.95652579,27.95652579
_source.lng,-82.437450939,-82.437450939,-82.437450939,-82.437450939,-82.437450939


In [20]:
gamma_df = gamma_df.drop(["_score","_type","DateTime","_source.module","_source.protocol"], axis=1)

In [28]:
gamma_df["new_index"] = [i for i in range(gamma_df.shape[0])]

In [29]:
gamma_df.head().transpose()

Unnamed: 0,0,1,2,3,4
_id,hBzcd2IBcKmh5GBKv1Fy,hRzcd2IBcKmh5GBKv1G6,hhzcd2IBcKmh5GBKv1Hq,hxzcd2IBcKmh5GBKwFE0,PBzcd2IBcKmh5GBK4FJH
_index,gammarf,gammarf,gammarf,gammarf,gammarf
_source.alt,20.252,20.252,20.252,20.252,20.252
_source.epv,34.643,34.643,34.643,34.643,34.643
_source.epx,9.091,9.091,9.091,9.091,9.091
_source.epy,8.616,8.616,8.616,8.616,8.616
_source.freq,2417000000,2433000000,2441000000,2442000000,2411000000
_source.lat,27.95652579,27.95652579,27.95652579,27.95652579,27.95652579
_source.lng,-82.437450939,-82.437450939,-82.437450939,-82.437450939,-82.437450939
_source.pwr,-35.0759,-42.0609,-36.6234,-45.408,-39.1471


### Represent data with EntitySets

In [30]:
from featuretools.variable_types import Categorical, LatLong, Numeric
from featuretools.primitives import Latitude, Longitude

In [31]:
es = ft.EntitySet(id="gamma")

In [33]:
es = es.entity_from_dataframe(entity_id="gamma",
                             dataframe=gamma_df,
                             index="new_index",
                             time_index="_source.timestamp",
                             variable_types={"_source.lat": LatLong,
                                             "_source.lng": LatLong,
                                             "_source.alt": Numeric,
                                             "_source.epv": Numeric,
                                             "_source.epx": Numeric,
                                             "_source.epy": Numeric,
                                             "_source.freq": Numeric,
                                             "_source.pwr": Numeric})

In [34]:
es

Entityset: gamma
  Entities:
    gamma (shape = [1125615, 15])
  Relationships:
    No relationships

In [35]:
es["gamma"].variables

[<Variable: new_index (dtype = index, count = 1125615)>,
 <Variable: _source.epx (dtype = numeric, count = 1125615)>,
 <Variable: _index (dtype = categorical, count = 1125615)>,
 <Variable: _source.stationid (dtype = categorical, count = 1125615)>,
 <Variable: _source.lat (dtype = latlong, count = 1125615)>,
 <Variable: _source.timestamp (dtype: datetime_time_index, format: None)>,
 <Variable: _source.pwr (dtype = numeric, count = 1125615)>,
 <Variable: _id (dtype = categorical, count = 1125615)>,
 <Variable: _source.epv (dtype = numeric, count = 1125615)>,
 <Variable: _source.epy (dtype = numeric, count = 1125615)>,
 <Variable: _source.freq (dtype = numeric, count = 1125615)>,
 <Variable: _source.lng (dtype = latlong, count = 1125615)>,
 <Variable: _source.alt (dtype = numeric, count = 1125615)>,
 <Variable: _source.rand (dtype = categorical, count = 1125615)>,
 <Variable: _source.sign (dtype = categorical, count = 1125615)>]

### Extract feature matrix

In [36]:
from featuretools.primitives import Count, Month

In [37]:
feature_matrix, feature_defs = ft.dfs(entityset=es, 
                                      target_entity="gamma",
                                      agg_primitives=[Count],
                                      trans_primitives=[Month],
                                      max_depth=1)

In [38]:
feature_matrix

Unnamed: 0_level_0,_source.epx,_index,_source.alt,_source.freq,_source.epv,_source.pwr,_id,_source.stationid,_source.rand,_source.sign,_source.epy,MONTH(_source.timestamp)
new_index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
0,9.091,gammarf,20.252,2417000000,34.643,-35.075912,hBzcd2IBcKmh5GBKv1Fy,range_core,71fffcfd,56bcb9ac67b8,8.616,3
1,9.091,gammarf,20.252,2433000000,34.643,-42.060940,hRzcd2IBcKmh5GBKv1G6,range_core,26a68d2e,f547bb922aa1,8.616,3
2,9.091,gammarf,20.252,2441000000,34.643,-36.623356,hhzcd2IBcKmh5GBKv1Hq,range_core,c51d307c,2144ddc27e48,8.616,3
3,9.091,gammarf,20.252,2442000000,34.643,-45.408043,hxzcd2IBcKmh5GBKwFE0,range_core,376968c0,49ee362df7a1,8.616,3
4,9.091,gammarf,20.252,2411000000,34.643,-39.147091,PBzcd2IBcKmh5GBK4FJH,range_core,65253cba,c9f03a1253e2,8.616,3
5,9.091,gammarf,19.261,2436000000,34.643,-35.307999,KRzed2IBcKmh5GBKz1yn,range_core,5f691222,439219ad2990,8.616,3
6,9.091,gammarf,20.136,2437000000,34.643,-38.641010,Khzed2IBcKmh5GBKz1zk,range_core,b506d77d,4fce020aca0f,8.616,3
7,9.091,gammarf,20.136,2438000000,34.643,-31.845972,Kxzed2IBcKmh5GBK0Fwn,range_core,29bb4e4c,1e449b8d6bed,8.616,3
8,9.091,gammarf,20.136,2446000000,34.643,-45.486420,LBzed2IBcKmh5GBK0FxY,range_core,f94850cc,d39416c2e5bc,8.616,3
9,9.091,gammarf,20.136,2461000000,34.643,-37.510098,LRzed2IBcKmh5GBK0Fye,range_core,61f97e02,9137fa0c6482,8.616,3
