# Analisis de superficial del csv de clicks
### En este notebook exploramos los registros del csv con el objetivo de buscar relaciones entre las distintas variables

In [1]:
import xgboost as xgb
from sklearn.metrics import mean_squared_error
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline

## Leemos el csv, y dropeamos las columnas action_id por estar llena de nans y wifi_connection por estar llena de False

In [2]:
installs = pd.read_csv("../data/installs.csv")
''', dtype={'auction_type_id': 'float64', 'country': 'int64', 'date': 'str',
                                                   'device_id': 'int64', 'platform':'int64', 'ref_type_id': 'int64',
                                                   'source_id':'int64'}, parse_dates = ['date'])'''
installs.drop('click_hash', axis=1, inplace = True)
installs.drop('attributed', axis=1, inplace = True)
installs['created'] = pd.to_datetime(installs['created'], errors = 'coerce')
installs['day'] = installs['created'].dt.date
installs['hour'] = installs['created'].dt.hour
installs.head(5)

  interactivity=interactivity, compiler=compiler, result=result)


Unnamed: 0,created,application_id,ref_type,ref_hash,implicit,device_countrycode,device_brand,device_model,session_user_agent,user_agent,event_uuid,kind,wifi,trans_id,ip_address,device_language,day,hour
0,2019-04-24 06:23:29.495,1,1494519392962156891,4716708407362582887,True,6287817205707153877,,3.739127e+17,adjust.com,,79837499-2f2a-4605-a663-e322f759424f,app_open,,,4243443387795468703,3.301378e+18,2019-04-24,6
1,2019-04-24 02:06:01.032,1,1494519392962156891,7143568733100935872,False,6287817205707153877,,7.805539e+18,adjust.com,,,,,,4724288679627032761,3.301378e+18,2019-04-24,2
2,2019-04-20 10:15:36.274,1,1494519392962156891,5230323462636548010,True,6287817205707153877,,8.355496e+18,adjust.com,,dda99e3c-9c4b-487d-891c-79f0a02cb4a8,app_open,,,8291809486355890410,4.06093e+18,2019-04-20,10
3,2019-04-20 21:56:47.151,1,1494519392962156891,5097163995161606833,True,6287817205707153877,,2.355772e+18,adjust.com,,7010c3ce-0fcf-46c6-9be8-374cc0e20af4,app_open,,,4006811922873399949,3.301378e+18,2019-04-20,21
4,2019-04-20 22:40:41.239,1,1494519392962156891,6328027616411983332,False,6287817205707153877,,6.156971e+18,adjust.com,,,,,,3386455054590810771,3.301378e+18,2019-04-20,22


In [3]:
installs['device_countrycode'].value_counts()

6287817205707153877    481511
Name: device_countrycode, dtype: int64

In [4]:
installs['created'].max()

Timestamp('2019-04-26 23:59:58.788000')

In [5]:
installs['created'].min()

Timestamp('2019-04-18 00:00:01.560000')

## Revisamos el tipo de cada columna

In [6]:
installs.dtypes

created               datetime64[ns]
application_id                 int64
ref_type                       int64
ref_hash                       int64
implicit                        bool
device_countrycode             int64
device_brand                 float64
device_model                 float64
session_user_agent            object
user_agent                    object
event_uuid                    object
kind                          object
wifi                          object
trans_id                      object
ip_address                     int64
device_language              float64
day                           object
hour                           int64
dtype: object

## Elegimos las columnas con menor entropia o pocos valores diferentes y nos quedamos con los 5 valores mas comunes

In [7]:
grupos = ['application_id', 'ref_type', 'implicit', 'device_countrycode', 'device_brand', 'device_model', 'session_user_agent', 'kind', 'wifi', 'device_language', 'ip_address']#, 'hour', 'day']
top5 = {}
for i in grupos:
    top5[i]= installs[i].value_counts().head(5).index.tolist()
top5

{'application_id': [121, 36, 210, 14, 65],
 'device_brand': [3.083058605577787e+17,
  2.2088346671269993e+18,
  6.115025880051902e+18,
  2.523245982232924e+18,
  4.567867128441484e+18],
 'device_countrycode': [6287817205707153877],
 'device_language': [6.977049253562487e+18,
  3.3013777759777e+18,
  8.441417429938961e+18,
  6.584214382597376e+18,
  4.060929664968129e+18],
 'device_model': [6.794880020077884e+18,
  9.186120447236368e+18,
  5.660121202018897e+18,
  3.0574023248014715e+18,
  7.346301283849156e+18],
 'implicit': [False, True],
 'ip_address': [3225289966945059087,
  6731755574901734021,
  5098957481418445121,
  3059497526785721451,
  4368472372578406700],
 'kind': ['Open', 'af_app_opened', 'app_open', 'app open', 'Sign In'],
 'ref_type': [1891515180541284343, 1494519392962156891],
 'session_user_agent': ['http-kit/2.0',
  'adjust.com',
  'Apsalar-Postback',
  'HasOffers Mobile AppTracking v1.0',
  'Mozilla/5.0 (iPhone; CPU iPhone OS 12_2 like Mac OS X) AppleWebKit/605.1.15 

In [8]:
installs = installs.sort_values(by = 'created')
installs = installs.sort_values(by = 'ref_hash')
installs['repeticiones'] = 1
device_ids = installs['ref_hash'].to_frame().set_index('ref_hash')
#installs = pd.get_dummies(installs, columns = grupos)
for column in grupos:
    for value in top5[column]:
        installs[column + str(value)] = (installs[column] == value)*1

In [9]:
fecha_minima=pd.to_datetime('2019-04-18 00:00:00')
fecha_minima
fecha_tope=pd.to_datetime('2019-04-21 00:00:00')
installs_ventana1=installs.loc[((installs['created']>fecha_minima) & (installs['created']<fecha_tope) )].copy(deep=False)

In [10]:
fecha_minima=pd.to_datetime('2019-04-19 00:00:00')
fecha_minima
fecha_tope=pd.to_datetime('2019-04-22 00:00:00')
installs_ventana2=installs.loc[((installs['created']>fecha_minima) & (installs['created']<fecha_tope) )].copy(deep=False)

In [11]:
fecha_minima=pd.to_datetime('2019-04-20 00:00:00')
fecha_minima
fecha_tope=pd.to_datetime('2019-04-23 00:00:00')
installs_ventana3=installs.loc[((installs['created']>fecha_minima) & (installs['created']<fecha_tope) )].copy(deep=False)

In [12]:
fecha_minima=pd.to_datetime('2019-04-21 00:00:00')
fecha_minima
fecha_tope=pd.to_datetime('2019-04-24 00:00:00')
installs_ventana4=installs.loc[((installs['created']>fecha_minima) & (installs['created']<fecha_tope) )].copy(deep=False)

In [13]:
fecha_minima=pd.to_datetime('2019-04-24 00:00:00')
fecha_minima
fecha_tope=pd.to_datetime('2019-04-27 00:00:00')
installs_ventana7=installs.loc[((installs['created']>fecha_minima) & (installs['created']<fecha_tope) )].copy(deep=False)

In [14]:
features = ['sum', 'mean', 'std', 'min', 'max']
ventanas = [installs_ventana1, installs_ventana2, installs_ventana3, installs_ventana4, installs_ventana7]
numero_de_ventanas = [1,2,3,4,7]
contador = 0

In [15]:
drop_columns_sc = [
    'application_id_std',
    'ref_type_std',
    'implicit_sum',
    'implicit_mean',
    'implicit_std',
    'implicit_min',
    'implicit_max',
    'device_countrycode_std',
    'device_brand_sum',
    'device_brand_mean',
    'device_brand_std',
    'device_brand_min',
    'device_brand_max',
    'device_model_sum',
    'device_model_mean',
    'device_model_std',
    'device_model_min',
    'device_model_max',
    'ip_address_std',
    'device_language_sum',
    'device_language_mean',
    'device_language_std',
    'device_language_min',
    'device_language_max',
    'hour_sum',
    'hour_mean',
    'hour_std',
    'hour_min',
    'hour_max',
    'repeticiones_std',
    'application_id121_sum',
    'application_id121_mean',
    'application_id121_std',
    'application_id121_min',
    'application_id121_max',
    'application_id36_sum',
    'application_id36_mean',
    'application_id36_std',
    'application_id36_min',
    'application_id36_max',
    'application_id210_sum',
    'application_id210_mean',
    'application_id210_std',
    'application_id210_min',
    'application_id210_max',
    'application_id14_sum',
    'application_id14_mean',
    'application_id14_std',
    'application_id14_min',
    'application_id14_max',
    'application_id65_sum',
    'application_id65_mean',
    'application_id65_std',
    'application_id65_min',
    'application_id65_max',
    'ref_type1891515180541284343_sum',
    'ref_type1891515180541284343_mean',
    'ref_type1891515180541284343_std',
    'ref_type1891515180541284343_min',
    'ref_type1891515180541284343_max',
    'ref_type1494519392962156891_sum',
    'ref_type1494519392962156891_mean',
    'ref_type1494519392962156891_std',
    'ref_type1494519392962156891_min',
    'ref_type1494519392962156891_max',
    'implicitFalse_sum',
    'implicitFalse_mean',
    'implicitFalse_std',
    'implicitFalse_min',
    'implicitFalse_max',
    'implicitTrue_sum',
    'implicitTrue_mean',
    'implicitTrue_std',
    'implicitTrue_min',
    'implicitTrue_max',
    'device_countrycode6287817205707153877_std',
    'device_brand3.08305860558e+17_sum',
    'device_brand3.08305860558e+17_mean',
    'device_brand3.08305860558e+17_std',
    'device_brand3.08305860558e+17_min',
    'device_brand3.08305860558e+17_max',
    'device_brand2.20883466713e+18_sum',
    'device_brand2.20883466713e+18_mean',
    'device_brand2.20883466713e+18_std',
    'device_brand2.20883466713e+18_min',
    'device_brand2.20883466713e+18_max',
    'device_brand6.11502588005e+18_sum',
    'device_brand6.11502588005e+18_mean',
    'device_brand6.11502588005e+18_std',
    'device_brand6.11502588005e+18_min',
    'device_brand6.11502588005e+18_max',
    'device_brand2.52324598223e+18_sum',
    'device_brand2.52324598223e+18_mean',
    'device_brand2.52324598223e+18_std',
    'device_brand2.52324598223e+18_min',
    'device_brand2.52324598223e+18_max',
    'device_brand4.56786712844e+18_sum',
    'device_brand4.56786712844e+18_mean',
    'device_brand4.56786712844e+18_std',
    'device_brand4.56786712844e+18_min',
    'device_brand4.56786712844e+18_max',
    'device_model6.79488002008e+18_sum',
    'device_model6.79488002008e+18_mean',
    'device_model6.79488002008e+18_std',
    'device_model6.79488002008e+18_min',
    'device_model6.79488002008e+18_max',
    'device_model9.18612044724e+18_sum',
    'device_model9.18612044724e+18_mean',
    'device_model9.18612044724e+18_std',
    'device_model9.18612044724e+18_min',
    'device_model9.18612044724e+18_max',
    'device_model5.66012120202e+18_sum',
    'device_model5.66012120202e+18_mean',
    'device_model5.66012120202e+18_std',
    'device_model5.66012120202e+18_min',
    'device_model5.66012120202e+18_max',
    'device_model3.0574023248e+18_sum',
    'device_model3.0574023248e+18_mean',
    'device_model3.0574023248e+18_std',
    'device_model3.0574023248e+18_min',
    'device_model3.0574023248e+18_max',
    'device_model7.34630128385e+18_sum',
    'device_model7.34630128385e+18_mean',
    'device_model7.34630128385e+18_std',
    'device_model7.34630128385e+18_min',
    'device_model7.34630128385e+18_max',
    'session_user_agenthttp-kit/2.0_sum',
    'session_user_agenthttp-kit/2.0_mean',
    'session_user_agenthttp-kit/2.0_std',
    'session_user_agenthttp-kit/2.0_min',
    'session_user_agenthttp-kit/2.0_max',
    'session_user_agentadjust.com_sum',
    'session_user_agentadjust.com_mean',
    'session_user_agentadjust.com_std',
    'session_user_agentadjust.com_min',
    'session_user_agentadjust.com_max',
    'session_user_agentApsalar-Postback_sum',
    'session_user_agentApsalar-Postback_mean',
    'session_user_agentApsalar-Postback_std',
    'session_user_agentApsalar-Postback_min',
    'session_user_agentApsalar-Postback_max',
    'session_user_agentHasOffers Mobile AppTracking v1.0_sum',
    'session_user_agentHasOffers Mobile AppTracking v1.0_mean',
    'session_user_agentHasOffers Mobile AppTracking v1.0_std',
    'session_user_agentHasOffers Mobile AppTracking v1.0_min',
    'session_user_agentHasOffers Mobile AppTracking v1.0_max',
    'session_user_agentMozilla/5.0 (iPhone; CPU iPhone OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148_sum',
    'session_user_agentMozilla/5.0 (iPhone; CPU iPhone OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148_mean',
    'session_user_agentMozilla/5.0 (iPhone; CPU iPhone OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148_std',
    'session_user_agentMozilla/5.0 (iPhone; CPU iPhone OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148_min',
    'session_user_agentMozilla/5.0 (iPhone; CPU iPhone OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148_max',
    'kindOpen_sum',
    'kindOpen_mean',
    'kindOpen_std',
    'kindOpen_min',
    'kindOpen_max',
    'kindaf_app_opened_sum',
    'kindaf_app_opened_mean',
    'kindaf_app_opened_std',
    'kindaf_app_opened_min',
    'kindaf_app_opened_max',
    'kindapp_open_sum',
    'kindapp_open_mean',
    'kindapp_open_std',
    'kindapp_open_min',
    'kindapp_open_max',
    'kindapp open_sum',
    'kindapp open_mean',
    'kindapp open_std',
    'kindapp open_min',
    'kindapp open_max',
    'kindSign In_sum',
    'kindSign In_mean',
    'kindSign In_std',
    'kindSign In_min',
    'kindSign In_max',
    'wifiTrue_sum',
    'wifiTrue_mean',
    'wifiTrue_std',
    'wifiTrue_min',
    'wifiTrue_max',
    'wifiFalse_sum',
    'wifiFalse_mean',
    'wifiFalse_std',
    'wifiFalse_min',
    'wifiFalse_max',
    'device_language6.97704925356e+18_sum',
    'device_language6.97704925356e+18_mean',
    'device_language6.97704925356e+18_std',
    'device_language6.97704925356e+18_min',
    'device_language6.97704925356e+18_max',
    'device_language3.30137777598e+18_sum',
    'device_language3.30137777598e+18_mean',
    'device_language3.30137777598e+18_std',
    'device_language3.30137777598e+18_min',
    'device_language3.30137777598e+18_max',
    'device_language8.44141742994e+18_sum',
    'device_language8.44141742994e+18_mean',
    'device_language8.44141742994e+18_std',
    'device_language8.44141742994e+18_min',
    'device_language8.44141742994e+18_max',
    'device_language6.5842143826e+18_sum',
    'device_language6.5842143826e+18_mean',
    'device_language6.5842143826e+18_std',
    'device_language6.5842143826e+18_min',
    'device_language6.5842143826e+18_max',
    'device_language4.06092966497e+18_sum',
    'device_language4.06092966497e+18_mean',
    'device_language4.06092966497e+18_std',
    'device_language4.06092966497e+18_min',
    'device_language4.06092966497e+18_max',
    'ip_address3225289966945059087_sum',
    'ip_address3225289966945059087_mean',
    'ip_address3225289966945059087_std',
    'ip_address3225289966945059087_min',
    'ip_address3225289966945059087_max',
    'ip_address6731755574901734021_sum',
    'ip_address6731755574901734021_mean',
    'ip_address6731755574901734021_std',
    'ip_address6731755574901734021_min',
    'ip_address6731755574901734021_max',
    'ip_address5098957481418445121_sum',
    'ip_address5098957481418445121_mean',
    'ip_address5098957481418445121_std',
    'ip_address5098957481418445121_min',
    'ip_address5098957481418445121_max',
    'ip_address3059497526785721451_sum',
    'ip_address3059497526785721451_mean',
    'ip_address3059497526785721451_std',
    'ip_address3059497526785721451_min',
    'ip_address3059497526785721451_max',
    'ip_address4368472372578406700_sum',
    'ip_address4368472372578406700_mean',
    'ip_address4368472372578406700_std',
    'ip_address4368472372578406700_min',
    'ip_address4368472372578406700_max']

In [16]:
drop_columns_st = [
    'application_id_sum',
    'application_id_mean',
    'application_id_min',
    'application_id_max',
    'ref_type_sum',
    'ref_type_mean',
    'ref_type_std',
    'ref_type_min',
    'ref_type_max',
    'implicit_sum',
    'implicit_mean',
    'implicit_std',
    'implicit_min',
    'implicit_max',
    'device_countrycode_sum',
    'device_countrycode_mean',
    'device_countrycode_std',
    'device_countrycode_min',
    'device_countrycode_max',
    'device_brand_sum',
    'device_brand_mean',
    'device_brand_std',
    'device_brand_min',
    'device_brand_max',
    'device_model_mean',
    'device_model_std',
    'device_model_min',
    'device_model_max',
    'ip_address_mean',
    'ip_address_std',
    'ip_address_min',
    'ip_address_max',
    'device_language_sum',
    'device_language_mean',
    'device_language_std',
    'device_language_min',
    'device_language_max',
    'hour_sum',
    'hour_mean',
    'hour_std',
    'hour_min',
    'repeticiones_sum',
    'repeticiones_mean',
    'repeticiones_std',
    'repeticiones_min',
    'repeticiones_max',
    'application_id121_sum',
    'application_id121_mean',
    'application_id121_std',
    'application_id121_min',
    'application_id121_max',
    'application_id36_sum',
    'application_id36_std',
    'application_id36_min',
    'application_id36_max',
    'application_id210_sum',
    'application_id210_mean',
    'application_id210_std',
    'application_id210_min',
    'application_id210_max',
    'application_id14_sum',
    'application_id14_mean',
    'application_id14_std',
    'application_id14_min',
    'application_id14_max',
    'application_id65_sum',
    'application_id65_mean',
    'application_id65_std',
    'application_id65_min',
    'application_id65_max',
    'ref_type1891515180541284343_sum',
    'ref_type1891515180541284343_mean',
    'ref_type1891515180541284343_std',
    'ref_type1891515180541284343_min',
    'ref_type1891515180541284343_max',
    'ref_type1494519392962156891_sum',
    'ref_type1494519392962156891_mean',
    'ref_type1494519392962156891_std',
    'ref_type1494519392962156891_min',
    'ref_type1494519392962156891_max',
    'implicitFalse_sum',
    'implicitFalse_mean',
    'implicitFalse_std',
    'implicitFalse_min',
    'implicitFalse_max',
    'implicitTrue_sum',
    'implicitTrue_mean',
    'implicitTrue_std',
    'implicitTrue_min',
    'implicitTrue_max',
    'device_countrycode6287817205707153877_sum',
    'device_countrycode6287817205707153877_mean',
    'device_countrycode6287817205707153877_std',
    'device_countrycode6287817205707153877_min',
    'device_countrycode6287817205707153877_max',
    'device_brand3.08305860558e+17_sum',
    'device_brand3.08305860558e+17_mean',
    'device_brand3.08305860558e+17_std',
    'device_brand3.08305860558e+17_min',
    'device_brand3.08305860558e+17_max',
    'device_brand2.20883466713e+18_sum',
    'device_brand2.20883466713e+18_mean',
    'device_brand2.20883466713e+18_std',
    'device_brand2.20883466713e+18_min',
    'device_brand2.20883466713e+18_max',
    'device_brand6.11502588005e+18_sum',
    'device_brand6.11502588005e+18_mean',
    'device_brand6.11502588005e+18_std',
    'device_brand6.11502588005e+18_min',
    'device_brand6.11502588005e+18_max',
    'device_brand2.52324598223e+18_sum',
    'device_brand2.52324598223e+18_mean',
    'device_brand2.52324598223e+18_std',
    'device_brand2.52324598223e+18_min',
    'device_brand2.52324598223e+18_max',
    'device_brand4.56786712844e+18_sum',
    'device_brand4.56786712844e+18_mean',
    'device_brand4.56786712844e+18_std',
    'device_brand4.56786712844e+18_min',
    'device_brand4.56786712844e+18_max',
    'device_model6.79488002008e+18_sum',
    'device_model6.79488002008e+18_mean',
    'device_model6.79488002008e+18_std',
    'device_model6.79488002008e+18_min',
    'device_model6.79488002008e+18_max',
    'device_model9.18612044724e+18_sum',
    'device_model9.18612044724e+18_mean',
    'device_model9.18612044724e+18_std',
    'device_model9.18612044724e+18_min',
    'device_model9.18612044724e+18_max',
    'device_model5.66012120202e+18_sum',
    'device_model5.66012120202e+18_mean',
    'device_model5.66012120202e+18_std',
    'device_model5.66012120202e+18_min',
    'device_model5.66012120202e+18_max',
    'device_model3.0574023248e+18_sum',
    'device_model3.0574023248e+18_mean',
    'device_model3.0574023248e+18_std',
    'device_model3.0574023248e+18_min',
    'device_model3.0574023248e+18_max',
    'device_model7.34630128385e+18_sum',
    'device_model7.34630128385e+18_mean',
    'device_model7.34630128385e+18_std',
    'device_model7.34630128385e+18_min',
    'device_model7.34630128385e+18_max',
    'session_user_agenthttp-kit/2.0_sum',
    'session_user_agenthttp-kit/2.0_mean',
    'session_user_agenthttp-kit/2.0_std',
    'session_user_agenthttp-kit/2.0_min',
    'session_user_agenthttp-kit/2.0_max',
    'session_user_agentadjust.com_sum',
    'session_user_agentadjust.com_mean',
    'session_user_agentadjust.com_std',
    'session_user_agentadjust.com_min',
    'session_user_agentadjust.com_max',
    'session_user_agentApsalar-Postback_sum',
    'session_user_agentApsalar-Postback_mean',
    'session_user_agentApsalar-Postback_std',
    'session_user_agentApsalar-Postback_min',
    'session_user_agentApsalar-Postback_max',
    'session_user_agentHasOffers Mobile AppTracking v1.0_sum',
    'session_user_agentHasOffers Mobile AppTracking v1.0_mean',
    'session_user_agentHasOffers Mobile AppTracking v1.0_std',
    'session_user_agentHasOffers Mobile AppTracking v1.0_min',
    'session_user_agentHasOffers Mobile AppTracking v1.0_max',
    'session_user_agentMozilla/5.0 (iPhone; CPU iPhone OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148_sum',
    'session_user_agentMozilla/5.0 (iPhone; CPU iPhone OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148_mean',
    'session_user_agentMozilla/5.0 (iPhone; CPU iPhone OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148_std',
    'session_user_agentMozilla/5.0 (iPhone; CPU iPhone OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148_min',
    'session_user_agentMozilla/5.0 (iPhone; CPU iPhone OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148_max',
    'kindOpen_sum',
    'kindOpen_mean',
    'kindOpen_std',
    'kindOpen_min',
    'kindOpen_max',
    'kindaf_app_opened_sum',
    'kindaf_app_opened_mean',
    'kindaf_app_opened_std',
    'kindaf_app_opened_min',
    'kindaf_app_opened_max',
    'kindapp_open_sum',
    'kindapp_open_mean',
    'kindapp_open_std',
    'kindapp_open_min',
    'kindapp_open_max',
    'kindapp open_sum',
    'kindapp open_mean',
    'kindapp open_std',
    'kindapp open_min',
    'kindapp open_max',
    'kindSign In_sum',
    'kindSign In_mean',
    'kindSign In_std',
    'kindSign In_min',
    'kindSign In_max',
    'wifiTrue_sum',
    'wifiTrue_mean',
    'wifiTrue_std',
    'wifiTrue_min',
    'wifiTrue_max',
    'wifiFalse_sum',
    'wifiFalse_mean',
    'wifiFalse_std',
    'wifiFalse_min',
    'wifiFalse_max',
    'device_language6.97704925356e+18_sum',
    'device_language6.97704925356e+18_mean',
    'device_language6.97704925356e+18_std',
    'device_language6.97704925356e+18_min',
    'device_language6.97704925356e+18_max',
    'device_language3.30137777598e+18_sum',
    'device_language3.30137777598e+18_mean',
    'device_language3.30137777598e+18_std',
    'device_language3.30137777598e+18_min',
    'device_language3.30137777598e+18_max',
    'device_language8.44141742994e+18_sum',
    'device_language8.44141742994e+18_mean',
    'device_language8.44141742994e+18_std',
    'device_language8.44141742994e+18_min',
    'device_language8.44141742994e+18_max',
    'device_language6.5842143826e+18_sum',
    'device_language6.5842143826e+18_mean',
    'device_language6.5842143826e+18_std',
    'device_language6.5842143826e+18_min',
    'device_language6.5842143826e+18_max',
    'device_language4.06092966497e+18_sum',
    'device_language4.06092966497e+18_mean',
    'device_language4.06092966497e+18_std',
    'device_language4.06092966497e+18_min',
    'device_language4.06092966497e+18_max',
    'ip_address3225289966945059087_sum',
    'ip_address3225289966945059087_mean',
    'ip_address3225289966945059087_std',
    'ip_address3225289966945059087_min',
    'ip_address3225289966945059087_max',
    'ip_address6731755574901734021_sum',
    'ip_address6731755574901734021_mean',
    'ip_address6731755574901734021_std',
    'ip_address6731755574901734021_min',
    'ip_address6731755574901734021_max',
    'ip_address5098957481418445121_sum',
    'ip_address5098957481418445121_mean',
    'ip_address5098957481418445121_std',
    'ip_address5098957481418445121_min',
    'ip_address5098957481418445121_max',
    'ip_address3059497526785721451_sum',
    'ip_address3059497526785721451_mean',
    'ip_address3059497526785721451_std',
    'ip_address3059497526785721451_min',
    'ip_address3059497526785721451_max',
    'ip_address4368472372578406700_sum',
    'ip_address4368472372578406700_mean',
    'ip_address4368472372578406700_std',
    'ip_address4368472372578406700_min',
    'ip_address4368472372578406700_max']

In [17]:
for ventana in ventanas:
    features_ventana = ventana.groupby('ref_hash').agg(features).fillna(0)
    level0 = features_ventana.columns.get_level_values(0)
    level1 = features_ventana.columns.get_level_values(1)
    features_ventana.columns = level0 + "_" + level1
    features_st = features_ventana.drop(drop_columns_st, axis=1, inplace = False)
    features_st.to_csv('../xgb/features_installs_gonzalo_st_ventana' + str(numero_de_ventanas[contador]) + '.csv')
    features_sc = features_ventana.drop(drop_columns_sc, axis=1, inplace = False)
    features_sc.to_csv('../xgb/features_installs_gonzalo_sc_ventana' + str(numero_de_ventanas[contador]) + '.csv')
    contador = contador + 1

KeyboardInterrupt: 