# Testing Metalearning Predictions script

In [1]:
options(repr.matrix.max.cols=50, repr.matrix.max.rows=100)

In [2]:
# install.packages("pkgcond")

In [3]:
library(dplyr)
library(data.table)
library(partykit)
library(tictoc)
library(caret)
library(e1071)
library(randomForest)
library(ranger)

#for 3d plotting
library(akima)
library(plotly)

# for prep data
library(rPython)
library(stringr)
library(pbapply)
library(stringdist)
library(data.table)
library(dominanceanalysis)


getwd()


Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union



Attaching package: ‘data.table’


The following objects are masked from ‘package:dplyr’:

    between, first, last


Loading required package: grid

Loading required package: libcoin

Loading required package: mvtnorm

Loading required package: lattice

Loading required package: ggplot2

randomForest 4.6-14

Type rfNews() to see new features/changes/bug fixes.


Attaching package: ‘randomForest’


The following object is masked from ‘package:ggplot2’:

    margin


The following object is masked from ‘package:dplyr’:

    combine



Attaching package: ‘ranger’


The following object is masked from ‘package:randomForest’:

    importance



Attaching package: ‘plotly’


The following object is masked from ‘package:ggplot2’:

    last_plot


The following object is masked from ‘package:sta

In [4]:

source('../orig/functions.R')

source('../orig/functions_models.R')

source('../phaseII_03_forest/functions_eval.R')


source('../phaseII_03_forest/load_prep_data_expiry.R')

# expiry_train_prepped_2_1 (list, less df's w/ 0 obs)
# expiry_test_prepped_2_1  (list, less df's w/ 0 obs)
# expiry_train_df_1 (above, rbound)
# expiry_test_df_1 (above, rbound)
# expiry_train_df_sub (subset vars)                                        
# expiry_test_df_sub (subset vars)

In [27]:

source('functions_metalearning.R')

# DEFINE limited global objects

In [28]:
tld_reseller_list = expiry_train_df_1 %>% sample_n(5) %>% distinct(tld_registrar_index) %>% 
  pull(tld_registrar_index) %>%  head(5) # limit to 5
length(tld_reseller_list)
tld_reseller_list

In [29]:
reseller_list = expiry_train_df_1 %>% distinct(reseller) %>% pull(reseller) %>% head(5) # limit 5
length(reseller_list)
reseller_list

# TEST train_all in functions*.R

In [30]:
subDir = paste("models", format(Sys.Date(), format="%Y%m%d") , sep = "_")
fullDir = file.path('../../data/output', subDir)
dir.create(fullDir, showWarnings = FALSE)

In [31]:
train_all(tld_reseller_list,
                       reseller_list,
                       train_list = expiry_train_prepped_2_1,
                       test_list = expiry_test_prepped_2_1,
                       model_agg_glm = 1, # skip agg models
                       model_agg_rf = 1, 
         fullDir=fullDir)   # skip agg models

[1] "model_seg_glm_11internet"
[1] "model_seg_rf_11internet"
[1] "model_seg_glm_10dencehispahard"


“glm.fit: fitted probabilities numerically 0 or 1 occurred”


[1] "model_seg_rf_10dencehispahard"
[1] "model_seg_glm_1api"
[1] "model_seg_rf_1api"
[1] "model_seg_glm_abnameisp"
[1] "model_seg_rf_abnameisp"
[1] "model_seg_glm_active24"
[1] "model_seg_rf_active24"
[1] "model_seg2_glm_funchengduwest"


“glm.fit: fitted probabilities numerically 0 or 1 occurred”


[1] "model_seg2_rf_funchengduwest"
[1] "model_seg2_glm_siteuol"


“glm.fit: fitted probabilities numerically 0 or 1 occurred”


[1] "model_seg2_rf_siteuol"
[1] "model_seg2_glm_pwgandisas"
[1] "model_seg2_rf_pwgandisas"
[1] "model_seg2_glm_siteregru"
[1] "model_seg2_rf_siteregru"
[1] "model_seg2_glm_sitegmo"


“glm.fit: fitted probabilities numerically 0 or 1 occurred”


[1] "model_seg2_rf_sitegmo"


# TEST pred_all in functions*.R

In [55]:
source('functions_metalearning.R')

In [56]:
pred_df <- pred_all(fullDir='../../data/output/models_20201017', # dir of models
                      tld_reseller_list,
                      test_list = expiry_test_prepped_2_1)

Predicting model_agg_rfNULL
[1] "funchengdu west"


ERROR: Error in get(model_name): object 'model_seg_glm_chengduwest' not found


In [44]:
load("../../data/output/models_20201017/model_agg_glm.Rdata")

In [54]:
model_agg_glm

NULL

In [45]:
tld_reseller_str="funchengdu west"
model=model_agg_glm

In [41]:
test_list_tld_reseller = test_list[tld_reseller_str]
test_df_tld_reseller =  rbindlist(test_list_tld_reseller,use.names=TRUE)

In [43]:
dim(test_df_tld_reseller)

In [46]:
# if test data contains no observations, skip!
if (dim(test_df_tld_reseller)[1]==0){
    pred_df_agg_glm = NA
} else {
    pred = predict_first_renewal_agg(test_df_tld_reseller, model)

    pred_df_agg_glm = data.frame("actual" = pred$renewal_status,
                                  "predicted" = pred$first_renewal_prediction)
} 

ERROR: Error in UseMethod("predict"): no applicable method for 'predict' applied to an object of class "NULL"


In [47]:
test_data = test_df_tld_reseller

In [48]:
test_data$sld_type[!(test_data$sld_type %in% model$xlevels$sld_type)]<-NA
test_data$reseller[!(test_data$reseller %in% model$xlevels$reseller)]<-NA # LVG added

In [50]:
test_data_sub = subset(test_data,
                                              select=c(pattern_domain_count, 
                                                       log_reg_arpt,
                                                       sld_length, 
                                                       gibb_score,
                                                       sld_type, 
                                                       day_domains,
                                                       reg_period, 
                                                       tld, reseller))

In [51]:
dim(test_data_sub)

In [52]:
#test.data$probabilities <- predict(model,newdata=subset(test.data,select=c(Coeff.Variation, LogArpt, SLD.Length, SLD.Type, Day.Domains, Gibb.Score)),type='response');
test_data$probabilities<-predict(model,
                               newdata=subset(test_data,
                                              select=c(pattern_domain_count, 
                                                       log_reg_arpt,
                                                       sld_length, 
                                                       gibb_score,
                                                       sld_type, 
                                                       day_domains,
                                                       reg_period, 
                                                       tld, reseller)),type='response');

ERROR: Error in UseMethod("predict"): no applicable method for 'predict' applied to an object of class "NULL"


In [53]:
model

NULL

In [None]:
# had to comment out the following to get predition list to work
#   test_data$first_renewal_prediction[test_data$Status == "Deleted"]<-0
#   test_data$first_renewal_prediction<-round(test_data$first_renewal_prediction,3)
# made the following mods
test_data$first_renewal_prediction<-round(test_data$probabilities,3)
test_data$first_renewal_prediction[test_data$Status == "Deleted"]<-0
return(test_data)

In [158]:
# define vars
fullDir='../../data/output/models_20201015' # dir of models
# tld_reseller_list # already defined in global vars above
test_list = expiry_test_prepped_2_1

test_list_tld_reseller = test_list[tld_reseller_str]
test_df_tld_reseller =  rbindlist(test_list_tld_reseller,use.names=TRUE)    
reseller_str = test_df_tld_reseller %>% filter(tld_registrar_index==tld_reseller_str) %>% 
   distinct(reseller) %>% pull(reseller)

In [103]:
length(test_list)

In [73]:
# skipping agg models

In [74]:
# list & remove R model objects so we can test load()
ls(pattern='^model_seg')
rm(list=ls(pattern='^model_seg'))
ls()

In [141]:
# load all seg_glm objects into memory
lapply(Sys.glob(file.path(fullDir,'model_seg_glm_*')),load,.GlobalEnv)

In [142]:
ls(pattern='^model_seg')

In [159]:
model_name <- paste0('model_seg_glm_',str_replace_all(reseller_str, "[^[:alnum:]]", ""))
model_name

In [171]:
model_seg_glm_11internet


Call:  glm(formula = renewal_status ~ ., family = binomial(link = "logit"), 
    data = build_data, model = FALSE, y = FALSE)

Coefficients:
         (Intercept)  pattern_domain_count          log_reg_arpt  
            2.221461             -0.187971              0.022872  
          sld_length             sld_typel            sld_typeln  
           -0.038738             -0.399963             -0.742730  
   sld_typehyphen-ln           sld_typeidn             sld_typen  
           -0.596912              0.084214             -1.885978  
         day_domains            gibb_score            reg_period  
           -0.004669              0.013288             -0.091999  
             tldhost             tldonline              tldpress  
          -13.942045             -1.046547             -1.327404  
             tldsite              tldspace              tldstore  
           -2.478357             -2.002977             -1.089458  
             tldtech            tldwebsite  
         

In [162]:
model <- mget(model_name)

In [163]:
model

$model_seg_glm_11internet

Call:  glm(formula = renewal_status ~ ., family = binomial(link = "logit"), 
    data = build_data, model = FALSE, y = FALSE)

Coefficients:
         (Intercept)  pattern_domain_count          log_reg_arpt  
            2.221461             -0.187971              0.022872  
          sld_length             sld_typel            sld_typeln  
           -0.038738             -0.399963             -0.742730  
   sld_typehyphen-ln           sld_typeidn             sld_typen  
           -0.596912              0.084214             -1.885978  
         day_domains            gibb_score            reg_period  
           -0.004669              0.013288             -0.091999  
             tldhost             tldonline              tldpress  
          -13.942045             -1.046547             -1.327404  
             tldsite              tldspace              tldstore  
           -2.478357             -2.002977             -1.089458  
             tldtech        

In [170]:
preds_seg_glm = lapply(tld_reseller_list, 
           function(reseller_str) pred_seg_glm(
               test_list, 
               tld_reseller_str)
           )

[1] "fun1&1 internet"


ERROR: Error: value for ‘model_seg_glm_11internet’ not found


In [183]:
tld_reseller_str="fun1&1 internet"

In [184]:
print(tld_reseller_str)
test_list_tld_reseller = test_list[tld_reseller_str]
test_df_tld_reseller =  rbindlist(test_list_tld_reseller,use.names=TRUE)    
test_df_tld_reseller

[1] "fun1&1 internet"


renewal_type,renewed_count,expiry_date,domain_id,domain,creation_date,status,tld,registrar,reseller,reseller_country,region,reg_period,registrant_country,renewal_status,renew_mbg,renew_type,autorenew_type,renew_date,renew_registrar,renew_reseller,reg_revenue,reg_arpt,renew_period,renew_domain_revenue,renew_arpt,reg_arpt_org,tld_registrar_index,sld,sld_type,sld_length,sld_type2,day_domains,log_reg_arpt,gibb_score,pattern,cluster,pattern_score,pattern_domain_count
<chr>,<int>,<date>,<int>,<chr>,<date>,<chr>,<fct>,<chr>,<chr>,<chr>,<chr>,<int>,<chr>,<fct>,<int>,<chr>,<chr>,<date>,<chr>,<chr>,<dbl>,<dbl>,<int>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<fct>,<int>,<chr>,<int>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<int>
FirstTime,1,2020-01-22,91639803,segeln.fun,2019-01-22,Active,fun,1&1 internet,1&1 internet,Germany,Non China,1,,Renewed,0,auto-renewal,realized,2020-01-22,1&1 Internet,1&1 Internet,15,15,1,15,15,15,fun1&1 internet,segeln,l,6,6l,1,2.70805,2.23,segeln,1,1,1
FirstTime,1,2020-03-07,95819759,harzhotel.fun,2019-03-07,Active,fun,1&1 internet,1&1 internet,Germany,Non China,1,,Renewed,0,auto-renewal,realized,2020-03-07,1&1 Internet,1&1 Internet,15,15,1,15,15,15,fun1&1 internet,harzhotel,l,9,9l,1,2.70805,3.45,harzhotel,1,1,1


In [186]:
reseller_str = test_df_tld_reseller %>% filter(tld_registrar_index==tld_reseller_str) %>% 
   distinct(reseller) %>% pull(reseller)
reseller_str

In [187]:
model_name <- paste0('model_seg_glm_',str_replace_all(reseller_str, "[^[:alnum:]]", ""))
model_name

In [188]:
model_seg_glm_11internet


Call:  glm(formula = renewal_status ~ ., family = binomial(link = "logit"), 
    data = build_data, model = FALSE, y = FALSE)

Coefficients:
         (Intercept)  pattern_domain_count          log_reg_arpt  
            2.221461             -0.187971              0.022872  
          sld_length             sld_typel            sld_typeln  
           -0.038738             -0.399963             -0.742730  
   sld_typehyphen-ln           sld_typeidn             sld_typen  
           -0.596912              0.084214             -1.885978  
         day_domains            gibb_score            reg_period  
           -0.004669              0.013288             -0.091999  
             tldhost             tldonline              tldpress  
          -13.942045             -1.046547             -1.327404  
             tldsite              tldspace              tldstore  
           -2.478357             -2.002977             -1.089458  
             tldtech            tldwebsite  
         

In [189]:
model <- mget(model_name)

In [197]:
model2 <- get(model_name)

In [198]:
model2


Call:  glm(formula = renewal_status ~ ., family = binomial(link = "logit"), 
    data = build_data, model = FALSE, y = FALSE)

Coefficients:
         (Intercept)  pattern_domain_count          log_reg_arpt  
            2.221461             -0.187971              0.022872  
          sld_length             sld_typel            sld_typeln  
           -0.038738             -0.399963             -0.742730  
   sld_typehyphen-ln           sld_typeidn             sld_typen  
           -0.596912              0.084214             -1.885978  
         day_domains            gibb_score            reg_period  
           -0.004669              0.013288             -0.091999  
             tldhost             tldonline              tldpress  
          -13.942045             -1.046547             -1.327404  
             tldsite              tldspace              tldstore  
           -2.478357             -2.002977             -1.089458  
             tldtech            tldwebsite  
         

In [191]:
# if test data contains no observations, skip!
if (dim(test_df_tld_reseller)[1]==0){
    pred_df_seg_glm = NA
} else {


    pred = predict_first_renewal_reg(test_df_tld_reseller, model)

    pred_df_seg_glm = data.frame("actual" = pred$renewal_status,
                                  "predicted" = pred$first_renewal_prediction)
} 

In [192]:
pred

renewal_type,renewed_count,expiry_date,domain_id,domain,creation_date,status,tld,registrar,reseller,reseller_country,region,reg_period,registrant_country,renewal_status,renew_mbg,renew_type,autorenew_type,renew_date,renew_registrar,renew_reseller,reg_revenue,reg_arpt,renew_period,renew_domain_revenue,renew_arpt,reg_arpt_org,tld_registrar_index,sld,sld_type,sld_length,sld_type2,day_domains,log_reg_arpt,gibb_score,pattern,cluster,pattern_score,pattern_domain_count,probabilities,first_renewal_prediction
<chr>,<int>,<date>,<int>,<chr>,<date>,<chr>,<fct>,<chr>,<chr>,<chr>,<chr>,<int>,<chr>,<fct>,<int>,<chr>,<chr>,<date>,<chr>,<chr>,<dbl>,<dbl>,<int>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<fct>,<int>,<chr>,<int>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<int>,<dbl>,<dbl>
FirstTime,1,2020-01-22,91639803,segeln.fun,2019-01-22,Active,fun,1&1 internet,1&1 internet,Germany,Non China,1,,Renewed,0,auto-renewal,realized,2020-01-22,1&1 Internet,1&1 Internet,15,15,1,15,15,15,fun1&1 internet,segeln,,6,6l,1,2.70805,2.23,segeln,1,1,1,,
FirstTime,1,2020-03-07,95819759,harzhotel.fun,2019-03-07,Active,fun,1&1 internet,1&1 internet,Germany,Non China,1,,Renewed,0,auto-renewal,realized,2020-03-07,1&1 Internet,1&1 Internet,15,15,1,15,15,15,fun1&1 internet,harzhotel,,9,9l,1,2.70805,3.45,harzhotel,1,1,1,,


In [193]:
pred_df_seg_glm

actual,predicted
<fct>,<dbl>
Renewed,
Renewed,


In [194]:
pred_seg_glm <- function(test_list, tld_reseller_str) {
    # seg glm (reseller-segmented glm (including tld as predictor))
    
    print(tld_reseller_str)
    test_list_tld_reseller = test_list[tld_reseller_str]
    test_df_tld_reseller =  rbindlist(test_list_tld_reseller,use.names=TRUE)    
    reseller_str = test_df_tld_reseller %>% filter(tld_registrar_index==tld_reseller_str) %>% 
       distinct(reseller) %>% pull(reseller)
    
    model_name <- paste0('model_seg_glm_',str_replace_all(reseller_str, "[^[:alnum:]]", ""))
    model <- mget(model_name)

    # if test data contains no observations, skip!
    if (dim(test_df_tld_reseller)[1]==0){
        pred_df_seg_glm = NA
    } else {


        pred = predict_first_renewal_reg(test_df_tld_reseller, model)

        pred_df_seg_glm = data.frame("actual" = pred$renewal_status,
                                      "predicted" = pred$first_renewal_prediction)
    } 
    return(pred_df_seg_glm)
}

In [195]:
tld_reseller_str

In [200]:
test = pred_seg_glm(test_list, 
               tld_reseller_str)

[1] "fun1&1 internet"


“the condition has length > 1 and only the first element will be used”


In [201]:
test

actual,predicted
<fct>,<dbl>
Renewed,0.802
Renewed,0.785


In [215]:
preds_seg_glm = lapply(tld_reseller_list, 
           function(tld_reseller_str) pred_seg_glm(
               test_list, 
               tld_reseller_str)
           )

[1] "fun1&1 internet"


“the condition has length > 1 and only the first element will be used”


[1] "fun10dencehispahard"


“the condition has length > 1 and only the first element will be used”


[1] "fun1api"


“the condition has length > 1 and only the first element will be used”


[1] "funab name isp"
[1] "funactive 24"


“the condition has length > 1 and only the first element will be used”
“prediction from a rank-deficient fit may be misleading”


In [216]:
preds_seg_glm

actual,predicted
<fct>,<dbl>
Renewed,0.802
Renewed,0.785

actual,predicted
<fct>,<dbl>
Not Renewd,0
Not Renewd,0

actual,predicted
<fct>,<dbl>
Renewed,0.842
Not Renewd,0.843

actual,predicted
<fct>,<dbl>
Not Renewd,0.192
Not Renewd,0.198
Not Renewd,0.21
Not Renewd,0.212
Not Renewd,0.188
Not Renewd,0.197
Not Renewd,0.138
Renewed,0.118
Renewed,0.119
Renewed,0.196


In [217]:
ls()

In [219]:
# list & remove R model objects so we can test load()
ls(pattern='^model_seg_glm_')
rm(list=ls(pattern='^model_seg_glm_'))
ls(pattern='^model_seg_glm_')

In [235]:
lapply(Sys.glob(file.path(fullDir,'model_seg_rf_*')),load,.GlobalEnv)
preds_seg_rf = lapply(tld_reseller_list, 
       function(tld_reseller_str) pred_seg_rf(
           test_list, 
           tld_reseller_str)
       )
rm(list=ls(pattern='^model_seg_rf_'))
gc()  

Unnamed: 0,used,(Mb),gc trigger,(Mb).1,max used,(Mb).2
Ncells,6974640,372.5,13384612,714.9,13384612,714.9
Vcells,134400307,1025.4,222902998,1700.7,222902921,1700.7


In [237]:
lapply(Sys.glob(file.path(fullDir,'model_seg_rf_*')),load,.GlobalEnv)

In [259]:
length(test_list)

In [262]:
tld_reseller_str=tld_reseller_list[[5]]

In [284]:
pred_seg_rf(test_list, tld_reseller_str)

[1] 22 39
[1] "go"
[1] 22  2


actual,predicted
<fct>,<dbl>
Not Renewd,0.1782283
Not Renewd,0.1877276
Not Renewd,0.199204
Not Renewd,0.2086369
Not Renewd,0.2383924
Not Renewd,0.5100125
Not Renewd,0.2416895
Renewed,0.2047713
Renewed,0.2194942
Renewed,0.511191


In [273]:
test

actual,predicted
<fct>,<dbl>
Renewed,
Renewed,


In [290]:
lapply(Sys.glob(file.path(fullDir,'model_seg_rf_*')),load,.GlobalEnv)
preds_seg_rf = lapply(tld_reseller_list, 
       function(tld_reseller_str) pred_seg_rf(
           test_list, 
           tld_reseller_str)
       )
rm(list=ls(pattern='^model_seg_rf_'))
gc()  

Unnamed: 0,used,(Mb),gc trigger,(Mb).1,max used,(Mb).2
Ncells,5848520,312.4,13384612,714.9,13384612,714.9
Vcells,127591285,973.5,222902998,1700.7,222902921,1700.7


In [292]:
lapply(preds_seg_rf,nrow)
lapply(preds_seg_glm,nrow)

In [293]:
lapply(Sys.glob(file.path(fullDir,'model_seg2_glm_*')),load,.GlobalEnv)

In [295]:
preds_seg2_glm = lapply(tld_reseller_list, 
   function(tld_reseller_str) pred_seg2_glm(
       test_list, 
       tld_reseller_str)
   )

“the condition has length > 1 and only the first element will be used”
“prediction from a rank-deficient fit may be misleading”
“the condition has length > 1 and only the first element will be used”
“prediction from a rank-deficient fit may be misleading”
“the condition has length > 1 and only the first element will be used”
“prediction from a rank-deficient fit may be misleading”


In [296]:
preds_seg2_glm

actual,predicted
<fct>,<dbl>
Renewed,0.8459141
Renewed,0.4607455

actual,predicted
<fct>,<lgl>
Not Renewd,
Not Renewd,

actual,predicted
<fct>,<dbl>
Renewed,0.9999971
Not Renewd,0.9999992

actual,predicted
<fct>,<dbl>
Not Renewd,1.710762e-09
Not Renewd,0.2825345
Not Renewd,0.2842911
Not Renewd,0.2905496
Not Renewd,0.2516515
Not Renewd,0.2583409
Not Renewd,0.1904241
Renewed,0.1609926
Renewed,0.1373008
Renewed,0.2545271


In [305]:
lapply(Sys.glob(file.path(fullDir,'model_seg2_rf_*')),load,.GlobalEnv)
preds_seg2_rf = lapply(tld_reseller_list, 
       function(tld_reseller_str) pred_seg2_rf(
           test_list, 
           tld_reseller_str)
       )
rm(list=ls(pattern='^model_rf2_rf_'))
gc()  

Unnamed: 0,used,(Mb),gc trigger,(Mb).1,max used,(Mb).2
Ncells,5908440,315.6,13384612,714.9,13384612,714.9
Vcells,127788517,975.0,222902998,1700.7,222902921,1700.7


In [306]:
preds_seg2_rf

actual,predicted
<fct>,<dbl>
Renewed,0.7125206
Renewed,0.7248957

actual,predicted
<fct>,<dbl>
Not Renewd,0
Not Renewd,0

actual,predicted
<fct>,<dbl>
Renewed,0.832
Not Renewd,0.832

actual,predicted
<fct>,<dbl>
Not Renewd,0.12787708
Not Renewd,0.19177551
Not Renewd,0.2134116
Not Renewd,0.2134116
Not Renewd,0.24060072
Not Renewd,0.48983654
Not Renewd,0.1841928
Renewed,0.13453123
Renewed,0.15543138
Renewed,0.47717256


In [307]:
lapply(preds_seg_rf,nrow)
lapply(preds_seg_glm,nrow)
lapply(preds_seg2_glm,nrow)
lapply(preds_seg2_rf,nrow)
# lapply(preds_seg_glm,nrow)

In [303]:

source('functions_metalearning.R')

## combine all preds, cbind with test

In [336]:
x = list()
    i=1
for (tld_reseller_str in tld_reseller_list) {
    print(i)
    if (is.na(preds_seg_glm[[i]])) {
        x[[tld_reseller_str]]= NA
    } else{
        x[[tld_reseller_str]] = cbind(
            test_list[[tld_reseller_str]],
      preds_seg_glm[[i]]$predicted,
      preds_seg_rf[[i]]$predicted,
      preds_seg2_glm[[i]]$predicted,
      preds_seg2_rf[[i]]$predicted)
    }
    
    i=i+1
}
    
    

[1] 1


“the condition has length > 1 and only the first element will be used”


[1] 2


“the condition has length > 1 and only the first element will be used”


[1] 3


“the condition has length > 1 and only the first element will be used”


[1] 4
[1] 5


“the condition has length > 1 and only the first element will be used”


In [337]:
x

renewal_type,renewed_count,expiry_date,domain_id,domain,creation_date,status,tld,registrar,reseller,reseller_country,region,reg_period,registrant_country,renewal_status,renew_mbg,renew_type,autorenew_type,renew_date,renew_registrar,renew_reseller,reg_revenue,reg_arpt,renew_period,renew_domain_revenue,renew_arpt,reg_arpt_org,tld_registrar_index,sld,sld_type,sld_length,sld_type2,day_domains,log_reg_arpt,gibb_score,pattern,cluster,pattern_score,pattern_domain_count,V2,V3,V4,V5
<chr>,<int>,<date>,<int>,<chr>,<date>,<chr>,<fct>,<chr>,<chr>,<chr>,<chr>,<int>,<chr>,<fct>,<int>,<chr>,<chr>,<date>,<chr>,<chr>,<dbl>,<dbl>,<int>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<fct>,<int>,<chr>,<int>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<int>,<dbl>,<dbl>,<dbl>,<dbl>
FirstTime,1,2020-01-22,91639803,segeln.fun,2019-01-22,Active,fun,1&1 internet,1&1 internet,Germany,Non China,1,,Renewed,0,auto-renewal,realized,2020-01-22,1&1 Internet,1&1 Internet,15,15,1,15,15,15,fun1&1 internet,segeln,l,6,6l,1,2.70805,2.23,segeln,1,1,1,0.802,0.6801492,0.7125206,0.7125206
FirstTime,1,2020-03-07,95819759,harzhotel.fun,2019-03-07,Active,fun,1&1 internet,1&1 internet,Germany,Non China,1,,Renewed,0,auto-renewal,realized,2020-03-07,1&1 Internet,1&1 Internet,15,15,1,15,15,15,fun1&1 internet,harzhotel,l,9,9l,1,2.70805,3.45,harzhotel,1,1,1,0.785,0.6428311,0.7248957,0.7248957

renewal_type,renewed_count,expiry_date,domain_id,domain,creation_date,status,tld,registrar,reseller,reseller_country,region,reg_period,registrant_country,renewal_status,renew_mbg,renew_type,autorenew_type,renew_date,renew_registrar,renew_reseller,reg_revenue,reg_arpt,renew_period,renew_domain_revenue,renew_arpt,reg_arpt_org,tld_registrar_index,sld,sld_type,sld_length,sld_type2,day_domains,log_reg_arpt,gibb_score,pattern,cluster,pattern_score,pattern_domain_count,V2,V3,V4,V5
<chr>,<int>,<date>,<int>,<chr>,<date>,<chr>,<fct>,<chr>,<chr>,<chr>,<chr>,<int>,<chr>,<fct>,<int>,<chr>,<chr>,<date>,<chr>,<chr>,<dbl>,<dbl>,<int>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<fct>,<int>,<chr>,<int>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<int>,<dbl>,<dbl>,<dbl>,<dbl>
FirstTime,1,2020-01-15,91044973,movistar.fun,2019-01-15,Active,fun,10dencehispahard,10dencehispahard,Spain,Non China,1,,Not Renewd,0,auto-renewal,unrealized,2020-01-15,10dencehispahard,10dencehispahard,15,15,1,15,15,15,fun10dencehispahard,movistar,l,8,8l,1,2.70805,8.62,movistar,1,1,1,0,0.3462084,0,0
FirstTime,1,2020-02-01,92472913,kuinik.fun,2019-02-01,Active,fun,10dencehispahard,10dencehispahard,Spain,Non China,1,,Not Renewd,0,auto-renewal,unrealized,2020-02-01,10dencehispahard,10dencehispahard,15,15,1,15,15,15,fun10dencehispahard,kuinik,l,6,6l,1,2.70805,3.13,kuinik,1,1,1,0,0.3169892,0,0

renewal_type,renewed_count,expiry_date,domain_id,domain,creation_date,status,tld,registrar,reseller,reseller_country,region,reg_period,registrant_country,renewal_status,renew_mbg,renew_type,autorenew_type,renew_date,renew_registrar,renew_reseller,reg_revenue,reg_arpt,renew_period,renew_domain_revenue,renew_arpt,reg_arpt_org,tld_registrar_index,sld,sld_type,sld_length,sld_type2,day_domains,log_reg_arpt,gibb_score,pattern,cluster,pattern_score,pattern_domain_count,V2,V3,V4,V5
<chr>,<int>,<date>,<int>,<chr>,<date>,<chr>,<fct>,<chr>,<chr>,<chr>,<chr>,<int>,<chr>,<fct>,<int>,<chr>,<chr>,<date>,<chr>,<chr>,<dbl>,<dbl>,<int>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<fct>,<int>,<chr>,<int>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<int>,<dbl>,<dbl>,<dbl>,<dbl>
FirstTime,1,2020-01-21,91532433,bulgari.fun,2019-01-21,Active,fun,1api,1api,Germany,Non China,1,,Renewed,0,auto-renewal,realized,2020-01-21,1API,1API,5,5,1,15,15,5,fun1api,bulgari,l,7,7l,1,1.609438,4.51,bulgari,1,1,1,0.842,0.8486598,0.832,0.832
FirstTime,1,2020-04-24,101717674,gotsome.fun,2019-04-24,Active,fun,1api,1api,Germany,Non China,1,,Not Renewd,0,auto-renewal,unrealized,2020-04-24,1API,1API,5,5,1,15,15,5,fun1api,gotsome,l,7,7l,1,1.609438,6.09,gotsome,1,1,1,0.843,0.8421671,0.832,0.832

renewal_type,renewed_count,expiry_date,domain_id,domain,creation_date,status,tld,registrar,reseller,reseller_country,region,reg_period,registrant_country,renewal_status,renew_mbg,renew_type,autorenew_type,renew_date,renew_registrar,renew_reseller,reg_revenue,reg_arpt,renew_period,renew_domain_revenue,renew_arpt,reg_arpt_org,tld_registrar_index,sld,sld_type,sld_length,sld_type2,day_domains,log_reg_arpt,gibb_score,cluster,pattern,pattern_score,pattern_domain_count,V2,V3,V4,V5
<chr>,<int>,<date>,<int>,<chr>,<date>,<chr>,<fct>,<chr>,<chr>,<chr>,<chr>,<int>,<chr>,<fct>,<int>,<chr>,<chr>,<date>,<chr>,<chr>,<dbl>,<dbl>,<int>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<fct>,<int>,<chr>,<int>,<dbl>,<dbl>,<chr>,<chr>,<dbl>,<int>,<dbl>,<dbl>,<dbl>,<dbl>
FirstTime,1,2020-01-16,91155123,e-smokevirtual.fun,2019-01-16,Active,fun,ascio,active 24,Czech Republic,Non China,1,,Not Renewd,0,auto-renewal,unrealized,2020-01-16,Ascio,Active 24,0.5,0.5,1,15,15,0.5,funactive 24,e-smokevirtual,hyphen-l,14,14hyphen-l,3,-0.6931472,4.37,1,e-smokevirtual,0.1111111,1,0.192,0.1782283,0.12787708,0.12787708
FirstTime,1,2020-01-16,91146808,rovnatka.fun,2019-01-16,Active,fun,ascio,active 24,Czech Republic,Non China,1,,Not Renewd,0,auto-renewal,unrealized,2020-01-16,Ascio,Active 24,0.5,0.5,1,15,15,0.5,funactive 24,rovnatka,l,8,8l,3,-0.6931472,1.57,2,rovnatka,0.1111111,1,0.198,0.1877276,0.19177551,0.19177551
FirstTime,1,2020-01-16,91145283,myokd.fun,2019-01-16,Active,fun,ascio,active 24,Czech Republic,Non China,1,,Not Renewd,0,auto-renewal,unrealized,2020-01-16,Ascio,Active 24,0.5,0.5,1,15,15,0.5,funactive 24,myokd,l,5,5l,3,-0.6931472,1.18,3,myokd,0.1111111,1,0.21,0.199204,0.2134116,0.2134116
FirstTime,1,2020-01-17,91262818,vtipy.fun,2019-01-17,Active,fun,ascio,active 24,Czech Republic,Non China,1,,Not Renewd,0,auto-renewal,unrealized,2020-01-17,Ascio,Active 24,0.5,0.5,1,15,15,0.5,funactive 24,vtipy,l,5,5l,3,-0.6931472,0.63,1,vtipy,0.1111111,1,0.212,0.2086369,0.2134116,0.2134116
FirstTime,1,2020-01-17,91231718,blahoviny.fun,2019-01-17,Active,fun,ascio,active 24,Czech Republic,Non China,1,,Not Renewd,0,auto-renewal,unrealized,2020-01-17,Ascio,Active 24,0.5,0.5,1,15,15,0.5,funactive 24,blahoviny,l,9,9l,3,-0.6931472,4.49,2,blahoviny,0.1111111,1,0.188,0.2383924,0.24060072,0.24060072
FirstTime,1,2020-01-17,91220903,maxitip.fun,2019-01-17,Active,fun,ascio,active 24,Czech Republic,Non China,1,,Not Renewd,0,auto-renewal,unrealized,2020-01-17,Ascio,Active 24,0.5,0.5,1,15,15,0.5,funactive 24,maxitip,l,7,7l,3,-0.6931472,3.7,3,maxitip,0.1111111,1,0.197,0.5100125,0.48983654,0.48983654
FirstTime,1,2020-01-20,91430133,quads4.fun,2019-01-20,Active,fun,ascio,active 24,Czech Republic,Non China,1,,Not Renewd,0,auto-renewal,unrealized,2020-01-20,Ascio,Active 24,0.5,0.5,1,15,15,0.5,funactive 24,quads4,ln,6,6ln,1,-0.6931472,7.29,quads4,1,1.0,1,0.138,0.2416895,0.1841928,0.1841928
FirstTime,1,2020-01-23,91697458,converge4.fun,2019-01-23,Active,fun,ascio,active 24,Czech Republic,Non China,1,,Renewed,0,renewal,,2020-01-23,Ascio,Active 24,0.5,0.5,1,15,15,0.5,funactive 24,converge4,ln,9,9ln,3,-0.6931472,8.2,1,converge4,0.1111111,1,0.118,0.2047713,0.13453123,0.13453123
FirstTime,1,2020-01-23,91734683,sendit4.fun,2019-01-23,Active,fun,ascio,active 24,Czech Republic,Non China,1,,Renewed,0,renewal,,2020-01-23,Ascio,Active 24,0.5,0.5,1,15,15,0.5,funactive 24,sendit4,ln,7,7ln,3,-0.6931472,11.41,2,sendit4,0.1111111,1,0.119,0.2194942,0.15543138,0.15543138
FirstTime,1,2020-01-23,91679788,matylda.fun,2019-01-23,Active,fun,ascio,active 24,Czech Republic,Non China,1,,Renewed,0,auto-renewal,realized,2020-01-23,Ascio,Active 24,0.5,0.5,1,15,15,0.5,funactive 24,matylda,l,7,7l,3,-0.6931472,4.06,3,matylda,0.1111111,1,0.196,0.511191,0.47717256,0.47717256


In [341]:
na.omit.list <- function(y) { return(y[!sapply(y, function(x) all(is.na(x)))]) }
                                               
x <- na.omit.list(x)
df <- rbindlist(x,use.names=TRUE)

In [342]:
dim(df)
head(df)

renewal_type,renewed_count,expiry_date,domain_id,domain,creation_date,status,tld,registrar,reseller,reseller_country,region,reg_period,registrant_country,renewal_status,renew_mbg,renew_type,autorenew_type,renew_date,renew_registrar,renew_reseller,reg_revenue,reg_arpt,renew_period,renew_domain_revenue,renew_arpt,reg_arpt_org,tld_registrar_index,sld,sld_type,sld_length,sld_type2,day_domains,log_reg_arpt,gibb_score,pattern,cluster,pattern_score,pattern_domain_count,V2,V3,V4,V5
<chr>,<int>,<date>,<int>,<chr>,<date>,<chr>,<fct>,<chr>,<chr>,<chr>,<chr>,<int>,<chr>,<fct>,<int>,<chr>,<chr>,<date>,<chr>,<chr>,<dbl>,<dbl>,<int>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<fct>,<int>,<chr>,<int>,<dbl>,<dbl>,<chr>,<chr>,<dbl>,<int>,<dbl>,<dbl>,<dbl>,<dbl>
FirstTime,1,2020-01-22,91639803,segeln.fun,2019-01-22,Active,fun,1&1 internet,1&1 internet,Germany,Non China,1,,Renewed,0,auto-renewal,realized,2020-01-22,1&1 Internet,1&1 Internet,15,15,1,15,15,15,fun1&1 internet,segeln,l,6,6l,1,2.70805,2.23,segeln,1,1,1,0.802,0.6801492,0.7125206,0.7125206
FirstTime,1,2020-03-07,95819759,harzhotel.fun,2019-03-07,Active,fun,1&1 internet,1&1 internet,Germany,Non China,1,,Renewed,0,auto-renewal,realized,2020-03-07,1&1 Internet,1&1 Internet,15,15,1,15,15,15,fun1&1 internet,harzhotel,l,9,9l,1,2.70805,3.45,harzhotel,1,1,1,0.785,0.6428311,0.7248957,0.7248957
FirstTime,1,2020-01-15,91044973,movistar.fun,2019-01-15,Active,fun,10dencehispahard,10dencehispahard,Spain,Non China,1,,Not Renewd,0,auto-renewal,unrealized,2020-01-15,10dencehispahard,10dencehispahard,15,15,1,15,15,15,fun10dencehispahard,movistar,l,8,8l,1,2.70805,8.62,movistar,1,1,1,0.0,0.3462084,0.0,0.0
FirstTime,1,2020-02-01,92472913,kuinik.fun,2019-02-01,Active,fun,10dencehispahard,10dencehispahard,Spain,Non China,1,,Not Renewd,0,auto-renewal,unrealized,2020-02-01,10dencehispahard,10dencehispahard,15,15,1,15,15,15,fun10dencehispahard,kuinik,l,6,6l,1,2.70805,3.13,kuinik,1,1,1,0.0,0.3169892,0.0,0.0
FirstTime,1,2020-01-21,91532433,bulgari.fun,2019-01-21,Active,fun,1api,1api,Germany,Non China,1,,Renewed,0,auto-renewal,realized,2020-01-21,1API,1API,5,5,1,15,15,5,fun1api,bulgari,l,7,7l,1,1.609438,4.51,bulgari,1,1,1,0.842,0.8486598,0.832,0.832
FirstTime,1,2020-04-24,101717674,gotsome.fun,2019-04-24,Active,fun,1api,1api,Germany,Non China,1,,Not Renewd,0,auto-renewal,unrealized,2020-04-24,1API,1API,5,5,1,15,15,5,fun1api,gotsome,l,7,7l,1,1.609438,6.09,gotsome,1,1,1,0.843,0.8421671,0.832,0.832
