In [1]:
import pandas as pd
import numpy as np
from scipy import sparse
from sklearn.feature_extraction import FeatureHasher
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, scale
from sklearn.decomposition import TruncatedSVD, SparsePCA
from sklearn.cross_validation import train_test_split, cross_val_score
from sklearn.feature_selection import SelectPercentile, f_classif, chi2, SelectKBest
from sklearn.linear_model import LogisticRegression, SGDClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.metrics import log_loss
from sklearn import pipeline, grid_search

In [2]:
app_ev = pd.read_csv(".\\raw_data\\app_events.csv", dtype={'app_id': np.str})
app_ev.head()

Unnamed: 0,event_id,app_id,is_installed,is_active
0,2,5927333115845830913,1,1
1,2,-5720078949152207372,1,0
2,2,-1633887856876571208,1,0
3,2,-653184325010919369,1,1
4,2,8693964245073640147,1,1


In [3]:
label_cat = pd.read_csv('.\\raw_data\\label_categories.csv')
print(label_cat.shape)
label_cat = label_cat.fillna('Other')
label_cat.head()

(930, 2)


Unnamed: 0,label_id,category
0,1,Other
1,2,game-game type
2,3,game-Game themes
3,4,game-Art Style
4,5,game-Leisure time


In [4]:
app_labels = pd.read_csv('.\\raw_data\\app_labels.csv',dtype={'app_id': np.str})
print(app_labels.shape)
app_labels.head()

(459943, 2)


Unnamed: 0,app_id,label_id
0,7324884708820027918,251
1,-4494216993218550286,251
2,6058196446775239644,406
3,6058196446775239644,407
4,8694625920731541625,406


In [5]:
app_labels = pd.merge(app_labels, label_cat, how='inner',on='label_id')
app_labels.drop('label_id',axis=1, inplace=True)
print(app_labels.shape)
del label_cat
app_labels.head()

(459943, 2)


Unnamed: 0,app_id,category
0,7324884708820027918,Finance
1,-4494216993218550286,Finance
2,8756705988821000489,Finance
3,1061207043315821111,Finance
4,-1491198667294647703,Finance


In [6]:
app_labels = app_labels.groupby("app_id")["category"].apply(lambda x: " ".join(set("category:" + str(s).replace(" ","_") for s in x)))

In [7]:
app_ev = pd.merge(app_ev,pd.DataFrame(app_labels).reset_index(), how='left',on='app_id')

In [8]:
app_ev.head()

Unnamed: 0,event_id,app_id,is_installed,is_active,category
0,2,5927333115845830913,1,1,category:Property_Industry_1.0 category:IM cat...
1,2,-5720078949152207372,1,0,category:Personal_Effectiveness_1 category:unk...
2,2,-1633887856876571208,1,0,category:Higher_income category:P2P_net_loan c...
3,2,-653184325010919369,1,1,category:Medium_risk category:Finance category...
4,2,8693964245073640147,1,1,category:Property_Industry_1.0 category:IM cat...


In [9]:
app_ev1 = app_ev.groupby("event_id")["app_id"].apply(lambda x: " ".join(set("app_id:" + str(s) for s in x)))

In [10]:
pd.DataFrame(app_ev1).reset_index().to_csv('.\\transformed_data\\app_ev1.csv',index=False)
del app_ev1
#events = events.reset_index(name="app_id")

In [11]:
app_ev2 = app_ev.groupby("event_id")["category"].apply(lambda x: " ".join(set(str(" ".join(str(s) for s in x)).split(" "))))
pd.DataFrame(app_ev2).reset_index().to_csv('.\\transformed_data\\app_ev2.csv',index=False)
del app_ev2

In [12]:
del app_ev
app_ev = pd.merge(pd.read_csv('.\\transformed_data\\app_ev1.csv'),pd.read_csv('.\\transformed_data\\app_ev2.csv'),
                   how='inner',on='event_id')

In [13]:
app_ev.head()

Unnamed: 0,event_id,app_id,category
0,2,app_id:701031210314548601 app_id:3717049149426...,category:Consumer_loans category:Financial_Ser...
1,6,app_id:1261332316628097511 app_id:-58398582699...,category:Financial_Services category:fixed_inc...
2,7,app_id:-5372083417770193025 app_id:-5839858269...,category:Consumer_loans category:Financial_Ser...
3,9,app_id:-5839858269967688123 app_id:-7020654533...,category:Financial_Services category:fixed_inc...
4,16,app_id:4811881744526295139 app_id:-17633617655...,category:Consumer_loans category:Financial_Ser...


In [14]:
events = pd.read_csv(".\\raw_data\\events.csv", dtype={'device_id': np.str})

In [15]:
events.head()

Unnamed: 0,event_id,device_id,timestamp,longitude,latitude
0,1,29182687948017175,2016-05-01 00:55:25,121.38,31.24
1,2,-6401643145415154744,2016-05-01 00:54:12,103.65,30.97
2,3,-4833982096941402721,2016-05-01 00:08:05,106.6,29.7
3,4,-6815121365017318426,2016-05-01 00:06:40,104.27,23.28
4,5,-5373797595892518570,2016-05-01 00:07:18,115.88,28.66


In [16]:
events["category"] = events["event_id"].map(pd.Series(data=app_ev.category, index=app_ev.event_id))
events["app_id"] = events["event_id"].map(pd.Series(data=app_ev.app_id, index=app_ev.event_id))

In [17]:
events = events.dropna()
print(events.shape)
print(events.isnull().sum())
del app_ev

(680651, 7)
event_id     0
device_id    0
timestamp    0
longitude    0
latitude     0
category     0
app_id       0
dtype: int64


In [18]:
print(events.shape)
events.head()

(680651, 7)


Unnamed: 0,event_id,device_id,timestamp,longitude,latitude,category,app_id
1,2,-6401643145415154744,2016-05-01 00:54:12,103.65,30.97,category:Consumer_loans category:Financial_Ser...,app_id:-5372083417770193025 app_id:-5839858269...
5,6,1476664663289716375,2016-05-01 00:27:21,0.0,0.0,category:Financial_Services category:fixed_inc...,app_id:-4950143259631681001 app_id:-2339074173...
6,7,5990807147117726237,2016-05-01 00:15:13,113.73,23.0,category:Consumer_loans category:Financial_Ser...,app_id:-3864848260969172656 app_id:-5839858269...
8,9,-2073340001552902943,2016-05-01 00:15:33,0.0,0.0,category:1_free category:Low_risk category:Pro...,app_id:2632326908921001951 app_id:592733311584...
15,16,9070651185984875886,2016-05-01 00:06:06,0.0,0.0,category:Book_hotel_complex category:service c...,app_id:-5577604742451702341 app_id:73483404052...


In [19]:
events['timestamp'] = pd.to_datetime(events['timestamp'])
events['hour'] = events['timestamp'].map(lambda x : x.hour)
events['min'] = events['timestamp'].map(lambda x : x.minute)

In [20]:
len(events['device_id'].unique())

53161

In [21]:
gd = events.groupby('device_id')
longitude = gd['longitude'].agg([np.max, np.min, np.mean, np.std, np.median]).reset_index()
longitude = longitude.rename(columns={'amax':'long_max','amin':'long_min','mean':'long_mean','median':'long_median','std':'long_std'})
longitude['long_std'].fillna(0,inplace=True)

In [22]:
latitude = gd['longitude'].agg([np.max, np.min, np.mean, np.std, np.median]).reset_index()
latitude = latitude.rename(columns={'amax':'lat_max','amin':'lat_min','mean':'lat_mean','median':'lat_median','std':'lat_std'})
latitude['lat_std'].fillna(0,inplace=True)

In [23]:
hour = gd['hour'].agg([np.max, np.min, np.mean, np.std, np.median]).reset_index()
hour = hour.rename(columns={'amax':'hour_max','amin':'hour_min','mean':'hour_mean','median':'hour_median','std':'hour_std'})
hour['hour_std'].fillna(0,inplace=True)

In [24]:
minute = gd['min'].agg([np.max, np.min, np.mean, np.std, np.median]).reset_index()
minute = minute.rename(columns={'amax':'min_max','amin':'min_min','mean':'min_mean','median':'min_median','std':'min_std'})
minute['min_std'].fillna(0,inplace=True)

In [26]:
events1 = events[["device_id", "app_id"]]
# remove duplicates(app_id)
events1 = events1.groupby("device_id")["app_id"].apply(lambda x: " ".join(set(str(" ".join(str(s) for s in x)).split(" "))))
events1 = events1.reset_index(name="app_id")

In [27]:
events2 = events[["device_id", "category"]]
# remove duplicates(app_id)
events2 = events2.groupby("device_id")["category"].apply(lambda x: " ".join(set(str(" ".join(str(s) for s in x)).split(" "))))
events2 = events2.reset_index(name="category")

In [28]:
del events

In [29]:
print("# Read Phone Brand")
pbd = pd.read_csv(".\\raw_data\phone_brand_device_model.csv",dtype={'device_id': np.str})
pbd.drop_duplicates('device_id', keep='first', inplace=True)

# Read Phone Brand


In [30]:
train = pd.read_csv(".\\raw_data\gender_age_train.csv",dtype={'device_id': np.str})
train.drop(["age", "gender"], axis=1, inplace=True)

test = pd.read_csv(".\\raw_data\gender_age_test.csv",dtype={'device_id': np.str})
test["group"] = np.nan

In [31]:
print(events1.shape)
events1.head()

(53161, 2)


Unnamed: 0,device_id,app_id
0,-100015673884079572,app_id:-2573239141534432308 app_id:-3864848260...
1,-1000667340060427374,app_id:701031210314548601 app_id:-386484826096...
2,-100101996136889832,app_id:4419243384223088778 app_id:734834040520...
3,-1001337759327042486,app_id:701031210314548601 app_id:-583985826996...
4,-1001384358977718793,app_id:-3864848260969172656 app_id:-4310155758...


In [32]:
print(events2.shape)
events2.head()

(53161, 2)


Unnamed: 0,device_id,category
0,-100015673884079572,category:Consumer_loans category:Cool_trendy c...
1,-1000667340060427374,category:Consumer_loans category:Cool_trendy c...
2,-100101996136889832,category:fixed_income category:Sports_News cat...
3,-1001337759327042486,category:Consumer_loans category:Contacts cate...
4,-1001384358977718793,category:Financial_Services category:1_free ca...


In [33]:
print(longitude.shape)
longitude.head()

(53161, 6)


Unnamed: 0,device_id,long_max,long_min,long_mean,long_std,long_median
0,-100015673884079572,0.0,0.0,0.0,0.0,0.0
1,-1000667340060427374,116.69,0.0,24.024412,47.892567,0.0
2,-100101996136889832,114.4,114.4,114.4,0.0,114.4
3,-1001337759327042486,120.2,119.5,119.791538,0.267098,119.655
4,-1001384358977718793,0.0,0.0,0.0,0.0,0.0


In [34]:
print(latitude.shape)
latitude.head()

(53161, 6)


Unnamed: 0,device_id,lat_max,lat_min,lat_mean,lat_std,lat_median
0,-100015673884079572,0.0,0.0,0.0,0.0,0.0
1,-1000667340060427374,116.69,0.0,24.024412,47.892567,0.0
2,-100101996136889832,114.4,114.4,114.4,0.0,114.4
3,-1001337759327042486,120.2,119.5,119.791538,0.267098,119.655
4,-1001384358977718793,0.0,0.0,0.0,0.0,0.0


In [35]:
print(hour.shape)
hour.head()

(53161, 6)


Unnamed: 0,device_id,hour_max,hour_min,hour_mean,hour_std,hour_median
0,-100015673884079572,23,0,9.75,7.440112,8.0
1,-1000667340060427374,23,0,14.058824,5.954073,13.0
2,-100101996136889832,22,22,22.0,0.0,22.0
3,-1001337759327042486,16,8,10.346154,2.208027,10.0
4,-1001384358977718793,10,0,4.333333,5.131601,3.0


In [36]:
print(minute.shape)
minute.head()

(53161, 6)


Unnamed: 0,device_id,min_max,min_min,min_mean,min_std,min_median
0,-100015673884079572,51,1,25.8,16.227332,28.0
1,-1000667340060427374,57,0,19.441176,20.365138,5.5
2,-100101996136889832,50,50,50.0,0.0,50.0
3,-1001337759327042486,59,0,22.115385,13.066987,19.5
4,-1001384358977718793,42,11,27.666667,15.631165,30.0


In [37]:
print(train.shape)
print(test.shape)
split_len = len(train)
Df = pd.concat((train, test), axis=0, ignore_index=True)

(74645, 2)
(112071, 2)


In [38]:
e = pd.merge(events1, events2, how='inner',on='device_id')
m = pd.merge(e, hour, how='inner',on='device_id')
m1 = pd.merge(m, minute, how='inner',on='device_id')
m2 = pd.merge(m1, latitude, how='inner',on='device_id')
device_info = pd.merge(m2, longitude, how='inner',on='device_id')

del e
del m
del m1
del m2

In [39]:
print(device_info.shape)
device_info.columns

(53161, 23)


Index(['device_id', 'app_id', 'category', 'hour_max', 'hour_min', 'hour_mean',
       'hour_std', 'hour_median', 'min_max', 'min_min', 'min_mean', 'min_std',
       'min_median', 'lat_max', 'lat_min', 'lat_mean', 'lat_std', 'lat_median',
       'long_max', 'long_min', 'long_mean', 'long_std', 'long_median'],
      dtype='object')

In [40]:
device_info["hour_max"] = device_info["hour_max"].apply(lambda x: "hour_max:" + str(x))
device_info["hour_min"] = device_info["hour_min"].apply(lambda x: "hour_min:" + str(x))
device_info["hour_mean"] = device_info["hour_mean"].apply(lambda x: "hour_mean:" + str(x))
device_info["hour_std"] = device_info["hour_std"].apply(lambda x: "hour_std:" + str(x))
device_info["hour_median"] = device_info["hour_median"].apply(lambda x: "hour_median:" + str(x))
device_info["min_max"] = device_info["min_max"].apply(lambda x: "min_max:" + str(x))
device_info["min_min"] = device_info["min_min"].apply(lambda x: "min_min:" + str(x))
device_info["min_std"] = device_info["min_std"].apply(lambda x: "min_std:" + str(x))
device_info["min_median"] = device_info["min_median"].apply(lambda x: "min_median:" + str(x))
device_info["min_mean"] = device_info["min_mean"].apply(lambda x: "min_mean:" + str(x))
device_info["lat_max"] = device_info["lat_max"].apply(lambda x: "min_lat_maxmax:" + str(x))
device_info["lat_min"] = device_info["lat_min"].apply(lambda x: "lat_min:" + str(x))
device_info["lat_std"] = device_info["lat_std"].apply(lambda x: "lat_std:" + str(x))
device_info["lat_median"] = device_info["lat_median"].apply(lambda x: "lat_median:" + str(x))
device_info["lat_mean"] = device_info["lat_mean"].apply(lambda x: "lat_mean:" + str(x))
device_info["long_max"] = device_info["long_max"].apply(lambda x: "long_max:" + str(x))
device_info["long_min"] = device_info["long_min"].apply(lambda x: "long_min:" + str(x))
device_info["long_mean"] = device_info["long_mean"].apply(lambda x: "long_mean:" + str(x))
device_info["long_std"] = device_info["long_std"].apply(lambda x: "long_std:" + str(x))
device_info["long_median"] = device_info["long_median"].apply(lambda x: "long_median:" + str(x))

In [41]:
# Group Labels
Y_train = train["group"]
lable_group = LabelEncoder()
Y_train = lable_group.fit_transform(Y_train)
device_id = test["device_id"]

Df = pd.merge(Df, pbd, how="left", on="device_id")
Df["phone_brand"] = Df["phone_brand"].apply(lambda x: "phone_brand:" + str(x))
Df["device_model"] = Df["device_model"].apply(lambda x: "device_model:" + str(x))

In [42]:
###################
#  Concat Feature
###################

apps = pd.concat([pd.Series(row['device_id'], row['app_id'].split(' '))for _, row in device_info.iterrows()]).reset_index()
apps.columns = ['app_id', 'device_id']

cats = pd.concat([pd.Series(row['device_id'], row['category'].split(' '))for _, row in device_info.iterrows()]).reset_index()
cats.columns = ['category', 'device_id']


f1 = Df[["device_id", "phone_brand"]]   # phone_brand
f2 = Df[["device_id", "device_model"]]  # device_model
f3 = apps[["device_id", "app_id"]]    # app_id
f4 = device_info[["device_id","hour_max"]]
f5 = device_info[["device_id","hour_min"]]
f6 = device_info[["device_id","hour_mean"]]
f7 = device_info[["device_id","hour_std"]]
f8 = device_info[["device_id","hour_median"]]
f9 = device_info[["device_id","min_max"]]
f10 = device_info[["device_id","min_min"]]
f11 = device_info[["device_id","min_mean"]]
f12 = device_info[["device_id","min_median"]]
f13 = device_info[["device_id","lat_max"]]
f14 = device_info[["device_id","lat_min"]]
f15 = device_info[["device_id","lat_mean"]]
f16 = device_info[["device_id","lat_std"]]
f17 = device_info[["device_id","lat_median"]]
f18 = device_info[["device_id","long_max"]]
f19 = device_info[["device_id","long_min"]]
f20 = device_info[["device_id","long_mean"]]
f21 = device_info[["device_id","long_std"]]
f22 = device_info[["device_id","long_median"]]
f23 = cats[["device_id", "category"]]    # app_id

del apps
del Df
del device_info

f1.columns.values[1] = "feature"
f2.columns.values[1] = "feature"
f3.columns.values[1] = "feature"
f4.columns.values[1] = "feature"
f5.columns.values[1] = "feature"
f6.columns.values[1] = "feature"
f7.columns.values[1] = "feature"
f8.columns.values[1] = "feature"
f9.columns.values[1] = "feature"
f10.columns.values[1] = "feature"
f11.columns.values[1] = "feature"
f12.columns.values[1] = "feature"
f13.columns.values[1] = "feature"
f14.columns.values[1] = "feature"
f15.columns.values[1] = "feature"
f16.columns.values[1] = "feature"
f17.columns.values[1] = "feature"
f18.columns.values[1] = "feature"
f19.columns.values[1] = "feature"
f20.columns.values[1] = "feature"
f21.columns.values[1] = "feature"
f22.columns.values[1] = "feature"
f23.columns.values[1] = "feature"

FLS = pd.concat((f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14, f15, f16, f17, f18, f19,
                 f20, f21, f22,f23), axis=0, ignore_index=True)

In [43]:
#FLS[FLS.device_id == '-8260683887967679142']

In [44]:
###################
# User-Item Feature
###################
print("# User-Item-Feature")

device_ids = FLS["device_id"].unique()
feature_cs = FLS["feature"].unique()

data = np.ones(len(FLS))
dec = LabelEncoder().fit(FLS["device_id"])
row = dec.transform(FLS["device_id"])
col = LabelEncoder().fit_transform(FLS["feature"])
print(len(row))
print(len(col))
print(len(device_ids))
print(len(feature_cs))
print(data.shape)
sparse_matrix = sparse.csr_matrix((data, (row, col)), shape=(len(device_ids), len(feature_cs)))

sparse_matrix = sparse_matrix[:, sparse_matrix.getnnz(0) > 0]

# User-Item-Feature
16195606
16195606
188653
146988
(16195606,)


In [45]:
sparse_matrix

<188653x146988 sparse matrix of type '<class 'numpy.float64'>'
	with 16195606 stored elements in Compressed Sparse Row format>

In [49]:
train_row = dec.transform(train["device_id"])
X_train = sparse_matrix[train_row, :]

test_row = dec.transform(test["device_id"])
X_test = sparse_matrix[test_row, :]

In [50]:
##################
#   Feature Sel
##################
print("# Feature Selection")
selector = SelectPercentile(f_classif, percentile=10)

selector.fit(X_train, Y_train)

X_train = selector.transform(X_train)
X_test = selector.transform(X_test)

print("# Num of Features: ", X_train.shape[1])

# Feature Selection




# Num of Features:  14698


In [48]:
#('f_classif','chi2','SelectKBest'):
for i in (3,5):
    train_row = dec.transform(train["device_id"])
    X_train = sparse_matrix[train_row, :]

    test_row = dec.transform(test["device_id"])
    X_test = sparse_matrix[test_row, :]
        
    selector = SelectPercentile(f_classif, percentile=i)
    selector.fit(X_train, Y_train)

    X_train = selector.transform(X_train)
    X_test = selector.transform(X_test)

    lg = LogisticRegression(random_state=23, fit_intercept=True, class_weight= None)
    param_grid = {'C': [0.1]}
    lg_gs = grid_search.GridSearchCV(estimator = lg, param_grid = param_grid, cv = 5, n_jobs=-1, scoring='log_loss', error_score=0, verbose=2) 
    lg_gs.fit(X_train, Y_train)
    print("Feature sel %",i)
    print("# Num of Features: ", X_train.shape[1])
    print("Best parameters found by grid search:", lg_gs.best_params_)
    print("Best CV score:", lg_gs.best_score_)
    print("Best CV score:", lg_gs.grid_scores_)
#23%
#Best parameters found by grid search: {'class_weight': None, 'C': 0.1}
#Best CV score: -2.30189906196
#10%
#Best parameters found by grid search: {'class_weight': None, 'C': 0.1}
#mean: -2.29573, std: 0.03596 LB: 2.28530
#3%
#mean: -2.31534, std: 0.03358



Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:  2.4min finished


Feature sel % 3
# Num of Features:  4409
Best parameters found by grid search: {'C': 0.1}
Best CV score: -2.49878866947
Best CV score: [mean: -2.49879, std: 0.02829, params: {'C': 0.1}]




Fitting 5 folds for each of 1 candidates, totalling 5 fits


KeyboardInterrupt: 

In [None]:
abc = AdaBoostClassifier(n_estimators=100, random_state=3)
param_grid = {'learning_rate': [0.01]}
abc_gs = grid_search.GridSearchCV(estimator = abc, param_grid = param_grid, cv = 6, n_jobs=-1, scoring='log_loss', error_score=0, verbose=2) 
abc_gs.fit(X_train, Y_train)
print("Best parameters found by grid search:", abc_gs.best_params_)
print("Best CV score:", abc_gs.best_score_)
print("Best CV score:", abc_gs.grid_scores_)
# 23%
#Best parameters found by grid search: {'learning_rate': 0.1, n_estimators:100}
#Best CV score: -2.45524182137

In [None]:
print("\nTraining Ensamble Random Forest ....")
rfc = RandomForestClassifier(n_estimators = 100, n_jobs=-1, random_state=23)
param_grid = {'min_samples_leaf': [2,5]}
rfc_gs = grid_search.GridSearchCV(estimator = rfc, param_grid = param_grid, cv = 6, n_jobs=-1, scoring='log_loss', error_score=0, verbose=2) 
rfc_gs.fit(X_train, Y_train)
print("Best parameters found by grid search:", rfc_gs.best_params_)
print("Best CV score:", rfc_gs.best_score_)
print("Best CV score:", rfc_gs.grid_scores_)
# 23%
# 5:-2.35487, std: 0.01740, CV=LB=2.34511
# 3% ('min_samples_leaf': 2)
# mean: -2.32991, std: 0.02355


Training Ensamble Random Forest ....
Fitting 6 folds for each of 2 candidates, totalling 12 fits


In [None]:
rfc = RandomForestClassifier(n_estimators = 2, n_jobs=-1, random_state=23, min_samples_leaf= 5)
rfc.fit(X_train, Y_train)
features_lb = sorted(zip(map(lambda x: round(x, 4), rfc.feature_importances_), X_train.columns), reverse=True, key=lambda pair: pair[0])
features_lb

In [None]:
preds = lg_gs.predict_proba(X_test)

In [None]:
result = pd.DataFrame(preds, columns=lable_group.classes_)
result["device_id"] = device_id
#result = result.set_index("device_id")

In [None]:
result["device_id"] = test['device_id'].values
#order columns
cols = result.columns.tolist()
cols = cols[-1:] + cols[:-1]
result = result[cols]

In [None]:
result.head(10)

In [None]:
result.to_csv('.\\submissions\\sub_lr.csv',index=False)

In [None]:
import pandas as pd


In [None]:
print(label_categories.shape)
label_categories.category.value_counts()

In [None]:
label_categories.head()

In [None]:
label_categories.isnull().sum()

In [None]:
for cat in label_categories.category:
    print(cat)