In [1]:
# Copyright (c) 2019 by contributors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


# This is a snippet to test DMatrix transition from pandas DataFrame
# Note: this program modify by demo of titanic,
# If users want to check DMatrix work well, set disable lock-free in
# both this file and run_titanic_no_cv.py, there maybe some difference of precision
# with output result file

import xlearn as xl
import numpy as np
import pandas as pd

# read file from file
titanic_train = pd.read_csv("D:/model/demo_titanic/titanic_train.txt", header=None, sep="\t")
titanic_test = pd.read_csv("D:/model/demo_titanic/titanic_test.txt", header=None, sep="\t")

In [2]:
titanic_train.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
0,0,1,0,1,0,0,0,1,0,1,0,0,1,-0.561363,-0.502445
1,1,1,0,0,1,1,0,0,1,0,1,0,0,0.613182,0.786845
2,1,0,0,1,0,0,0,1,1,0,0,0,1,-0.267727,-0.488854
3,1,1,0,0,1,0,0,1,1,0,1,0,0,0.392955,0.42073
4,0,0,0,1,0,0,0,1,0,1,0,0,1,0.392955,-0.486337


In [3]:
# get train X, y
X_train = titanic_train[titanic_train.columns[1:]]
y_train = titanic_train[0]


# get test X, y
X_test = titanic_test[titanic_test.columns[1:]]
y_test = titanic_test[0]

# DMatrix transition
xdm_train = xl.DMatrix(X_train, y_train)
xdm_test = xl.DMatrix(X_test, y_test)

In [4]:
# Training task
fm_model = xl.create_fm()  # Use factorization machine
# we use the same API for train from file
# that is, you can also pass xl.DMatrix for this API now
fm_model.setTrain(xdm_train)    # Training data
fm_model.setValidate(xdm_test)  # Validation data

# param:
#  0. regression task
#  1. learning rate: 0.2
#  2. regular lambda: 0.002
#  3. evaluation metric: acc
param = {'task':'binary', 'lr':0.2,
         'lambda':0.002, 'metric':'acc'}

# Start to train
# The trained model will be stored in model.out
fm_model.fit(param, 'D:/model/demo_titanic/model_dm.out')

# Prediction task
# we use the same API for test from file
# that is, you can also pass xl.DMatrix for this API now
fm_model.setTest(xdm_test)  # Test data
fm_model.setSigmoid()  # Convert output to 0-1

# Start to predict
# The output result will be stored in output.txt
# if no result out path setted, we return res as numpy.ndarray
res = fm_model.predict("D:/model/demo_titanic/model_dm.out")

print(res)


[0.15171707 0.38471457 0.25728175 0.13720287 0.4926911  0.19522497
 0.5218482  0.28587505 0.5827551  0.20034908 0.13424052 0.2502616
 0.83357584 0.23679863 0.7449428  0.69706905 0.25089794 0.2061758
 0.44420055 0.48756704 0.34131682 0.24688978 0.58853227 0.48204222
 0.7770271  0.1436287  0.88708174 0.19922495 0.40294805 0.23210882
 0.22160524 0.27683735 0.49039394 0.5356006  0.5459674  0.21874101
 0.46203122 0.49859792 0.14392862 0.18262711 0.20526993 0.37532356
 0.12504792 0.62766373 0.74766946 0.14296098 0.46783632 0.15103048
 0.7566836  0.5013354  0.4896968  0.3006076  0.58397335 0.74454623
 0.26734647 0.3170307  0.12377421 0.24649006 0.14815272 0.83119756
 0.17795011 0.21618678 0.17272696 0.54882455 0.6801617  0.62372524
 0.559515   0.2741074  0.5225855  0.71826535 0.54291177 0.15861079
 0.4682149  0.5298829  0.851157   0.6908793  0.12851053 0.6045461
 0.22074108 0.54291177 0.31027424 0.5438006  0.2516115  0.13424052
 0.24958684 0.21186633 0.5333146  0.50695103 0.501251   0.3746273