In [3]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from sklearn.datasets import make_classification

In [14]:
# -*- coding: utf-8 -*-

class FFMFormat:
    def __init__(self):
        self.field_index_ = None
        self.feature_index_ = None

    def get_params(self):
        pass
    
    def set_params(self, **parameters):
        pass

    def fit(self, df, y=None):
        self.field_index_ = {col: i for i,col in enumerate(df.columns)}
        self.feature_index_ = dict()
        last_idx = 0
        for col in df.columns:
            vals = np.unique(df[col])
            for val in vals:
                if pd.isna(val): continue
                name = '{}_{}'.format(col, val)
                if name not in self.feature_index_:
                    self.feature_index_[name] = last_idx
                    last_idx += 1
            self.feature_index_[col] = last_idx
            last_idx += 1
        return self

    def fit_transform(self, df, y=None):
        self.fit(df, y)
        return self.transform(df)

    def transform_row_(self, row):
        ffm = []
        for col,val in row.loc[row!=0].to_dict().items():
            name = '{}_{}'.format(col, val)
            ffm.append('{}:{}:1'.format(self.field_index_[col], self.feature_index_[name]))
            ffm.append('{}:{}:{}'.format(self.field_index_[col], self.feature_index_[col], val))
        return ' '.join(ffm)

    def transform(self, df):
        return pd.Series({idx: self.transform_row_(row) for idx,row in df.iterrows()})

In [27]:
d = {'col1': ['a', 'b', 'f'], 'col2': [1, 2, 100], 'col3': [0.5, 0.3, 0.19]}
df = pd.DataFrame(data=d)

In [28]:
df.dtypes

col1     object
col2      int64
col3    float64
dtype: object

In [29]:
df.set_index('col4', drop = True, inplace = True)

KeyError: "None of ['col4'] are in the columns"

In [30]:
df

Unnamed: 0,col1,col2,col3
0,a,1,0.5
1,b,2,0.3
2,f,100,0.19


In [31]:
ffm_train = FFMFormat()
ffm_train_data = ffm_train.fit_transform(df, y='col4')

In [32]:
ffm_train_data

0      0:0:1 0:3:a 1:4:1 1:7:1 2:10:1 2:11:0.5
1       0:1:1 0:3:b 1:5:1 1:7:2 2:9:1 2:11:0.3
2    0:2:1 0:3:f 1:6:1 1:7:100 2:8:1 2:11:0.19
dtype: object