# <font size="6">Libraries</font>

In [1]:
import pandas as pd
import numpy as np

# <font size="5">dataset</font>

In [2]:
train_df = pd.read_parquet('data/train.parquet').drop(columns=['transaction_number',
                                                               'reversed_transaction', 
                                                               'transaction_max',
                                                               'time'])

valid_df = pd.read_parquet('data/valid.parquet').drop(columns=['transaction_number',
                                                               'reversed_transaction', 
                                                               'transaction_max',
                                                               'time'])

In [3]:
train_df['mcc_next'] = train_df.groupby(['app_id'])['mcc'].shift(-1)
train_df['amnt_next'] = train_df.groupby(['app_id'])['amnt'].shift(-1)
train_df['hour_diff_next'] = train_df.groupby(['app_id'])['hour_diff'].shift(-1)
train_df['amnt_bins_next'] = train_df.groupby(['app_id'])['amnt_bins'].shift(-1)
train_df['amnt_mcc_bins_next'] = train_df.groupby(['app_id'])['amnt_mcc_bins'].shift(-1)

valid_df['mcc_next'] = valid_df.groupby(['app_id'])['mcc'].shift(-1)
valid_df['amnt_next'] = valid_df.groupby(['app_id'])['amnt'].shift(-1)
valid_df['hour_diff_next'] = valid_df.groupby(['app_id'])['hour_diff'].shift(-1)
valid_df['amnt_bins_next'] = valid_df.groupby(['app_id'])['amnt_bins'].shift(-1)
valid_df['amnt_mcc_bins_next'] = valid_df.groupby(['app_id'])['amnt_mcc_bins'].shift(-1)

In [4]:
last_transactions_train = train_df.groupby('app_id').tail(1)

train_df = train_df.drop(last_transactions_train.index)

train_df = train_df.dropna()

In [5]:
first_transactions_valid = valid_df.groupby('app_id').head(1)
first_transactions_valid = first_transactions_valid.dropna()

# <font size="6">Markov Chain Model</font>

In [15]:
class MarkovChainModel():
    def __init__(self, dataset, user_id_column='app_id'):
        self.dataset = dataset
        self.user_id_column = user_id_column
        
        self.transition_matrix = self.build_transition_matrix()

    def forward(self, _type):
        return self.transition_matrix[_type]
                
    def build_transition_matrix(self):
        self.dataset['amnt_mcc_bins_shift'] = self.dataset.groupby(self.user_id_column)['amnt_mcc_bins'].shift(-1)
        transition_matrix = self.dataset.groupby('amnt_mcc_bins')['amnt_mcc_bins_shift'].apply(lambda x: x.mode().iloc[0] if not x.mode().empty else None).to_dict()

        return transition_matrix
    
markov_model = MarkovChainModel(train_df)

In [25]:
%%time
    
predicted_types = []
    
for _, last_transaction in last_transactions_train.iterrows():
    user_id = last_transaction['app_id']
    last_type = last_transaction['amnt_mcc_bins']
    
    predicted_type = markov_model.forward(last_type)
    
    predicted_types.append(predicted_type)

CPU times: user 23.4 s, sys: 17.8 ms, total: 23.4 s
Wall time: 23.4 s


In [26]:
predicted_types_df = pd.DataFrame({"app_id": last_transactions_train['app_id'].values,
                                'pred_type': predicted_types})

In [28]:
dataframe = predicted_types_df.merge(first_transactions_valid, on='app_id', how='inner')

In [29]:
dataframe['pred_mcc'] = (dataframe['pred_type'] - 1) // 4 + 1
dataframe['pred_amnt'] = (dataframe['pred_type'] - 1) % 4

# <font size="6">Metrics</font>

In [30]:
from sklearn.metrics import mean_absolute_error, accuracy_score

print('Type accuracy:', accuracy_score(dataframe['amnt_mcc_bins'], dataframe['pred_type']))
print('Amnt accuracy:', accuracy_score(dataframe['amnt_bins'], dataframe['pred_amnt']))
print('MCC accuracy:', accuracy_score(dataframe['mcc'], dataframe['pred_mcc']))

Type accuracy: 0.20407110305928056
Amnt accuracy: 0.5350449180082926
MCC accuracy: 0.31523616973590823
