# <font size="6">Libraries</font>

In [1]:
import pandas as pd

# <font size="6">dataset</font>

In [2]:
train_df = pd.read_parquet('data/train.parquet')
valid_df = pd.read_parquet('data/valid.parquet')

train_df = train_df[['app_id', 'amnt_mcc_bins', 'hour_diff']]

In [3]:
last_transactions = train_df.groupby('app_id').tail(1)

# <font size="6">Model</font>

In [4]:
class MostCommonMCCPredictor:
    def __init__(self):
        self.most_common_mcc = {}

    def fit(self, data):
        self.most_common_mcc = data.groupby('app_id')['amnt_mcc_bins'].apply(lambda x: x.mode().iloc[0]).to_dict()

    def forward(self, app_id):
        
        return self.most_common_mcc.get(app_id, None)

    
model = MostCommonMCCPredictor()

model.fit(train_df)

In [25]:
%%time

predicted_transactions = pd.DataFrame()
valid_counts = valid_df['app_id'].value_counts().to_dict()
predicted_types = []

for _, last_transaction in last_transactions.iterrows():
    user_id = last_transaction['app_id']
    last_type = last_transaction['amnt_mcc_bins']
    num_transactions = valid_counts.get(user_id, 0)
    
    for _ in range(num_transactions):
        predicted_type = model.forward(user_id)
        
        predicted_types.append(predicted_type)

CPU times: user 35.2 s, sys: 184 ms, total: 35.4 s
Wall time: 35.3 s


In [29]:
valid_df['predicted_type'] = predicted_types

valid_df['pred_mcc'] = (valid_df['predicted_type'] - 1) // 4 + 1
valid_df['pred_amnt'] = (valid_df['predicted_type'] - 1) % 4

# <font size="6">Metrics</font>

In [30]:
from sklearn.metrics import mean_absolute_error, accuracy_score

print('Type accuracy:', accuracy_score(valid_df['amnt_mcc_bins'], valid_df['predicted_type']))
print('Amnt accuracy:', accuracy_score(valid_df['amnt_bins'], valid_df['pred_amnt']))
print('MCC accuracy:', accuracy_score(valid_df['mcc'], valid_df['pred_mcc']))

Type accuracy: 0.20276643610945957
Amnt accuracy: 0.5309335067601867
MCC accuracy: 0.29054795332676336
