In [None]:
import os
import json
import warnings
import configparser

import joblib
import numpy as np
import pandas as pd
from tqdm import tqdm
from Crypto.Cipher import AES
from sqlalchemy import create_engine

import utils
import config

warnings.filterwarnings("ignore")

cipher = AES.new(config.DB_SETTING['encrypt_key'],
                 AES.MODE_CFB,
                 config.DB_SETTING['encrypt_msg'][:16])
decrypt_msg = cipher.decrypt(config.DB_SETTING['encrypt_msg'][16:]).decode('utf-8')

config_dict = {
    "driver": "ODBC+Driver+17+for+SQL+Server",
    "database": config.DB_SETTING["database"],
    "host": config.DB_SETTING["host"],
    "port": config.DB_SETTING["port"],
    "user": config.DB_SETTING["user"],
    "passwd": decrypt_msg
}
URI = 'mssql+pyodbc://{user}:{passwd}@{host}:{port}\
/{database}?driver={driver}'.format(**config_dict)

In [None]:
def get_data_generator(conn):
    data_gen = pd.read_sql_table("info_trans", con=conn, chunksize=100)
    return data_gen

def loadgamedata(x):
    with create_engine(URI).connect() as conn:
        data_game = pd.read_sql("select * from info_game with (nolock) where trans_id ="+str(x), con=conn)
        return data_game
    
def main():
    with create_engine(URI).connect() as conn:
        gen = get_data_generator(conn)
        for data in tqdm(gen):
            #pop
            for col in data.columns:
                if data[col].dtype != float and col not in config.EXCEPTION_FEATURE_LIST:
                    data[col].fillna(value="", inplace=True)
                else:
                    data[col].fillna(value=np.nan,inplace=True)
#             print(data.trans_id.unique())
            for x in data.trans_id.unique():
#                 print(x)
                if x ==data.trans_id.unique()[0]:
                    data_ga = loadgamedata(x)
                    #pop
                    data_ga["ticket_status"] = data_ga["ticket_status"].str.replace(' ','')
                    data_ga["home_score"] = data_ga["home_score"].fillna(0)
                    data_ga["away_score"] = data_ga["away_score"].fillna(0)
                else:
                    tmp = loadgamedata(x)
                    #pop
                    tmp["ticket_status"] = tmp["ticket_status"].str.replace(' ','')
                    tmp["home_score"] = tmp["home_score"].fillna(0)
                    tmp["away_score"] = tmp["away_score"].fillna(0)
                    data_ga=pd.concat([data_ga,tmp])
            inputs={'trans': data, 'games': data_ga}
            return inputs

#             loss_object = tf.keras.losses.BinaryCrossentropy()
#             ruc = rucmodel()
#             ruc.compile(optimizer='adam',
#                           loss=loss_object,
#                           metrics=['accuracy'])
#             ruc.fit(trans_data, y_train, epochs=30)

In [None]:
%%time
rawdata=main()

In [None]:
len(rawdata1['trans'].trans_id.unique())

In [None]:
rawdata1 = rawdata

In [None]:
feature_col = config.XGB_FEATURE_COLUMNNAME
tranformer_train = utils.DataTransformer(data=rawdata1['trans'],feature_colname=feature_col)
data_trans = tranformer_train.transform()

In [None]:
tranformer_train.idx_false

In [None]:
import numpy as np
import pandas as pd
import fasttext as ft
import tensorflow as tf
import tensorflow_addons as tfa

import config
import utils

class rucmodel(tf.keras.models.Model):
    
    def __init__(self):
        super(rucmodel, self).__init__()
        self.dense_layer = tf.keras.layers.Dense(25, activation='relu')
        self.dense_layer2 = tf.keras.layers.Dense(10, activation='relu')
        self.dense_layer3 = tf.keras.layers.Dense(1, activation='sigmoid')

    def transformdata(self,inputs):
        trans_data = inputs["trans"]
        trans_id = trans_data["trans_id"]
        feature_col = config.XGB_FEATURE_COLUMNNAME
        tranformer_train = utils.DataTransformer(data=trans_data,feature_colname=feature_col)
        data_trans = tranformer_train.transform()
        data_trans = pd.concat([trans_id, data_trans],axis=1)
        
        game_data = inputs["games"]
        data_ga_group = game_data.groupby("trans_id")
        data_games = pd.DataFrame(columns=["trans_id","string"])
        ftmodel = ft.load_model("game_embedding_model_ep5.bin")
        for i in data_ga_group["trans_id"]:
            group = data_ga_group.get_group(i[0])
            group = group[group.columns[~group.columns.isin(config.POP_FEATURE_LIST)]]
            group_string = group[group.columns].astype(str).apply(lambda x: ','.join(x), axis = 1)
            string=''
            for ind in group_string.index:
                string = string + group_string[ind]+' '
                if ind== group_string.index[-1]:
                    string = string + group_string[ind]
            vector = ftmodel.get_sentence_vector(string)
            d = {'trans_id': [i[0]], 'string': [string]}
            his = pd.DataFrame(data=d)
            vector=pd.DataFrame(vector).T
            his = pd.concat([his,vector],axis=1)

            data_games = data_games.append(his)            
        data_games = data_games.reset_index()
        data_games = data_games.drop(columns=["index","string"])
        
        transform_inputs = pd.merge(data_trans, data_games, on="trans_id")
        transform_inputs = transform_inputs.drop(columns="trans_id")
        transform_inputs = transform_inputs[transform_inputs.index.isin(tranformer_train.idx_true)]
        transform_inputs_to_tensor=tf.convert_to_tensor(transform_inputs)
        
        return transform_inputs_to_tensor # trans concat games
        
    def call(self,inputs):
#         transform_inputs = self.transformdata(inputs)

        dly1_out = self.dense_layer(inputs)
        dly2_out = self.dense_layer2(dly1_out)
        out = self.dense_layer3(dly2_out)
        
        return out

In [None]:
loss_object = tf.keras.losses.BinaryCrossentropy()
ruc = rucmodel()
ruc.compile(optimizer='adam',
              loss=loss_object,
              metrics=['accuracy'])

In [None]:
y_train = np.concatenate((np.ones(50), np.zeros(47)), axis=0)

In [None]:
ruc.fit(trans_data, y_train, epochs=30)

In [None]:
trans_data = ruc.transformdata(rawdata1)

In [None]:
trans_data

In [None]:
data_to_tensor=tf.convert_to_tensor(trans_data)

In [None]:
data_to_tensor

In [None]:
feature_col = config.XGB_FEATURE_COLUMNNAME
# print(feature_col)
tranformer = utils.DataTransformer(data=data,feature_colname=feature_col)
tranformer.update_configuration()
# print(tranformer_train)
data_trans = tranformer.transform()

In [None]:
data_trans.head()

In [None]:
a=data.trans_id.unique()

In [None]:
x = str(a).replace("[","(").replace("]",")").replace("\n ",",").replace(" ",",")