In [None]:
%%time
import polars as pl
import pandas as pd
import numpy as np
import gc
import os
import random
import pickle
from glob import glob
from tqdm import tqdm
from joblib import Parallel, delayed
from sklearn.model_selection import StratifiedKFold,KFold,StratifiedGroupKFold,GroupKFold
from sklearn.preprocessing import LabelEncoder,MultiLabelBinarizer
from sklearn.decomposition import NMF,LatentDirichletAllocation,TruncatedSVD,PCA
from sklearn.linear_model import Lasso,Ridge,ElasticNet,BayesianRidge,LinearRegression
from sklearn.cluster import KMeans
from sklearn.metrics import mean_squared_error
import lightgbm as lgb
import xgboost as xgb
import catboost as cbt
import kaggle_evaluation.mcts_inference_server
import warnings 
warnings.filterwarnings('ignore')

input_path = '/kaggle/input/um-game-playing-strength-of-mcts-variants/'
model_path = '/kaggle/input/um-models/'
is_train = False

In [None]:
%%time

def fe_base(file):
    df = pl.read_csv(file).head()
    return df

if is_train:
    base = fe_base(input_path+'train.csv')
else:
    base = fe_base(input_path+'test.csv') 

In [None]:
%%time
    
def fe(base):
    base = base.with_columns( (pl.col('NumRows')*pl.col('NumColumns')).alias('area') )
    base = base.with_columns( (pl.col('Balance')*pl.col('GameTreeComplexity')).alias('ComplexityBalanceInteraction') )    
    base = base.with_columns( (pl.col('PlayoutsPerSecond')/(pl.col('MovesPerSecond')+1e-15)).alias('PlayoutsPerMoves') )    
    base = base.with_columns( (pl.col('MovesPerSecond')/(pl.col('PlayoutsPerSecond')+1e-15)).alias('EfficiencyPerPlayout') )    
    base = base.with_columns( (pl.col('DurationActions')/(pl.col('DurationTurnsStdDev')+1e-15)).alias('TurnsDurationEfficiency') )    
    base = base.with_columns( (pl.col('AdvantageP1')/(pl.col('Balance')+1e-15)).alias('AdvantageBalanceRatio') ) 
    base = base.with_columns( (pl.col('DurationActions')/(pl.col('MovesPerSecond')+1e-15)).alias('ActionTimeEfficiency') ) 
    base = base.with_columns( (pl.col('DurationTurnsStdDev')/(pl.col('DurationActions')+1e-15)).alias('StandardizedTurnsEfficiency') ) 
    base = base.with_columns( (pl.col('AdvantageP1')/(pl.col('DurationActions')+1e-15)).alias('AdvantageTimeImpact') )     
    base = base.with_columns( (pl.col('DurationActions')/(pl.col('StateTreeComplexity')+1e-15)).alias('DurationToComplexityRatio') ) 
    base = base.with_columns( (pl.col('GameTreeComplexity')/(pl.col('StateTreeComplexity')+1e-15)).alias('NormalizedGameTreeComplexity') ) 
    base = base.with_columns( (pl.col('GameTreeComplexity')/(pl.col('PlayoutsPerSecond')+1e-15)).alias('ComplexityPerPlayout') ) 
    base = base.with_columns( (pl.col('DurationTurnsNotTimeouts')/(pl.col('MovesPerSecond')+1e-15)).alias('TurnsNotTimeoutsPerMoves') ) 
    base = base.with_columns( (pl.col('Timeouts')/(pl.col('DurationActions')+1e-15)).alias('TimeoutsPerDurationActions') ) 
    base = base.with_columns( (pl.col('OutcomeUniformity')/(pl.col('AdvantageP1')+1e-15)).alias('OutcomeUniformityPerAdvantageP1') ) 
    base = base.with_columns( (pl.col('StateTreeComplexity')+pl.col('GameTreeComplexity')).alias('OverallComplexity') )
    base = base.with_columns( (pl.col('StepDecisionToEnemy')+pl.col('SlideDecisionToEnemy')+pl.col('HopDecisionMoreThanOne')).alias('ComplexDecisionRatio') )
    base = base.with_columns( (pl.col('StepDecisionToEnemy')+pl.col('HopDecisionEnemyToEnemy')+pl.col('HopDecisionFriendToEnemy')+pl.col('SlideDecisionToEnemy')).alias('AggressiveActionsRatio') )    
            
    return base        

def process(base):
        
    base = base.with_columns(pl.col('agent1').alias('agent1_encoder'))
    base = base.with_columns(pl.col('agent2').alias('agent2_encoder'))
    base = base.with_columns(pl.col('agent1').str.split(by="-").list.to_struct(fields=lambda idx: f"{'agent1'}_{idx}")).unnest('agent1').drop(f"agent1_0")
    base = base.with_columns(pl.col('agent2').str.split(by="-").list.to_struct(fields=lambda idx: f"{'agent2'}_{idx}")).unnest('agent2').drop(f"agent2_0")

    text_cols = ['agent1_encoder','agent2_encoder','agent1_1','agent1_2','agent1_3','agent1_4','agent2_1','agent2_2','agent2_3','agent2_4']
    
    if is_train:
        # fe
        base = fe(base)      
        
        # tfidf
        if os.path.exists(model_path+'EnglishRules_tfidf_model.pkl'):
            with open(model_path+'EnglishRules_tfidf_model.pkl', 'rb') as f:
                tfidf_model = pickle.load(f)            
            with open(model_path+'EnglishRules_svd_model.pkl', 'rb') as f:
                svd_model = pickle.load(f)           
            
        else:    
            tfidf_model = TfidfVectorizer(analyzer='word',ngram_range=(2,3),max_features=300).fit(base['EnglishRules'])
            with open(model_path+'EnglishRules_tfidf_model.pkl', 'wb') as f:
                pickle.dump(tfidf_model, f)
            
            svd_model = TruncatedSVD(n_components=20, algorithm='arpack',random_state=223).fit(train_tfidf)
            with open(model_path+'EnglishRules_svd_model.pkl', 'wb') as f:
                pickle.dump(svd_model, f)    
                
        train_tfidf = tfidf_model.transform(base['EnglishRules'])    
        train_svd = svd_model.transform(train_tfidf)
        train_svd = pd.DataFrame(train_svd)
        train_svd.columns = ['fe_svd_'+str(j+1) for j in range(20)]
        train_svd = pl.from_pandas(train_svd)  
        
        base = pl.concat([base,train_svd],how='horizontal')
        
        target_encoder = dict(zip(sorted(list(set(base['utility_agent1']))),range(0,len(set(base['utility_agent1'])))))
        base = base.with_columns(pl.col('utility_agent1').replace_strict(target_encoder).alias('target_int'))         
        
        target_int_dict = {0:0,1:0,2:0,3:3,4:3,5:5,6:5,7:7,8:7,9:9,10:9,11:11,12:11,13:13,14:13,15:15,16:15,17:17,18:17,19:17,20:20,21:20,22:22,23:22,
                   24:24,25:24,26:26,27:26,28:28,29:28,30:30,31:30,32:32,33:32,34:32,35:35,36:35,37:37,38:37,39:39,40:39,41:39,42:42,43:42,44:42,45:45,46:45,
                   47:47,48:47,49:47,50:50,51:50,52:52,53:52,54:54,55:54,56:56,57:56,58:58,59:58,60:60,61:60,62:62,63:62,64:64,65:64,66:67,67:67}
        base = base.with_columns(pl.col('target_int').replace_strict(target_int_dict).alias('target_int')) 
        target_int_dict = dict(zip(sorted(list(set(base['target_int']))),range(0,len(set(base['target_int'])))))
        base = base.with_columns(pl.col('target_int').replace_strict(target_int_dict).alias('target_int'))        
        
        # label encoder
        if os.path.exists(model_path+'text_cols_encoder.pkl'):
            with open(model_path+'text_cols_encoder.pkl', 'rb') as f:
                text_cols_encoder = pickle.load(f)    
            for i in range(len(text_cols)):    
                base = base.with_columns(pl.col(text_cols[i]).replace_strict(text_cols_encoder[i], default=-1))  
        else:        
            text_cols_encoder = []
            for col in text_cols:
                base = base.with_columns(pl.col(col).fill_null('nan').cast(pl.String))
                encoder = dict(zip(sorted(list(set(base[col]))),range(0,len(set(base[col])))))
                base = base.with_columns(pl.col(col).replace_strict(encoder, default=-1)) 
                text_cols_encoder.append(encoder)
            with open(model_path+'text_cols_encoder.pkl', 'wb') as f:
                pickle.dump(text_cols_encoder, f)
            
    else:
        
        # fe
        base = fe(base)
        
        # tfidf
        with open(model_path+'EnglishRules_tfidf_model.pkl', 'rb') as f:
            tfidf_model = pickle.load(f)            
        with open(model_path+'EnglishRules_svd_model.pkl', 'rb') as f:
            svd_model = pickle.load(f)   
        test_tfidf = tfidf_model.transform(base['EnglishRules'])
        test_svd = svd_model.transform(test_tfidf)
        test_svd = pd.DataFrame(test_svd)
        test_svd.columns = ['fe_svd_'+str(j+1) for j in range(20)]
        test_svd = pl.from_pandas(test_svd)    
        
        base = pl.concat([base,test_svd],how='horizontal')
        
        # label encoder
        with open(model_path+'text_cols_encoder.pkl', 'rb') as f:
            text_cols_encoder = pickle.load(f)    
        for i in range(len(text_cols)):    
            base = base.with_columns(pl.col(text_cols[i]).replace_strict(text_cols_encoder[i], default=-1)) 
            
    return base        

In [None]:
def create_reversed_data(base):
    base_reversed = base
    base_reversed = base_reversed.drop(['agent1_1','agent1_2','agent1_3','agent1_4','agent2_1','agent2_2','agent2_3','agent2_4','agent1_encoder','agent2_encoder','AdvantageP1'])
    base_reversed = base_reversed.with_columns(pl.Series('agent1_1', base['agent2_1']))
    base_reversed = base_reversed.with_columns(pl.Series('agent1_2', base['agent2_2']))
    base_reversed = base_reversed.with_columns(pl.Series('agent1_3', base['agent2_3']))
    base_reversed = base_reversed.with_columns(pl.Series('agent1_4', base['agent2_4']))
    base_reversed = base_reversed.with_columns(pl.Series('agent2_1', base['agent1_1']))
    base_reversed = base_reversed.with_columns(pl.Series('agent2_2', base['agent1_2']))
    base_reversed = base_reversed.with_columns(pl.Series('agent2_3', base['agent1_3']))
    base_reversed = base_reversed.with_columns(pl.Series('agent2_4', base['agent1_4']))    
    base_reversed = base_reversed.with_columns(pl.Series('agent1_encoder', base['agent2_encoder'])) 
    base_reversed = base_reversed.with_columns(pl.Series('agent2_encoder', base['agent1_encoder']))     
    base_reversed = base_reversed.with_columns(pl.Series('AdvantageP1', base['AdvantageP1']))
    base_reversed = base_reversed.with_columns( (1-pl.col('AdvantageP1')).alias('AdvantageP1') )
    
    # fe
    base_reversed = fe(base_reversed)
        
    return base_reversed.to_pandas()

In [None]:
feats1 = ['agent1_1',
 'agent1_2',
 'agent1_3',
 'agent1_4',
 'agent2_1',
 'agent2_2',
 'agent2_3',
 'agent2_4',
 'Stochastic',
 'Asymmetric',
 'AsymmetricForces',
 'AsymmetricPiecesType',
 'PlayersWithDirections',
 'Shape',
 'SquareShape',
 'HexShape',
 'TriangleShape',
 'DiamondShape',
 'RectangleShape',
 'CircleShape',
 'SquarePyramidalShape',
 'RegularShape',
 'PolygonShape',
 'Tiling',
 'SquareTiling',
 'HexTiling',
 'TriangleTiling',
 'SemiRegularTiling',
 'MorrisTiling',
 'CircleTiling',
 'ConcentricTiling',
 'AlquerqueTiling',
 'MancalaBoard',
 'MancalaStores',
 'MancalaTwoRows',
 'MancalaFourRows',
 'MancalaSixRows',
 'MancalaCircular',
 'AlquerqueBoard',
 'AlquerqueBoardWithOneTriangle',
 'AlquerqueBoardWithTwoTriangles',
 'AlquerqueBoardWithFourTriangles',
 'ThreeMensMorrisBoard',
 'ThreeMensMorrisBoardWithTwoTriangles',
 'NineMensMorrisBoard',
 'StarBoard',
 'CrossBoard',
 'Track',
 'TrackLoop',
 'TrackOwned',
 'Region',
 'Vertex',
 'Cell',
 'Edge',
 'NumPlayableSitesOnBoard',
 'NumColumns',
 'NumRows',
 'NumCorners',
 'NumDirections',
 'NumOrthogonalDirections',
 'NumDiagonalDirections',
 'NumAdjacentDirections',
 'NumOuterSites',
 'NumInnerSites',
 'NumLayers',
 'NumEdges',
 'NumCells',
 'NumVertices',
 'NumPerimeterSites',
 'NumTopSites',
 'NumBottomSites',
 'NumRightSites',
 'NumLeftSites',
 'NumCentreSites',
 'NumConvexCorners',
 'NumConcaveCorners',
 'NumPhasesBoard',
 'Hand',
 'NumContainers',
 'NumPlayableSites',
 'Piece',
 'PieceValue',
 'PieceRotation',
 'PieceDirection',
 'Dice',
 'DiceD6',
 'LargePiece',
 'Tile',
 'NumComponentsType',
 'NumComponentsTypePerPlayer',
 'NumDice',
 'Meta',
 'OpeningContract',
 'SwapOption',
 'Repetition',
 'TurnKo',
 'Start',
 'PiecesPlacedOutsideBoard',
 'InitialRandomPlacement',
 'InitialScore',
 'NumStartComponentsBoard',
 'NumStartComponentsHand',
 'NumStartComponents',
 'NumStartComponentsBoardPerPlayer',
 'NumStartComponentsHandPerPlayer',
 'NumStartComponentsPerPlayer',
 'Moves',
 'NoSiteMoves',
 'VoteDecision',
 'SwapPlayersDecision',
 'SwapPlayersDecisionFrequency',
 'PassDecision',
 'PassDecisionFrequency',
 'ProposeDecision',
 'ProposeDecisionFrequency',
 'SingleSiteMoves',
 'AddDecision',
 'AddDecisionFrequency',
 'PromotionDecisionFrequency',
 'RemoveDecisionFrequency',
 'RotationDecision',
 'StepDecisionFrequency',
 'StepDecisionToEmpty',
 'StepDecisionToEmptyFrequency',
 'StepDecisionToFriend',
 'StepDecisionToFriendFrequency',
 'StepDecisionToEnemy',
 'StepDecisionToEnemyFrequency',
 'SlideDecisionFrequency',
 'SlideDecisionToEmpty',
 'SlideDecisionToEmptyFrequency',
 'SlideDecisionToEnemy',
 'SlideDecisionToEnemyFrequency',
 'SlideDecisionToFriend',
 'SlideDecisionToFriendFrequency',
 'LeapDecisionFrequency',
 'LeapDecisionToEmpty',
 'LeapDecisionToEmptyFrequency',
 'LeapDecisionToEnemy',
 'LeapDecisionToEnemyFrequency',
 'HopDecision',
 'HopDecisionFrequency',
 'HopDecisionMoreThanOne',
 'HopDecisionMoreThanOneFrequency',
 'HopDecisionEnemyToEmpty',
 'HopDecisionEnemyToEmptyFrequency',
 'HopDecisionFriendToEmpty',
 'HopDecisionFriendToEmptyFrequency',
 'HopDecisionFriendToFriendFrequency',
 'HopDecisionEnemyToEnemy',
 'HopDecisionEnemyToEnemyFrequency',
 'HopDecisionFriendToEnemyFrequency',
 'FromToDecision',
 'FromToDecisionFrequency',
 'FromToDecisionWithinBoardFrequency',
 'FromToDecisionBetweenContainersFrequency',
 'FromToDecisionEmpty',
 'FromToDecisionEmptyFrequency',
 'FromToDecisionEnemy',
 'FromToDecisionEnemyFrequency',
 'FromToDecisionFriend',
 'FromToDecisionFriendFrequency',
 'SwapPiecesDecisionFrequency',
 'ShootDecision',
 'ShootDecisionFrequency',
 'MovesNonDecision',
 'MovesEffects',
 'VoteEffect',
 'SwapPlayersEffect',
 'PassEffect',
 'Roll',
 'RollFrequency',
 'ProposeEffect',
 'ProposeEffectFrequency',
 'AddEffect',
 'AddEffectFrequency',
 'Sow',
 'SowFrequency',
 'SowWithEffect',
 'SowCaptureFrequency',
 'SowRemove',
 'SowRemoveFrequency',
 'SowBacktrackingFrequency',
 'SowSkip',
 'SowCW',
 'PromotionEffect',
 'PromotionEffectFrequency',
 'RemoveEffectFrequency',
 'PushEffect',
 'PushEffectFrequency',
 'Flip',
 'FlipFrequency',
 'SetMove',
 'SetNextPlayer',
 'SetNextPlayerFrequency',
 'MoveAgainFrequency',
 'SetValue',
 'SetValueFrequency',
 'SetCount',
 'SetRotation',
 'StepEffect',
 'SlideEffect',
 'LeapEffect',
 'HopEffect',
 'FromToEffect',
 'MovesOperators',
 'Priority',
 'MaxMovesInTurn',
 'Capture',
 'ReplacementCapture',
 'ReplacementCaptureFrequency',
 'HopCapture',
 'HopCaptureFrequency',
 'HopCaptureMoreThanOne',
 'HopCaptureMoreThanOneFrequency',
 'DirectionCaptureFrequency',
 'EncloseCapture',
 'EncloseCaptureFrequency',
 'CustodialCapture',
 'CustodialCaptureFrequency',
 'InterveneCapture',
 'SurroundCaptureFrequency',
 'CaptureSequence',
 'CaptureSequenceFrequency',
 'Conditions',
 'SpaceConditions',
 'Line',
 'Connection',
 'Group',
 'Contains',
 'Loop',
 'Territory',
 'Fill',
 'Distance',
 'MoveConditions',
 'NoMoves',
 'NoMovesMover',
 'NoMovesNext',
 'CanMove',
 'CanNotMove',
 'PieceConditions',
 'NoPiece',
 'NoPieceMover',
 'NoPieceNext',
 'NoTargetPiece',
 'IsEmpty',
 'IsFriend',
 'IsPieceAt',
 'LineOfSight',
 'CountPiecesComparison',
 'CountPiecesMoverComparison',
 'CountPiecesNextComparison',
 'ProgressCheck',
 'Directions',
 'AbsoluteDirections',
 'AllDirections',
 'AdjacentDirection',
 'OrthogonalDirection',
 'DiagonalDirection',
 'RotationalDirection',
 'SameLayerDirection',
 'RelativeDirections',
 'ForwardDirection',
 'ForwardsDirection',
 'BackwardsDirection',
 'RightwardDirection',
 'ForwardLeftDirection',
 'SameDirection',
 'OppositeDirection',
 'Phase',
 'NumPlayPhase',
 'Scoring',
 'PieceCount',
 'SpaceEnd',
 'LineEnd',
 'LineEndFrequency',
 'LineWin',
 'LineWinFrequency',
 'LineLoss',
 'LineLossFrequency',
 'LineDraw',
 'ConnectionEnd',
 'ConnectionEndFrequency',
 'ConnectionWin',
 'ConnectionWinFrequency',
 'ConnectionLoss',
 'ConnectionLossFrequency',
 'GroupEnd',
 'GroupEndFrequency',
 'GroupWin',
 'LoopEnd',
 'LoopEndFrequency',
 'LoopWinFrequency',
 'PatternEnd',
 'PatternEndFrequency',
 'PatternWin',
 'TerritoryEndFrequency',
 'TerritoryWinFrequency',
 'CaptureEnd',
 'CheckmateFrequency',
 'CheckmateWinFrequency',
 'NoTargetPieceEnd',
 'NoTargetPieceEndFrequency',
 'NoTargetPieceWin',
 'NoTargetPieceWinFrequency',
 'EliminatePiecesEndFrequency',
 'EliminatePiecesWin',
 'EliminatePiecesWinFrequency',
 'EliminatePiecesLoss',
 'EliminatePiecesLossFrequency',
 'EliminatePiecesDraw',
 'EliminatePiecesDrawFrequency',
 'RaceEnd',
 'NoOwnPiecesEnd',
 'NoOwnPiecesEndFrequency',
 'NoOwnPiecesWinFrequency',
 'NoOwnPiecesLossFrequency',
 'FillEnd',
 'FillEndFrequency',
 'FillWin',
 'FillWinFrequency',
 'ReachEnd',
 'ReachEndFrequency',
 'ReachWin',
 'ReachWinFrequency',
 'ReachLoss',
 'ScoringEnd',
 'ScoringEndFrequency',
 'ScoringWinFrequency',
 'ScoringLoss',
 'ScoringLossFrequency',
 'NoMovesEndFrequency',
 'NoMovesWin',
 'NoMovesWinFrequency',
 'NoMovesLoss',
 'NoMovesLossFrequency',
 'NoMovesDraw',
 'NoMovesDrawFrequency',
 'NoProgressEndFrequency',
 'NoProgressDrawFrequency',
 'Draw',
 'DrawFrequency',
 'Misere',
 'DurationActions',
 'DurationMoves',
 'DurationTurns',
 'DurationTurnsStdDev',
 'DurationTurnsNotTimeouts',
 'DecisionMoves',
 'GameTreeComplexity',
 'StateTreeComplexity',
 'BoardCoverageDefault',
 'BoardCoverageFull',
 'BoardCoverageUsed',
 'AdvantageP1',
 'Balance',
 'Completion',
 'Drawishness',
 'Timeouts',
 'OutcomeUniformity',
 'BoardSitesOccupiedAverage',
 'BoardSitesOccupiedMedian',
 'BoardSitesOccupiedMaximum',
 'BoardSitesOccupiedVariance',
 'BoardSitesOccupiedChangeAverage',
 'BoardSitesOccupiedChangeSign',
 'BoardSitesOccupiedChangeLineBestFit',
 'BoardSitesOccupiedChangeNumTimes',
 'BoardSitesOccupiedMaxIncrease',
 'BoardSitesOccupiedMaxDecrease',
 'BranchingFactorAverage',
 'BranchingFactorMedian',
 'BranchingFactorMaximum',
 'BranchingFactorVariance',
 'BranchingFactorChangeAverage',
 'BranchingFactorChangeSign',
 'BranchingFactorChangeLineBestFit',
 'BranchingFactorChangeMaxIncrease',
 'BranchingFactorChangeMaxDecrease',
 'DecisionFactorAverage',
 'DecisionFactorMedian',
 'DecisionFactorMaximum',
 'DecisionFactorVariance',
 'DecisionFactorChangeAverage',
 'DecisionFactorChangeSign',
 'DecisionFactorChangeLineBestFit',
 'DecisionFactorChangeNumTimes',
 'DecisionFactorMaxIncrease',
 'DecisionFactorMaxDecrease',
 'MoveDistanceAverage',
 'MoveDistanceMedian',
 'MoveDistanceMaximum',
 'MoveDistanceVariance',
 'MoveDistanceChangeAverage',
 'MoveDistanceChangeSign',
 'MoveDistanceChangeLineBestFit',
 'MoveDistanceChangeNumTimes',
 'MoveDistanceMaxIncrease',
 'MoveDistanceMaxDecrease',
 'PieceNumberAverage',
 'PieceNumberMedian',
 'PieceNumberMaximum',
 'PieceNumberVariance',
 'PieceNumberChangeAverage',
 'PieceNumberChangeSign',
 'PieceNumberChangeLineBestFit',
 'PieceNumberChangeNumTimes',
 'PieceNumberMaxIncrease',
 'PieceNumberMaxDecrease',
 'ScoreDifferenceAverage',
 'ScoreDifferenceMedian',
 'ScoreDifferenceMaximum',
 'ScoreDifferenceVariance',
 'ScoreDifferenceChangeAverage',
 'ScoreDifferenceChangeSign',
 'ScoreDifferenceChangeLineBestFit',
 'ScoreDifferenceMaxIncrease',
 'ScoreDifferenceMaxDecrease',
 'Math',
 'Arithmetic',
 'Operations',
 'Addition',
 'Subtraction',
 'Multiplication',
 'Division',
 'Modulo',
 'Exponentiation',
 'Minimum',
 'Maximum',
 'Comparison',
 'Equal',
 'NotEqual',
 'LesserThan',
 'LesserThanOrEqual',
 'GreaterThan',
 'GreaterThanOrEqual',
 'Parity',
 'Even',
 'Odd',
 'Logic',
 'Conjunction',
 'Disjunction',
 'Negation',
 'Set',
 'Union',
 'Intersection',
 'Complement',
 'Algorithmics',
 'ConditionalStatement',
 'ControlFlowStatement',
 'Style',
 'GraphStyle',
 'GoStyle',
 'PenAndPaperStyle',
 'ShibumiStyle',
 'ShogiStyle',
 'TableStyle',
 'SurakartaStyle',
 'TaflStyle',
 'ComponentStyle',
 'AnimalComponent',
 'ChessComponent',
 'KingComponent',
 'KnightComponent',
 'RookComponent',
 'BishopComponent',
 'PawnComponent',
 'FairyChessComponent',
 'PloyComponent',
 'ShogiComponent',
 'XiangqiComponent',
 'StrategoComponent',
 'JanggiComponent',
 'CheckersComponent',
 'BallComponent',
 'TaflComponent',
 'DiscComponent',
 'StackType',
 'Stack',
 'Symbols',
 'ShowPieceState',
 'Implementation',
 'State',
 'PieceState',
 'SiteState',
 'SetSiteState',
 'Variable',
 'SetVar',
 'RememberValues',
 'ForgetValues',
 'SetPending',
 'SetInternalCounter',
 'PlayerValue',
 'Efficiency',
 'CopyContext',
 'Then',
 'ForEachPiece',
 'DoLudeme',
 'Trigger',
 'PlayoutsPerSecond',
 'MovesPerSecond',
 'agent1_encoder',
 'agent2_encoder',
 'area',
 'ComplexityBalanceInteraction',
 'PlayoutsPerMoves',
 'EfficiencyPerPlayout',
 'TurnsDurationEfficiency',
 'AdvantageBalanceRatio',
 'ActionTimeEfficiency',
 'StandardizedTurnsEfficiency',
 'AdvantageTimeImpact',
 'DurationToComplexityRatio',
 'NormalizedGameTreeComplexity',
 'ComplexityPerPlayout',
 'TurnsNotTimeoutsPerMoves',
 'TimeoutsPerDurationActions',
 'OutcomeUniformityPerAdvantageP1',
 'OverallComplexity',
 'ComplexDecisionRatio',
 'AggressiveActionsRatio'
] 
feats2 = feats1 + [ 'fe_svd_1',
 'fe_svd_2',
 'fe_svd_3',
 'fe_svd_4',
 'fe_svd_5',
 'fe_svd_6',
 'fe_svd_7',
 'fe_svd_8',
 'fe_svd_9',
 'fe_svd_10',
 'fe_svd_11',
 'fe_svd_12',
 'fe_svd_13',
 'fe_svd_14',
 'fe_svd_15',
 'fe_svd_16',
 'fe_svd_17',
 'fe_svd_18',
 'fe_svd_19',
 'fe_svd_20',
'xgb_preds_aug' ,'lgb_preds_aug' ,'cbt_preds_aug'
]

In [None]:
%%time

base = base.to_pandas()
target = 'utility_agent1'
n_splits = 10
print ('feats1',len(feats1))
print ('feats2',len(feats2))
if is_train:
    print ('')
else:
    lgb_jiajie = []
    xgb_jiajie = []
    cbt_jiajie = [] 
    lgb_models = []
    cbt_models = []

    for n_fold in range(5):
        print (n_fold)
        bst = lgb.Booster(model_file=model_path+'lgb_fold_jiajie_' + str(n_fold) + '.txt')     
        lgb_jiajie.append(bst)
        
        bst = xgb.Booster()
        bst.load_model(model_path+'xgb_fold_jiajie_' + str(n_fold) + '.json')
        xgb_jiajie.append(bst)
        
        bst = cbt.CatBoostRegressor()
        bst.load_model(model_path+'cbt_fold_jiajie_' + str(n_fold) + '.bin')
        cbt_jiajie.append(bst)                    
    
    for n_fold in range(n_splits):
        print (n_fold)
        bst = lgb.Booster(model_file=model_path+'lgb_fold_' + str(n_fold) + '.txt')     
        lgb_models.append(bst)
        bst = lgb.Booster(model_file=model_path+'lgb_fold_' + str(n_fold) + '_888888.txt')     
        lgb_models.append(bst)
        bst = lgb.Booster(model_file=model_path+'lgb_fold_' + str(n_fold) + '_666.txt')     
        lgb_models.append(bst)
        
        bst = cbt.CatBoostRegressor()
        bst.load_model(model_path+'cbt_fold_' + str(n_fold) + '.bin')
        cbt_models.append(bst)        
        bst = cbt.CatBoostRegressor()
        bst.load_model(model_path+'cbt_fold_' + str(n_fold) + '_888888.bin')
        cbt_models.append(bst)  
        bst = cbt.CatBoostRegressor()
        bst.load_model(model_path+'cbt_fold_' + str(n_fold) + '_666.bin')
        cbt_models.append(bst)        
        
    def infer_ensemble(data,lgb_jiajie,xgb_jiajie,cbt_jiajie,lgb_models,cbt_models):
        data['lgb_preds_aug'] = np.clip(np.mean([model.predict(data[feats1]) for model in lgb_jiajie], axis=0), -1, 1)
        data['xgb_preds_aug'] = np.clip(np.mean([model.predict(xgb.DMatrix(data[feats1])) for model in xgb_jiajie], axis=0), -1, 1)
        data['cbt_preds_aug'] = np.clip(np.mean([model.predict(data[feats1]) for model in cbt_jiajie], axis=0), -1, 1)

        lgb_model = np.clip(np.mean([model.predict(data[feats2]) for model in lgb_models], axis=0), -1, 1)
        cbt_model = np.clip(np.mean([model.predict(data[feats2]) for model in cbt_models], axis=0), -1, 1)

        ensemble = cbt_model*0.75+lgb_model*0.25
        return ensemble
    
    def predict(test_data, submission):
        test_data = process(test_data)
        test_data = test_data.to_pandas()
        
        return submission.with_columns(pl.Series('utility_agent1', np.clip(infer_ensemble(test_data,lgb_jiajie,xgb_jiajie,cbt_jiajie,lgb_models,cbt_models)*1.12,-1,1) ))
        
    inference_server = kaggle_evaluation.mcts_inference_server.MCTSInferenceServer(predict)
    if os.getenv('KAGGLE_IS_COMPETITION_RERUN'):
        inference_server.serve()
    else:
        inference_server.run_local_gateway(
            (
            '/kaggle/input/um-game-playing-strength-of-mcts-variants/test.csv',
            '/kaggle/input/um-game-playing-strength-of-mcts-variants/sample_submission.csv'
            )
        )    