**Final Model Pipeline**

In [None]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [None]:
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import PowerTransformer
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2
from sklearn.model_selection import RepeatedStratifiedKFold
from imblearn.under_sampling import EditedNearestNeighbours
from imblearn.combine import SMOTEENN
from imblearn.pipeline import Pipeline

In [None]:
train=pd.read_csv('train.csv')
test=pd.read_csv('test.csv')

In [None]:
inter=Pipeline([('i',SimpleImputer(strategy='most_frequent')),
                ('e',OneHotEncoder()),
                ('fs',SelectKBest(score_func=chi2,k=23))])
              
transformers=[('t',PowerTransformer(),num),
              ('inter',inter,cat)]
    
steps=[('pre',ColumnTransformer(transformers=transformers)),
       ('sampling',SMOTEENN(enn=EditedNearestNeighbours(sampling_strategy='majority'))),
       ('model',RandomForestClassifier(**params))]

In [None]:
X_train=train.drop(columns=['enrollee_id','city','target'],axis=1)
y_train=train['target']

X_test=test.drop(columns=['enrollee_id','city'],axis=1)

In [None]:
model=Pipeline(steps=steps)
model.fit(X_train,y_train)

predictions=model.predict(X_test)

In [None]:
submission = pd.DataFrame({'enrollee_id':test['enrollee_id'],'target':predictions})
submission.head(10)

In [None]:
submission.to_csv('submissions.csv',index=False)