In [16]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.cluster import KMeans
from sklearn.preprocessing import MinMaxScaler
from sklearn.cross_validation import KFold
from sklearn.cross_validation import train_test_split
import xgboost as xgb
from sklearn.metrics import accuracy_score
from tqdm import tqdm_notebook
from sklearn.grid_search import GridSearchCV

In [2]:
train = pd.read_csv('train_data.csv')
test = pd.read_csv('test_data.csv')

In [3]:
train.head()

Unnamed: 0,connection_id,cont_1,cont_2,cont_3,cont_4,cont_5,cont_6,cont_7,cont_8,cont_9,...,cat_15,cat_16,cat_17,cat_18,cat_19,cat_20,cat_21,cat_22,cat_23,target
0,cxcon_1,0,1032,0,0.0,0.0,0.0,0.0,1.0,0.0,...,0,0,0,0,0,511,511,255,255,2
1,cxcon_4,0,520,0,0.0,0.0,0.0,0.0,1.0,0.0,...,0,0,0,0,0,511,511,255,255,0
2,cxcon_7,0,1032,0,0.0,0.0,0.0,0.0,1.0,0.0,...,0,0,0,0,0,511,511,255,255,0
3,cxcon_10,0,1032,0,0.0,0.0,0.0,0.0,1.0,0.0,...,0,0,0,0,0,511,511,255,255,0
4,cxcon_13,0,1032,0,0.0,0.0,0.0,0.0,1.0,0.0,...,0,0,0,0,0,511,511,255,255,2


In [4]:
# Continuous variables
cont_cols = ['cont_1','cont_2','cont_3','cont_4','cont_5','cont_6','cont_7','cont_8','cont_9','cont_10','cont_11','cont_12',
            'cont_13','cont_14','cont_15','cont_16','cont_17','cont_18']

# Categorical variables
cat_cols = ['cat_1','cat_2','cat_3','cat_4','cat_5','cat_6','cat_7','cat_8','cat_9','cat_10','cat_11','cat_12','cat_13',
           'cat_14','cat_15','cat_16', 'cat_18','cat_19','cat_20', 'cat_21', 'cat_22','cat_23']

In [5]:
scaler = MinMaxScaler()
train[cont_cols] = scaler.fit_transform(train[cont_cols])
test[cont_cols] = scaler.fit_transform(test[cont_cols])

In [6]:
cols_to_use = list(set(train.columns) - set(['connection_id', 'cat_17', 'target']))

In [7]:
def multAcc(pred, dtrain):
    label = dtrain.get_label()
    acc = accuracy_score(label, pred)
    return 'maccuracy', acc

In [26]:
# One hot encoding the training data
X = pd.get_dummies(train[cols_to_use])
y = train.target

# One hot encoding the test dataset
X_test = pd.get_dummies(test[cols_to_use])

In [27]:
# Creating training meta dataset
X_train_meta = X.copy()
X_train_meta['M1'] = None
X_train_meta['M2'] = None

In [28]:
X_train, X_valid, y_train, y_test = train_test_split(X, y, test_size = 0.15,random_state = 47)

#### SVC

In [29]:
def svc_param_selection(X, y, nfolds):
    parameters = {'C':[0.001, 0.01, 0.1, 1, 10], 'gamma':[0.001, 0.01, 0.1, 1]}
    svc = SVC(kernel='rbf',random_state=47)
    grid_search = GridSearchCV(svc, param_grid=parameters, cv=nfolds, verbose = True)
    grid_search.fit(X, y)
    return grid_search.best_params_

In [None]:
svc = svc_param_selection(X_train, y_train, nfolds=3)
print svc