In [1]:
from google import colab

colab.drive.mount("/content/drive")

Mounted at /content/drive


In [2]:
data_path = '/content/drive/Shareddrives/JobCare/Jobcare_data/'
submit_path = '/content/drive/Shareddrives/JobCare/submit/'
SEED = 42

In [3]:
import os
import sys
import platform
import random
import math
from typing import List ,Dict, Tuple

import pandas as pd
import numpy as np
 
import sklearn 
from sklearn.model_selection import StratifiedKFold , KFold
from sklearn.metrics import f1_score 

In [4]:
!pip install catboost

Collecting catboost
  Downloading catboost-1.0.4-cp37-none-manylinux1_x86_64.whl (76.1 MB)
[K     |████████████████████████████████| 76.1 MB 60 kB/s 
Installing collected packages: catboost
Successfully installed catboost-1.0.4


In [5]:
# Optuna Libraries
!pip install optuna
import optuna
from optuna import Trial
from optuna.samplers import TPESampler

# LGBM Regressor
from lightgbm import LGBMRegressor

# train_test_split
from sklearn.model_selection import train_test_split

# Evaluation Score
from sklearn.metrics import mean_squared_error

Collecting optuna
  Downloading optuna-2.10.0-py3-none-any.whl (308 kB)
[K     |████████████████████████████████| 308 kB 27.9 MB/s 
[?25hCollecting alembic
  Downloading alembic-1.7.5-py3-none-any.whl (209 kB)
[K     |████████████████████████████████| 209 kB 78.0 MB/s 
Collecting colorlog
  Downloading colorlog-6.6.0-py2.py3-none-any.whl (11 kB)
Collecting cmaes>=0.8.2
  Downloading cmaes-0.8.2-py3-none-any.whl (15 kB)
Collecting cliff
  Downloading cliff-3.10.0-py3-none-any.whl (80 kB)
[K     |████████████████████████████████| 80 kB 12.5 MB/s 
Collecting Mako
  Downloading Mako-1.1.6-py2.py3-none-any.whl (75 kB)
[K     |████████████████████████████████| 75 kB 6.0 MB/s 
[?25hCollecting stevedore>=2.0.1
  Downloading stevedore-3.5.0-py3-none-any.whl (49 kB)
[K     |████████████████████████████████| 49 kB 8.0 MB/s 
[?25hCollecting autopage>=0.4.0
  Downloading autopage-0.4.0-py3-none-any.whl (20 kB)
Collecting pbr!=2.1.0,>=2.0.0
  Downloading pbr-5.8.0-py2.py3-none-any.whl (112 k

In [6]:
train_data = pd.read_csv(f'{data_path}train.csv')
test_data = pd.read_csv(f'{data_path}test.csv')

code_d = pd.read_csv(f'{data_path}속성_D_코드.csv')
code_h = pd.read_csv(f'{data_path}속성_H_코드.csv')
code_l = pd.read_csv(f'{data_path}속성_L_코드.csv')

train_data.shape , test_data.shape

((501951, 35), (46404, 34))

In [7]:
code_d.columns= ["attribute_d","attribute_d_d","attribute_d_s","attribute_d_m","attribute_d_l"]
code_h.columns= ["attribute_h","attribute_h_p","attribute_h_l"]
code_l.columns= ["attribute_l","attribute_l_d","attribute_l_s","attribute_l_m","attribute_l_l"]

In [8]:
#데이터 merge
def merge_codes(df:pd.DataFrame,df_code:pd.DataFrame,col:str)->pd.DataFrame:
    df = df.copy()
    df_code = df_code.copy()
    df_code = df_code.add_prefix(f"{col}_")
    df_code.columns.values[0] = col
    return pd.merge(df,df_code,how="left",on=col)

In [9]:
def preprocess_data(
                    df:pd.DataFrame,is_train:bool = True, cols_merge:List[Tuple[str,pd.DataFrame]] = []  , cols_equi:List[Tuple[str,str]]= [] ,
                    cols_drop:List[str] = ["id","person_prefer_f","person_prefer_g" ,"contents_open_dt"]
                    )->Tuple[pd.DataFrame,np.ndarray]:
    df = df.copy()

    y_data = None
    if is_train:
        y_data = df["target"].to_numpy()
        df = df.drop(columns="target")

    for col, df_code in cols_merge:
        df = merge_codes(df,df_code,col)

    cols = df.select_dtypes(bool).columns.tolist()
    df[cols] = df[cols].astype(int)

    for col1, col2 in cols_equi:
        df[f"{col1}_{col2}"] = (df[col1] == df[col2] ).astype(int)

    df = df.drop(columns=cols_drop)
    return (df , y_data)

In [10]:
# 소분류 중분류 대분류 속성코드 merge 컬럼명 및 데이터 프레임 리스트
cols_merge = [
              ("person_prefer_d_1" , code_d),
              ("person_prefer_d_2" , code_d),
              ("person_prefer_d_3" , code_d),
              ("contents_attribute_d" , code_d),
              ("person_prefer_h_1" , code_h),
              ("person_prefer_h_2" , code_h),
              ("person_prefer_h_3" , code_h),
              ("contents_attribute_h" , code_h),
              ("contents_attribute_l" , code_l),
]

# 회원 속성과 콘텐츠 속성의 동일한 코드 여부에 대한 컬럼명 리스트
cols_equi = [

    ("contents_attribute_c","person_prefer_c"),
    ("contents_attribute_e","person_prefer_e"),

    ("person_prefer_d_2_attribute_d_s" , "contents_attribute_d_attribute_d_s"),
    ("person_prefer_d_2_attribute_d_m" , "contents_attribute_d_attribute_d_m"),
    ("person_prefer_d_2_attribute_d_l" , "contents_attribute_d_attribute_d_l"),
    ("person_prefer_d_3_attribute_d_s" , "contents_attribute_d_attribute_d_s"),
    ("person_prefer_d_3_attribute_d_m" , "contents_attribute_d_attribute_d_m"),
    ("person_prefer_d_3_attribute_d_l" , "contents_attribute_d_attribute_d_l"),

    ("person_prefer_h_1_attribute_h_p" , "contents_attribute_h_attribute_h_p"),
    ("person_prefer_h_2_attribute_h_p" , "contents_attribute_h_attribute_h_p"),
    ("person_prefer_h_3_attribute_h_p" , "contents_attribute_h_attribute_h_p"),

]

# 학습에 필요없는 컬럼 리스트
cols_drop = ["id","person_prefer_f","person_prefer_g" ,"contents_open_dt", "contents_rn", ]

In [11]:
x_train, y_train = preprocess_data(train_data, cols_merge = cols_merge , cols_equi= cols_equi , cols_drop = cols_drop)
x_test, _ = preprocess_data(test_data,is_train = False, cols_merge = cols_merge , cols_equi= cols_equi  , cols_drop = cols_drop)
x_train.shape , y_train.shape , x_test.shape

((501951, 68), (501951,), (46404, 68))

In [None]:
# random sampler
sampler = TPESampler(seed=10)
from catboost import Pool,CatBoostClassifier

# define function
def objective(trial):
    test_pred = pd.Series([0 for x in range(len(x_test))], index=x_test.index)
    kf = KFold(n_splits=10,shuffle = True, random_state=2014)
    for train_index, valid_index in kf.split(x_train):
      train_X, train_y = x_train.iloc[train_index], y_train[train_index]
      valid_X, valid_y = x_train.iloc[valid_index], y_train[valid_index]
      

      cbrm_param = {
          "eval_metric" : 'AUC',
          "iterations" : 5000,
          "metric_period" : 5000,
          "early_stopping_rounds": 1000,
          "task_type" : 'GPU',
          "grow_policy" : 'Depthwise',
          "depth" : trial.suggest_int("depth", 4, 12),
          'learning_rate' : trial.suggest_loguniform('learning_rate', 0.01, 0.3),
          "l2_leaf_reg":trial.suggest_float("l2_leaf_reg",20,50),
          "random_seed": 2014,

        }
      

      # Generate model
      model_cbrm = CatBoostClassifier(**cbrm_param)
      model_cbrm = model_cbrm.fit(train_X, train_y, eval_set=(valid_X, valid_y))

      # 평가지표 원하는 평가 지표가 있을 시 바꾸어 준다.
      MSE = mean_squared_error(valid_y, model_cbrm.predict(valid_X))
      return MSE


optuna_cbrm = optuna.create_study(direction='minimize', sampler=sampler)
optuna_cbrm.optimize(objective, n_trials=50)



[32m[I 2022-01-17 01:49:24,571][0m A new study created in memory with name: no-name-8a7cf5f5-2201-4d2b-bc9f-82b20c7b97ff[0m


0:	learn: 0.6495224	test: 0.6358535	best: 0.6358535 (0)	total: 108ms	remaining: 8m 59s
4999:	learn: 0.9302554	test: 0.6984776	best: 0.6984776 (4999)	total: 4m 11s	remaining: 0us
bestTest = 0.698477596
bestIteration = 4999


[32m[I 2022-01-17 01:54:00,063][0m Trial 0 finished with value: 0.3560443063192286 and parameters: {'depth': 10, 'learning_rate': 0.010731320001845925, 'l2_leaf_reg': 39.009447047788264}. Best is trial 0 with value: 0.3560443063192286.[0m


0:	learn: 0.6496345	test: 0.6359600	best: 0.6359600 (0)	total: 103ms	remaining: 8m 36s
bestTest = 0.6982039213
bestIteration = 1624
Shrink model to first 1625 iterations.


[32m[I 2022-01-17 01:56:25,037][0m Trial 1 finished with value: 0.3562634472866364 and parameters: {'depth': 10, 'learning_rate': 0.054494830174039256, 'l2_leaf_reg': 26.74389936592543}. Best is trial 0 with value: 0.3560443063192286.[0m


0:	learn: 0.6194041	test: 0.6183336	best: 0.6183336 (0)	total: 28ms	remaining: 2m 19s
4999:	learn: 0.8156167	test: 0.6887182	best: 0.6887212 (4996)	total: 1m 5s	remaining: 0us
bestTest = 0.6887212396
bestIteration = 4996
Shrink model to first 4997 iterations.


[32m[I 2022-01-17 01:57:36,396][0m Trial 2 finished with value: 0.3638935373336521 and parameters: {'depth': 5, 'learning_rate': 0.13286056037300167, 'l2_leaf_reg': 25.07332509687606}. Best is trial 0 with value: 0.3560443063192286.[0m


0:	learn: 0.6127205	test: 0.6123188	best: 0.6123188 (0)	total: 21.6ms	remaining: 1m 48s
4999:	learn: 0.7417760	test: 0.6842837	best: 0.6842848 (4997)	total: 57s	remaining: 0us
bestTest = 0.6842848063
bestIteration = 4997
Shrink model to first 4998 iterations.


[32m[I 2022-01-17 01:58:38,151][0m Trial 3 finished with value: 0.36835604430631924 and parameters: {'depth': 4, 'learning_rate': 0.1028867751008824, 'l2_leaf_reg': 48.601800385848094}. Best is trial 0 with value: 0.3560443063192286.[0m


0:	learn: 0.6127205	test: 0.6123188	best: 0.6123188 (0)	total: 21.1ms	remaining: 1m 45s
4999:	learn: 0.7224788	test: 0.6814889	best: 0.6814889 (4999)	total: 56.7s	remaining: 0us
bestTest = 0.6814889014
bestIteration = 4999


[32m[I 2022-01-17 01:59:39,651][0m Trial 4 finished with value: 0.3711849549764922 and parameters: {'depth': 4, 'learning_rate': 0.05709131925970865, 'l2_leaf_reg': 44.378628849563405}. Best is trial 0 with value: 0.3560443063192286.[0m


0:	learn: 0.6427986	test: 0.6335759	best: 0.6335759 (0)	total: 69.5ms	remaining: 5m 47s
bestTest = 0.6924034655
bestIteration = 1343
Shrink model to first 1344 iterations.


[32m[I 2022-01-17 02:01:08,109][0m Trial 5 finished with value: 0.360307594230616 and parameters: {'depth': 9, 'learning_rate': 0.11644480557404961, 'l2_leaf_reg': 28.756282045118994}. Best is trial 0 with value: 0.3560443063192286.[0m


0:	learn: 0.6692705	test: 0.6299761	best: 0.6299761 (0)	total: 264ms	remaining: 21m 59s
bestTest = 0.6910100877
bestIteration = 590
Shrink model to first 591 iterations.


[32m[I 2022-01-17 02:04:56,519][0m Trial 6 finished with value: 0.36237947246792573 and parameters: {'depth': 12, 'learning_rate': 0.11363577488646587, 'l2_leaf_reg': 36.27633104033784}. Best is trial 0 with value: 0.3560443063192286.[0m


0:	learn: 0.6194041	test: 0.6183336	best: 0.6183336 (0)	total: 24.9ms	remaining: 2m 4s
4999:	learn: 0.7430615	test: 0.6847114	best: 0.6847114 (4999)	total: 1m 5s	remaining: 0us
bestTest = 0.6847113967
bestIteration = 4999


[32m[I 2022-01-17 02:06:07,595][0m Trial 7 finished with value: 0.3693720615188461 and parameters: {'depth': 5, 'learning_rate': 0.03560160455275411, 'l2_leaf_reg': 40.22400845199036}. Best is trial 0 with value: 0.3560443063192286.[0m


0:	learn: 0.6307673	test: 0.6270884	best: 0.6270884 (0)	total: 36.8ms	remaining: 3m 3s
4999:	learn: 0.8756832	test: 0.6949556	best: 0.6949574 (4998)	total: 1m 35s	remaining: 0us
bestTest = 0.6949574053
bestIteration = 4998
Shrink model to first 4999 iterations.


[32m[I 2022-01-17 02:07:51,697][0m Trial 8 finished with value: 0.3594708741732409 and parameters: {'depth': 7, 'learning_rate': 0.04376140877130064, 'l2_leaf_reg': 38.53300935407951}. Best is trial 0 with value: 0.3560443063192286.[0m


0:	learn: 0.6370214	test: 0.6310156	best: 0.6310156 (0)	total: 52.2ms	remaining: 4m 21s
bestTest = 0.694920361
bestIteration = 3389
Shrink model to first 3390 iterations.


[32m[I 2022-01-17 02:09:50,420][0m Trial 9 finished with value: 0.3608454856960714 and parameters: {'depth': 8, 'learning_rate': 0.09135146183055094, 'l2_leaf_reg': 38.03116860213633}. Best is trial 0 with value: 0.3560443063192286.[0m


0:	learn: 0.6693603	test: 0.6297842	best: 0.6297842 (0)	total: 261ms	remaining: 21m 45s
4999:	learn: 0.9953157	test: 0.7025422	best: 0.7025710 (4917)	total: 10m 42s	remaining: 0us
bestTest = 0.7025709748
bestIteration = 4917
Shrink model to first 4918 iterations.


[32m[I 2022-01-17 02:21:28,590][0m Trial 10 finished with value: 0.3531954737429277 and parameters: {'depth': 12, 'learning_rate': 0.01091437967298683, 'l2_leaf_reg': 31.577776583419578}. Best is trial 10 with value: 0.3531954737429277.[0m


0:	learn: 0.6694204	test: 0.6297549	best: 0.6297549 (0)	total: 253ms	remaining: 21m 5s
4999:	learn: 0.9943266	test: 0.7022693	best: 0.7022759 (4968)	total: 10m 41s	remaining: 0us
bestTest = 0.7022758722
bestIteration = 4968
Shrink model to first 4969 iterations.


[32m[I 2022-01-17 02:33:05,846][0m Trial 11 finished with value: 0.3536735994899992 and parameters: {'depth': 12, 'learning_rate': 0.010291841687366868, 'l2_leaf_reg': 30.943736204046168}. Best is trial 10 with value: 0.3531954737429277.[0m


0:	learn: 0.6693591	test: 0.6297548	best: 0.6297548 (0)	total: 254ms	remaining: 21m 7s
4999:	learn: 0.9950942	test: 0.7026883	best: 0.7026883 (4999)	total: 10m 43s	remaining: 0us
bestTest = 0.7026883364
bestIteration = 4999


[32m[I 2022-01-17 02:44:46,549][0m Trial 12 finished with value: 0.35297633277552 and parameters: {'depth': 12, 'learning_rate': 0.010786895475833787, 'l2_leaf_reg': 31.633942346112224}. Best is trial 12 with value: 0.35297633277552.[0m


0:	learn: 0.6584835	test: 0.6332731	best: 0.6332731 (0)	total: 162ms	remaining: 13m 31s
4999:	learn: 0.9975781	test: 0.7015793	best: 0.7020001 (4269)	total: 6m 47s	remaining: 0us
bestTest = 0.7020000815
bestIteration = 4269
Shrink model to first 4270 iterations.


[32m[I 2022-01-17 02:52:05,046][0m Trial 13 finished with value: 0.35437086620447844 and parameters: {'depth': 11, 'learning_rate': 0.018039574479844014, 'l2_leaf_reg': 20.23768022599082}. Best is trial 12 with value: 0.35297633277552.[0m


0:	learn: 0.6693444	test: 0.6303022	best: 0.6303022 (0)	total: 245ms	remaining: 20m 24s
bestTest = 0.7009846568
bestIteration = 3240
Shrink model to first 3241 iterations.


[32m[I 2022-01-17 03:02:12,613][0m Trial 14 finished with value: 0.35482907004542197 and parameters: {'depth': 12, 'learning_rate': 0.02176437843655606, 'l2_leaf_reg': 32.86771983592758}. Best is trial 12 with value: 0.35297633277552.[0m


0:	learn: 0.6496812	test: 0.6358403	best: 0.6358403 (0)	total: 104ms	remaining: 8m 39s
bestTest = 0.6802099645
bestIteration = 3196
Shrink model to first 3197 iterations.


[32m[I 2022-01-17 03:06:09,267][0m Trial 15 finished with value: 0.3700294844210694 and parameters: {'depth': 10, 'learning_rate': 0.26506162257241284, 'l2_leaf_reg': 23.43970564284751}. Best is trial 12 with value: 0.35297633277552.[0m


0:	learn: 0.6582738	test: 0.6337798	best: 0.6337798 (0)	total: 165ms	remaining: 13m 45s
4999:	learn: 0.9951435	test: 0.7015440	best: 0.7015642 (4430)	total: 6m 45s	remaining: 0us
bestTest = 0.7015641928
bestIteration = 4430
Shrink model to first 4431 iterations.


[32m[I 2022-01-17 03:13:26,842][0m Trial 16 finished with value: 0.3551278986373416 and parameters: {'depth': 11, 'learning_rate': 0.017767503088258734, 'l2_leaf_reg': 33.29140311305197}. Best is trial 12 with value: 0.35297633277552.[0m


0:	learn: 0.6307722	test: 0.6270919	best: 0.6270919 (0)	total: 36.8ms	remaining: 3m 3s
4999:	learn: 0.8320373	test: 0.6930404	best: 0.6930451 (4995)	total: 1m 35s	remaining: 0us
bestTest = 0.6930451393
bestIteration = 4995
Shrink model to first 4996 iterations.


[32m[I 2022-01-17 03:15:10,877][0m Trial 17 finished with value: 0.36110447047573513 and parameters: {'depth': 7, 'learning_rate': 0.02564532014603323, 'l2_leaf_reg': 29.348313682794718}. Best is trial 12 with value: 0.35297633277552.[0m


0:	learn: 0.6580537	test: 0.6336542	best: 0.6336542 (0)	total: 166ms	remaining: 13m 50s


In [None]:
cbrm_trial = optuna_cbrm.best_trial
cbrm_trial_params = cbrm_trial.params
print('Best Trial: score {},\nparams {}'.format(cbrm_trial.value, cbrm_trial_params))

In [None]:
cbrm_trial_params

In [None]:
data1 = catboost_modeling(x_train, y_train, x_test, 'Depthwise', 10, 0.02423, 20.35, 2014, 2)

In [None]:
datas = pd.read_csv("/content/drive/Shareddrives/JobCare/submit/CatBoost_Depthwise_10.csv")
data_fin = pd.DataFrame(datas*2)
data_fin.columns = datas.columns
data_fin

In [None]:
data4 = catboost_modeling(x_train, y_train, x_test, 'Lossguide', 16, 0.01213, 5.027, 2022, 2)

In [None]:
# 최종 모델 앙상블
sample_submission = pd.read_csv(f'{data_path}sample_submission.csv')
data_final = pd.DataFrame(data1)
data_final.columns = sample_submission.columns
data_final.to_csv(f"{submit_path}final_data1_only.csv", index=True)
data_final

In [None]:
'''# 최종 모델 앙상블
sample_submission = pd.read_csv(f'{data_path}sample_submission.csv')
data_final = pd.DataFrame((data1+data2)/2 *1/3 + (data3+data4)/2 *2/3)
data_final.columns = sample_submission.columns
data_final.to_csv('data_final.csv', index =True)
data_final'''

In [None]:
y_pred=clf.predict(x_test)

In [None]:
#Import scikit-learn metrics module for accuracy calculation
from sklearn import metrics
# Model Accuracy, how often is the classifier correct?
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))

## sklearn 홈페이지 참조

In [None]:
import pandas as pd 

jobs = pd.read_csv('/content/drive/Shareddrives/JobCare/Jobcare_data/train.csv', sep=',')

In [None]:
print(jobs.columns)
print(jobs.id[0:5])

In [None]:
from sklearn.model_selection import train_test_split

X=jobs[['d_l_match_yn', 'd_m_match_yn', 'd_s_match_yn', 'h_l_match_yn',
       'h_m_match_yn', 'h_s_match_yn', 'person_attribute_a',
       'person_attribute_a_1', 'person_attribute_b', 'person_prefer_c',
       'person_prefer_d_1', 'person_prefer_d_2', 'person_prefer_d_3',
       'person_prefer_e', 'person_prefer_f', 'person_prefer_g',
       'person_prefer_h_1', 'person_prefer_h_2', 'person_prefer_h_3',
       'contents_attribute_i', 'contents_attribute_a',
       'contents_attribute_j_1', 'contents_attribute_j',
       'contents_attribute_c', 'contents_attribute_k', 'contents_attribute_l',
       'contents_attribute_d', 'contents_attribute_m', 'contents_attribute_e',
       'contents_attribute_h', 'person_rn', 'contents_rn']]  # Features ('id','contents_open_dt' 제외)
y=jobs['target']  # Labels

# Split dataset into training set and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3) # 70% training and 30% test

In [None]:
#Import Random Forest Model
from sklearn.ensemble import RandomForestClassifier

#Create a Gaussian Classifier
clf=RandomForestClassifier(n_estimators=100)

#Train the model using the training sets y_pred=clf.predict(X_test)
clf.fit(X_train,y_train)

y_pred=clf.predict(X_test)

In [None]:
#Import scikit-learn metrics module for accuracy calculation
from sklearn import metrics
# Model Accuracy, how often is the classifier correct?
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))

In [None]:
import pandas as pd
jobs.newcol = []
for i in range(len(jobs.columns)):
  if i != 0 and i <= 32:
    jobs.newcol.append(jobs.columns[i])
feature_imp = pd.Series(clf.feature_importances_,index=jobs.newcol).sort_values(ascending=False)
feature_imp

In [None]:
print(feature_imp.index[:16])

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
# Creating a bar plot
sns.barplot(x=feature_imp, y=feature_imp.index)
# Add labels to your graph
plt.xlabel('Feature Importance Score')
plt.ylabel('Features')
plt.title("Visualizing Important Features")
plt.legend()
plt.show()

그래서 위 요소들 중에 가장 importance 가 낮은것 제외 상위 16개만 학습하면?

In [None]:
from sklearn.model_selection import train_test_split
# Split dataset into features and labels
X=jobs[['contents_rn', 'person_rn', 'contents_attribute_l',
       'contents_attribute_d', 'person_prefer_d_2', 'person_prefer_d_1',
       'contents_attribute_h', 'person_prefer_d_3', 'person_prefer_h_2',
       'person_prefer_h_3', 'person_prefer_h_1', 'person_attribute_a_1',
       'person_prefer_e', 'contents_attribute_e', 'person_attribute_b',
       'contents_attribute_m']]  # Removed feature "sepal length"
y=jobs['target']                                       
# Split dataset into training set and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.70, random_state=5) 



In [None]:
from sklearn.ensemble import RandomForestClassifier

#Create a Gaussian Classifier
clf=RandomForestClassifier(n_estimators=100)

#Train the model using the training sets y_pred=clf.predict(X_test)
clf.fit(X_train,y_train)

# prediction on test set
y_pred=clf.predict(X_test)

#Import scikit-learn metrics module for accuracy calculation
from sklearn import metrics
# Model Accuracy, how often is the classifier correct?
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))