# VK Cup. Трек Машинное обучение. Квалификация

## Необходимые библиотеки

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

## Загрузка данных

In [2]:
data = pd.read_csv('data.csv', sep = ';', index_col = 'ID')
test = pd.read_csv('test.csv', sep = ';', index_col = 'ID')
train = pd.read_csv('train.csv', sep = ';', index_col = 'ID')

In [3]:
train['Question'] = data['Question'][:30001]
test['Question'] = data['Question'][30000:]

In [4]:
train.head()

Unnamed: 0_level_0,Answer,Question
ID,Unnamed: 1_level_1,Unnamed: 2_level_1
1,0,Как зовут лодочника на реке Стикс в древнегреч...
2,1,Как в химии обозначается свинец?
3,0,Какой химический элемент преобладает в составе...
4,0,Кто из перечисленных был пажом во времена Екат...
5,0,Когда началась 2 мировая война?


In [5]:
test.head()

Unnamed: 0_level_0,Question
ID,Unnamed: 1_level_1
30001,"Что в литературе обозначает слово ""эпитет""?"
30002,Когда отмечается ДЕНЬ ШОКОЛАДА
30003,Кто из этих бодибилдеров играл Халка
30004,"Последняя на данный момент страна, официально ..."
30005,Как зовут лютоволка Арьи Старк из сериала «Иг...


## Отбор признаков

In [6]:
class features_extract:
    def __init__(self, data):
        self.data = data
    
    def question_mark(data):
        question_mark = []
        for i in range(len(data['Question'].values)):
            if '?' in data['Question'].values[i]:
                question_mark.append(1)
            else:
                question_mark.append(0)
        
        return pd.Series(question_mark, index = data.index)
    
    def angle_quotes(data):
        angle_quotes = []
        for i in range(len(data['Question'].values)):
            if '«' in data['Question'].values[i]:
                angle_quotes.append(1)
            else:
                angle_quotes.append(0)
        
        return pd.Series(angle_quotes, index = data.index)
    
    def words_count(data):
        words_count = []
        for i in range(len(data['Question'].values)):
            words_count.append(len(data['Question'].values[i].split()))
        
        return pd.Series(words_count, index = data.index)
    
    def comments(data):
        comments = []
        for i in range(len(data['Question'].values)):
            if data['Question'].values[i][-1] != '?':
                comments.append(1)
            else:
                comments.append(0)
        
        return pd.Series(comments, index = data.index)
    
    def space_before_question_mark(data):
        space_before_question_mark = []
        for i in range(len(data['Question'].values)):
            if ' ?' in data['Question'].values[i]:
                space_before_question_mark.append(1)
            else: 
                space_before_question_mark.append(0)
        
        return pd.Series(space_before_question_mark, index = data.index)
    
    def german_quotes(data):
        german_quotes = []
        for i in range(len(data['Question'].values)):
            if '"' in data['Question'].values[i]:
                german_quotes.append(1)
            else:
                german_quotes.append(0)
            
        return pd.Series(german_quotes, index = data.index)
    
    def is_title(data):
        is_title = []
        for i in range(len(data['Question'].values)):
            if data['Question'].values[i][0].istitle():
                is_title.append(1)
            else:
                is_title.append(0)
        
        return pd.Series(is_title, index = data.index)
    
    def punct_mark(data):
        punct_mark = []
        for i in range(len(data['Question'].values)):
            if ('.' or '!' or '\n' or '-' or '«') in train.Question.values[i]:
                punct_mark.append(1)
            else:
                punct_mark.append(0)
        
        return pd.Series(punct_mark, index = data.index)
    
    def lenght(data):
        lenght = []
        for i in range(len(data.Question.values)):
            lenght.append(len(data.Question.values[i]))
        
        return pd.Series(lenght, index = data.index)

In [7]:
for d in [train, test]:
    d['question_mark'] = features_extract.question_mark(d)
    d['angle_quotes'] = features_extract.angle_quotes(d)
    d['comments'] = features_extract.comments(d)
    d['space_before_question_mark'] = features_extract.space_before_question_mark(d)
    d['words_count'] = features_extract.words_count(d)
    d['german_quotes'] = features_extract.german_quotes(d)
    d['first_letter_is_title'] = features_extract.is_title(d)
    d['punct_mark'] = features_extract.punct_mark(d)
    d['lenght'] = features_extract.lenght(d)

## Разбиение выборки

In [8]:
X = train[['question_mark','angle_quotes', 'comments',
           'space_before_question_mark', 'words_count',
           'german_quotes', 'first_letter_is_title','punct_mark', 'lenght']]
y = train['Answer']

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size = 0.25, shuffle = True, random_state = np.random)

## Модель

In [10]:
from catboost import CatBoostClassifier

In [11]:
model = CatBoostClassifier(learning_rate = 0.05, depth = 4)
model.fit(X_train, y_train)

0:	learn: 0.6288942	total: 69.3ms	remaining: 1m 9s
1:	learn: 0.5916761	total: 81.7ms	remaining: 40.7s
2:	learn: 0.5418600	total: 93.9ms	remaining: 31.2s
3:	learn: 0.5112802	total: 106ms	remaining: 26.4s
4:	learn: 0.4878142	total: 114ms	remaining: 22.8s
5:	learn: 0.4668424	total: 126ms	remaining: 20.9s
6:	learn: 0.4387835	total: 138ms	remaining: 19.6s
7:	learn: 0.4229695	total: 150ms	remaining: 18.6s
8:	learn: 0.4086883	total: 163ms	remaining: 17.9s
9:	learn: 0.3965507	total: 174ms	remaining: 17.3s
10:	learn: 0.3860474	total: 187ms	remaining: 16.8s
11:	learn: 0.3707426	total: 199ms	remaining: 16.4s
12:	learn: 0.3570949	total: 211ms	remaining: 16s
13:	learn: 0.3505979	total: 222ms	remaining: 15.7s
14:	learn: 0.3409073	total: 234ms	remaining: 15.4s
15:	learn: 0.3328682	total: 246ms	remaining: 15.1s
16:	learn: 0.3261257	total: 259ms	remaining: 14.9s
17:	learn: 0.3209100	total: 280ms	remaining: 15.3s
18:	learn: 0.3165839	total: 301ms	remaining: 15.6s
19:	learn: 0.3128321	total: 334ms	remain

163:	learn: 0.2777530	total: 3.56s	remaining: 18.2s
164:	learn: 0.2777364	total: 3.59s	remaining: 18.1s
165:	learn: 0.2777182	total: 3.61s	remaining: 18.1s
166:	learn: 0.2777158	total: 3.62s	remaining: 18.1s
167:	learn: 0.2777132	total: 3.63s	remaining: 18s
168:	learn: 0.2777087	total: 3.65s	remaining: 17.9s
169:	learn: 0.2776934	total: 3.66s	remaining: 17.9s
170:	learn: 0.2776792	total: 3.67s	remaining: 17.8s
171:	learn: 0.2776730	total: 3.68s	remaining: 17.7s
172:	learn: 0.2776304	total: 3.69s	remaining: 17.7s
173:	learn: 0.2776255	total: 3.7s	remaining: 17.6s
174:	learn: 0.2776193	total: 3.71s	remaining: 17.5s
175:	learn: 0.2775873	total: 3.73s	remaining: 17.5s
176:	learn: 0.2775669	total: 3.74s	remaining: 17.4s
177:	learn: 0.2775574	total: 3.75s	remaining: 17.3s
178:	learn: 0.2775429	total: 3.77s	remaining: 17.3s
179:	learn: 0.2775170	total: 3.79s	remaining: 17.3s
180:	learn: 0.2775011	total: 3.81s	remaining: 17.2s
181:	learn: 0.2774788	total: 3.82s	remaining: 17.2s
182:	learn: 0.2

335:	learn: 0.2746784	total: 6.11s	remaining: 12.1s
336:	learn: 0.2746633	total: 6.13s	remaining: 12.1s
337:	learn: 0.2746437	total: 6.14s	remaining: 12s
338:	learn: 0.2746292	total: 6.15s	remaining: 12s
339:	learn: 0.2746162	total: 6.17s	remaining: 12s
340:	learn: 0.2746102	total: 6.18s	remaining: 11.9s
341:	learn: 0.2746019	total: 6.19s	remaining: 11.9s
342:	learn: 0.2745884	total: 6.2s	remaining: 11.9s
343:	learn: 0.2745650	total: 6.21s	remaining: 11.9s
344:	learn: 0.2745532	total: 6.23s	remaining: 11.8s
345:	learn: 0.2745374	total: 6.24s	remaining: 11.8s
346:	learn: 0.2745247	total: 6.25s	remaining: 11.8s
347:	learn: 0.2745179	total: 6.26s	remaining: 11.7s
348:	learn: 0.2745043	total: 6.28s	remaining: 11.7s
349:	learn: 0.2744861	total: 6.29s	remaining: 11.7s
350:	learn: 0.2744735	total: 6.3s	remaining: 11.6s
351:	learn: 0.2744669	total: 6.32s	remaining: 11.6s
352:	learn: 0.2744560	total: 6.34s	remaining: 11.6s
353:	learn: 0.2744458	total: 6.35s	remaining: 11.6s
354:	learn: 0.274436

503:	learn: 0.2727576	total: 8.4s	remaining: 8.27s
504:	learn: 0.2727453	total: 8.42s	remaining: 8.25s
505:	learn: 0.2727391	total: 8.44s	remaining: 8.24s
506:	learn: 0.2727323	total: 8.45s	remaining: 8.22s
507:	learn: 0.2727276	total: 8.46s	remaining: 8.2s
508:	learn: 0.2727229	total: 8.48s	remaining: 8.18s
509:	learn: 0.2727158	total: 8.49s	remaining: 8.16s
510:	learn: 0.2727126	total: 8.5s	remaining: 8.13s
511:	learn: 0.2727053	total: 8.51s	remaining: 8.12s
512:	learn: 0.2726978	total: 8.53s	remaining: 8.09s
513:	learn: 0.2726821	total: 8.54s	remaining: 8.07s
514:	learn: 0.2726765	total: 8.55s	remaining: 8.05s
515:	learn: 0.2726687	total: 8.56s	remaining: 8.03s
516:	learn: 0.2726544	total: 8.58s	remaining: 8.01s
517:	learn: 0.2726500	total: 8.59s	remaining: 7.99s
518:	learn: 0.2726382	total: 8.61s	remaining: 7.98s
519:	learn: 0.2726252	total: 8.63s	remaining: 7.97s
520:	learn: 0.2726168	total: 8.65s	remaining: 7.95s
521:	learn: 0.2726005	total: 8.66s	remaining: 7.93s
522:	learn: 0.2

673:	learn: 0.2712067	total: 10.7s	remaining: 5.18s
674:	learn: 0.2711749	total: 10.7s	remaining: 5.16s
675:	learn: 0.2711653	total: 10.7s	remaining: 5.15s
676:	learn: 0.2711585	total: 10.8s	remaining: 5.13s
677:	learn: 0.2711510	total: 10.8s	remaining: 5.12s
678:	learn: 0.2711446	total: 10.8s	remaining: 5.1s
679:	learn: 0.2711384	total: 10.8s	remaining: 5.08s
680:	learn: 0.2711338	total: 10.8s	remaining: 5.07s
681:	learn: 0.2711303	total: 10.8s	remaining: 5.05s
682:	learn: 0.2711254	total: 10.8s	remaining: 5.03s
683:	learn: 0.2711117	total: 10.8s	remaining: 5.01s
684:	learn: 0.2711032	total: 10.9s	remaining: 5s
685:	learn: 0.2710977	total: 10.9s	remaining: 4.98s
686:	learn: 0.2710923	total: 10.9s	remaining: 4.96s
687:	learn: 0.2710864	total: 10.9s	remaining: 4.94s
688:	learn: 0.2710817	total: 10.9s	remaining: 4.92s
689:	learn: 0.2710668	total: 10.9s	remaining: 4.92s
690:	learn: 0.2710585	total: 11s	remaining: 4.9s
691:	learn: 0.2710504	total: 11s	remaining: 4.91s
692:	learn: 0.2710333

834:	learn: 0.2698675	total: 14.5s	remaining: 2.87s
835:	learn: 0.2698592	total: 14.5s	remaining: 2.85s
836:	learn: 0.2698483	total: 14.6s	remaining: 2.83s
837:	learn: 0.2698389	total: 14.6s	remaining: 2.82s
838:	learn: 0.2698273	total: 14.6s	remaining: 2.8s
839:	learn: 0.2698183	total: 14.6s	remaining: 2.78s
840:	learn: 0.2698087	total: 14.6s	remaining: 2.76s
841:	learn: 0.2698001	total: 14.6s	remaining: 2.74s
842:	learn: 0.2697962	total: 14.6s	remaining: 2.73s
843:	learn: 0.2697882	total: 14.7s	remaining: 2.71s
844:	learn: 0.2697837	total: 14.7s	remaining: 2.69s
845:	learn: 0.2697720	total: 14.7s	remaining: 2.67s
846:	learn: 0.2697612	total: 14.7s	remaining: 2.65s
847:	learn: 0.2697583	total: 14.7s	remaining: 2.63s
848:	learn: 0.2697459	total: 14.7s	remaining: 2.62s
849:	learn: 0.2697344	total: 14.7s	remaining: 2.6s
850:	learn: 0.2697306	total: 14.8s	remaining: 2.58s
851:	learn: 0.2697264	total: 14.8s	remaining: 2.57s
852:	learn: 0.2697213	total: 14.8s	remaining: 2.55s
853:	learn: 0.

999:	learn: 0.2686767	total: 16.8s	remaining: 0us


<catboost.core.CatBoostClassifier at 0x7f6332baa510>

## Оценка точности

In [12]:
predictions = model.predict_proba(X_test)
y_pred1 = [predictions[i][0] for i in range(len(predictions))]
y_pred2 = [predictions[i][1] for i in range(len(predictions))]

In [13]:
from sklearn.metrics import roc_auc_score
roc_auc_score(y_test, y_pred2)

0.7301416089996522

In [14]:
model.get_feature_importance()

array([48.00124374,  2.35788871, 19.20925255,  7.20570928,  5.07705007,
        6.24132111,  2.87290267,  1.63808851,  7.39654336])

## Обучение на основных данных

In [15]:
model.fit(X,y)

0:	learn: 0.6276811	total: 15.3ms	remaining: 15.3s
1:	learn: 0.5908989	total: 30.4ms	remaining: 15.2s
2:	learn: 0.5405602	total: 45.7ms	remaining: 15.2s
3:	learn: 0.5101644	total: 60.4ms	remaining: 15s
4:	learn: 0.4870873	total: 70.6ms	remaining: 14s
5:	learn: 0.4641616	total: 87ms	remaining: 14.4s
6:	learn: 0.4359333	total: 102ms	remaining: 14.4s
7:	learn: 0.4118327	total: 117ms	remaining: 14.5s
8:	learn: 0.3928555	total: 131ms	remaining: 14.4s
9:	learn: 0.3783371	total: 145ms	remaining: 14.4s
10:	learn: 0.3664697	total: 172ms	remaining: 15.5s
11:	learn: 0.3561688	total: 195ms	remaining: 16s
12:	learn: 0.3482655	total: 223ms	remaining: 16.9s
13:	learn: 0.3408604	total: 260ms	remaining: 18.3s
14:	learn: 0.3342901	total: 289ms	remaining: 19s
15:	learn: 0.3289820	total: 303ms	remaining: 18.6s
16:	learn: 0.3244103	total: 315ms	remaining: 18.2s
17:	learn: 0.3204847	total: 328ms	remaining: 17.9s
18:	learn: 0.3161980	total: 343ms	remaining: 17.7s
19:	learn: 0.3130755	total: 359ms	remaining: 

162:	learn: 0.2802141	total: 3.16s	remaining: 16.2s
163:	learn: 0.2801994	total: 3.19s	remaining: 16.3s
164:	learn: 0.2801939	total: 3.22s	remaining: 16.3s
165:	learn: 0.2801885	total: 3.23s	remaining: 16.2s
166:	learn: 0.2801737	total: 3.25s	remaining: 16.2s
167:	learn: 0.2801682	total: 3.26s	remaining: 16.2s
168:	learn: 0.2801645	total: 3.28s	remaining: 16.1s
169:	learn: 0.2801558	total: 3.29s	remaining: 16.1s
170:	learn: 0.2801459	total: 3.31s	remaining: 16s
171:	learn: 0.2801238	total: 3.32s	remaining: 16s
172:	learn: 0.2801175	total: 3.34s	remaining: 16s
173:	learn: 0.2801154	total: 3.35s	remaining: 15.9s
174:	learn: 0.2801108	total: 3.38s	remaining: 15.9s
175:	learn: 0.2800803	total: 3.4s	remaining: 15.9s
176:	learn: 0.2800673	total: 3.42s	remaining: 15.9s
177:	learn: 0.2800344	total: 3.44s	remaining: 15.9s
178:	learn: 0.2800206	total: 3.45s	remaining: 15.8s
179:	learn: 0.2800150	total: 3.47s	remaining: 15.8s
180:	learn: 0.2800045	total: 3.48s	remaining: 15.8s
181:	learn: 0.27999

322:	learn: 0.2779722	total: 6.76s	remaining: 14.2s
323:	learn: 0.2779613	total: 6.81s	remaining: 14.2s
324:	learn: 0.2779542	total: 6.82s	remaining: 14.2s
325:	learn: 0.2779415	total: 6.84s	remaining: 14.1s
326:	learn: 0.2779284	total: 6.86s	remaining: 14.1s
327:	learn: 0.2779175	total: 6.87s	remaining: 14.1s
328:	learn: 0.2779083	total: 6.88s	remaining: 14s
329:	learn: 0.2778918	total: 6.9s	remaining: 14s
330:	learn: 0.2778796	total: 6.91s	remaining: 14s
331:	learn: 0.2778666	total: 6.93s	remaining: 13.9s
332:	learn: 0.2778598	total: 6.94s	remaining: 13.9s
333:	learn: 0.2778433	total: 6.97s	remaining: 13.9s
334:	learn: 0.2778320	total: 7s	remaining: 13.9s
335:	learn: 0.2778225	total: 7.01s	remaining: 13.9s
336:	learn: 0.2778131	total: 7.03s	remaining: 13.8s
337:	learn: 0.2777959	total: 7.04s	remaining: 13.8s
338:	learn: 0.2777885	total: 7.06s	remaining: 13.8s
339:	learn: 0.2777777	total: 7.08s	remaining: 13.7s
340:	learn: 0.2777673	total: 7.09s	remaining: 13.7s
341:	learn: 0.2777582	

482:	learn: 0.2763560	total: 9.57s	remaining: 10.2s
483:	learn: 0.2763475	total: 9.6s	remaining: 10.2s
484:	learn: 0.2763431	total: 9.62s	remaining: 10.2s
485:	learn: 0.2763349	total: 9.64s	remaining: 10.2s
486:	learn: 0.2763264	total: 9.66s	remaining: 10.2s
487:	learn: 0.2763225	total: 9.67s	remaining: 10.1s
488:	learn: 0.2763136	total: 9.69s	remaining: 10.1s
489:	learn: 0.2763057	total: 9.71s	remaining: 10.1s
490:	learn: 0.2763003	total: 9.72s	remaining: 10.1s
491:	learn: 0.2762839	total: 9.74s	remaining: 10.1s
492:	learn: 0.2762769	total: 9.76s	remaining: 10s
493:	learn: 0.2762720	total: 9.77s	remaining: 10s
494:	learn: 0.2762586	total: 9.79s	remaining: 9.99s
495:	learn: 0.2762524	total: 9.82s	remaining: 9.98s
496:	learn: 0.2762496	total: 9.84s	remaining: 9.96s
497:	learn: 0.2762433	total: 9.86s	remaining: 9.93s
498:	learn: 0.2762299	total: 9.87s	remaining: 9.91s
499:	learn: 0.2762206	total: 9.88s	remaining: 9.88s
500:	learn: 0.2762130	total: 9.9s	remaining: 9.86s
501:	learn: 0.2762

641:	learn: 0.2751410	total: 12.4s	remaining: 6.89s
642:	learn: 0.2751362	total: 12.4s	remaining: 6.88s
643:	learn: 0.2751266	total: 12.4s	remaining: 6.86s
644:	learn: 0.2751224	total: 12.4s	remaining: 6.84s
645:	learn: 0.2751101	total: 12.4s	remaining: 6.82s
646:	learn: 0.2751040	total: 12.5s	remaining: 6.8s
647:	learn: 0.2750939	total: 12.5s	remaining: 6.78s
648:	learn: 0.2750891	total: 12.5s	remaining: 6.76s
649:	learn: 0.2750781	total: 12.5s	remaining: 6.74s
650:	learn: 0.2750714	total: 12.5s	remaining: 6.71s
651:	learn: 0.2750628	total: 12.5s	remaining: 6.69s
652:	learn: 0.2750528	total: 12.6s	remaining: 6.67s
653:	learn: 0.2750437	total: 12.6s	remaining: 6.65s
654:	learn: 0.2750355	total: 12.6s	remaining: 6.64s
655:	learn: 0.2750270	total: 12.6s	remaining: 6.62s
656:	learn: 0.2750224	total: 12.6s	remaining: 6.59s
657:	learn: 0.2750150	total: 12.6s	remaining: 6.57s
658:	learn: 0.2750084	total: 12.7s	remaining: 6.55s
659:	learn: 0.2750004	total: 12.7s	remaining: 6.53s
660:	learn: 0

805:	learn: 0.2740863	total: 15.2s	remaining: 3.66s
806:	learn: 0.2740810	total: 15.2s	remaining: 3.64s
807:	learn: 0.2740731	total: 15.2s	remaining: 3.62s
808:	learn: 0.2740686	total: 15.3s	remaining: 3.6s
809:	learn: 0.2740603	total: 15.3s	remaining: 3.58s
810:	learn: 0.2740573	total: 15.3s	remaining: 3.56s
811:	learn: 0.2740518	total: 15.3s	remaining: 3.54s
812:	learn: 0.2740469	total: 15.3s	remaining: 3.52s
813:	learn: 0.2740436	total: 15.3s	remaining: 3.5s
814:	learn: 0.2740299	total: 15.3s	remaining: 3.48s
815:	learn: 0.2740213	total: 15.4s	remaining: 3.46s
816:	learn: 0.2740181	total: 15.4s	remaining: 3.44s
817:	learn: 0.2740150	total: 15.4s	remaining: 3.42s
818:	learn: 0.2740122	total: 15.4s	remaining: 3.41s
819:	learn: 0.2740090	total: 15.4s	remaining: 3.39s
820:	learn: 0.2740034	total: 15.5s	remaining: 3.37s
821:	learn: 0.2739998	total: 15.5s	remaining: 3.35s
822:	learn: 0.2739973	total: 15.5s	remaining: 3.33s
823:	learn: 0.2739955	total: 15.5s	remaining: 3.31s
824:	learn: 0.

965:	learn: 0.2731621	total: 18s	remaining: 633ms
966:	learn: 0.2731574	total: 18s	remaining: 615ms
967:	learn: 0.2731555	total: 18s	remaining: 596ms
968:	learn: 0.2731521	total: 18.1s	remaining: 578ms
969:	learn: 0.2731482	total: 18.1s	remaining: 559ms
970:	learn: 0.2731448	total: 18.1s	remaining: 540ms
971:	learn: 0.2731365	total: 18.1s	remaining: 521ms
972:	learn: 0.2731354	total: 18.1s	remaining: 503ms
973:	learn: 0.2731342	total: 18.1s	remaining: 484ms
974:	learn: 0.2731314	total: 18.1s	remaining: 465ms
975:	learn: 0.2731268	total: 18.2s	remaining: 447ms
976:	learn: 0.2731175	total: 18.2s	remaining: 428ms
977:	learn: 0.2731141	total: 18.2s	remaining: 409ms
978:	learn: 0.2731111	total: 18.2s	remaining: 391ms
979:	learn: 0.2731037	total: 18.2s	remaining: 372ms
980:	learn: 0.2731003	total: 18.3s	remaining: 354ms
981:	learn: 0.2730959	total: 18.3s	remaining: 335ms
982:	learn: 0.2730887	total: 18.3s	remaining: 316ms
983:	learn: 0.2730833	total: 18.3s	remaining: 298ms
984:	learn: 0.2730

<catboost.core.CatBoostClassifier at 0x7f6332baa510>

## Предсказание

In [16]:
test = test[['question_mark','angle_quotes', 'comments',
           'space_before_question_mark', 'words_count',
           'german_quotes', 'first_letter_is_title','punct_mark', 'lenght']]
model.predict_proba(test)
pred = []
for i in range(len(model.predict_proba(test))):
    pred.append(model.predict_proba(test)[i][1])

In [17]:
solution = pd.Series(pred, index = test.index)
solution.to_csv('solution.csv')

  
