## Подключение библиотек

In [1]:
import numpy as np
import pandas as pd
import os
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt

## Get Data

In [2]:
train_data = pd.read_csv('titanik/train.csv')
test_data = pd.read_csv('titanik/test.csv')

In [3]:
passengers = train_data.drop('Survived', axis=1)
passengers_labels = train_data['Survived'].copy()

## Data info

In [4]:
passengers.head()

Unnamed: 0,PassengerId,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [5]:
passengers.describe()

Unnamed: 0,PassengerId,Pclass,Age,SibSp,Parch,Fare
count,891.0,891.0,714.0,891.0,891.0,891.0
mean,446.0,2.308642,29.699118,0.523008,0.381594,32.204208
std,257.353842,0.836071,14.526497,1.102743,0.806057,49.693429
min,1.0,1.0,0.42,0.0,0.0,0.0
25%,223.5,2.0,20.125,0.0,0.0,7.9104
50%,446.0,3.0,28.0,0.0,0.0,14.4542
75%,668.5,3.0,38.0,1.0,0.0,31.0
max,891.0,3.0,80.0,8.0,6.0,512.3292


In [6]:
passengers.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 11 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   PassengerId  891 non-null    int64  
 1   Pclass       891 non-null    int64  
 2   Name         891 non-null    object 
 3   Sex          891 non-null    object 
 4   Age          714 non-null    float64
 5   SibSp        891 non-null    int64  
 6   Parch        891 non-null    int64  
 7   Ticket       891 non-null    object 
 8   Fare         891 non-null    float64
 9   Cabin        204 non-null    object 
 10  Embarked     889 non-null    object 
dtypes: float64(2), int64(4), object(5)
memory usage: 76.7+ KB


In [7]:
test_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 418 entries, 0 to 417
Data columns (total 11 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   PassengerId  418 non-null    int64  
 1   Pclass       418 non-null    int64  
 2   Name         418 non-null    object 
 3   Sex          418 non-null    object 
 4   Age          332 non-null    float64
 5   SibSp        418 non-null    int64  
 6   Parch        418 non-null    int64  
 7   Ticket       418 non-null    object 
 8   Fare         417 non-null    float64
 9   Cabin        91 non-null     object 
 10  Embarked     418 non-null    object 
dtypes: float64(2), int64(4), object(5)
memory usage: 36.0+ KB


In [8]:
corr_matrix = train_data.corr()
corr_matrix['Survived'].sort_values(ascending=False)

Survived       1.000000
Fare           0.257307
Parch          0.081629
PassengerId   -0.005007
SibSp         -0.035322
Age           -0.077221
Pclass        -0.338481
Name: Survived, dtype: float64

In [9]:
passengers['Cabin']

0       NaN
1       C85
2       NaN
3      C123
4       NaN
       ... 
886     NaN
887     B42
888     NaN
889    C148
890     NaN
Name: Cabin, Length: 891, dtype: object

## Data Preparing

In [10]:
passengers_num = pd.get_dummies(passengers[['Age', 'SibSp', 'Parch', 'Fare']])

In [11]:
from sklearn.impute import SimpleImputer

imputer = SimpleImputer(strategy='median')
imputer.fit(passengers_num)

SimpleImputer(add_indicator=False, copy=True, fill_value=None,
              missing_values=nan, strategy='median', verbose=0)

In [12]:
imputer.statistics_

array([28.    ,  0.    ,  0.    , 14.4542])

In [13]:
passengers_num.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Age     714 non-null    float64
 1   SibSp   891 non-null    int64  
 2   Parch   891 non-null    int64  
 3   Fare    891 non-null    float64
dtypes: float64(2), int64(2)
memory usage: 28.0 KB


## Transformators

In [14]:
from sklearn.base import BaseEstimator, TransformerMixin

class CabinTransform(BaseEstimator, TransformerMixin):
    def __init__(self, attribs):
        self.attribs = attribs
    def fit(self, X, y=None):
        return self
    def transform(self, X):
        X['Cabin'].fillna(0, inplace=True)
        for i in range(len(X['Cabin'])):
            if X['Cabin'][i] != 0:
                X['Cabin'][i] = X['Cabin'][i][0]
            else:
                X['Cabin'][i] = '0'
        return X[self.attribs]

In [15]:
class MostFrequentImputer(BaseEstimator, TransformerMixin):
    def fit(self, X, y=None):
        self.most_frequent_ = pd.Series([X[c].value_counts().index[0] for c in X],
                                        index=X.columns)
        return self
    def transform(self, X, y=None):
        return X.fillna(self.most_frequent_)

In [32]:
SibSp_ix, Parch_ix = 1, 2

class AttributesAdder(BaseEstimator, TransformerMixin):
#    def __init__(self, delet=True):
#        self.delet = delet
    def fit(self, X, y=None):
        return self
    def transform(self, X):
        SnP = X[:, SibSp_ix] + X[:, Parch_ix]
        return np.c_[X, SnP]

In [52]:
class FeatureSelection(BaseEstimator, TransformerMixin):
    def __init__(self, features):
        self.features = features
    def fit(self, X, y=None):
        return self
    def transform(self, X):
        return X[:, self.features]

## Transporter

In [17]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OneHotEncoder

In [18]:
num_attribs = list(passengers_num)
cat_attribs = ['Sex', 'Embarked', 'Pclass']

In [19]:
train_data['Embarked'].value_counts()

S    644
C    168
Q     77
Name: Embarked, dtype: int64

In [20]:
train_data['Cabin'].value_counts()

C23 C25 C27    4
B96 B98        4
G6             4
F2             3
D              3
              ..
A31            1
E49            1
D49            1
C46            1
B39            1
Name: Cabin, Length: 147, dtype: int64

In [33]:
num_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='median')),
    ('attr_adder', AttributesAdder())
])

In [34]:
num_pipeline.fit_transform(passengers_num)

array([[22.    ,  1.    ,  0.    ,  7.25  ,  1.    ],
       [38.    ,  1.    ,  0.    , 71.2833,  1.    ],
       [26.    ,  0.    ,  0.    ,  7.925 ,  0.    ],
       ...,
       [28.    ,  1.    ,  2.    , 23.45  ,  3.    ],
       [26.    ,  0.    ,  0.    , 30.    ,  0.    ],
       [32.    ,  0.    ,  0.    ,  7.75  ,  0.    ]])

In [35]:
cat_pipeline = Pipeline([
    ('imputer', MostFrequentImputer()),
    ('encoder', OneHotEncoder(sparse=False))
])

In [38]:
testing = cat_pipeline.fit_transform(train_data)
testing[0]

In [36]:
from sklearn.compose import ColumnTransformer

full_pipeline = ColumnTransformer([
    ('num', num_pipeline, num_attribs),
    ('cat', cat_pipeline, cat_attribs)
])

In [63]:
features = np.array([0,3,4,5,6,7,8,9,10,11,12])

prepare_selection = Pipeline([
    ('preparation', full_pipeline),
    ('selection', FeatureSelection(features))
])

In [64]:
X_train = full_pipeline.fit_transform(passengers)
X_train[0]

array([22.  ,  1.  ,  0.  ,  7.25,  1.  ,  0.  ,  1.  ,  0.  ,  0.  ,
        1.  ,  0.  ,  0.  ,  1.  ])

In [65]:
X_train = prepare_selection.fit_transform(passengers)
X_train[0]

array([22.  ,  7.25,  1.  ,  0.  ,  1.  ,  0.  ,  0.  ,  1.  ,  0.  ,
        0.  ,  1.  ])

In [66]:
y_train = train_data['Survived']

## Classifier

In [84]:
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestClassifier

model_rfc = RandomForestClassifier(random_state=42, n_estimators=120)
model_rfc.fit(X_train, y_train)
forest_scores = cross_val_score(model_rfc, X_train, y_train, cv=10)
forest_scores

array([0.71111111, 0.82022472, 0.75280899, 0.80898876, 0.88764045,
       0.82022472, 0.82022472, 0.75280899, 0.83146067, 0.84269663])

In [85]:
forest_scores.mean()

0.8048189762796504

In [29]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import GridSearchCV

model_dec = DecisionTreeClassifier(random_state=42)
model_dec.fit(X_train, y_train)
dec_scores = cross_val_score(model_dec, X_train, y_train, cv=10)
dec_scores

array([0.77777778, 0.82022472, 0.70786517, 0.7752809 , 0.83146067,
       0.78651685, 0.83146067, 0.7752809 , 0.80898876, 0.83146067])

In [34]:
dec_scores.mean()

0.7946317103620475

In [30]:
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from scipy.stats import expon, reciprocal

model_svm = SVC()
params_svc = {
    'C' : [25, 30, 35, 40],
    'kernel' : ['linear'],
    'gamma': [2.0, 2.1, 2.2, 2.3, 2.4, 2.5]
}
rnd_search = GridSearchCV(model_svm, params_svc, cv=10, verbose=8)
rnd_search.fit(X_train, y_train)

Fitting 10 folds for each of 24 candidates, totalling 240 fits
[CV] C=25, gamma=2.0, kernel=linear ..................................


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


[CV] ...... C=25, gamma=2.0, kernel=linear, score=0.811, total=  26.7s
[CV] C=25, gamma=2.0, kernel=linear ..................................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   26.6s remaining:    0.0s


[CV] ...... C=25, gamma=2.0, kernel=linear, score=0.798, total=  24.9s
[CV] C=25, gamma=2.0, kernel=linear ..................................


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:   51.5s remaining:    0.0s


[CV] ...... C=25, gamma=2.0, kernel=linear, score=0.775, total=  36.4s
[CV] C=25, gamma=2.0, kernel=linear ..................................


[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:  1.5min remaining:    0.0s


[CV] ...... C=25, gamma=2.0, kernel=linear, score=0.843, total=  39.7s
[CV] C=25, gamma=2.0, kernel=linear ..................................


[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:  2.1min remaining:    0.0s


[CV] ...... C=25, gamma=2.0, kernel=linear, score=0.798, total=  26.7s
[CV] C=25, gamma=2.0, kernel=linear ..................................


[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:  2.6min remaining:    0.0s


[CV] ...... C=25, gamma=2.0, kernel=linear, score=0.775, total=  36.9s
[CV] C=25, gamma=2.0, kernel=linear ..................................


[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed:  3.2min remaining:    0.0s


[CV] ...... C=25, gamma=2.0, kernel=linear, score=0.764, total=  32.4s
[CV] C=25, gamma=2.0, kernel=linear ..................................


[Parallel(n_jobs=1)]: Done   7 out of   7 | elapsed:  3.7min remaining:    0.0s


[CV] ...... C=25, gamma=2.0, kernel=linear, score=0.742, total=  30.5s
[CV] C=25, gamma=2.0, kernel=linear ..................................
[CV] ...... C=25, gamma=2.0, kernel=linear, score=0.809, total=  19.0s
[CV] C=25, gamma=2.0, kernel=linear ..................................
[CV] ...... C=25, gamma=2.0, kernel=linear, score=0.764, total=  35.0s
[CV] C=25, gamma=2.1, kernel=linear ..................................
[CV] ...... C=25, gamma=2.1, kernel=linear, score=0.811, total=  27.4s
[CV] C=25, gamma=2.1, kernel=linear ..................................
[CV] ...... C=25, gamma=2.1, kernel=linear, score=0.798, total=  26.8s
[CV] C=25, gamma=2.1, kernel=linear ..................................
[CV] ...... C=25, gamma=2.1, kernel=linear, score=0.775, total=  41.3s
[CV] C=25, gamma=2.1, kernel=linear ..................................
[CV] ...... C=25, gamma=2.1, kernel=linear, score=0.843, total=  44.5s
[CV] C=25, gamma=2.1, kernel=linear ..................................
[CV] .

[CV] ...... C=30, gamma=2.0, kernel=linear, score=0.775, total=  32.3s
[CV] C=30, gamma=2.0, kernel=linear ..................................
[CV] ...... C=30, gamma=2.0, kernel=linear, score=0.764, total=  30.7s
[CV] C=30, gamma=2.0, kernel=linear ..................................
[CV] ...... C=30, gamma=2.0, kernel=linear, score=0.742, total=  28.8s
[CV] C=30, gamma=2.0, kernel=linear ..................................
[CV] ...... C=30, gamma=2.0, kernel=linear, score=0.809, total=  31.6s
[CV] C=30, gamma=2.0, kernel=linear ..................................
[CV] ...... C=30, gamma=2.0, kernel=linear, score=0.764, total=  37.2s
[CV] C=30, gamma=2.1, kernel=linear ..................................
[CV] ...... C=30, gamma=2.1, kernel=linear, score=0.811, total=  47.1s
[CV] C=30, gamma=2.1, kernel=linear ..................................
[CV] ...... C=30, gamma=2.1, kernel=linear, score=0.798, total=  27.5s
[CV] C=30, gamma=2.1, kernel=linear ..................................
[CV] .

[CV] ...... C=35, gamma=2.0, kernel=linear, score=0.843, total=  48.9s
[CV] C=35, gamma=2.0, kernel=linear ..................................
[CV] ...... C=35, gamma=2.0, kernel=linear, score=0.798, total=  50.3s
[CV] C=35, gamma=2.0, kernel=linear ..................................
[CV] ...... C=35, gamma=2.0, kernel=linear, score=0.775, total=  50.6s
[CV] C=35, gamma=2.0, kernel=linear ..................................
[CV] ...... C=35, gamma=2.0, kernel=linear, score=0.764, total=  39.2s
[CV] C=35, gamma=2.0, kernel=linear ..................................
[CV] ...... C=35, gamma=2.0, kernel=linear, score=0.742, total=  41.5s
[CV] C=35, gamma=2.0, kernel=linear ..................................
[CV] ...... C=35, gamma=2.0, kernel=linear, score=0.809, total=  29.4s
[CV] C=35, gamma=2.0, kernel=linear ..................................
[CV] ...... C=35, gamma=2.0, kernel=linear, score=0.764, total=  38.8s
[CV] C=35, gamma=2.1, kernel=linear ..................................
[CV] .

[CV] ...... C=40, gamma=2.0, kernel=linear, score=0.787, total=  51.3s
[CV] C=40, gamma=2.0, kernel=linear ..................................
[CV] ...... C=40, gamma=2.0, kernel=linear, score=0.775, total=  55.0s
[CV] C=40, gamma=2.0, kernel=linear ..................................
[CV] ...... C=40, gamma=2.0, kernel=linear, score=0.843, total= 1.9min
[CV] C=40, gamma=2.0, kernel=linear ..................................
[CV] ...... C=40, gamma=2.0, kernel=linear, score=0.798, total=  39.5s
[CV] C=40, gamma=2.0, kernel=linear ..................................
[CV] ...... C=40, gamma=2.0, kernel=linear, score=0.775, total= 1.4min
[CV] C=40, gamma=2.0, kernel=linear ..................................
[CV] ...... C=40, gamma=2.0, kernel=linear, score=0.764, total=  46.0s
[CV] C=40, gamma=2.0, kernel=linear ..................................
[CV] ...... C=40, gamma=2.0, kernel=linear, score=0.742, total= 1.1min
[CV] C=40, gamma=2.0, kernel=linear ..................................
[CV] .

[CV] ...... C=40, gamma=2.5, kernel=linear, score=0.764, total= 1.7min


[Parallel(n_jobs=1)]: Done 240 out of 240 | elapsed: 210.8min finished


GridSearchCV(cv=10, error_score=nan,
             estimator=SVC(C=1.0, break_ties=False, cache_size=200,
                           class_weight=None, coef0=0.0,
                           decision_function_shape='ovr', degree=3,
                           gamma='scale', kernel='rbf', max_iter=-1,
                           probability=False, random_state=None, shrinking=True,
                           tol=0.001, verbose=False),
             iid='deprecated', n_jobs=None,
             param_grid={'C': [25, 30, 35, 40],
                         'gamma': [2.0, 2.1, 2.2, 2.3, 2.4, 2.5],
                         'kernel': ['linear']},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring=None, verbose=8)

In [31]:
rnd_search.best_params_

{'C': 25, 'gamma': 2.0, 'kernel': 'linear'}

In [32]:
scv_model = rnd_search.best_estimator_
svc_scores = cross_val_score(scv_model, X_train, y_train, cv=10)
svc_scores

array([0.81111111, 0.79775281, 0.7752809 , 0.84269663, 0.79775281,
       0.7752809 , 0.76404494, 0.74157303, 0.80898876, 0.76404494])

In [35]:
svc_scores.mean()

0.787852684144819

In [37]:
from sklearn.neighbors import KNeighborsClassifier

knn_model = KNeighborsClassifier()
params_knn = {
    'n_neighbors' : [3, 4, 5, 6],
    'weights' : ['uniform', 'distance']
}
grid_search = GridSearchCV(knn_model, params_knn, cv=10, verbose = 8)
grid_search.fit(X_train, y_train)

Fitting 10 folds for each of 8 candidates, totalling 80 fits
[CV] n_neighbors=3, weights=uniform ..................................
[CV] ...... n_neighbors=3, weights=uniform, score=0.667, total=   0.0s
[CV] n_neighbors=3, weights=uniform ..................................
[CV] ...... n_neighbors=3, weights=uniform, score=0.663, total=   0.0s
[CV] n_neighbors=3, weights=uniform ..................................
[CV] ...... n_neighbors=3, weights=uniform, score=0.708, total=   0.0s
[CV] n_neighbors=3, weights=uniform ..................................
[CV] ...... n_neighbors=3, weights=uniform, score=0.753, total=   0.0s
[CV] n_neighbors=3, weights=uniform ..................................
[CV] ...... n_neighbors=3, weights=uniform, score=0.787, total=   0.0s
[CV] n_neighbors=3, weights=uniform ..................................
[CV] ...... n_neighbors=3, weights=uniform, score=0.730, total=   0.0s
[CV] n_neighbors=3, weights=uniform ..................................
[CV] ...... n_ne

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   7 out of   7 | elapsed:    0.0s remaining:    0.0s



[CV] n_neighbors=4, weights=uniform ..................................
[CV] ...... n_neighbors=4, weights=uniform, score=0.730, total=   0.0s
[CV] n_neighbors=4, weights=uniform ..................................
[CV] ...... n_neighbors=4, weights=uniform, score=0.753, total=   0.0s
[CV] n_neighbors=4, weights=uniform ..................................
[CV] ...... n_neighbors=4, weights=uniform, score=0.764, total=   0.0s
[CV] n_neighbors=4, weights=uniform ..................................
[CV] ...... n_neighbors=4, weights=uniform, score=0.663, total=   0.0s
[CV] n_neighbors=4, weights=uniform ..................................
[CV] ...... n_neighbors=4, weights=uniform, score=0.742, total=   0.0s
[CV] n_neighbors=4, weights=uniform ..................................
[CV] ...... n_neighbors=4, weights=uniform, score=0.730, total=   0.0s
[CV] n_neighbors=4, weights=uniform ..................................
[CV] ...... n_neighbors=4, weights=uniform, score=0.685, total=   0.0s
[CV] 

[Parallel(n_jobs=1)]: Done  80 out of  80 | elapsed:    0.5s finished


GridSearchCV(cv=10, error_score=nan,
             estimator=KNeighborsClassifier(algorithm='auto', leaf_size=30,
                                            metric='minkowski',
                                            metric_params=None, n_jobs=None,
                                            n_neighbors=5, p=2,
                                            weights='uniform'),
             iid='deprecated', n_jobs=None,
             param_grid={'n_neighbors': [3, 4, 5, 6],
                         'weights': ['uniform', 'distance']},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring=None, verbose=8)

In [38]:
grid_search.best_params_

{'n_neighbors': 6, 'weights': 'distance'}

In [39]:
knn_model = grid_search.best_estimator_
knn_scores = cross_val_score(knn_model, X_train, y_train, cv=10)
knn_scores

array([0.66666667, 0.73033708, 0.71910112, 0.75280899, 0.80898876,
       0.70786517, 0.75280899, 0.69662921, 0.6741573 , 0.76404494])

In [40]:
knn_scores.mean()

0.7273408239700375

In [41]:
from sklearn.naive_bayes import GaussianNB

gas_model = GaussianNB()
gas_model.fit(X_train, y_train)
gas_scores = cross_val_score(gas_model, X_train, y_train, cv=10)
gas_scores

array([0.74444444, 0.73033708, 0.78651685, 0.80898876, 0.79775281,
       0.7752809 , 0.79775281, 0.79775281, 0.78651685, 0.83146067])

In [42]:
gas_scores.mean()

0.7856803995006241

In [49]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.model_selection import RandomizedSearchCV

lda_model = LinearDiscriminantAnalysis()
lda_params = [{
    'solver' : ['sdv']},
    {
    'solver' : ['lsqr', 'eigen'],
    'shrinkage' : ['auto']
}]
grid_search = GridSearchCV(lda_model, lda_params, cv=10, verbose = 6)
grid_search.fit(X_train, y_train)

Fitting 10 folds for each of 3 candidates, totalling 30 fits
[CV] solver=sdv ......................................................
[CV] ............................ solver=sdv, score=nan, total=   0.0s
[CV] solver=sdv ......................................................
[CV] ............................ solver=sdv, score=nan, total=   0.0s
[CV] solver=sdv ......................................................
[CV] ............................ solver=sdv, score=nan, total=   0.0s
[CV] solver=sdv ......................................................
[CV] ............................ solver=sdv, score=nan, total=   0.0s
[CV] solver=sdv ......................................................
[CV] ............................ solver=sdv, score=nan, total=   0.0s
[CV] solver=sdv ......................................................
[CV] ............................ solver=sdv, score=nan, total=   0.0s
[CV] solver=sdv ......................................................
[CV] ...........

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
ValueError: unknown solver sdv (valid solvers are 'svd', 'lsqr', and 'eigen').

[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
ValueError: unknown solver sdv (valid solvers are 'svd', 'lsqr', and 'eigen').

[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s
ValueError: unknown solver sdv (valid solvers are 'svd', 'lsqr', and 'eigen').

[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.0s remaining:    0.0s
ValueError: unknown solver sdv (valid solvers are 'svd', 'lsqr', and 'eigen').

[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    0.0s remaining:    0.0s
ValueError: unknown solver sdv (valid solvers are 'svd', 'lsqr', and 'eigen').

[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:    0.0s remaining:    0.0s
ValueError: unknown solver sdv (valid solvers are 'svd', 'lsqr', and 'eigen').

ValueError: unknown solver sdv (valid s

GridSearchCV(cv=10, error_score=nan,
             estimator=LinearDiscriminantAnalysis(n_components=None,
                                                  priors=None, shrinkage=None,
                                                  solver='svd',
                                                  store_covariance=False,
                                                  tol=0.0001),
             iid='deprecated', n_jobs=None,
             param_grid=[{'solver': ['sdv']},
                         {'shrinkage': ['auto'], 'solver': ['lsqr', 'eigen']}],
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring=None, verbose=6)

In [45]:
grid_search.best_params_

{'shrinkage': 'auto', 'solver': 'lsqr'}

In [46]:
lda_model = grid_search.best_estimator_
lda_scores = cross_val_score(lda_model, X_train, y_train, cv=10)
lda_scores

array([0.77777778, 0.79775281, 0.78651685, 0.84269663, 0.78651685,
       0.76404494, 0.7752809 , 0.75280899, 0.82022472, 0.7752809 ])

In [47]:
lda_scores.mean()

0.7878901373283396

In [61]:
from sklearn.linear_model import LogisticRegression

lrs_model = LogisticRegression(random_state=42, max_iter=1000)
lrs_params = {
    'C' : reciprocal(10, 300000)
}
rnd_search = RandomizedSearchCV(lrs_model, lrs_params, n_iter = 100, scoring = 'accuracy', cv=10, verbose = 10)
rnd_search.fit(X_train, y_train)

Fitting 10 folds for each of 100 candidates, totalling 1000 fits
[CV] C=1382.2023980663503 ............................................
[CV] ................ C=1382.2023980663503, score=0.778, total=   0.0s
[CV] C=1382.2023980663503 ............................................
[CV] ................ C=1382.2023980663503, score=0.798, total=   0.0s
[CV] C=1382.2023980663503 ............................................
[CV] ................ C=1382.2023980663503, score=0.764, total=   0.0s
[CV] C=1382.2023980663503 ............................................
[CV] ................ C=1382.2023980663503, score=0.820, total=   0.0s
[CV] C=1382.2023980663503 ............................................
[CV] ................ C=1382.2023980663503, score=0.798, total=   0.0s

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:    0.1s remaining:    0.0s



[CV] C=1382.2023980663503 ............................................
[CV] ................ C=1382.2023980663503, score=0.775, total=   0.0s
[CV] C=1382.2023980663503 ............................................
[CV] ................ C=1382.2023980663503, score=0.787, total=   0.0s
[CV] C=1382.2023980663503 ............................................
[CV] ................ C=1382.2023980663503, score=0.787, total=   0.0s
[CV] C=1382.2023980663503 ............................................
[CV] ................ C=1382.2023980663503, score=0.831, total=   0.0s
[CV] C=1382.2023980663503 ............................................
[CV] ................ C=1382.2023980663503, score=0.854, total=   0.0s
[CV] C=68613.93954949082 .............................................


[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   7 out of   7 | elapsed:    0.2s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   8 out of   8 | elapsed:    0.2s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   9 out of   9 | elapsed:    0.2s remaining:    0.0s


[CV] ................. C=68613.93954949082, score=0.778, total=   0.0s
[CV] C=68613.93954949082 .............................................
[CV] ................. C=68613.93954949082, score=0.798, total=   0.0s
[CV] C=68613.93954949082 .............................................
[CV] ................. C=68613.93954949082, score=0.764, total=   0.0s
[CV] C=68613.93954949082 .............................................
[CV] ................. C=68613.93954949082, score=0.820, total=   0.0s
[CV] C=68613.93954949082 .............................................
[CV] ................. C=68613.93954949082, score=0.798, total=   0.0s
[CV] C=68613.93954949082 .............................................
[CV] ................. C=68613.93954949082, score=0.775, total=   0.0s
[CV] C=68613.93954949082 .............................................
[CV] ................. C=68613.93954949082, score=0.787, total=   0.0s
[CV] C=68613.93954949082 .............................................
[CV] .

[CV] ................. C=21757.54657881032, score=0.854, total=   0.0s
[CV] C=34788.849074762074 ............................................
[CV] ................ C=34788.849074762074, score=0.778, total=   0.0s
[CV] C=34788.849074762074 ............................................
[CV] ................ C=34788.849074762074, score=0.798, total=   0.0s
[CV] C=34788.849074762074 ............................................
[CV] ................ C=34788.849074762074, score=0.764, total=   0.0s
[CV] C=34788.849074762074 ............................................
[CV] ................ C=34788.849074762074, score=0.820, total=   0.0s
[CV] C=34788.849074762074 ............................................
[CV] ................ C=34788.849074762074, score=0.798, total=   0.0s
[CV] C=34788.849074762074 ............................................
[CV] ................ C=34788.849074762074, score=0.775, total=   0.0s
[CV] C=34788.849074762074 ............................................
[CV] .

[CV] ................ C=30911.356767312885, score=0.764, total=   0.0s
[CV] C=30911.356767312885 ............................................
[CV] ................ C=30911.356767312885, score=0.820, total=   0.0s
[CV] C=30911.356767312885 ............................................
[CV] ................ C=30911.356767312885, score=0.798, total=   0.0s
[CV] C=30911.356767312885 ............................................
[CV] ................ C=30911.356767312885, score=0.775, total=   0.0s
[CV] C=30911.356767312885 ............................................
[CV] ................ C=30911.356767312885, score=0.787, total=   0.0s
[CV] C=30911.356767312885 ............................................
[CV] ................ C=30911.356767312885, score=0.787, total=   0.0s
[CV] C=30911.356767312885 ............................................
[CV] ................ C=30911.356767312885, score=0.831, total=   0.0s
[CV] C=30911.356767312885 ............................................
[CV] .

[CV] ................ C=31.016973877629894, score=0.775, total=   0.0s
[CV] C=31.016973877629894 ............................................
[CV] ................ C=31.016973877629894, score=0.787, total=   0.0s
[CV] C=31.016973877629894 ............................................
[CV] ................ C=31.016973877629894, score=0.787, total=   0.0s
[CV] C=31.016973877629894 ............................................
[CV] ................ C=31.016973877629894, score=0.831, total=   0.0s
[CV] C=31.016973877629894 ............................................
[CV] ................ C=31.016973877629894, score=0.854, total=   0.0s
[CV] C=198.17184619581522 ............................................
[CV] ................ C=198.17184619581522, score=0.778, total=   0.0s
[CV] C=198.17184619581522 ............................................
[CV] ................ C=198.17184619581522, score=0.798, total=   0.0s
[CV] C=198.17184619581522 ............................................
[CV] .

[CV] ................ C=129021.96892447205, score=0.820, total=   0.0s
[CV] C=129021.96892447205 ............................................
[CV] ................ C=129021.96892447205, score=0.798, total=   0.0s
[CV] C=129021.96892447205 ............................................
[CV] ................ C=129021.96892447205, score=0.775, total=   0.0s
[CV] C=129021.96892447205 ............................................
[CV] ................ C=129021.96892447205, score=0.787, total=   0.0s
[CV] C=129021.96892447205 ............................................
[CV] ................ C=129021.96892447205, score=0.787, total=   0.0s
[CV] C=129021.96892447205 ............................................
[CV] ................ C=129021.96892447205, score=0.831, total=   0.0s
[CV] C=129021.96892447205 ............................................
[CV] ................ C=129021.96892447205, score=0.854, total=   0.0s
[CV] C=176.66454410216892 ............................................
[CV] .

[CV] ................ C=246126.60817157396, score=0.764, total=   0.0s
[CV] C=246126.60817157396 ............................................
[CV] ................ C=246126.60817157396, score=0.820, total=   0.0s
[CV] C=246126.60817157396 ............................................
[CV] ................ C=246126.60817157396, score=0.798, total=   0.0s
[CV] C=246126.60817157396 ............................................
[CV] ................ C=246126.60817157396, score=0.775, total=   0.0s
[CV] C=246126.60817157396 ............................................
[CV] ................ C=246126.60817157396, score=0.787, total=   0.0s
[CV] C=246126.60817157396 ............................................
[CV] ................ C=246126.60817157396, score=0.787, total=   0.0s
[CV] C=246126.60817157396 ............................................
[CV] ................ C=246126.60817157396, score=0.831, total=   0.0s
[CV] C=246126.60817157396 ............................................
[CV] .

[CV] ................. C=183.9303408050867, score=0.820, total=   0.0s
[CV] C=183.9303408050867 .............................................
[CV] ................. C=183.9303408050867, score=0.798, total=   0.0s
[CV] C=183.9303408050867 .............................................
[CV] ................. C=183.9303408050867, score=0.775, total=   0.0s
[CV] C=183.9303408050867 .............................................
[CV] ................. C=183.9303408050867, score=0.787, total=   0.0s
[CV] C=183.9303408050867 .............................................
[CV] ................. C=183.9303408050867, score=0.787, total=   0.0s
[CV] C=183.9303408050867 .............................................
[CV] ................. C=183.9303408050867, score=0.831, total=   0.0s
[CV] C=183.9303408050867 .............................................
[CV] ................. C=183.9303408050867, score=0.854, total=   0.0s
[CV] C=114325.90033788909 ............................................
[CV] .

[CV] ................ C=431.26717032126385, score=0.764, total=   0.0s
[CV] C=431.26717032126385 ............................................
[CV] ................ C=431.26717032126385, score=0.820, total=   0.0s
[CV] C=431.26717032126385 ............................................
[CV] ................ C=431.26717032126385, score=0.798, total=   0.0s
[CV] C=431.26717032126385 ............................................
[CV] ................ C=431.26717032126385, score=0.775, total=   0.0s
[CV] C=431.26717032126385 ............................................
[CV] ................ C=431.26717032126385, score=0.787, total=   0.0s
[CV] C=431.26717032126385 ............................................
[CV] ................ C=431.26717032126385, score=0.787, total=   0.0s
[CV] C=431.26717032126385 ............................................
[CV] ................ C=431.26717032126385, score=0.831, total=   0.0s
[CV] C=431.26717032126385 ............................................
[CV] .

[CV] ................. C=7055.171375964784, score=0.820, total=   0.0s
[CV] C=7055.171375964784 .............................................
[CV] ................. C=7055.171375964784, score=0.798, total=   0.0s
[CV] C=7055.171375964784 .............................................
[CV] ................. C=7055.171375964784, score=0.775, total=   0.0s
[CV] C=7055.171375964784 .............................................
[CV] ................. C=7055.171375964784, score=0.787, total=   0.0s
[CV] C=7055.171375964784 .............................................
[CV] ................. C=7055.171375964784, score=0.787, total=   0.0s
[CV] C=7055.171375964784 .............................................
[CV] ................. C=7055.171375964784, score=0.831, total=   0.0s
[CV] C=7055.171375964784 .............................................
[CV] ................. C=7055.171375964784, score=0.854, total=   0.0s
[CV] C=6625.430080019313 .............................................
[CV] .

[CV] ................. C=170967.3644890901, score=0.820, total=   0.0s
[CV] C=170967.3644890901 .............................................
[CV] ................. C=170967.3644890901, score=0.798, total=   0.0s
[CV] C=170967.3644890901 .............................................
[CV] ................. C=170967.3644890901, score=0.775, total=   0.0s
[CV] C=170967.3644890901 .............................................
[CV] ................. C=170967.3644890901, score=0.787, total=   0.0s
[CV] C=170967.3644890901 .............................................
[CV] ................. C=170967.3644890901, score=0.787, total=   0.0s
[CV] C=170967.3644890901 .............................................
[CV] ................. C=170967.3644890901, score=0.831, total=   0.0s
[CV] C=170967.3644890901 .............................................
[CV] ................. C=170967.3644890901, score=0.854, total=   0.0s
[CV] C=159.5160629046502 .............................................
[CV] .

[CV] ................. C=48.30838677444012, score=0.764, total=   0.0s
[CV] C=48.30838677444012 .............................................
[CV] ................. C=48.30838677444012, score=0.820, total=   0.0s
[CV] C=48.30838677444012 .............................................
[CV] ................. C=48.30838677444012, score=0.798, total=   0.0s
[CV] C=48.30838677444012 .............................................
[CV] ................. C=48.30838677444012, score=0.775, total=   0.0s
[CV] C=48.30838677444012 .............................................
[CV] ................. C=48.30838677444012, score=0.787, total=   0.0s
[CV] C=48.30838677444012 .............................................
[CV] ................. C=48.30838677444012, score=0.787, total=   0.0s
[CV] C=48.30838677444012 .............................................
[CV] ................. C=48.30838677444012, score=0.831, total=   0.0s
[CV] C=48.30838677444012 .............................................
[CV] .

[CV] ................ C=125422.69584893892, score=0.764, total=   0.0s
[CV] C=125422.69584893892 ............................................
[CV] ................ C=125422.69584893892, score=0.820, total=   0.0s
[CV] C=125422.69584893892 ............................................
[CV] ................ C=125422.69584893892, score=0.798, total=   0.0s
[CV] C=125422.69584893892 ............................................
[CV] ................ C=125422.69584893892, score=0.775, total=   0.0s
[CV] C=125422.69584893892 ............................................
[CV] ................ C=125422.69584893892, score=0.787, total=   0.0s
[CV] C=125422.69584893892 ............................................
[CV] ................ C=125422.69584893892, score=0.787, total=   0.0s
[CV] C=125422.69584893892 ............................................
[CV] ................ C=125422.69584893892, score=0.831, total=   0.0s
[CV] C=125422.69584893892 ............................................
[CV] .

[CV] ................. C=592.8876467479681, score=0.820, total=   0.0s
[CV] C=592.8876467479681 .............................................
[CV] ................. C=592.8876467479681, score=0.798, total=   0.0s
[CV] C=592.8876467479681 .............................................
[CV] ................. C=592.8876467479681, score=0.775, total=   0.0s
[CV] C=592.8876467479681 .............................................
[CV] ................. C=592.8876467479681, score=0.787, total=   0.0s
[CV] C=592.8876467479681 .............................................
[CV] ................. C=592.8876467479681, score=0.787, total=   0.0s
[CV] C=592.8876467479681 .............................................
[CV] ................. C=592.8876467479681, score=0.831, total=   0.0s
[CV] C=592.8876467479681 .............................................
[CV] ................. C=592.8876467479681, score=0.854, total=   0.0s
[CV] C=30.26187826215901 .............................................
[CV] .

[CV] ................. C=968.2306308853335, score=0.775, total=   0.0s
[CV] C=968.2306308853335 .............................................
[CV] ................. C=968.2306308853335, score=0.787, total=   0.0s
[CV] C=968.2306308853335 .............................................
[CV] ................. C=968.2306308853335, score=0.787, total=   0.0s
[CV] C=968.2306308853335 .............................................
[CV] ................. C=968.2306308853335, score=0.831, total=   0.0s
[CV] C=968.2306308853335 .............................................
[CV] ................. C=968.2306308853335, score=0.854, total=   0.0s
[CV] C=7738.776061335659 .............................................
[CV] ................. C=7738.776061335659, score=0.778, total=   0.0s
[CV] C=7738.776061335659 .............................................
[CV] ................. C=7738.776061335659, score=0.798, total=   0.0s
[CV] C=7738.776061335659 .............................................
[CV] .

[CV] ................. C=3092.585600246093, score=0.787, total=   0.0s
[CV] C=3092.585600246093 .............................................
[CV] ................. C=3092.585600246093, score=0.787, total=   0.0s
[CV] C=3092.585600246093 .............................................
[CV] ................. C=3092.585600246093, score=0.831, total=   0.0s
[CV] C=3092.585600246093 .............................................
[CV] ................. C=3092.585600246093, score=0.854, total=   0.0s
[CV] C=262544.5758208397 .............................................
[CV] ................. C=262544.5758208397, score=0.778, total=   0.0s
[CV] C=262544.5758208397 .............................................
[CV] ................. C=262544.5758208397, score=0.798, total=   0.0s
[CV] C=262544.5758208397 .............................................
[CV] ................. C=262544.5758208397, score=0.764, total=   0.0s
[CV] C=262544.5758208397 .............................................
[CV] .

[CV] ................. C=49.82475766859925, score=0.831, total=   0.0s
[CV] C=49.82475766859925 .............................................
[CV] ................. C=49.82475766859925, score=0.854, total=   0.0s
[CV] C=53.44735784666334 .............................................
[CV] ................. C=53.44735784666334, score=0.778, total=   0.0s
[CV] C=53.44735784666334 .............................................
[CV] ................. C=53.44735784666334, score=0.798, total=   0.0s
[CV] C=53.44735784666334 .............................................
[CV] ................. C=53.44735784666334, score=0.764, total=   0.0s
[CV] C=53.44735784666334 .............................................
[CV] ................. C=53.44735784666334, score=0.820, total=   0.0s
[CV] C=53.44735784666334 .............................................
[CV] ................. C=53.44735784666334, score=0.798, total=   0.0s
[CV] C=53.44735784666334 .............................................
[CV] .

[CV] ................. C=6706.552352404382, score=0.854, total=   0.0s
[CV] C=110.44770279629076 ............................................
[CV] ................ C=110.44770279629076, score=0.778, total=   0.0s
[CV] C=110.44770279629076 ............................................
[CV] ................ C=110.44770279629076, score=0.798, total=   0.0s
[CV] C=110.44770279629076 ............................................
[CV] ................ C=110.44770279629076, score=0.764, total=   0.0s
[CV] C=110.44770279629076 ............................................
[CV] ................ C=110.44770279629076, score=0.820, total=   0.0s
[CV] C=110.44770279629076 ............................................
[CV] ................ C=110.44770279629076, score=0.798, total=   0.0s
[CV] C=110.44770279629076 ............................................
[CV] ................ C=110.44770279629076, score=0.775, total=   0.0s
[CV] C=110.44770279629076 ............................................
[CV] .

[Parallel(n_jobs=1)]: Done 1000 out of 1000 | elapsed:   32.9s finished


RandomizedSearchCV(cv=10, error_score=nan,
                   estimator=LogisticRegression(C=1.0, class_weight=None,
                                                dual=False, fit_intercept=True,
                                                intercept_scaling=1,
                                                l1_ratio=None, max_iter=1000,
                                                multi_class='auto', n_jobs=None,
                                                penalty='l2', random_state=42,
                                                solver='lbfgs', tol=0.0001,
                                                verbose=0, warm_start=False),
                   iid='deprecated', n_iter=100, n_jobs=None,
                   param_distributions={'C': <scipy.stats._distn_infrastructure.rv_frozen object at 0x000002595763A0C8>},
                   pre_dispatch='2*n_jobs', random_state=None, refit=True,
                   return_train_score=False, scoring='accuracy', verbose=10)

In [62]:
rnd_search.best_params_

{'C': 1382.2023980663503}

In [87]:
from sklearn.linear_model import LogisticRegression

#lrs_model = rnd_search.best_estimator_
lrs_model = LogisticRegression(random_state=42, max_iter=1000, C=1400)
lrs_model.fit(X_train, y_train)
lrs_scores = cross_val_score(lrs_model, X_train, y_train, cv=10)
lrs_scores

array([0.77777778, 0.78651685, 0.76404494, 0.84269663, 0.79775281,
       0.7752809 , 0.79775281, 0.79775281, 0.80898876, 0.83146067])

In [88]:
lrs_scores.mean()

0.7980024968789012

## Predictions

In [92]:
X_test = prepare_selection.transform(test_data)

In [93]:
final_model = lrs_model
predictions = final_model.predict(X_test)

In [94]:
output = pd.DataFrame({'PassengerId': test_data.PassengerId, 'Survived': predictions})
output.to_csv(r'C:\Users\roman\Documents\Учеба\projects\kaggle\titanik\my_submission2.csv', index=False)