In [None]:
# Update sklearn to prevent version mismatches
# !pip install sklearn --upgrade

In [None]:
# install joblib. This will be used to save your model. 
# Restart your kernel after installing 
# !pip install joblib

In [1]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
import joblib

# Read the CSV and Perform Basic Data Cleaning

In [2]:
df = pd.read_csv("exoplanet_data.csv")
# Drop the null columns where all values are null
df = df.dropna(axis='columns', how='all')
# Drop the null rows
df = df.dropna()  # empty values, 
df

Unnamed: 0,koi_disposition,koi_fpflag_nt,koi_fpflag_ss,koi_fpflag_co,koi_fpflag_ec,koi_period,koi_period_err1,koi_period_err2,koi_time0bk,koi_time0bk_err1,...,koi_steff_err2,koi_slogg,koi_slogg_err1,koi_slogg_err2,koi_srad,koi_srad_err1,koi_srad_err2,ra,dec,koi_kepmag
0,CONFIRMED,0,0,0,0,54.418383,2.479000e-04,-2.479000e-04,162.513840,0.003520,...,-81,4.467,0.064,-0.096,0.927,0.105,-0.061,291.93423,48.141651,15.347
1,FALSE POSITIVE,0,1,0,0,19.899140,1.490000e-05,-1.490000e-05,175.850252,0.000581,...,-176,4.544,0.044,-0.176,0.868,0.233,-0.078,297.00482,48.134129,15.436
2,FALSE POSITIVE,0,1,0,0,1.736952,2.630000e-07,-2.630000e-07,170.307565,0.000115,...,-174,4.564,0.053,-0.168,0.791,0.201,-0.067,285.53461,48.285210,15.597
3,CONFIRMED,0,0,0,0,2.525592,3.760000e-06,-3.760000e-06,171.595550,0.001130,...,-211,4.438,0.070,-0.210,1.046,0.334,-0.133,288.75488,48.226200,15.509
4,CONFIRMED,0,0,0,0,4.134435,1.050000e-05,-1.050000e-05,172.979370,0.001900,...,-232,4.486,0.054,-0.229,0.972,0.315,-0.105,296.28613,48.224670,15.714
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6986,FALSE POSITIVE,0,0,0,1,8.589871,1.846000e-04,-1.846000e-04,132.016100,0.015700,...,-152,4.296,0.231,-0.189,1.088,0.313,-0.228,298.74921,46.973351,14.478
6987,FALSE POSITIVE,0,1,1,0,0.527699,1.160000e-07,-1.160000e-07,131.705093,0.000170,...,-166,4.529,0.035,-0.196,0.903,0.237,-0.079,297.18875,47.093819,14.082
6988,CANDIDATE,0,0,0,0,1.739849,1.780000e-05,-1.780000e-05,133.001270,0.007690,...,-220,4.444,0.056,-0.224,1.031,0.341,-0.114,286.50937,47.163219,14.757
6989,FALSE POSITIVE,0,0,1,0,0.681402,2.430000e-06,-2.430000e-06,132.181750,0.002850,...,-236,4.447,0.056,-0.224,1.041,0.341,-0.114,294.16489,47.176281,15.385


In [3]:
# Do we want to one hot encode koi_disposition
# data = df.copy()

# data_binary_encoded = pd.get_dummies(data, columns=["koi_disposition"])  # multiple cols pd.get_dummies(data)
# data_binary_encoded.head()
X = df.drop('koi_disposition', axis=1)
y = df['koi_disposition']
label_encoder = LabelEncoder()
label_encoder.fit(y)
y = label_encoder.transform(y)


In [4]:
y
label_encoder.classes_

array(['CANDIDATE', 'CONFIRMED', 'FALSE POSITIVE'], dtype=object)

In [5]:
# from tensorflow.keras.utils import to_categorical
# one_hot_y = to_categorical(y)
# one_hot_y   # for Neuro network

# Select your features (columns)

In [6]:
# Set features. This will also be used as your x values.
# selected_features = df[['names', 'of', 'selected', 'features', 'here']] # used to train the model

# Create a Train Test Split

Use `koi_disposition` for the y values

In [7]:
from sklearn.model_selection import train_test_split
# X = pd.get_dummies(X)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

# Pre-processing

Scale the data using the MinMaxScaler and perform some feature selection

In [8]:
# Scale your data  May or may not affect the accuracy of the model
from sklearn.preprocessing import MinMaxScaler
X_scaler = MinMaxScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

# Train the Model



In [9]:
from sklearn.linear_model import LogisticRegression
model = LogisticRegression(max_iter=1000)   #(max_iter=100)
model.fit(X_train_scaled, y_train)

# print(model.score(X_train_scaled, y_train), model.score(X_test_scaled,y_test))

# predictions = model.predict(X_test)  # value of each
# print(f"First 10 Predictions:)   {predictions[:10]}")
# print(f"First 10 Actual Labels: {y_test[:10].tolist()}")

# predictions = model.predict_proba(X_test)  # probablity of each
# print(f"First 10 Predictions:)   {predictions[:10]}")
# print(f"First 10 Actual Labels: {y_test[:10].tolist()}")


print(f"Training Data Score: {model.score(X_train_scaled, y_train)}")
print(f"Testing Data Score: {model.score(X_test_scaled, y_test)}")

Training Data Score: 0.8483692542437535
Testing Data Score: 0.8443935926773455


# Hyperparameter Tuning

Use `GridSearchCV` to tune the model's parameters
Source: https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.GridSearchCV.html
sklearn Logisics: https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html

In [32]:
# Create the GridSearchCV model   
from sklearn.model_selection import GridSearchCV  # algorythm based on light look for best possible accuracy
param_grid = {"C": [0.01, 0.1, 1, 10, 100, 150],    # adjustments, Note that regularization is applied by default.
             'penalty': ['l1', 'l2', 'elasticnet', 'none'],
             'solver':['liblinear'],
             'max_iter':[1000],
             'verbose':[0,1,4,8, 12]}
        
model = LogisticRegression(max_iter=1000)  # solver='liblinear'
grid = GridSearchCV(model, param_grid, verbose = 3)

In [33]:
# Train the model with GridSearch
#fit the model using the grid search extimator
grid.fit(X_train_scaled, y_train)  # replaced grid with model2
#List the best parameters for this dataset


Fitting 5 folds for each of 120 candidates, totalling 600 fits
[CV] C=0.01, max_iter=1000, penalty=l1, solver=liblinear, verbose=0 ..
[CV]  C=0.01, max_iter=1000, penalty=l1, solver=liblinear, verbose=0, score=0.748, total=   0.1s
[CV] C=0.01, max_iter=1000, penalty=l1, solver=liblinear, verbose=0 ..
[CV]  C=0.01, max_iter=1000, penalty=l1, solver=liblinear, verbose=0, score=0.737, total=   0.1s
[CV] C=0.01, max_iter=1000, penalty=l1, solver=liblinear, verbose=0 ..


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.1s remaining:    0.0s


[CV]  C=0.01, max_iter=1000, penalty=l1, solver=liblinear, verbose=0, score=0.746, total=   0.1s
[CV] C=0.01, max_iter=1000, penalty=l1, solver=liblinear, verbose=0 ..
[CV]  C=0.01, max_iter=1000, penalty=l1, solver=liblinear, verbose=0, score=0.738, total=   0.1s
[CV] C=0.01, max_iter=1000, penalty=l1, solver=liblinear, verbose=0 ..
[CV]  C=0.01, max_iter=1000, penalty=l1, solver=liblinear, verbose=0, score=0.741, total=   0.1s
[CV] C=0.01, max_iter=1000, penalty=l1, solver=liblinear, verbose=1 ..
[LibLinear][CV]  C=0.01, max_iter=1000, penalty=l1, solver=liblinear, verbose=1, score=0.748, total=   0.1s
[CV] C=0.01, max_iter=1000, penalty=l1, solver=liblinear, verbose=1 ..
[LibLinear][CV]  C=0.01, max_iter=1000, penalty=l1, solver=liblinear, verbose=1, score=0.737, total=   0.1s
[CV] C=0.01, max_iter=1000, penalty=l1, solver=liblinear, verbose=1 ..
[LibLinear][CV]  C=0.01, max_iter=1000, penalty=l1, solver=liblinear, verbose=1, score=0.746, total=   0.1s
[CV] C=0.01, max_iter=1000, pe

Traceback (most recent call last):
  File "C:\Users\paule\anaconda3\envs\PythonData\lib\site-packages\sklearn\model_selection\_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\paule\anaconda3\envs\PythonData\lib\site-packages\sklearn\linear_model\_logistic.py", line 1304, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "C:\Users\paule\anaconda3\envs\PythonData\lib\site-packages\sklearn\linear_model\_logistic.py", line 450, in _check_solver
    " got solver={}.".format(solver))
ValueError: Only 'saga' solver supports elasticnet penalty, got solver=liblinear.

Traceback (most recent call last):
  File "C:\Users\paule\anaconda3\envs\PythonData\lib\site-packages\sklearn\model_selection\_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\paule\anaconda3\envs\PythonData\lib\site-packages\sklearn\linear_model\_logistic.py", line 1304, in fit


[CV]  C=0.1, max_iter=1000, penalty=l1, solver=liblinear, verbose=0, score=0.833, total=   0.1s
[CV] C=0.1, max_iter=1000, penalty=l1, solver=liblinear, verbose=0 ...
[CV]  C=0.1, max_iter=1000, penalty=l1, solver=liblinear, verbose=0, score=0.815, total=   0.1s
[CV] C=0.1, max_iter=1000, penalty=l1, solver=liblinear, verbose=0 ...
[CV]  C=0.1, max_iter=1000, penalty=l1, solver=liblinear, verbose=0, score=0.835, total=   0.1s
[CV] C=0.1, max_iter=1000, penalty=l1, solver=liblinear, verbose=0 ...
[CV]  C=0.1, max_iter=1000, penalty=l1, solver=liblinear, verbose=0, score=0.805, total=   0.1s
[CV] C=0.1, max_iter=1000, penalty=l1, solver=liblinear, verbose=0 ...
[CV]  C=0.1, max_iter=1000, penalty=l1, solver=liblinear, verbose=0, score=0.815, total=   0.1s
[CV] C=0.1, max_iter=1000, penalty=l1, solver=liblinear, verbose=1 ...
[LibLinear][CV]  C=0.1, max_iter=1000, penalty=l1, solver=liblinear, verbose=1, score=0.833, total=   0.1s
[CV] C=0.1, max_iter=1000, penalty=l1, solver=liblinear, v

[LibLinear][CV]  C=0.1, max_iter=1000, penalty=l2, solver=liblinear, verbose=12, score=0.843, total=   0.0s
[CV] C=0.1, max_iter=1000, penalty=l2, solver=liblinear, verbose=12 ..
[LibLinear][CV]  C=0.1, max_iter=1000, penalty=l2, solver=liblinear, verbose=12, score=0.805, total=   0.0s
[CV] C=0.1, max_iter=1000, penalty=l2, solver=liblinear, verbose=12 ..
[LibLinear][CV]  C=0.1, max_iter=1000, penalty=l2, solver=liblinear, verbose=12, score=0.818, total=   0.0s
[CV] C=0.1, max_iter=1000, penalty=elasticnet, solver=liblinear, verbose=0 
[CV]  C=0.1, max_iter=1000, penalty=elasticnet, solver=liblinear, verbose=0, score=nan, total=   0.0s
[CV] C=0.1, max_iter=1000, penalty=elasticnet, solver=liblinear, verbose=0 
[CV]  C=0.1, max_iter=1000, penalty=elasticnet, solver=liblinear, verbose=0, score=nan, total=   0.0s
[CV] C=0.1, max_iter=1000, penalty=elasticnet, solver=liblinear, verbose=0 
[CV]  C=0.1, max_iter=1000, penalty=elasticnet, solver=liblinear, verbose=0, score=nan, total=   0.0s


Traceback (most recent call last):
  File "C:\Users\paule\anaconda3\envs\PythonData\lib\site-packages\sklearn\model_selection\_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\paule\anaconda3\envs\PythonData\lib\site-packages\sklearn\linear_model\_logistic.py", line 1304, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "C:\Users\paule\anaconda3\envs\PythonData\lib\site-packages\sklearn\linear_model\_logistic.py", line 450, in _check_solver
    " got solver={}.".format(solver))
ValueError: Only 'saga' solver supports elasticnet penalty, got solver=liblinear.

Traceback (most recent call last):
  File "C:\Users\paule\anaconda3\envs\PythonData\lib\site-packages\sklearn\model_selection\_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\paule\anaconda3\envs\PythonData\lib\site-packages\sklearn\linear_model\_logistic.py", line 1304, in fit


[CV]  C=1, max_iter=1000, penalty=l1, solver=liblinear, verbose=0, score=0.888, total=   0.4s
[CV] C=1, max_iter=1000, penalty=l1, solver=liblinear, verbose=0 .....
[CV]  C=1, max_iter=1000, penalty=l1, solver=liblinear, verbose=0, score=0.858, total=   0.4s
[CV] C=1, max_iter=1000, penalty=l1, solver=liblinear, verbose=0 .....
[CV]  C=1, max_iter=1000, penalty=l1, solver=liblinear, verbose=0, score=0.883, total=   0.4s
[CV] C=1, max_iter=1000, penalty=l1, solver=liblinear, verbose=0 .....
[CV]  C=1, max_iter=1000, penalty=l1, solver=liblinear, verbose=0, score=0.851, total=   0.4s
[CV] C=1, max_iter=1000, penalty=l1, solver=liblinear, verbose=0 .....
[CV]  C=1, max_iter=1000, penalty=l1, solver=liblinear, verbose=0, score=0.860, total=   0.4s
[CV] C=1, max_iter=1000, penalty=l1, solver=liblinear, verbose=1 .....
[LibLinear][CV]  C=1, max_iter=1000, penalty=l1, solver=liblinear, verbose=1, score=0.888, total=   0.4s
[CV] C=1, max_iter=1000, penalty=l1, solver=liblinear, verbose=1 .....

[LibLinear][CV]  C=1, max_iter=1000, penalty=l2, solver=liblinear, verbose=12, score=0.813, total=   0.1s
[CV] C=1, max_iter=1000, penalty=l2, solver=liblinear, verbose=12 ....
[LibLinear][CV]  C=1, max_iter=1000, penalty=l2, solver=liblinear, verbose=12, score=0.833, total=   0.0s
[CV] C=1, max_iter=1000, penalty=elasticnet, solver=liblinear, verbose=0 
[CV]  C=1, max_iter=1000, penalty=elasticnet, solver=liblinear, verbose=0, score=nan, total=   0.0s
[CV] C=1, max_iter=1000, penalty=elasticnet, solver=liblinear, verbose=0 
[CV]  C=1, max_iter=1000, penalty=elasticnet, solver=liblinear, verbose=0, score=nan, total=   0.0s
[CV] C=1, max_iter=1000, penalty=elasticnet, solver=liblinear, verbose=0 
[CV]  C=1, max_iter=1000, penalty=elasticnet, solver=liblinear, verbose=0, score=nan, total=   0.0s
[CV] C=1, max_iter=1000, penalty=elasticnet, solver=liblinear, verbose=0 
[CV]  C=1, max_iter=1000, penalty=elasticnet, solver=liblinear, verbose=0, score=nan, total=   0.0s
[CV] C=1, max_iter=10

Traceback (most recent call last):
  File "C:\Users\paule\anaconda3\envs\PythonData\lib\site-packages\sklearn\model_selection\_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\paule\anaconda3\envs\PythonData\lib\site-packages\sklearn\linear_model\_logistic.py", line 1304, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "C:\Users\paule\anaconda3\envs\PythonData\lib\site-packages\sklearn\linear_model\_logistic.py", line 450, in _check_solver
    " got solver={}.".format(solver))
ValueError: Only 'saga' solver supports elasticnet penalty, got solver=liblinear.

Traceback (most recent call last):
  File "C:\Users\paule\anaconda3\envs\PythonData\lib\site-packages\sklearn\model_selection\_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\paule\anaconda3\envs\PythonData\lib\site-packages\sklearn\linear_model\_logistic.py", line 1304, in fit


[CV]  C=10, max_iter=1000, penalty=l1, solver=liblinear, verbose=0, score=0.893, total=   3.4s
[CV] C=10, max_iter=1000, penalty=l1, solver=liblinear, verbose=0 ....
[CV]  C=10, max_iter=1000, penalty=l1, solver=liblinear, verbose=0, score=0.872, total=   3.2s
[CV] C=10, max_iter=1000, penalty=l1, solver=liblinear, verbose=0 ....
[CV]  C=10, max_iter=1000, penalty=l1, solver=liblinear, verbose=0, score=0.891, total=   1.4s
[CV] C=10, max_iter=1000, penalty=l1, solver=liblinear, verbose=0 ....
[CV]  C=10, max_iter=1000, penalty=l1, solver=liblinear, verbose=0, score=0.858, total=   1.7s
[CV] C=10, max_iter=1000, penalty=l1, solver=liblinear, verbose=0 ....
[CV]  C=10, max_iter=1000, penalty=l1, solver=liblinear, verbose=0, score=0.876, total=   2.4s
[CV] C=10, max_iter=1000, penalty=l1, solver=liblinear, verbose=1 ....
[LibLinear][CV]  C=10, max_iter=1000, penalty=l1, solver=liblinear, verbose=1, score=0.892, total=   2.8s
[CV] C=10, max_iter=1000, penalty=l1, solver=liblinear, verbose=

[LibLinear][CV]  C=10, max_iter=1000, penalty=l2, solver=liblinear, verbose=12, score=0.872, total=   0.1s
[CV] C=10, max_iter=1000, penalty=l2, solver=liblinear, verbose=12 ...
[LibLinear][CV]  C=10, max_iter=1000, penalty=l2, solver=liblinear, verbose=12, score=0.852, total=   0.1s
[CV] C=10, max_iter=1000, penalty=l2, solver=liblinear, verbose=12 ...
[LibLinear][CV]  C=10, max_iter=1000, penalty=l2, solver=liblinear, verbose=12, score=0.862, total=   0.1s
[CV] C=10, max_iter=1000, penalty=elasticnet, solver=liblinear, verbose=0 
[CV]  C=10, max_iter=1000, penalty=elasticnet, solver=liblinear, verbose=0, score=nan, total=   0.0s
[CV] C=10, max_iter=1000, penalty=elasticnet, solver=liblinear, verbose=0 
[CV]  C=10, max_iter=1000, penalty=elasticnet, solver=liblinear, verbose=0, score=nan, total=   0.0s
[CV] C=10, max_iter=1000, penalty=elasticnet, solver=liblinear, verbose=0 
[CV]  C=10, max_iter=1000, penalty=elasticnet, solver=liblinear, verbose=0, score=nan, total=   0.0s
[CV] C=10

Traceback (most recent call last):
  File "C:\Users\paule\anaconda3\envs\PythonData\lib\site-packages\sklearn\model_selection\_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\paule\anaconda3\envs\PythonData\lib\site-packages\sklearn\linear_model\_logistic.py", line 1304, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "C:\Users\paule\anaconda3\envs\PythonData\lib\site-packages\sklearn\linear_model\_logistic.py", line 450, in _check_solver
    " got solver={}.".format(solver))
ValueError: Only 'saga' solver supports elasticnet penalty, got solver=liblinear.

Traceback (most recent call last):
  File "C:\Users\paule\anaconda3\envs\PythonData\lib\site-packages\sklearn\model_selection\_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\paule\anaconda3\envs\PythonData\lib\site-packages\sklearn\linear_model\_logistic.py", line 1304, in fit


[CV]  C=10, max_iter=1000, penalty=elasticnet, solver=liblinear, verbose=12, score=nan, total=   0.0s
[CV] C=10, max_iter=1000, penalty=elasticnet, solver=liblinear, verbose=12 
[CV]  C=10, max_iter=1000, penalty=elasticnet, solver=liblinear, verbose=12, score=nan, total=   0.0s
[CV] C=10, max_iter=1000, penalty=elasticnet, solver=liblinear, verbose=12 
[CV]  C=10, max_iter=1000, penalty=elasticnet, solver=liblinear, verbose=12, score=nan, total=   0.0s
[CV] C=10, max_iter=1000, penalty=elasticnet, solver=liblinear, verbose=12 
[CV]  C=10, max_iter=1000, penalty=elasticnet, solver=liblinear, verbose=12, score=nan, total=   0.0s
[CV] C=10, max_iter=1000, penalty=none, solver=liblinear, verbose=0 ..
[CV]  C=10, max_iter=1000, penalty=none, solver=liblinear, verbose=0, score=nan, total=   0.0s
[CV] C=10, max_iter=1000, penalty=none, solver=liblinear, verbose=0 ..
[CV]  C=10, max_iter=1000, penalty=none, solver=liblinear, verbose=0, score=nan, total=   0.0s
[CV] C=10, max_iter=1000, penalt

[LibLinear][CV]  C=100, max_iter=1000, penalty=l1, solver=liblinear, verbose=8, score=0.878, total=  17.1s
[CV] C=100, max_iter=1000, penalty=l1, solver=liblinear, verbose=12 ..
[LibLinear][CV]  C=100, max_iter=1000, penalty=l1, solver=liblinear, verbose=12, score=0.896, total=  13.5s
[CV] C=100, max_iter=1000, penalty=l1, solver=liblinear, verbose=12 ..
[LibLinear][CV]  C=100, max_iter=1000, penalty=l1, solver=liblinear, verbose=12, score=0.870, total=   8.2s
[CV] C=100, max_iter=1000, penalty=l1, solver=liblinear, verbose=12 ..
[LibLinear][CV]  C=100, max_iter=1000, penalty=l1, solver=liblinear, verbose=12, score=0.895, total=   4.5s
[CV] C=100, max_iter=1000, penalty=l1, solver=liblinear, verbose=12 ..
[LibLinear][CV]  C=100, max_iter=1000, penalty=l1, solver=liblinear, verbose=12, score=0.864, total=  14.9s
[CV] C=100, max_iter=1000, penalty=l1, solver=liblinear, verbose=12 ..
[LibLinear][CV]  C=100, max_iter=1000, penalty=l1, solver=liblinear, verbose=12, score=0.878, total=  15.8

Traceback (most recent call last):
  File "C:\Users\paule\anaconda3\envs\PythonData\lib\site-packages\sklearn\model_selection\_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\paule\anaconda3\envs\PythonData\lib\site-packages\sklearn\linear_model\_logistic.py", line 1304, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "C:\Users\paule\anaconda3\envs\PythonData\lib\site-packages\sklearn\linear_model\_logistic.py", line 450, in _check_solver
    " got solver={}.".format(solver))
ValueError: Only 'saga' solver supports elasticnet penalty, got solver=liblinear.

Traceback (most recent call last):
  File "C:\Users\paule\anaconda3\envs\PythonData\lib\site-packages\sklearn\model_selection\_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\paule\anaconda3\envs\PythonData\lib\site-packages\sklearn\linear_model\_logistic.py", line 1304, in fit


[CV]  C=100, max_iter=1000, penalty=elasticnet, solver=liblinear, verbose=12, score=nan, total=   0.0s
[CV] C=100, max_iter=1000, penalty=none, solver=liblinear, verbose=0 .
[CV]  C=100, max_iter=1000, penalty=none, solver=liblinear, verbose=0, score=nan, total=   0.0s
[CV] C=100, max_iter=1000, penalty=none, solver=liblinear, verbose=0 .
[CV]  C=100, max_iter=1000, penalty=none, solver=liblinear, verbose=0, score=nan, total=   0.0s
[CV] C=100, max_iter=1000, penalty=none, solver=liblinear, verbose=0 .
[CV]  C=100, max_iter=1000, penalty=none, solver=liblinear, verbose=0, score=nan, total=   0.0s
[CV] C=100, max_iter=1000, penalty=none, solver=liblinear, verbose=0 .
[CV]  C=100, max_iter=1000, penalty=none, solver=liblinear, verbose=0, score=nan, total=   0.0s
[CV] C=100, max_iter=1000, penalty=none, solver=liblinear, verbose=0 .
[CV]  C=100, max_iter=1000, penalty=none, solver=liblinear, verbose=0, score=nan, total=   0.0s
[CV] C=100, max_iter=1000, penalty=none, solver=liblinear, ver

[LibLinear][CV]  C=150, max_iter=1000, penalty=l1, solver=liblinear, verbose=12, score=0.895, total=   3.6s
[CV] C=150, max_iter=1000, penalty=l1, solver=liblinear, verbose=12 ..
[LibLinear][CV]  C=150, max_iter=1000, penalty=l1, solver=liblinear, verbose=12, score=0.865, total=  13.9s
[CV] C=150, max_iter=1000, penalty=l1, solver=liblinear, verbose=12 ..
[LibLinear][CV]  C=150, max_iter=1000, penalty=l1, solver=liblinear, verbose=12, score=0.878, total=  15.6s
[CV] C=150, max_iter=1000, penalty=l2, solver=liblinear, verbose=0 ...
[CV]  C=150, max_iter=1000, penalty=l2, solver=liblinear, verbose=0, score=0.893, total=   0.2s
[CV] C=150, max_iter=1000, penalty=l2, solver=liblinear, verbose=0 ...
[CV]  C=150, max_iter=1000, penalty=l2, solver=liblinear, verbose=0, score=0.871, total=   0.2s
[CV] C=150, max_iter=1000, penalty=l2, solver=liblinear, verbose=0 ...
[CV]  C=150, max_iter=1000, penalty=l2, solver=liblinear, verbose=0, score=0.891, total=   0.2s
[CV] C=150, max_iter=1000, penalt

Traceback (most recent call last):
  File "C:\Users\paule\anaconda3\envs\PythonData\lib\site-packages\sklearn\model_selection\_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\paule\anaconda3\envs\PythonData\lib\site-packages\sklearn\linear_model\_logistic.py", line 1304, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "C:\Users\paule\anaconda3\envs\PythonData\lib\site-packages\sklearn\linear_model\_logistic.py", line 450, in _check_solver
    " got solver={}.".format(solver))
ValueError: Only 'saga' solver supports elasticnet penalty, got solver=liblinear.

Traceback (most recent call last):
  File "C:\Users\paule\anaconda3\envs\PythonData\lib\site-packages\sklearn\model_selection\_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\paule\anaconda3\envs\PythonData\lib\site-packages\sklearn\linear_model\_logistic.py", line 1304, in fit


GridSearchCV(estimator=LogisticRegression(max_iter=1000),
             param_grid={'C': [0.01, 0.1, 1, 10, 100, 150], 'max_iter': [1000],
                         'penalty': ['l1', 'l2', 'elasticnet', 'none'],
                         'solver': ['liblinear'], 'verbose': [0, 1, 4, 8, 12]},
             verbose=3)

In [34]:
print(grid.best_params_)

#List the best score
print(f'Best Score: {grid.best_score_}')

#Make predictions with the hpyertuned model
predictions = grid.predict(X_test_scaled)

print(grid.best_params_)
print(grid.best_score_)

# {'C': 100, 'max_iter': 1000, 'penalty': 'l2', 'solver': 'liblinear', 'verbose': 0}
# Best Score: 0.8754477182922304
# {'C': 100, 'max_iter': 1000, 'penalty': 'l1', 'solver': 'liblinear', 'verbose': 8}
# Best Score: 0.8807894104890881

{'C': 100, 'max_iter': 1000, 'penalty': 'l1', 'solver': 'liblinear', 'verbose': 8}
Best Score: 0.8807894104890881
{'C': 100, 'max_iter': 1000, 'penalty': 'l1', 'solver': 'liblinear', 'verbose': 8}
0.8807894104890881


In [36]:
from sklearn.metrics import classification_report
print(classification_report(y_test, predictions)) #, target_names=['']))

              precision    recall  f1-score   support

           0       0.79      0.70      0.74       411
           1       0.77      0.83      0.80       484
           2       0.98      1.00      0.99       853

    accuracy                           0.88      1748
   macro avg       0.85      0.84      0.84      1748
weighted avg       0.88      0.88      0.88      1748



# Save the Model

In [None]:
# save your model by updating "your_name" with your name
# and "your_model" with your model variable
# be sure to turn this in to BCS
# if joblib fails to import, try running the command to install in terminal/git-bash
import joblib
filename = 'planet_logistic.sav'
joblib.dump(your_model, filename)