# Classification Approch

In [17]:


import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import PolynomialFeatures
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import (confusion_matrix, classification_report,
ConfusionMatrixDisplay,PrecisionRecallDisplay,RocCurveDisplay)
from sklearn.pipeline import Pipeline


#Grid Search
from sklearn.model_selection import GridSearchCV

#Model(s)
from sklearn.linear_model import LogisticRegression


## Work flow
0. [ ] Clean and Engeering Data for X and y
1. [ ] Split Data in Train/Test for X and y
2. [ ] Scaler on Training X & X test
3. [ ] Create Model(s)
4. [ ] Create Pipeline and HyperParameters
5. [ ] Fit/Train Model(s) on X Train
6. [ ] Evaluate Model(s) on X test
7. [ ] Adjust Param as Necessary
8. [ ] Bonus: Save Model(s)

In [18]:
filename ='logestic.pkl'

### PreProcess


In [19]:
names=['IR', 'MR', 'FF', 'Credibility', 'Competitiveness', 'OR', 'Class']
df = pd.read_csv('../Qualitative_Bankruptcy.data.txt',names=names)

#### Clean and Engeering Data

In [20]:
# barresi mohtaviat soton ha
for col in names:
    print(df[col].value_counts())
    print('\n****\n')

IR
N    89
A    81
P    80
Name: count, dtype: int64

****

MR
N    119
A     69
P     62
Name: count, dtype: int64

****

FF
N    119
A     74
P     57
Name: count, dtype: int64

****

Credibility
N    94
P    79
A    77
Name: count, dtype: int64

****

Competitiveness
N    103
P     91
A     56
Name: count, dtype: int64

****

OR
N    114
P     79
A     57
Name: count, dtype: int64

****

Class
NB    143
B     107
Name: count, dtype: int64

****



In [21]:

X = pd.get_dummies(df.drop(['Class'],axis=1),drop_first=True)
y = df['Class']

#### Split Data in Train/Test for X and y


In [22]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=1099)

#### Polynomal

In [23]:
poly = PolynomialFeatures()

#### Scaler


### Model

#### Create Model(s)

In [24]:
logistic = LogisticRegression()

#### Create Pipline

In [25]:
help(PolynomialFeatures)

Help on class PolynomialFeatures in module sklearn.preprocessing._polynomial:

class PolynomialFeatures(sklearn.base.TransformerMixin, sklearn.base.BaseEstimator)
 |  PolynomialFeatures(degree=2, *, interaction_only=False, include_bias=True, order='C')
 |  
 |  Generate polynomial and interaction features.
 |  
 |  Generate a new feature matrix consisting of all polynomial combinations
 |  of the features with degree less than or equal to the specified degree.
 |  For example, if an input sample is two dimensional and of the form
 |  [a, b], the degree-2 polynomial features are [1, a, b, a^2, ab, b^2].
 |  
 |  Read more in the :ref:`User Guide <polynomial_features>`.
 |  
 |  Parameters
 |  ----------
 |  degree : int or tuple (min_degree, max_degree), default=2
 |      If a single int is given, it specifies the maximal degree of the
 |      polynomial features. If a tuple `(min_degree, max_degree)` is passed,
 |      then `min_degree` is the minimum and `max_degree` is the maximum
 |

In [26]:
help(LogisticRegression)

Help on class LogisticRegression in module sklearn.linear_model._logistic:

class LogisticRegression(sklearn.linear_model._base.LinearClassifierMixin, sklearn.linear_model._base.SparseCoefMixin, sklearn.base.BaseEstimator)
 |  LogisticRegression(penalty='l2', *, dual=False, tol=0.0001, C=1.0, fit_intercept=True, intercept_scaling=1, class_weight=None, random_state=None, solver='lbfgs', max_iter=100, multi_class='auto', verbose=0, warm_start=False, n_jobs=None, l1_ratio=None)
 |  
 |  Logistic Regression (aka logit, MaxEnt) classifier.
 |  
 |  In the multiclass case, the training algorithm uses the one-vs-rest (OvR)
 |  scheme if the 'multi_class' option is set to 'ovr', and uses the
 |  cross-entropy loss if the 'multi_class' option is set to 'multinomial'.
 |  (Currently the 'multinomial' option is supported only by the 'lbfgs',
 |  'sag', 'saga' and 'newton-cg' solvers.)
 |  
 |  This class implements regularized logistic regression using the
 |  'liblinear' library, 'newton-cg', 's

In [27]:
pipe =Pipeline([('poly',poly),('log',logistic)])


In [28]:
#hyper parameters value

#poly
degree = list(range(1,10))
include_bias =[True,False]

#Logistic 
C = list(np.logspace(-4,4,10))
penalty =['l1', 'l2', 'elasticnet', None]
solver = ['lbfgs', 'liblinear', 'newton-cg', 'newton-cholesky', 'sag', 'saga']
multi_class = ['auto', 'ovr', 'multinomial']
l1_ratio = list(np.logspace(-4,0,10))


In [29]:
hyper_param ={
#poly
'poly__degree':degree,
'poly__include_bias': include_bias,

}

In [38]:
full_model = GridSearchCV(estimator=pipe,param_grid=hyper_param,cv = 5,verbose=5)

#### Train Model

In [39]:
full_model.fit(X_train,y_train)

Fitting 5 folds for each of 18 candidates, totalling 90 fits
[CV 1/5] END poly__degree=1, poly__include_bias=True;, score=1.000 total time=   0.1s
[CV 2/5] END poly__degree=1, poly__include_bias=True;, score=1.000 total time=   0.0s
[CV 3/5] END poly__degree=1, poly__include_bias=True;, score=0.970 total time=   0.0s
[CV 4/5] END poly__degree=1, poly__include_bias=True;, score=1.000 total time=   0.0s
[CV 5/5] END poly__degree=1, poly__include_bias=True;, score=1.000 total time=   0.0s
[CV 1/5] END poly__degree=1, poly__include_bias=False;, score=1.000 total time=   0.0s
[CV 2/5] END poly__degree=1, poly__include_bias=False;, score=1.000 total time=   0.0s
[CV 3/5] END poly__degree=1, poly__include_bias=False;, score=0.970 total time=   0.0s
[CV 4/5] END poly__degree=1, poly__include_bias=False;, score=1.000 total time=   0.0s
[CV 5/5] END poly__degree=1, poly__include_bias=False;, score=1.000 total time=   0.0s
[CV 1/5] END poly__degree=2, poly__include_bias=True;, score=1.000 total t

KeyboardInterrupt: 

### Evaluate model

#### Test On data

In [None]:
y_pred = full_model.predict(X_test)

#### Plots

In [None]:
ConfusionMatrixDisplay(confusion_matrix=confusion_matrix(
    y_true=y_test, y_pred=y_pred)
                       ,display_labels=full_model.classes_)

In [None]:
PrecisionRecallDisplay.from_estimator(estimator=full_model,
                                     X=X_train,
                                     y=y_train)

In [None]:
y_test1 = y_test.map({"NB":1,"B":0})
y_pred1 = pd.Series(y_pred).map({"NB":1,"B":0})

In [None]:
y_pred1


In [None]:
PrecisionRecallDisplay.from_predictions(y_pred=y_pred1,
                                    y_true=y_test1,)

In [None]:
RocCurveDisplay.from_estimator(estimator=full_model,
                                     X=X_train,
                                     y=y_train)

In [None]:
RocCurveDisplay.from_predictions(y_pred=y_pred1,
                                 y_true=y_test1)

In [None]:
print(classification_report(y_pred=y_pred,
                      y_true=y_test))

### Final Model(s)


#### Train on all Data 

In [None]:
full_model.best_params_

##### no need

#### Save with joblib

In [None]:
import joblib
joblib.dump(value=full_model,filename=filename)

# Congratulations!!!

#### Created and trained by  Matin1099.
