In [41]:
# data wrangling
import pandas as pd
import numpy as np

from sklearn.svm import LinearSVR
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import SGDRegressor, LassoCV
from sklearn.tree import DecisionTreeRegressor

# visualizations
import matplotlib.pyplot as plt
import seaborn as sns

import warnings
warnings.filterwarnings("ignore")

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.svm import SVC
from sklearn.preprocessing import LabelEncoder, OneHotEncoder

# py files
import acquire
import prepare
import explore
import model

In [2]:
df = acquire.acquire_agg_data()

In [3]:
df = prepare.prepare(df)

In [4]:
df = prepare.unique(df)

In [5]:
df = prepare.treat_nulls(df)

In [6]:
df = explore.early_failure(df,1.6)

In [7]:
df = explore.old_or_fail(df)

In [8]:
df = explore.make_binary_values(df)

In [9]:
df = explore.remove_manufacturers(df)

In [10]:
def encode_hot_X(train, col_name):
    encoded_values = sorted(list(train[col_name].unique()))

    # Integer Encoding
    int_encoder = LabelEncoder()
    train.encoded = int_encoder.fit_transform(train[col_name])
#     test.encoded = int_encoder.transform(test[col_name])

    # create 2D np arrays of the encoded variable (in train and test)
    train_array = np.array(train.encoded).reshape(len(train.encoded),1)
#     test_array = np.array(test.encoded).reshape(len(test.encoded),1)

    # One Hot Encoding
    ohe = OneHotEncoder(sparse=False, categories='auto')
    train_ohe = ohe.fit_transform(train_array)
#     test_ohe = ohe.transform(test_array)

    # Turn the array of new values into a data frame with columns names being the values
    # and index matching that of train/test
    # then merge the new dataframe with the existing train/test dataframe
    train_encoded = pd.DataFrame(data=train_ohe, columns=encoded_values, index=train.index)
    train = train.join(train_encoded)

#     test_encoded = pd.DataFrame(data=test_ohe, columns=encoded_values, index=test.index)
#     test = test.join(test_encoded)

    return train

In [11]:
df_1 = encode_hot_X(df, col_name= "model")

In [12]:
def split_my_data(df):
    X = df.drop(columns = ['serial_number', 'failure', 'model', 'early_failure', 'drive_age_in_years', 'reallocated_sectors_count', 'reported_uncorrectable_errors', 'command_timeout', 'current_pending_sector_count', 'uncorrectable_sector_count'])
    y = df[['early_failure']]
    X_train, X_test, y_train, y_test = train_test_split(X, y, train_size = .80, random_state = 123, stratify=df.early_failure)
    return X, y, X_train, X_test, y_train, y_test

In [13]:
X, y, X_train, X_test, y_train, y_test = split_my_data(df_1)

In [14]:
len(X_train), len(y_train), len(X_test), len(y_test)

(97520, 97520, 24380, 24380)

In [15]:
X_train, X_test = model.encode_hot(X_train, X_test, col_name = 'manufacturer')

In [16]:
X_train = X_train.drop(columns = 'manufacturer')

In [17]:
X_test = X_test.drop(columns = 'manufacturer')

In [18]:
#X_train = X_train.drop(columns = 'model')

In [19]:
#X_test = X_test.drop(columns = 'model')

In [20]:
X_train = X_train.drop(columns = 'capacity_terabytes')

In [21]:
X_test = X_test.drop(columns = 'capacity_terabytes')

In [42]:
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(97520, 99) (24380, 99) (97520, 1) (24380, 1)


### weights = {0: 1, 1: 50}, G = 10, C = 10

### Best combo with test data

In [43]:
weights = {0: 1, 1: 50}
svclassifier = SVC(kernel='sigmoid', probability = True, gamma = 10, C = 10, class_weight = weights)
svclassifier.fit(X_test, y_test)

SVC(C=10, cache_size=200, class_weight={0: 1, 1: 50}, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma=10, kernel='sigmoid',
    max_iter=-1, probability=True, random_state=None, shrinking=True, tol=0.001,
    verbose=False)

In [44]:
y_pred = svclassifier.predict(X_test)
y_pred

array([0, 1, 1, ..., 0, 1, 0])

In [45]:
y_pred_proba = svclassifier.predict_proba(X_test)
y_pred_proba

array([[0.97874463, 0.02125537],
       [0.97874399, 0.02125601],
       [0.97874399, 0.02125601],
       ...,
       [0.97874463, 0.02125537],
       [0.97874399, 0.02125601],
       [0.97874463, 0.02125537]])

In [46]:
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

[[ 8877 14983]
 [  315   205]]
              precision    recall  f1-score   support

           0       0.97      0.37      0.54     23860
           1       0.01      0.39      0.03       520

    accuracy                           0.37     24380
   macro avg       0.49      0.38      0.28     24380
weighted avg       0.95      0.37      0.53     24380



### weights = {0: 1, 1: 50}, G = 10, C = 100

### change C with test data

In [49]:
weights = {0: 1, 1: 50}
svclassifier = SVC(kernel='sigmoid', probability = True, gamma = 10, C = 100, class_weight = weights)
svclassifier.fit(X_test, y_test)

SVC(C=100, cache_size=200, class_weight={0: 1, 1: 50}, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma=10, kernel='sigmoid',
    max_iter=-1, probability=True, random_state=None, shrinking=True, tol=0.001,
    verbose=False)

In [50]:
y_pred = svclassifier.predict(X_test)
y_pred

array([0, 1, 1, ..., 0, 1, 0])

In [51]:
y_pred_proba = svclassifier.predict_proba(X_test)
y_pred_proba

array([[0.97888157, 0.02111843],
       [0.97888144, 0.02111856],
       [0.97888144, 0.02111856],
       ...,
       [0.97888157, 0.02111843],
       [0.97888144, 0.02111856],
       [0.97888157, 0.02111843]])

In [52]:
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

[[ 8877 14983]
 [  315   205]]
              precision    recall  f1-score   support

           0       0.97      0.37      0.54     23860
           1       0.01      0.39      0.03       520

    accuracy                           0.37     24380
   macro avg       0.49      0.38      0.28     24380
weighted avg       0.95      0.37      0.53     24380



### weights = {0: 1, 1: 250}, G = 10, C = 100

# TERRIBLE

In [53]:
weights = {0: 1, 1: 250}
svclassifier = SVC(kernel='sigmoid', probability = True, gamma = 10, C = 100, class_weight = weights)
svclassifier.fit(X_test, y_test)

SVC(C=100, cache_size=200, class_weight={0: 1, 1: 250}, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma=10, kernel='sigmoid',
    max_iter=-1, probability=True, random_state=None, shrinking=True, tol=0.001,
    verbose=False)

In [55]:
y_pred = svclassifier.predict(X_test)
y_pred

array([1, 1, 1, ..., 1, 1, 1])

In [56]:
y_pred_proba = svclassifier.predict_proba(X_test)
y_pred_proba

array([[0.97999265, 0.02000735],
       [0.9791645 , 0.0208355 ],
       [0.9791645 , 0.0208355 ],
       ...,
       [0.97999265, 0.02000735],
       [0.9791645 , 0.0208355 ],
       [0.97999265, 0.02000735]])

In [57]:
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

[[  162 23698]
 [   61   459]]
              precision    recall  f1-score   support

           0       0.73      0.01      0.01     23860
           1       0.02      0.88      0.04       520

    accuracy                           0.03     24380
   macro avg       0.37      0.44      0.03     24380
weighted avg       0.71      0.03      0.01     24380



### weights = {0: 1, 1: 25}, G = 100, C = 100

In [58]:
weights = {0: 1, 1: 25}
svclassifier = SVC(kernel='sigmoid', probability = True, gamma = 100, C = 100, class_weight = weights)
svclassifier.fit(X_test, y_test)

SVC(C=100, cache_size=200, class_weight={0: 1, 1: 25}, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma=100, kernel='sigmoid',
    max_iter=-1, probability=True, random_state=None, shrinking=True, tol=0.001,
    verbose=False)

In [59]:
y_pred = svclassifier.predict(X_test)
y_pred

array([0, 0, 0, ..., 0, 0, 0])

In [60]:
y_pred_proba = svclassifier.predict_proba(X_test)
y_pred_proba

array([[0.9785884, 0.0214116],
       [0.9785884, 0.0214116],
       [0.9785884, 0.0214116],
       ...,
       [0.9785884, 0.0214116],
       [0.9785884, 0.0214116],
       [0.9785884, 0.0214116]])

In [61]:
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

[[23443   417]
 [  488    32]]
              precision    recall  f1-score   support

           0       0.98      0.98      0.98     23860
           1       0.07      0.06      0.07       520

    accuracy                           0.96     24380
   macro avg       0.53      0.52      0.52     24380
weighted avg       0.96      0.96      0.96     24380



### weights = {0: 1, 1: 75}, G = 10, C = 100

In [71]:
weights = {0: 1, 1: 75}
svclassifier = SVC(kernel='sigmoid', probability = True, gamma = 10, C = 100, class_weight = weights)
svclassifier.fit(X_test, y_test)

SVC(C=100, cache_size=200, class_weight={0: 1, 1: 75}, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma=10, kernel='sigmoid',
    max_iter=-1, probability=True, random_state=None, shrinking=True, tol=0.001,
    verbose=False)

In [72]:
y_pred = svclassifier.predict(X_test)
y_pred

array([0, 1, 1, ..., 0, 1, 0])

In [73]:
y_pred_proba = svclassifier.predict_proba(X_test)
y_pred_proba

array([[0.97865385, 0.02134615],
       [0.97851794, 0.02148206],
       [0.97851794, 0.02148206],
       ...,
       [0.97865385, 0.02134615],
       [0.97851794, 0.02148206],
       [0.97865385, 0.02134615]])

In [74]:
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

[[ 8497 15363]
 [  231   289]]
              precision    recall  f1-score   support

           0       0.97      0.36      0.52     23860
           1       0.02      0.56      0.04       520

    accuracy                           0.36     24380
   macro avg       0.50      0.46      0.28     24380
weighted avg       0.95      0.36      0.51     24380



### weights = {0: 1, 1: 100}, G = 10, C = 100

In [76]:
weights = {0: 1, 1: 100}
svclassifier = SVC(kernel='sigmoid', probability = True, gamma = 10, C = 100, class_weight = weights)
svclassifier.fit(X_test, y_test)

SVC(C=100, cache_size=200, class_weight={0: 1, 1: 100}, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma=10, kernel='sigmoid',
    max_iter=-1, probability=True, random_state=None, shrinking=True, tol=0.001,
    verbose=False)

In [77]:
y_pred = svclassifier.predict(X_test)
y_pred

array([0, 1, 1, ..., 0, 1, 0])

In [78]:
y_pred_proba = svclassifier.predict_proba(X_test)
y_pred_proba

array([[0.97930643, 0.02069357],
       [0.97772633, 0.02227367],
       [0.97772633, 0.02227367],
       ...,
       [0.97930643, 0.02069357],
       [0.97772633, 0.02227367],
       [0.97930643, 0.02069357]])

In [79]:
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

[[ 7520 16340]
 [   98   422]]
              precision    recall  f1-score   support

           0       0.99      0.32      0.48     23860
           1       0.03      0.81      0.05       520

    accuracy                           0.33     24380
   macro avg       0.51      0.56      0.26     24380
weighted avg       0.97      0.33      0.47     24380



### weights = {0: 1, 1: 100}, G = 100, C = 10

In [81]:
weights = {0: 1, 1: 100}
svclassifier = SVC(kernel='sigmoid', probability = True, gamma = 100, C = 10, class_weight = weights)
svclassifier.fit(X_test, y_test)

SVC(C=10, cache_size=200, class_weight={0: 1, 1: 100}, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma=100, kernel='sigmoid',
    max_iter=-1, probability=True, random_state=None, shrinking=True, tol=0.001,
    verbose=False)

In [82]:
y_pred = svclassifier.predict(X_test)
y_pred

array([0, 1, 1, ..., 0, 1, 0])

In [83]:
y_pred_proba = svclassifier.predict_proba(X_test)
y_pred_proba

array([[0.97881731, 0.02118269],
       [0.97842459, 0.02157541],
       [0.97842459, 0.02157541],
       ...,
       [0.97881731, 0.02118269],
       [0.97842459, 0.02157541],
       [0.97881731, 0.02118269]])

In [84]:
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

[[ 7520 16340]
 [   98   422]]
              precision    recall  f1-score   support

           0       0.99      0.32      0.48     23860
           1       0.03      0.81      0.05       520

    accuracy                           0.33     24380
   macro avg       0.51      0.56      0.26     24380
weighted avg       0.97      0.33      0.47     24380



### weights = {0: 1, 1: 150}, G = 100, C = 10

In [85]:
weights = {0: 1, 1: 150}
svclassifier = SVC(kernel='sigmoid', probability = True, gamma = 100, C = 10, class_weight = weights)
svclassifier.fit(X_test, y_test)

SVC(C=10, cache_size=200, class_weight={0: 1, 1: 150}, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma=100, kernel='sigmoid',
    max_iter=-1, probability=True, random_state=None, shrinking=True, tol=0.001,
    verbose=False)

In [86]:
y_pred = svclassifier.predict(X_test)
y_pred

array([0, 1, 1, ..., 0, 1, 0])

In [87]:
y_pred_proba = svclassifier.predict_proba(X_test)
y_pred_proba

array([[0.97956449, 0.02043551],
       [0.97874861, 0.02125139],
       [0.97874861, 0.02125139],
       ...,
       [0.97956449, 0.02043551],
       [0.97874861, 0.02125139],
       [0.97956449, 0.02043551]])

In [88]:
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

[[ 7520 16340]
 [   98   422]]
              precision    recall  f1-score   support

           0       0.99      0.32      0.48     23860
           1       0.03      0.81      0.05       520

    accuracy                           0.33     24380
   macro avg       0.51      0.56      0.26     24380
weighted avg       0.97      0.33      0.47     24380



### weights = {0: 1, 1: 150}, G = 100, C = 100

In [89]:
weights = {0: 1, 1: 150}
svclassifier = SVC(kernel='sigmoid', probability = True, gamma = 100, C = 100, class_weight = weights)
svclassifier.fit(X_test, y_test)

SVC(C=100, cache_size=200, class_weight={0: 1, 1: 150}, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma=100, kernel='sigmoid',
    max_iter=-1, probability=True, random_state=None, shrinking=True, tol=0.001,
    verbose=False)

In [90]:
y_pred = svclassifier.predict(X_test)
y_pred

array([0, 1, 1, ..., 0, 1, 0])

In [91]:
y_pred_proba = svclassifier.predict_proba(X_test)
y_pred_proba

array([[0.97936571, 0.02063429],
       [0.97791827, 0.02208173],
       [0.97791827, 0.02208173],
       ...,
       [0.97936571, 0.02063429],
       [0.97791827, 0.02208173],
       [0.97936571, 0.02063429]])

In [92]:
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

[[ 7520 16340]
 [   98   422]]
              precision    recall  f1-score   support

           0       0.99      0.32      0.48     23860
           1       0.03      0.81      0.05       520

    accuracy                           0.33     24380
   macro avg       0.51      0.56      0.26     24380
weighted avg       0.97      0.33      0.47     24380



### weights = {0: 1, 1: 75}, G = 100, C = 10

In [93]:
weights = {0: 1, 1: 75}
svclassifier = SVC(kernel='sigmoid', probability = True, gamma = 100, C = 10, class_weight = weights)
svclassifier.fit(X_test, y_test)

SVC(C=10, cache_size=200, class_weight={0: 1, 1: 75}, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma=100, kernel='sigmoid',
    max_iter=-1, probability=True, random_state=None, shrinking=True, tol=0.001,
    verbose=False)

In [94]:
y_pred = svclassifier.predict(X_test)
y_pred

array([0, 1, 1, ..., 0, 1, 0])

In [95]:
y_pred_proba = svclassifier.predict_proba(X_test)
y_pred_proba

array([[0.97867314, 0.02132686],
       [0.97850651, 0.02149349],
       [0.97850651, 0.02149349],
       ...,
       [0.97867314, 0.02132686],
       [0.97850651, 0.02149349],
       [0.97867314, 0.02132686]])

In [96]:
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

[[ 8497 15363]
 [  231   289]]
              precision    recall  f1-score   support

           0       0.97      0.36      0.52     23860
           1       0.02      0.56      0.04       520

    accuracy                           0.36     24380
   macro avg       0.50      0.46      0.28     24380
weighted avg       0.95      0.36      0.51     24380



### weights = {0: 1, 1: 75}, G = 100, C = 100

In [105]:
weights = {0: 1, 1: 75}
svclassifier = SVC(kernel='sigmoid', probability = True, gamma = 100, C = 100, class_weight = weights)
svclassifier.fit(X_test, y_test)

SVC(C=100, cache_size=200, class_weight={0: 1, 1: 75}, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma=100, kernel='sigmoid',
    max_iter=-1, probability=True, random_state=None, shrinking=True, tol=0.001,
    verbose=False)

In [106]:
y_pred = svclassifier.predict(X_test)
y_pred

array([0, 1, 1, ..., 0, 1, 0])

In [107]:
y_pred_proba = svclassifier.predict_proba(X_test)
y_pred_proba

array([[0.97912501, 0.02087499],
       [0.97856905, 0.02143095],
       [0.97856905, 0.02143095],
       ...,
       [0.97912501, 0.02087499],
       [0.97856905, 0.02143095],
       [0.97912501, 0.02087499]])

In [108]:
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

[[ 8497 15363]
 [  231   289]]
              precision    recall  f1-score   support

           0       0.97      0.36      0.52     23860
           1       0.02      0.56      0.04       520

    accuracy                           0.36     24380
   macro avg       0.50      0.46      0.28     24380
weighted avg       0.95      0.36      0.51     24380



### weights = {0: 1, 1: 125}, G = 10, C = 10

In [101]:
weights = {0: 1, 1: 125}
svclassifier = SVC(kernel='sigmoid', probability = True, gamma = 10, C = 10, class_weight = weights)
svclassifier.fit(X_test, y_test)

SVC(C=10, cache_size=200, class_weight={0: 1, 1: 125}, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma=10, kernel='sigmoid',
    max_iter=-1, probability=True, random_state=None, shrinking=True, tol=0.001,
    verbose=False)

In [102]:
y_pred = svclassifier.predict(X_test)
y_pred

array([0, 1, 1, ..., 0, 1, 0])

In [103]:
y_pred_proba = svclassifier.predict_proba(X_test)
y_pred_proba

array([[0.97941432, 0.02058568],
       [0.9781397 , 0.0218603 ],
       [0.9781397 , 0.0218603 ],
       ...,
       [0.97941432, 0.02058568],
       [0.9781397 , 0.0218603 ],
       [0.97941432, 0.02058568]])

In [104]:
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

[[ 7520 16340]
 [   98   422]]
              precision    recall  f1-score   support

           0       0.99      0.32      0.48     23860
           1       0.03      0.81      0.05       520

    accuracy                           0.33     24380
   macro avg       0.51      0.56      0.26     24380
weighted avg       0.97      0.33      0.47     24380



### weights = {0: 1, 1: 25}, G = 10, C = 10

In [137]:
weights = {0: 1, 1: 25}
svclassifier = SVC(kernel='sigmoid', probability = True, gamma = 10, C = 10, class_weight = weights)
svclassifier.fit(X_train, y_train)

SVC(C=10, cache_size=200, class_weight={0: 1, 1: 25}, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma=10, kernel='sigmoid',
    max_iter=-1, probability=True, random_state=None, shrinking=True, tol=0.001,
    verbose=False)

In [138]:
y_pred = svclassifier.predict(X_test)
y_pred

array([0, 0, 0, ..., 0, 0, 0])

In [139]:
y_pred_proba = svclassifier.predict_proba(X_test)
y_pred_proba

array([[0.97872095, 0.02127905],
       [0.97872095, 0.02127905],
       [0.97872095, 0.02127905],
       ...,
       [0.97872095, 0.02127905],
       [0.97872095, 0.02127905],
       [0.97872095, 0.02127905]])

In [140]:
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

[[22951   909]
 [  359   161]]
              precision    recall  f1-score   support

           0       0.98      0.96      0.97     23860
           1       0.15      0.31      0.20       520

    accuracy                           0.95     24380
   macro avg       0.57      0.64      0.59     24380
weighted avg       0.97      0.95      0.96     24380



### weights = {0: 1, 1: 85}, G = 10, C = 1000

In [117]:
weights = {0: 1, 1: 85}
svclassifier = SVC(kernel='sigmoid', probability = True, gamma = 10, C = 1000, class_weight = weights)
svclassifier.fit(X_test, y_test)

SVC(C=1000, cache_size=200, class_weight={0: 1, 1: 85}, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma=10, kernel='sigmoid',
    max_iter=-1, probability=True, random_state=None, shrinking=True, tol=0.001,
    verbose=False)

In [118]:
y_pred = svclassifier.predict(X_test)
y_pred

array([0, 1, 1, ..., 0, 1, 0])

In [119]:
y_pred_proba = svclassifier.predict_proba(X_test)
y_pred_proba

array([[0.97878598, 0.02121402],
       [0.97835033, 0.02164967],
       [0.97835033, 0.02164967],
       ...,
       [0.97878598, 0.02121402],
       [0.97835033, 0.02164967],
       [0.97878598, 0.02121402]])

In [120]:
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

[[ 8029 15831]
 [  103   417]]
              precision    recall  f1-score   support

           0       0.99      0.34      0.50     23860
           1       0.03      0.80      0.05       520

    accuracy                           0.35     24380
   macro avg       0.51      0.57      0.28     24380
weighted avg       0.97      0.35      0.49     24380



### weights = {0: 1, 1: 90}, G = 100, C = 1000

In [121]:
weights = {0: 1, 1: 90}
svclassifier = SVC(kernel='sigmoid', probability = True, gamma = 100, C = 1000, class_weight = weights)
svclassifier.fit(X_test, y_test)

SVC(C=1000, cache_size=200, class_weight={0: 1, 1: 90}, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma=100, kernel='sigmoid',
    max_iter=-1, probability=True, random_state=None, shrinking=True, tol=0.001,
    verbose=False)

In [122]:
y_pred = svclassifier.predict(X_test)
y_pred

array([0, 1, 1, ..., 0, 1, 0])

In [123]:
y_pred_proba = svclassifier.predict_proba(X_test)
y_pred_proba

array([[0.97888261, 0.02111739],
       [0.97805415, 0.02194585],
       [0.97805415, 0.02194585],
       ...,
       [0.97888261, 0.02111739],
       [0.97805415, 0.02194585],
       [0.97888261, 0.02111739]])

In [124]:
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

[[ 8029 15831]
 [  103   417]]
              precision    recall  f1-score   support

           0       0.99      0.34      0.50     23860
           1       0.03      0.80      0.05       520

    accuracy                           0.35     24380
   macro avg       0.51      0.57      0.28     24380
weighted avg       0.97      0.35      0.49     24380



### weights = {0: 1, 1: 125}, G = 100, C = 1000

In [125]:
weights = {0: 1, 1: 125}
svclassifier = SVC(kernel='sigmoid', probability = True, gamma = 100, C = 1000, class_weight = weights)
svclassifier.fit(X_test, y_test)

SVC(C=1000, cache_size=200, class_weight={0: 1, 1: 125}, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma=100, kernel='sigmoid',
    max_iter=-1, probability=True, random_state=None, shrinking=True, tol=0.001,
    verbose=False)

In [126]:
y_pred = svclassifier.predict(X_test)
y_pred

array([0, 1, 1, ..., 0, 1, 0])

In [127]:
y_pred_proba = svclassifier.predict_proba(X_test)
y_pred_proba

array([[0.97892112, 0.02107888],
       [0.97848366, 0.02151634],
       [0.97848366, 0.02151634],
       ...,
       [0.97892112, 0.02107888],
       [0.97848366, 0.02151634],
       [0.97892112, 0.02107888]])

In [128]:
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

[[ 7520 16340]
 [   98   422]]
              precision    recall  f1-score   support

           0       0.99      0.32      0.48     23860
           1       0.03      0.81      0.05       520

    accuracy                           0.33     24380
   macro avg       0.51      0.56      0.26     24380
weighted avg       0.97      0.33      0.47     24380



### weights = {0: 1, 1: 125}, G = 10, C = 100

In [129]:
weights = {0: 1, 1: 125}
svclassifier = SVC(kernel='sigmoid', probability = True, gamma = 10, C = 100, class_weight = weights)
svclassifier.fit(X_test, y_test)

SVC(C=100, cache_size=200, class_weight={0: 1, 1: 125}, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma=10, kernel='sigmoid',
    max_iter=-1, probability=True, random_state=None, shrinking=True, tol=0.001,
    verbose=False)

In [130]:
y_pred = svclassifier.predict(X_test)
y_pred

array([0, 1, 1, ..., 0, 1, 0])

In [131]:
y_pred_proba = svclassifier.predict_proba(X_test)
y_pred_proba

array([[0.97897092, 0.02102908],
       [0.97844391, 0.02155609],
       [0.97844391, 0.02155609],
       ...,
       [0.97897092, 0.02102908],
       [0.97844391, 0.02155609],
       [0.97897092, 0.02102908]])

In [132]:
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

[[ 7520 16340]
 [   98   422]]
              precision    recall  f1-score   support

           0       0.99      0.32      0.48     23860
           1       0.03      0.81      0.05       520

    accuracy                           0.33     24380
   macro avg       0.51      0.56      0.26     24380
weighted avg       0.97      0.33      0.47     24380



### weights = {0: 1, 1: 200}, G = 10, C = 100

In [133]:
weights = {0: 1, 1: 200}
svclassifier = SVC(kernel='sigmoid', probability = True, gamma = 10, C = 100, class_weight = weights)
svclassifier.fit(X_test, y_test)

SVC(C=100, cache_size=200, class_weight={0: 1, 1: 200}, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma=10, kernel='sigmoid',
    max_iter=-1, probability=True, random_state=None, shrinking=True, tol=0.001,
    verbose=False)

In [134]:
y_pred = svclassifier.predict(X_test)
y_pred

array([0, 1, 1, ..., 0, 1, 0])

In [135]:
y_pred_proba = svclassifier.predict_proba(X_test)
y_pred_proba

array([[0.97928607, 0.02071393],
       [0.97872266, 0.02127734],
       [0.97872266, 0.02127734],
       ...,
       [0.97928607, 0.02071393],
       [0.97872266, 0.02127734],
       [0.97928607, 0.02071393]])

In [136]:
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

[[ 7520 16340]
 [   98   422]]
              precision    recall  f1-score   support

           0       0.99      0.32      0.48     23860
           1       0.03      0.81      0.05       520

    accuracy                           0.33     24380
   macro avg       0.51      0.56      0.26     24380
weighted avg       0.97      0.33      0.47     24380

