In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.utils import shuffle
import joblib
import pickle

In [None]:
!pip install joblib==1.1.0

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


# New Section

In [None]:
# loading the 3d dataset to a pandas DataFrame
printer_dataset = pd.read_excel('dataSet.xlsx') 
printer_dataset['error'] = printer_dataset['error'].map(
                   {'yes':True ,'no':False})

# New Section

In [None]:
printer_dataset.head()

Unnamed: 0,field1,field2,field3,error
0,0.31,1.02,-9.18,False
1,-0.78,0.98,-9.1,False
2,0.04,0.67,-8.28,False
3,-0.04,0.9,-8.16,False
4,0.0,1.26,-7.77,False


In [None]:
printer_dataset=shuffle(printer_dataset)

In [None]:
printer_dataset.head()

Unnamed: 0,field1,field2,field3,error
742,-0.08,1.14,11.49,True
82,-0.04,0.63,-6.79,False
668,-0.12,1.14,11.77,True
553,0.0,0.86,11.65,True
713,-0.08,0.9,11.73,True


In [None]:
printer_dataset.shape

(1233, 4)

In [None]:
# getting the statistical measures of the data
printer_dataset.describe()

Unnamed: 0,field1,field2,field3
count,1233.0,1233.0,1233.0
mean,-0.135677,1.098313,10.09193
std,0.217287,0.174594,5.335724
min,-1.84,-0.04,-10.4
25%,-0.24,1.02,11.53
50%,-0.12,1.1,11.65
75%,-0.04,1.22,11.77
max,1.65,1.73,12.36


In [None]:
printer_dataset['error'].value_counts()

True     984
False    249
Name: error, dtype: int64

In [None]:
printer_dataset.groupby('error').mean()

Unnamed: 0_level_0,field1,field2,field3
error,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
False,-0.109598,1.002731,3.850924
True,-0.142276,1.1225,11.671209


In [None]:
# separating the data and labels
X = printer_dataset.drop(columns = 'error', axis=1)
Y = printer_dataset['error']

In [None]:
print(X)

      field1  field2  field3
742    -0.08    1.14   11.49
82     -0.04    0.63   -6.79
668    -0.12    1.14   11.77
553     0.00    0.86   11.65
713    -0.08    0.90   11.73
...      ...     ...     ...
562    -0.12    0.98   11.81
1111    0.04    1.33   11.77
957    -0.20    1.06   11.73
499    -0.04    1.14   11.57
867    -0.08    1.10   11.77

[1233 rows x 3 columns]


In [None]:
print(Y)

742      True
82      False
668      True
553      True
713      True
        ...  
562      True
1111     True
957      True
499      True
867      True
Name: error, Length: 1233, dtype: bool


In [None]:
#Applying Standardization

In [None]:
scaler = StandardScaler()

In [None]:
scaler.fit(X)

StandardScaler()

In [None]:
standardized_data = scaler.transform(X)

In [None]:
print(standardized_data)

[[ 0.25634152  0.23886206  0.26212694]
 [ 0.44050414 -2.68338752 -3.16522743]
 [ 0.0721789   0.23886206  0.31462471]
 ...
 [-0.29614634 -0.21953003  0.30712503]
 [ 0.44050414  0.23886206  0.2771263 ]
 [ 0.25634152  0.00966601  0.31462471]]


In [None]:
X = standardized_data
Y = printer_dataset['error']

In [None]:
print(X)
print(Y)

[[ 0.25634152  0.23886206  0.26212694]
 [ 0.44050414 -2.68338752 -3.16522743]
 [ 0.0721789   0.23886206  0.31462471]
 ...
 [-0.29614634 -0.21953003  0.30712503]
 [ 0.44050414  0.23886206  0.2771263 ]
 [ 0.25634152  0.00966601  0.31462471]]
742      True
82      False
668      True
553      True
713      True
        ...  
562      True
1111     True
957      True
499      True
867      True
Name: error, Length: 1233, dtype: bool


In [None]:
#train test split

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X,Y, test_size = 0.25, stratify=Y, random_state=2)

In [None]:
print(X.shape, X_train.shape, X_test.shape)

(1233, 3) (924, 3) (309, 3)


TRAINING THE MODEL

In [None]:
from sklearn import linear_model
LRG = linear_model.LogisticRegression(random_state = 0,solver = 'liblinear',multi_class = 'auto')



In [None]:

LRG.fit(X_train, Y_train)
joblib.dump(LRG,"Trained_Model.joblib")

['Trained_Model.joblib']

ACCURACY

In [None]:
# accuracy score on the training data
X_train_prediction = LRG.predict(X_train)
training_data_accuracy = accuracy_score(X_train_prediction, Y_train)

In [None]:
print('Accuracy score of the training data : ', training_data_accuracy*100,"%")

Accuracy score of the training data :  87.44588744588745 %


In [None]:
# accuracy score on the test data
X_test_prediction = LRG.predict(X_test)
test_data_accuracy = accuracy_score(X_test_prediction, Y_test)

In [None]:
print('Accuracy score of the test data : ', test_data_accuracy*100,"%")

Accuracy score of the test data :  88.67313915857605 %


MAKING PREDICTIVE SYSTEM

In [None]:
input_data = (0.27,1.22,-7.06)

# changing the input_data to numpy array
input_data_as_numpy_array = np.asarray(input_data)

# reshape the array as we are predicting for one instance
input_data_reshaped = input_data_as_numpy_array.reshape(1,3)

df=pd.DataFrame(input_data_reshaped, )

# standardize the input data
std_data = scaler.transform(input_data_reshaped)
m_jlib = joblib.load('Trained_Model.joblib')
prediction = m_jlib.predict(std_data)
#print(prediction)

if (prediction[0] == False):
  print('no error')
else:
  print('Error')

no error


  "X does not have valid feature names, but"
