In [20]:
'''
Preprocessing techniques: https://scikit-learn.org/stable/modules/preprocessing.html
https://www.analyticsvidhya.com/blog/2017/09/common-machine-learning-algorithms/
https://analyticsindiamag.com/7-types-classification-algorithms/#:~:text=2%20Types%20of%20Classification%20Algorithms%20(Python)&text=Definition%3A%20Logistic%20regression%20is%20a,modelled%20using%20a%20logistic%20function.
https://developer.ibm.com/technologies/data-science/tutorials/learn-classification-algorithms-using-python-and-scikit-learn/
'''
'''
Bagging Vs Boosting: https://towardsdatascience.com/decision-tree-ensembles-bagging-and-boosting-266a8ba60fd9
Bagging Vs Boosting Vs Stacking (Linear combination) Vs Deep Stacking (Non linear combination): 
https://www.kdnuggets.com/2019/09/ensemble-learning.html
Regularization: https://www.analyticsvidhya.com/blog/2015/02/avoid-over-fitting-regularization/?
'''

#Linear Regression
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn import preprocessing

data = pd.read_csv('winequality-white.csv', ";")
X = data.drop(columns=['quality'],axis=1)
y = data['quality']
train_x, test_x, train_y, test_y = train_test_split(X, y, test_size=0.2)
train_x_scaled = preprocessing.scale(train_x)
test_x_scaled = preprocessing.scale(test_x)
print('Train X Scaled\n')
print(train_x_scaled[:5])
print('Train Y\n')
print(train_y[:5])
print('Test X Scaled\n')
print(test_x_scaled[:5])
print('Test Y\n')
print(test_y[:5])

'''
Create the object of the Linear Regression model
You can also add other parameters and test your code here
Some parameters are : fit_intercept and normalize
Documentation of sklearn LinearRegression: 

https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LinearRegression.html
'''

model = LinearRegression()
model.fit(train_x_scaled, train_y)
# coefficeints of the trained model
print('\nCoefficient of model :', model.coef_)

# intercept of the model
print('\nIntercept of model',model.intercept_)

# predict the target on the training dataset
predict_train = model.predict(train_x_scaled)
print('Wine rating prediction on training data', predict_train[:5])

# Root Mean Squared Error on training dataset
rmse_train = mean_squared_error(train_y, predict_train) ** (0.5)
print('\nRMSE on train dataset : ', rmse_train)

# predict the target on the training dataset
predict_test = model.predict(test_x_scaled)
print('Wine rating prediction on test data', predict_test[:5])

# Root Mean Squared Error on training dataset
rmse_test = mean_squared_error(test_y, predict_test) ** (0.5)
print('\nRMSE on test dataset : ', rmse_test)

Train X Scaled

[[-1.49862697e+00 -9.69101605e-01  2.01793699e+00 -1.00752959e+00
  -5.48676479e-01 -3.83769013e-01 -2.28141539e-01 -1.39472138e+00
   2.12885081e+00  9.66685585e-01  1.21051963e+00]
 [ 8.88938537e-01  1.22416316e+00 -5.34068329e-01 -2.34442340e-01
  -7.38744289e-01 -2.04051450e-01 -8.90863849e-01 -7.68029672e-01
   2.11386508e-01  1.14173581e+00  1.45549599e+00]
 [-1.14049215e+00  2.78369258e-02 -1.60426411e+00  4.30999853e-01
  -6.43710384e-01 -3.23863159e-01  7.95509621e-02  4.65352951e-01
   6.08103260e-01 -4.33716188e-01 -8.30950074e-01]
 [ 2.08272129e+00 -1.16848931e+00  1.27703222e+00  1.76566073e-01
  -1.21023907e-01 -1.58188610e+00 -1.79027270e+00  4.25351353e-01
  -3.83688620e-01  8.79160474e-01 -1.43621927e-02]
 [ 4.11425435e-01 -1.06879546e+00  4.21909357e-02  4.19610427e-04
  -1.68540860e-01 -1.16254512e+00 -6.54177310e-01 -4.14682218e-01
  -1.30936104e+00 -7.83816632e-01  2.30614172e-01]]
Train Y

2866    7
2722    7
2537    6
1593    6
3809    6
Name: qua

In [28]:
#Logistic Regression
from sklearn.linear_model import LogisticRegression

'''
Create the object of the Logistic Regression model
You can also add other parameters and test your code here
Some parameters are : fit_intercept and penalty
Documentation of sklearn LogisticRegression: 

https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html
'''

model = LogisticRegression()
model.fit(train_x_scaled, train_y)
model.predict(train_x_scaled)

# coefficeints of the trained model
print('\nCoefficient of model :', model.coef_)

# intercept of the model
print('\nIntercept of model',model.intercept_)

# predict the target on the training dataset
predict_train = model.predict(train_x_scaled)
print('Wine rating prediction on training data', predict_train[:5])

# Root Mean Squared Error on training dataset
rmse_train = mean_squared_error(train_y, predict_train) ** (0.5)
print('\nRMSE on train dataset : ', rmse_train)

# predict the target on the training dataset
predict_test = model.predict(test_x_scaled)
print('Wine rating prediction on training data', predict_test[:5])

# Root Mean Squared Error on training dataset
rmse_test = mean_squared_error(test_y, predict_test) ** (0.5)
print('\nRMSE on test dataset : ', rmse_test)


Coefficient of model : [[ 5.00261222e-01  2.85242867e-01 -2.67460329e-01 -1.36961873e-01
   4.90057560e-01  5.19496546e-01 -3.78518194e-02  3.46875772e-01
   5.98064342e-02 -1.88356193e-01  3.74224527e-02]
 [-2.53728772e-01  6.30866319e-01  7.55932075e-03 -1.20522526e+00
   1.44106776e-01 -8.03083431e-01 -4.45548432e-02  1.07301539e+00
  -3.70928235e-01 -1.13151573e-01 -6.54095563e-01]
 [-3.68302762e-01  2.90534365e-01  8.04058617e-02 -6.55015798e-01
   1.94499273e-01 -2.14477824e-01  1.85376162e-01  6.55869204e-01
  -3.92215255e-01 -1.73071716e-01 -8.69011542e-01]
 [-4.05641820e-01 -3.15373443e-01  8.78455553e-02 -2.15608059e-01
   2.42167497e-01 -1.02763553e-01  1.18743859e-01  4.38918553e-01
  -3.37679261e-01  6.37782951e-02  1.57019164e-01]
 [ 1.38900383e-02 -5.21006692e-01 -3.34063388e-02  9.38213683e-01
  -1.48823957e-02 -1.56427139e-02  1.18035970e-01 -1.28255029e+00
   1.09500775e-01  2.62038620e-01  8.88635629e-02]
 [-8.90423349e-02 -3.71077380e-01  6.81294905e-03  9.98955767

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


In [32]:
# Decision Tree
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
'''
Create the object of the Decision Tree model
You can also add other parameters and test your code here
Some parameters are : max_depth and max_features
Documentation of sklearn DecisionTreeClassifier: 

https://scikit-learn.org/stable/modules/generated/sklearn.tree.DecisionTreeClassifier.html
'''

model = DecisionTreeClassifier()
model.fit(train_x_scaled, train_y)

# depth of the decision tree
print('Depth of the Decision Tree :', model.get_depth())

# predict the target on the train dataset
predict_train = model.predict(train_x_scaled)
print('Target on train data', predict_train[:5]) 

# Accuray Score on train dataset
accuracy_train = accuracy_score(train_y, predict_train)
print('accuracy_score on train dataset : ', accuracy_train)

# predict the target on the test dataset
predict_test = model.predict(test_x_scaled)
print('Target on test data', predict_test[:5]) 

# Accuracy Score on test dataset
accuracy_test = accuracy_score(test_y, predict_test)
print('accuracy_score on test dataset : ', accuracy_test)

Depth of the Decision Tree : 24
Target on train data [7 7 6 6 6]
accuracy_score on train dataset :  1.0
Target on test data [6 6 6 6 4]
accuracy_score on test dataset :  0.5642857142857143


In [35]:
# Support Vector Machine (SVM)
from sklearn.svm import SVC

'''
Create the object of the Support Vector Classifier model
You can also add other parameters and test your code here
Some parameters are : kernal and degree
Documentation of sklearn Support Vector Classifier: 

https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html
'''

model = SVC()
model.fit(train_x_scaled, train_y)
# predict the target on the train dataset
predict_train = model.predict(train_x_scaled)
print('Target on train data', predict_train[:5]) 

# Accuray Score on train dataset
accuracy_train = accuracy_score(train_y, predict_train)
print('accuracy_score on train dataset : ', accuracy_train)

# predict the target on the test dataset
predict_test = model.predict(test_x_scaled)
print('Target on test data', predict_test[:5]) 

# Accuracy Score on test dataset
accuracy_test = accuracy_score(test_y, predict_test)
print('accuracy_score on test dataset : ', accuracy_test)

Target on train data [6 7 5 6 6]
accuracy_score on train dataset :  0.6158754466564573
Target on test data [6 6 5 6 5]
accuracy_score on test dataset :  0.55


In [38]:
# Naive bayes classifier
from sklearn.naive_bayes import GaussianNB

'''
Create the object of the Naive Bayes model
You can also add other parameters and test your code here
Some parameters are : var_smoothing
Documentation of sklearn GaussianNB: 

https://scikit-learn.org/stable/modules/generated/sklearn.naive_bayes.GaussianNB.html
'''
model = GaussianNB()
model.fit(train_x_scaled, train_y)

# predict the target on the train dataset
predict_train = model.predict(train_x_scaled)
print('Target on train data', predict_train[:5]) 

# Accuray Score on train dataset
accuracy_train = accuracy_score(train_y, predict_train)
print('accuracy_score on train dataset : ', accuracy_train)

# predict the target on the test dataset
predict_test = model.predict(test_x_scaled)
print('Target on test data', predict_test[:5]) 

# Accuracy Score on test dataset
accuracy_test = accuracy_score(test_y, predict_test)
print('accuracy_score on test dataset : ', accuracy_test)

Target on train data [6 7 5 6 7]
accuracy_score on train dataset :  0.4476773864216437
Target on test data [7 6 5 7 5]
accuracy_score on test dataset :  0.4153061224489796


In [40]:
#KNN (K-Nearest Neighbour)
from sklearn.neighbors import KNeighborsClassifier

'''
Create the object of the K-Nearest Neighbor model
You can also add other parameters and test your code here
Some parameters are : n_neighbors, leaf_size
Documentation of sklearn K-Neighbors Classifier: 

https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KNeighborsClassifier.html
'''
model = KNeighborsClassifier()
model.fit(train_x_scaled, train_y)

# predict the target on the train dataset
predict_train = model.predict(train_x_scaled)
print('Target on train data', predict_train[:5]) 

# Accuray Score on train dataset
accuracy_train = accuracy_score(train_y, predict_train)
print('accuracy_score on train dataset : ', accuracy_train)

# predict the target on the test dataset
predict_test = model.predict(test_x_scaled)
print('Target on test data', predict_test[:5]) 

# Accuracy Score on test dataset
accuracy_test = accuracy_score(test_y, predict_test)
print('accuracy_score on test dataset : ', accuracy_test)

Target on train data [6 7 6 4 5]
accuracy_score on train dataset :  0.704951505870342
Target on test data [6 5 5 6 5]
accuracy_score on test dataset :  0.5857142857142857


In [42]:
#Random Forest
from sklearn.ensemble import RandomForestClassifier
'''
Create the object of the Random Forest model
You can also add other parameters and test your code here
Some parameters are : n_estimators and max_depth
Documentation of sklearn RandomForestClassifier: 

https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestClassifier.html
'''
model = RandomForestClassifier()
model.fit(train_x_scaled, train_y)

# predict the target on the train dataset
predict_train = model.predict(train_x_scaled)
print('Target on train data', predict_train[:5]) 

# Accuray Score on train dataset
accuracy_train = accuracy_score(train_y, predict_train)
print('accuracy_score on train dataset : ', accuracy_train)

# predict the target on the test dataset
predict_test = model.predict(test_x_scaled)
print('Target on test data', predict_test[:5]) 

# Accuracy Score on test dataset
accuracy_test = accuracy_score(test_y, predict_test)
print('accuracy_score on test dataset : ', accuracy_test)

Target on train data [7 7 6 6 6]
accuracy_score on train dataset :  1.0
Target on test data [6 6 5 6 5]
accuracy_score on test dataset :  0.676530612244898


In [47]:
#Dimensionality Reduction Algorithm 
from sklearn.decomposition import PCA

# create the object of the PCA (Principal Component Analysis) model
# reduce the dimensions of the data to 3
'''
You can also add other parameters and test your code here
Some parameters are : svd_solver, iterated_power
Documentation of sklearn PCA:

https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html
'''
model_pca = PCA(n_components = 3)
new_train_x_scaled = model_pca.fit_transform(train_x_scaled)
new_test_x_scaled = model_pca.fit_transform(test_x_scaled)
print(new_train_x_scaled[:5])
print(new_test_x_scaled[:5])

model = LinearRegression()
model.fit(new_train_x_scaled, train_y)

# coefficeints of the trained model
print('\nCoefficient of model :', model.coef_)

# intercept of the model
print('\nIntercept of model',model.intercept_)

# predict the target on the training dataset
predict_train = model.predict(new_train_x_scaled)
print('Wine rating prediction on training data', predict_train[:5])

# Root Mean Squared Error on training dataset
rmse_train = mean_squared_error(train_y, predict_train) ** (0.5)
print('\nRMSE on train dataset : ', rmse_train)

# predict the target on the training dataset
predict_test = model.predict(new_test_x_scaled)
print('Wine rating prediction on test data', predict_test[:5])

# Root Mean Squared Error on training dataset
rmse_test = mean_squared_error(test_y, predict_test) ** (0.5)
print('\nRMSE on test dataset : ', rmse_test)

[[-2.17861011 -1.3399446  -2.52473092]
 [-1.61093687  0.28522184  0.1114597 ]
 [ 0.08636011 -1.50707124  1.31235862]
 [-0.34943664  2.6556558  -1.14756936]
 [-0.77768256  1.71214164  0.15824081]]
[[-1.81671612 -1.46388586  1.32807389]
 [-0.81434045  0.76000364  0.69364546]
 [ 2.90479439 -0.66157906 -1.91666535]
 [-2.50673829  0.13791526  0.92299307]
 [ 0.37545164 -0.89686564 -2.64789789]]

Coefficient of model : [-0.14861835 -0.04255709 -0.18420398]

Intercept of model 5.8700867789688616
Wine rating prediction on training data [6.71595783 6.07683202 5.67964697 6.02038932 5.88365232]

RMSE on train dataset :  0.8199043825394794
Wine rating prediction on test data [5.95774636 5.83099691 5.8195933  6.06674581 6.3402091 ]

RMSE on test dataset :  0.9044853201329642


In [49]:
#GBM (Gradient Boosting Model)
from sklearn.ensemble import GradientBoostingClassifier

'''
Create the object of the GradientBoosting Classifier model
You can also add other parameters and test your code here
Some parameters are : learning_rate, n_estimators
Documentation of sklearn GradientBoosting Classifier: 

https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.GradientBoostingClassifier.html
'''
model = GradientBoostingClassifier(n_estimators=100, max_depth=5)
model.fit(train_x_scaled, train_y)

# predict the target on the train dataset
predict_train = model.predict(train_x_scaled)
print('Target on train data', predict_train[:5]) 

# Accuray Score on train dataset
accuracy_train = accuracy_score(train_y, predict_train)
print('accuracy_score on train dataset : ', accuracy_train)

# predict the target on the test dataset
predict_test = model.predict(test_x_scaled)
print('Target on test data', predict_test[:5]) 

# Accuracy Score on test dataset
accuracy_test = accuracy_score(test_y, predict_test)
print('accuracy_score on test dataset : ', accuracy_test)

Target on train data [7 7 6 6 6]
accuracy_score on train dataset :  0.9328739152628892
Target on test data [6 6 5 6 4]
accuracy_score on test dataset :  0.6051020408163266


In [51]:
#XGBoost (eXtreme Gradient Boosting)
from xgboost import XGBClassifier

model = XGBClassifier()
model.fit(train_x_scaled, train_y)

# predict the target on the train dataset
predict_train = model.predict(train_x_scaled)
print('Target on train data', predict_train[:5]) 

# Accuray Score on train dataset
accuracy_train = accuracy_score(train_y, predict_train)
print('accuracy_score on train dataset : ', accuracy_train)

# predict the target on the test dataset
predict_test = model.predict(test_x_scaled)
print('Target on test data', predict_test[:5]) 

# Accuracy Score on test dataset
accuracy_test = accuracy_score(test_y, predict_test)
print('accuracy_score on test dataset : ', accuracy_test)

Target on train data [7 7 5 6 6]
accuracy_score on train dataset :  0.6562021439509954
Target on test data [6 6 5 6 5]
accuracy_score on test dataset :  0.5693877551020409
