# Wine Quality Red - Ordinal Logistic Solution

In [None]:
import warnings
warnings.filterwarnings('ignore')
%autosave 5

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.decomposition import PCA
from sklearn import datasets
%matplotlib inline
from sklearn.cluster import KMeans
from matplotlib import style
from scipy.cluster.hierarchy import dendrogram, linkage
style.use('ggplot')
from sklearn import preprocessing
import math


# Importing data

In [None]:
df = pd.read_csv('../input/red-wine-quality-cortez-et-al-2009/winequality-red.csv')
df_copy=df.copy()

In [None]:
#present data
df.head()

# Data visualization

In [None]:
df.info()

### count plot for quality

In [None]:
sns.countplot(df['quality'])


In [None]:
df['quality'].value_counts()

### From the above barplot we see that the data is not equally distributed, and that much more observations are at the median quality levels 5, 6 that on the best or poor levels. 


### barplot data presentation

In [None]:
fig, ax = plt.subplots(3, 4, figsize=(20, 10))
for value, subplot in zip(range(0,11), ax.flatten()):
    sns.barplot(x=df['quality'], y=df[df.columns[value]],ax=subplot)
#    sns.regplot(x=df[df.columns[value]], y=df['quality'],ax=subplot,truncate=True,scatter=False)
fig.tight_layout() 
plt.show()

### From the above barplots we can see that visually, most of the quality can be explaind by the following parameters: volatile acidity, citric acid, chlorides, sulphates, alcohol.  An interesting parameter is the sulfur dioxide,  which at low values may indicate low or high quality, but at high values indiocate average quality.

### In this analysis, we will initially use all parameters, and see whether the visual impression relates to the  according p-values obtained.

In [None]:
sns.pairplot(df,hue='quality')
plt.show()

In [None]:
df1 = df[['volatile acidity', 'quality']].copy()

df1.head()

In [None]:
col = list(df)
col=col[0:11]


### showing only diagonal pairplot, with distribution per quality.

In [None]:
for i in col:
    df1 = df[[i, 'quality']].copy()
    sns.pairplot(df1, hue='quality',size=5)
plt.show()   



### from the above distributions we see that the parameters which may have more influence on quality level are: fixed acidity, volatile acidity, citric acid, density, sulphates, alcohol.  We'll see next by P-values if the visual interpreatation is verified.

In [None]:
df.describe()

# Correlation

### Cross correlation is investigated to see whether correlation parameters are needed in the regression in the form of XiXj

In [None]:
df_corr=df.corr()
df_corr

In [None]:
df.head()

In [None]:
col_list = df.columns.values.tolist()
col_list

In [None]:
df2=df.copy()
df2

In [None]:
df2.drop(['quality'], axis='columns', inplace=True)
df2

In [None]:
col_list2 = df2.columns.values.tolist()
col_list2

### If correlations above 0.8 exist, they will be added as parameters do the dataset. 

In [None]:
    count=0
    for i in range(1,11):
        for j in range(i+1,11):
            if (abs(df_corr.iloc[i][j])>0.8 and i!=j):
                mlt="*"
                string=col_list[i]+mlt+col_list[j]
                df2[string]=df2[col_list[i]]*df2[col_list[j]]
                count=count+1
        
df2.head()


In [None]:
print(count)

### The parameters are not highly correlated, so we will not use correlated parameters in the analysis.

In [None]:
# Correlation heatmap
mask=np.array(df_corr)
fig=plt.gcf()
fig.set_size_inches(30,12)
sns.heatmap(df_corr,annot=True,cbar=True,square=True)


### No high correlations found, the dataset will remain as given.

# Ordinal Logistic regression

### Ordinal Logistic Regression is the common analysis method used for ordinal dependent parameters. This model is used in the medicine ans social arts worlds, where subjective dpenedent variables exist, such as level of pain, satisfaction or cancer development level. Using a non-ordinal classification method misses the ordinality of the dependent parameter, which is essential for good prediction and problen description. 

In [None]:
import statsmodels.api as sm
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.metrics import roc_auc_score
from sklearn.metrics import classification_report
import statsmodels.api as sm 


### Python has no bulit-in model for ordinal logistic regression, therefore it will be implemented here. We will use the proportional odds model for the Ordinal Logistic Regression. Denoting the number of categories by K, We make (K-1) logistic regression comparisons, each time cutting the entire dataset by two and comparing all the lower categories to higher categories. Each of these constitutes a sub-model and finally they will all be blended into one holisic model. In our example, since there are 6 quality categories, we make 5 logistic regression models.

In [None]:
df34=df.copy()
count34=0
for i in range (0,1599):
    if (df['quality'].iloc[i]>=4):
        df34['quality'].iloc[i]=1
        
    else:
        df34['quality'].iloc[i]=0 
        count34=count34+1
#df34  

In [None]:
#df

### The count_ij parameters below indicate the number of observations in the lower class. Recalling there are 1,599 observations totally, this variable indicates the level of unbalance in the specific sub-model partition. The unbalance existing in the dataset is enhanced by the Ordinal Logistic Regression analysis methiod

In [None]:
count34

In [None]:
df45=df.copy()
count45=0
for i in range (0,1599):
    if (df['quality'].iloc[i]>=5):
        df45['quality'].iloc[i]=1
        
    else:
        df45['quality'].iloc[i]=0 
        count45=count45+1
#df45

In [None]:
count45

In [None]:
df56=df.copy()
count56=0
for i in range (0,1599):
    if (df['quality'].iloc[i]>=6):
        df56['quality'].iloc[i]=1
        
    else:
        df56['quality'].iloc[i]=0 
        count56=count56+1
#df56

In [None]:
count56

In [None]:
df67=df.copy()
count67=0
for i in range (0,1599):
    if (df['quality'].iloc[i]>=7):
        df67['quality'].iloc[i]=1
        
    else:
        df67['quality'].iloc[i]=0 
        count67=count67+1
#df67

In [None]:
count67

In [None]:
df78=df.copy()
count78=0
for i in range (0,1599):
    if (df['quality'].iloc[i]>=8):
        df78['quality'].iloc[i]=1
        
    else:
        df78['quality'].iloc[i]=0 
        count78=count78+1
#df78

In [None]:
count78

### Let us now train and predict each of the four logistic Regression 

In [None]:
X34=df34.iloc[:,0:11] 
y34=df34['quality']
X_train34, X_test34, y_train34, y_test34 = train_test_split(X34, y34, test_size=0.2,random_state=0)
lgr34=LogisticRegression()
lgr34.fit(X_train34, y_train34)
pred34=lgr34.predict(X_test34)
pred34

In [None]:
roc_auc_score(y_test34,pred34)

### We see that due to low number of observations in quality level 3, all predictions are 1. The AUC is 0.5, meaning the regression has no significance.

In [None]:
df34

In [None]:
X45=df45.iloc[:,0:11] 
y45=df45['quality']
X_train45, X_test45, y_train45, y_test45 = train_test_split(X45, y45, test_size=0.2,random_state=0)
lgr45=LogisticRegression()
lgr45.fit(X_train45, y_train45)
pred45=lgr45.predict(X_test45)
pred45

In [None]:
roc_auc_score(y_test45,pred45)

In [None]:
#df45

### We received a bad AUC and homogeneous predicted vector here as well, due to lack of observations in categories 3+4.

In [None]:
X56=df56.iloc[:,0:11] 
y56=df56['quality']
X_train56, X_test56, y_train56, y_test56 = train_test_split(X56, y56, test_size=0.2,random_state=0)
lgr56=LogisticRegression()
lgr56.fit(X_train56, y_train56)
pred56=lgr56.predict(X_test56)
pred56

In [None]:
roc_auc_score(y_test56,pred56)

### The AUC received here is not optimal, but reasinable for a single sub-model. 

In [None]:
log_reg56 = sm.Logit(y_train56, X_train56).fit() 
print(log_reg56.summary()) 

### Some of the P-values obtained are higher than 0.05, and will be omitted later from the model.

In [None]:
X67=df67.iloc[:,0:11] 
y67=df67['quality']
X_train67, X_test67, y_train67, y_test67 = train_test_split(X67, y67, test_size=0.2,random_state=0)
lgr67=LogisticRegression()
lgr67.fit(X_train67, y_train67)
pred67=lgr67.predict(X_test67)
pred67

In [None]:
roc_auc_score(y_test67,pred67)

### The AUC received here is not optimal, but reasinable for a single sub-model. 

In [None]:
log_reg67 = sm.Logit(y_train67, X_train67).fit() 
print(log_reg67.summary()) 

### Some of the P-values obtained are higher than 0.05, and will be omitted later from the model.

In [None]:
X78=df78.iloc[:,0:11] 
y78=df78['quality']
X_train78, X_test78, y_train78, y_test78 = train_test_split(X78, y78, test_size=0.2,random_state=0)
lgr78=LogisticRegression()
lgr78.fit(X_train78, y_train78)
pred78=lgr78.predict(X_test78)
pred78

In [None]:
roc_auc_score(y_test78,pred78)

### We received a bad AUC and homogeneous predicted vector here as well, and a homogeneous predicted Y vector due to lack of observations in categor

### Due to the results of the first run, we hve two types of conclusions:
### 1.  We see that due to low number of observations in the 3, 3+4 and 8 quality categories, the model could not    
###      fit well, and did not take into account the extremal qualities
### 2. Due to low P-values, we will omit from all sub-models the following parameters: fixed acidity, free sulfur rate, PH.
###   From sub-model 5-6 we will also omit residual sugar, and from submodel 6-7  we omit citric acid. The 
###   following are the results of the new runs.

### Omitting categories fixed acidity, free sulfur rate, PH from entire model

In [None]:
df_low= df.drop(['fixed acidity','free sulfur dioxide','pH'],1)
df_low

In [None]:
df34=df_low.copy()
count34=0
for i in range (0,1599):
    if (df_low['quality'].iloc[i]>=4):
        df34['quality'].iloc[i]=1
        
    else:
        df34['quality'].iloc[i]=0 
        count34=count34+1

In [None]:
df34

In [None]:
df45=df_low.copy()
count45=0
for i in range (0,1599):
    if (df_low['quality'].iloc[i]>=5):
        df45['quality'].iloc[i]=1
        
    else:
        df45['quality'].iloc[i]=0 
        count45=count45+1

In [None]:
df45

In [None]:
df56=df_low.copy()
df56=df56.drop('residual sugar',1)
count56=0
for i in range (0,1599):
    if (df_low['quality'].iloc[i]>=6):
        df56['quality'].iloc[i]=1
        
    else:
        df56['quality'].iloc[i]=0 
        count56=count56+1

In [None]:
df56

In [None]:
df67=df_low.copy()
df67=df67.drop('citric acid',1)
count67=0
for i in range (0,1599):
    if (df_low['quality'].iloc[i]>=7):
        df67['quality'].iloc[i]=1
        
    else:
        df67['quality'].iloc[i]=0 
        count67=count67+1

In [None]:
df67

In [None]:
df78=df_low.copy()
count78=0
for i in range (0,1599):
    if (df['quality'].iloc[i]>=8):
        df78['quality'].iloc[i]=1
        
    else:
        df78['quality'].iloc[i]=0 
        count78=count78+1

In [None]:
df78

## Let us run all regressions again, and check for AUC and P-values

In [None]:
X34=df34.iloc[:,0:8] 
y34=df34['quality']
X_train34, X_test34, y_train34, y_test34 = train_test_split(X34, y34, test_size=0.2,random_state=0)
lgr34=LogisticRegression()
lgr34.fit(X_train34, y_train34)
pred34=lgr34.predict(X_test34)
pred34

In [None]:
roc_auc_score(y_test34,pred34)

### The still bad AUC means it is truly a low observations problem, and not insignificant variables causing noisy result.

In [None]:
X45=df45.iloc[:,0:8] 
y45=df45['quality']
X_train45, X_test45, y_train45, y_test45 = train_test_split(X45, y45, test_size=0.2,random_state=0)
lgr45=LogisticRegression()
lgr45.fit(X_train45, y_train45)
pred45=lgr45.predict(X_test45)
pred45

In [None]:
roc_auc_score(y_test45,pred45)

In [None]:
#df45 

### The still bad AUC means it is truly a low observations problem, and not insignificant variables causing noisy result.

In [None]:
X56=df56.iloc[:,0:7] 
y56=df56['quality']
X_train56, X_test56, y_train56, y_test56 = train_test_split(X56, y56, test_size=0.2,random_state=0)
lgr56=LogisticRegression()
lgr56.fit(X_train56, y_train56)
pred56=lgr56.predict(X_test56)
pred56

In [None]:
roc_auc_score(y_test56,pred56)

### Small improvement relative to previous run

In [None]:
log_reg56 = sm.Logit(y_train56, X_train56).fit() 
print(log_reg56.summary()) 

### Citric acid highly insignificant, and will be omitted now.

In [None]:
df56=df56.drop('citric acid',1)
count56=0
for i in range (0,1599):
    if (df_low['quality'].iloc[i]>=6):
        df56['quality'].iloc[i]=1
        
    else:
        df56['quality'].iloc[i]=0 
        count56=count56+1

In [None]:
df56

In [None]:
X56=df56.iloc[:,0:6] 
y56=df56['quality']
X_train56, X_test56, y_train56, y_test56 = train_test_split(X56, y56, test_size=0.2,random_state=0)
lgr56=LogisticRegression()
res56=lgr56.fit(X_train56, y_train56)
pred56=lgr56.predict(X_test56)
pred56

In [None]:
roc_auc_score(y_test56,pred56)

### Still non-optimal AUC

In [None]:
log_reg56 = sm.Logit(y_train56, X_train56).fit() 
print(log_reg56.summary()) 

### All P-values are significant now.

In [None]:
X67=df67.iloc[:,0:7] 
y67=df67['quality']
X_train67, X_test67, y_train67, y_test67 = train_test_split(X67, y67, test_size=0.2,random_state=0)
lgr67=LogisticRegression()
res67=lgr67.fit(X_train67, y_train67)
pred67=lgr67.predict(X_test67)
pred67

In [None]:
roc_auc_score(y_test67,pred67)

### Still non-optimal AUC

In [None]:
log_reg67 = sm.Logit(y_train67, X_train67).fit() 
print(log_reg67.summary()) 

### All P-values smaller than 0.05 and siginificant 

In [None]:
X78=df78.iloc[:,0:8] 
y78=df78['quality']
X_train78, X_test78, y_train78, y_test78 = train_test_split(X78, y78, test_size=0.2,random_state=0)
lgr78=LogisticRegression()
res78=lgr78.fit(X_train78, y_train78)
pred78=lgr78.predict(X_test78)
pred78

In [None]:
roc_auc_score(y_test78,pred78)

### The still bad AUC means it is truly a low observations problem, and not insignificant variables causing noisy result.

# SMOTE algorithm

### Therefore we will use the SMOTE algorithm, which will oversample the low-observation quality levels and undersample the rest,to get better results.

In [None]:
from imblearn.over_sampling import SMOTE


In [None]:
# performing SMOTE for 3-4
X34_resampled, y34_resampled = SMOTE().fit_sample(X34, y34)
X34_resampled

In [None]:
# performing SMOTE for 4-5
X45_resampled, y45_resampled = SMOTE().fit_sample(X45, y45)
X45_resampled

In [None]:
# performing SMOTE for 6-7
X67_resampled, y67_resampled = SMOTE().fit_sample(X67, y67)
X67_resampled

In [None]:
# performing SMOTE for 7-8
X78_resampled, y78_resampled = SMOTE().fit_sample(X78, y78)
X78_resampled

### Comparing the independent parameter vecotr before and after SMOTE algorithm implementation:

In [None]:
print(y34.mean(), y34_resampled.mean())
print(y45.mean(), y45_resampled.mean())
print(y67.mean(), y67_resampled.mean())
print(y78.mean(), y78_resampled.mean())
print(y56.mean() )

### We see the previously unbalanced y vector is now balanced.

### Repeating sub-modeling for post-SMOTE dataset.

### SMOTE for 3-4 submodel

In [None]:
X_train34_re, X_test34_re, y_train34_re, y_test34_re = train_test_split(X34_resampled, y34_resampled, test_size=0.2,random_state=0)
lgr34_re=LogisticRegression()
lgr34_re.fit(X_train34_re, y_train34_re)
pred34_re=lgr34_re.predict(X_test34_re)
pred34_re

In [None]:
roc_auc_score(y_test34_re,pred34_re)

### Higher AUC after SMOTE

In [None]:
log_reg34_re = sm.Logit(y_train34_re, X_train34_re).fit() 
print(log_reg34_re.summary()) 

### P-value for sulphates and residual sugar too high - above 0.05. We shall omit them from sub-model, and re-run


In [None]:
df34=df34.drop(['sulphates','residual sugar'],1)
count34=0
for i in range (0,1599):
    if (df_low['quality'].iloc[i]>=4):
        df34['quality'].iloc[i]=1
        
    else:
        df34['quality'].iloc[i]=0 
        count34=count34+1

In [None]:
df34

### Implement SMOTE again

In [None]:
X34=df34.iloc[:,0:6] 
y34=df34['quality']
X_train34, X_test34, y_train34, y_test34 = train_test_split(X34, y34, test_size=0.2,random_state=0)
lgr34=LogisticRegression()
lgr34.fit(X_train34, y_train34)
pred34=lgr34.predict(X_test34)
pred34

In [None]:
X34_resampled, y34_resampled = SMOTE().fit_sample(X34, y34)
X34_resampled

In [None]:
print(y34.mean(), y34_resampled.mean())


### The dataset is now balanced

### Run submodel again

In [None]:
X_train34_re, X_test34_re, y_train34_re, y_test34_re = train_test_split(X34_resampled, y34_resampled, test_size=0.2,random_state=0)
lgr34_re=LogisticRegression()
res34=lgr34_re.fit(X_train34_re, y_train34_re)
pred34_re=lgr34_re.predict(X_test34_re)
pred34_re

In [None]:
roc_auc_score(y_test34_re,pred34_re)

In [None]:
log_reg34_re = sm.Logit(y_train34_re, X_train34_re).fit() 
print(log_reg34_re.summary())

### All P-values significant now

### SMOTE for 4-5 submodel

In [None]:
X_train45_re, X_test45_re, y_train45_re, y_test45_re = train_test_split(X45_resampled, y45_resampled, test_size=0.2,random_state=0)
lgr45_re=LogisticRegression()
lgr45_re.fit(X_train45_re, y_train45_re)
pred45_re=lgr45_re.predict(X_test45_re)
pred45_re

In [None]:
roc_auc_score(y_test45_re,pred45_re)

In [None]:
log_reg45_re = sm.Logit(y_train45_re, X_train45_re).fit() 
print(log_reg45_re.summary())

### density has high P-value, and will now be omitted from sub-model

In [None]:
df45=df45.drop('density',1)
count45=0
for i in range (0,1599):
    if (df_low['quality'].iloc[i]>=5):
        df45['quality'].iloc[i]=1
        
    else:
        df45['quality'].iloc[i]=0 
        count45=count45+1

In [None]:
df45

In [None]:
X45=df45.iloc[:,0:7] 
y45=df45['quality']
X_train45, X_test45, y_train45, y_test45 = train_test_split(X45, y45, test_size=0.2,random_state=0)
lgr45=LogisticRegression()
lgr45.fit(X_train45, y_train45)
pred45=lgr45.predict(X_test45)
pred45

In [None]:
X45_resampled, y45_resampled = SMOTE().fit_sample(X45, y45)
X45_resampled

In [None]:
X_train45_re, X_test45_re, y_train45_re, y_test45_re = train_test_split(X45_resampled, y45_resampled, test_size=0.2,random_state=0)
lgr45_re=LogisticRegression()
res45=lgr45_re.fit(X_train45_re, y_train45_re)
pred45_re=lgr45_re.predict(X_test45_re)
pred45_re

In [None]:
roc_auc_score(y_test45_re,pred45_re)

In [None]:
log_reg45_re = sm.Logit(y_train45_re, X_train45_re).fit() 
print(log_reg45_re.summary())

### All P-values below 0.05.

### SMOTE 6-7

In [None]:
X_train67_re, X_test67_re, y_train67_re, y_test67_re = train_test_split(X67_resampled, y67_resampled, test_size=0.2,random_state=0)
lgr67_re=LogisticRegression()
res67=lgr67_re.fit(X_train67_re, y_train67_re)
pred67_re=lgr67_re.predict(X_test67_re)
pred67_re

In [None]:
roc_auc_score(y_test67_re,pred67_re)

In [None]:
log_reg67_re = sm.Logit(y_train67_re, X_train67_re).fit() 
print(log_reg67_re.summary())

### All P-values are below 0.05

In [None]:
df78

### SMOTE 7-8

In [None]:
X78-df78.iloc[:,0:8]
X_train78_re, X_test78_re, y_train78_re, y_test78_re = train_test_split(X78_resampled, y78_resampled, test_size=0.2,random_state=0)
lgr78_re=LogisticRegression()
res78=lgr78_re.fit(X_train78_re, y_train78_re)
pred78_re=lgr78_re.predict(X_test78_re)
pred78_re

In [None]:
roc_auc_score(y_test78_re,pred78_re)

### High AUC here

In [None]:
log_reg78_re = sm.Logit(y_train78_re, X_train78_re).fit() 
print(log_reg78_re.summary()) 

### All P-values below 0.05

# ROC Curves for sub-models

In [None]:
from sklearn import metrics
fpr, tpr, thresholds = metrics.roc_curve(y_test34_re, pred34_re)
plt.plot(fpr, tpr)
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.show()
print('AUC=',roc_auc_score(y_test34_re,pred34_re))


In [None]:
fpr, tpr, thresholds = metrics.roc_curve(y_test45_re, pred45_re)
plt.plot(fpr, tpr)
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.show()
print('AUC=',roc_auc_score(y_test45_re,pred45_re))


In [None]:
fpr, tpr, thresholds = metrics.roc_curve(y_test56, pred56)
plt.plot(fpr, tpr)
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.show()
print('AUC=',roc_auc_score(y_test56,pred56))


In [None]:
fpr, tpr, thresholds = metrics.roc_curve(y_test67_re, pred67_re)
plt.plot(fpr, tpr)
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.show()
print('AUC=',roc_auc_score(y_test67_re,pred67_re))


In [None]:
fpr, tpr, thresholds = metrics.roc_curve(y_test78_re, pred78_re)
plt.plot(fpr, tpr)
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.show()
print('AUC=',roc_auc_score(y_test78_re,pred78_re))


# Entire Model Goodness of fit

### The goodness of fit will be determined using two methods: 1. Hosmer–Lemeshow criterion, see reference [2] 2. Running observations through the model, and finding their predicted quality level. the result is analyzed using chi squared statistic.

### finding the probability of each quality level for each observation:

### Pi are the predicted probabilities for each quality level.
### Pgti are the probability found in each cut for the qualiti level to be i or above.

In [None]:
df['P3']=0
df['P4']=0
df['P5']=0
df['P6']=0
df['P7']=0
df['P8']=0
df['Score']=0


In [None]:
df['Pgt4']=0
df['Pgt5']=0
df['Pgt6']=0
df['Pgt7']=0
df['Pgt8']=0


In [None]:
df

###  Writing all sub-model coefficients

In [None]:
# 3-4 model coefficients
print(res34.coef_)
print(res34.intercept_)
df34_col=df34.columns.tolist()
print(df34_col)

In [None]:
# 4-5 model coefficients
print(res45.coef_)
print(res45.intercept_)
df45_col=df45.columns.tolist()
print(df45_col)

In [None]:
# 5-6 model coefficients
print(res56.coef_)
print(res56.intercept_)
df56_col=df56.columns.tolist()
print(df56_col)

In [None]:
# 6-7 model coefficients
print(res67.coef_)
print(res67.intercept_)
df67_col=df67.columns.tolist()
print(df67_col)

In [None]:
# 7-8 model coefficients
print(res78.coef_)
print(res78.intercept_)
df78_col=df78.columns.tolist()
print(df78_col)

### Calculating Probabilities for each observation, and overall Score, for the Hosmer–Lemeshow criterion

In [None]:
# for all observations
for i in range (0,1599):
    #for all models
    
    linear34=res34.intercept_[0]
    for lin in range (0,len(df34_col)-1):
            # calculate linear alpha+beta*X
            linear34=linear34+res34.coef_[0,lin]*df34[df34_col[lin]].iloc[i]
    #Calculating probability of being equal or larger that quality level 4
    df['Pgt4'].iloc[i]=math.exp(linear34)/(1+math.exp(linear34))
    # Calculating probability of quality level 3
    df['P3'].iloc[i]=1-df['Pgt4'].iloc[i]
    
    linear45=res45.intercept_[0]
    for lin in range (0,len(df45_col)-1):
            # calculate linear alpha+beta*X
            linear45=linear45+res45.coef_[0,lin]*df45[df45_col[lin]].iloc[i]
    df['Pgt5'].iloc[i]=math.exp(linear45)/(1+math.exp(linear45))
    df['P4'].iloc[i]=df['Pgt4'].iloc[i]-df['Pgt5'].iloc[i]
    
    linear56=res56.intercept_[0]
    for lin in range (0,len(df56_col)-1):
            # calculate linear alpha+beta*X
            linear56=linear56+res56.coef_[0,lin]*df56[df56_col[lin]].iloc[i]
    df['Pgt6'].iloc[i]=math.exp(linear56)/(1+math.exp(linear56))
    df['P5'].iloc[i]=df['Pgt5'].iloc[i]-df['Pgt6'].iloc[i] 
    
    linear67=res67.intercept_[0]
    for lin in range (0,len(df67_col)-1):
            # calculate linear alpha+beta*X
            linear67=linear67+res67.coef_[0,lin]*df67[df67_col[lin]].iloc[i]
    df['Pgt7'].iloc[i]=math.exp(linear67)/(1+math.exp(linear67))
    df['P6'].iloc[i]=df['Pgt6'].iloc[i]-df['Pgt7'].iloc[i] 
    
    linear78=res78.intercept_[0]
    for lin in range (0,len(df78_col)-1):
            # calculate linear alpha+beta*X
            linear78=linear78+res78.coef_[0,lin]*df78[df78_col[lin]].iloc[i]
    df['Pgt8'].iloc[i]=math.exp(linear78)/(1+math.exp(linear78))
    df['P7'].iloc[i]=df['Pgt7'].iloc[i]-df['Pgt8'].iloc[i]   
    
    df['P8'].iloc[i]=df['Pgt8'].iloc[i]
    # Calculating score acording to Hosmer–Lemeshow method
    df['Score'].iloc[i]=3*df['P3'].iloc[i]+4*df['P4'].iloc[i]+5*df['P5'].iloc[i]+6*df['P6'].iloc[i]+7*df['P7'].iloc[i]+8*df['P8'].iloc[i]

In [None]:
df

### checking whether some of Pi is equal or close to 1.

In [None]:
k=0
for j in range (0,1599):
    cnt=0
    for i in range (0,5):
        cnt=cnt+df.iloc[j,12+i]
    if (cnt<0.9):
#        print (j,df['quality'].iloc[j],cnt)
        k=k+1
print(k)

### As we can see, most observations yield excelent results, while others yield quite inadequate Pi. Thus the criterion cannot be implemented. we previously saw that the AUC yielded un-optimal results for some sub-models, while all P-values for all submodels are very low. This implies that although the chosen parameters for each sub-model are significant' they fail to explain the entire model, and more parameters are needed. we should also recall, that hree of the 11 parameters chosen wre found insignificant (fixed acidity, free sulfur chloride, pH) 

## different way - all observations will be passed through the entire model, and observed quality will be found. Then all will be sorted by size or sub model. 

In [None]:
# for all observations
df['observed']=0
for i in range (0,1599):
    #for all models
    df['observed'].iloc[i]=3
    linear34=res34.intercept_[0]
    for lin in range (0,len(df34_col)-1):
            # calculate linear alpha+beta*X
            linear34=linear34+res34.coef_[0,lin]*df34[df34_col[lin]].iloc[i]
    df['Pgt4'].iloc[i]=math.exp(linear34)/(1+math.exp(linear34))
    # artificially introducing threshold level
    if (df['Pgt4'].iloc[i]>=0.04):
        df['observed'].iloc[i]+=1
  #  df['P3'].iloc[i]=1-df['Pgt4'].iloc[i]
    
    linear45=res45.intercept_[0]
    for lin in range (0,len(df45_col)-1):
            # calculate linear alpha+beta*X
            linear45=linear45+res45.coef_[0,lin]*df45[df45_col[lin]].iloc[i]
    df['Pgt5'].iloc[i]=math.exp(linear45)/(1+math.exp(linear45))
    if (df['Pgt5'].iloc[i]>=0.17):
        df['observed'].iloc[i]+=1
#    df['P4'].iloc[i]=df['Pgt4'].iloc[i]-df['Pgt5'].iloc[i]
    
    linear56=res56.intercept_[0]
    for lin in range (0,len(df56_col)-1):
            # calculate linear alpha+beta*X
            linear56=linear56+res56.coef_[0,lin]*df56[df56_col[lin]].iloc[i]
    df['Pgt6'].iloc[i]=math.exp(linear56)/(1+math.exp(linear56))
    if (df['Pgt6'].iloc[i]>=0.49):
        df['observed'].iloc[i]+=1
#   df['P5'].iloc[i]=df['Pgt5'].iloc[i]-df['Pgt6'].iloc[i] 
    
    linear67=res67.intercept_[0]
    for lin in range (0,len(df67_col)-1):
            # calculate linear alpha+beta*X
            linear67=linear67+res67.coef_[0,lin]*df67[df67_col[lin]].iloc[i]
    df['Pgt7'].iloc[i]=math.exp(linear67)/(1+math.exp(linear67))
    if (df['Pgt7'].iloc[i]>=0.999):
        df['observed'].iloc[i]+=1
#    df['P6'].iloc[i]=df['Pgt6'].iloc[i]-df['Pgt7'].iloc[i] 
    
    linear78=res78.intercept_[0]
    for lin in range (0,len(df78_col)-1):
            # calculate linear alpha+beta*X
            linear78=linear78+res78.coef_[0,lin]*df78[df78_col[lin]].iloc[i]
    df['Pgt8'].iloc[i]=math.exp(linear78)/(1+math.exp(linear78))
    if (df['Pgt8'].iloc[i]>=0.5):
        df['observed'].iloc[i]+=1
#    df['P7'].iloc[i]=df['Pgt7'].iloc[i]-df['Pgt8'].iloc[i]   
    
#    df['P8'].iloc[i]=df['Pgt8'].iloc[i]
    


### Calculating chi2

In [None]:
chi2=0
for i in range (0,1599):
    chi2+=(df['quality'].iloc[i]-df['observed'].iloc[i])**2/df['quality'].iloc[i]
chi2

### Obtained very high chi2

### Calculating chi2 components

In [None]:
chi2_3= ((df.quality == 3).sum()-(df.observed==3).sum())**2/(df.quality == 3).sum()
chi2_4=((df.quality == 4).sum()-(df.observed==4).sum())**2/(df.quality == 4).sum()
chi2_5=((df.quality == 5).sum()-(df.observed==5).sum())**2/(df.quality == 5).sum()
chi2_6=((df.quality == 6).sum()-(df.observed==6).sum())**2/(df.quality == 6).sum()
chi2_7=((df.quality == 7).sum()-(df.observed==7).sum())**2/(df.quality == 7).sum()
chi2_8=((df.quality == 8).sum()-(df.observed==8).sum())**2/(df.quality == 4).sum()
print('chi2_3=',chi2_3,'chi2_4=',chi2_4,'chi2_3=',chi2_3,'chi2_4=',chi2_4,'chi2_5=',chi2_5, 'chi2_6=',chi2_6, 'chi2_7=',chi2_7, 'chi2_8=',chi2_8)

### The most significant contributors to Chi2 are the 5-6, 6-7 and 7-8 submodels

### Note the lines similar to (df['Pgt7'].iloc[i]>=0.999). This is an attempt to produce an artificial logistic regression threshold. Even at a 0.999 thresholds, the difference between observed and predicted count is still too large. 

In [None]:
(df.quality == 3).sum()

In [None]:
(df.observed==3).sum()

In [None]:
(df.quality == 4).sum()

In [None]:
(df.observed==4).sum()

In [None]:
(df.quality == 5).sum()

In [None]:
(df.observed==5).sum()

In [None]:
(df.quality == 6).sum()

In [None]:
(df.observed==6).sum()

In [None]:
(df.quality == 7).sum()

In [None]:
(df.observed==7).sum()

In [None]:
(df.quality == 8).sum()

In [None]:
(df.observed==8).sum()

### Running from high to low quality levels, to see if this is a cause of model mismatch

In [None]:
df['observed']=0
for i in range (0,1599):
    #for all models
    df['observed'].iloc[i]=3
    
    linear34=res34.intercept_[0]
    for lin in range (0,len(df34_col)-1):
            # calculate linear alpha+beta*X
            linear34=linear34+res34.coef_[0,lin]*df34[df34_col[lin]].iloc[i]
    df['Pgt4'].iloc[i]=math.exp(linear34)/(1+math.exp(linear34))
    if (df['Pgt4'].iloc[i]>=0.04):
        df['observed'].iloc[i]+=1
  #  df['P3'].iloc[i]=1-df['Pgt4'].iloc[i]
    
    linear45=res45.intercept_[0]
    for lin in range (0,len(df45_col)-1):
            # calculate linear alpha+beta*X
            linear45=linear45+res45.coef_[0,lin]*df45[df45_col[lin]].iloc[i]
    df['Pgt5'].iloc[i]=math.exp(linear45)/(1+math.exp(linear45))
    if (df['Pgt5'].iloc[i]>=0.17):
        df['observed'].iloc[i]+=1
#    df['P4'].iloc[i]=df['Pgt4'].iloc[i]-df['Pgt5'].iloc[i]
    
    linear56=res56.intercept_[0]
    for lin in range (0,len(df56_col)-1):
            # calculate linear alpha+beta*X
            linear56=linear56+res56.coef_[0,lin]*df56[df56_col[lin]].iloc[i]
    df['Pgt6'].iloc[i]=math.exp(linear56)/(1+math.exp(linear56))
    if (df['Pgt6'].iloc[i]>=0.49):
        df['observed'].iloc[i]+=1
#   df['P5'].iloc[i]=df['Pgt5'].iloc[i]-df['Pgt6'].iloc[i] 
    
    linear67=res67.intercept_[0]
    for lin in range (0,len(df67_col)-1):
            # calculate linear alpha+beta*X
            linear67=linear67+res67.coef_[0,lin]*df67[df67_col[lin]].iloc[i]
    df['Pgt7'].iloc[i]=math.exp(linear67)/(1+math.exp(linear67))
    if (df['Pgt7'].iloc[i]>=0.999):
        df['observed'].iloc[i]+=1
#    df['P6'].iloc[i]=df['Pgt6'].iloc[i]-df['Pgt7'].iloc[i] 
    
    linear78=res78.intercept_[0]
    for lin in range (0,len(df78_col)-1):
            # calculate linear alpha+beta*X
            linear78=linear78+res78.coef_[0,lin]*df78[df78_col[lin]].iloc[i]
    df['Pgt8'].iloc[i]=math.exp(linear78)/(1+math.exp(linear78))
    if (df['Pgt8'].iloc[i]>=0.5):
        df['observed'].iloc[i]+=1
#    df['P7'].iloc[i]=df['Pgt7'].iloc[i]-df['Pgt8'].iloc[i]   
    
#    df['P8'].iloc[i]=df['Pgt8'].iloc[i]
    
#    df['Score'].iloc[i]=3*df['P3'].iloc[i]+4*df['P4'].iloc[i]+5*df['P5'].iloc[i]+6*df['P6'].iloc[i]+7*df['P7'].iloc[i]+8*df['P8'].iloc[i]

In [None]:
for i in range (3,9):
    print((df.quality == i).sum())
    print((df.observed==i).sum())

### There still is a model mismatch, due to the reasons written above

### References
1.	"A Simple Approach to Ordinal Classification", Eibe Frank and Mark Hall, European Conference on Machine Learning, ECML 2001: Machine Learning: ECML 2001 pp 145-156
2.	"Tests for goodness of fit in ordinal logistic regression models", Morten W. Fagerland & David W. Hosmer (2016), Journal of Statistical Computation and Simulation, DOI: 10.1080/00949655.2016.1156682
