## Wine Quality Tester 

**This project is intended to make a model that is ideal for testing quality of red and white wine on the scale of 0-10.**

>**Note:** Code and Markdown cells can be executed using the **Shift + Enter** keyboard shortcut. In addition, Markdown cells can be edited by typically double-clicking the cell to enter edit mode.

In [1]:
#import some important libraries

import pandas as pd
import numpy as np

from sklearn.cross_validation import ShuffleSplit

# loading wine data 
wine_data = pd.ExcelFile('Wine_data.xlsx')

data_red = pd.read_excel(wine_data,'Red Wine')
data_white = pd.read_excel(wine_data,'White Wine')



In [2]:
# data framing for red wine 
qlty_red  = data_red['quality']
features_red = data_red.drop(['quality'],axis=1)

In [3]:
#data framing for white wine 
qlty_white = data_white['quality']
features_white = data_white.drop(['quality'],axis=1)

In [4]:
#making a performance function 

#importing some performance metrics

from sklearn.metrics import r2_score

#making a new performance metrics for computing performance of regressor
def performance_metrics(y_true,y_predict):
    return r2_score(y_true,y_predict)

In [5]:
#dividing the data in training and testing format 

from sklearn.cross_validation import train_test_split

def split_data(X,y):
    #spliting data in two halfs for training and testing 
    X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42)
    
    #returning the splitted data for model building
    return X_train,X_test,y_train,y_test

In [6]:
#creating a new function to fit model 

from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import make_scorer
from sklearn.grid_search import GridSearchCV

def fit_model(X,y):
    
    #creating a new cv set to increase complexity of data set 
    cv_set = ShuffleSplit(X.shape[0],n_iter=15,test_size=0.1,random_state=23)
    
    #making a scorer function for better evaluation
    score_func = make_scorer(performance_metrics)
    
    #creating a list of params 
    params = {'max_depth':list(range(1,11))}
    
    #creating a regressor to perform a regression
    reg = DecisionTreeRegressor()
    
    #creating a grid for estimation using GridSearchCV
    grid = GridSearchCV(reg,param_grid=params,scoring=score_func,cv=cv_set)
    
    #fitting the data for grid
    grid.fit(X,y)
    
    #returning best estimator for predicting the value
    return grid.best_estimator_



In [7]:
# getting the train and test set of data for white wine 

# spliting dataset 
X_train_w,X_test_w,y_train_w,y_test_w = split_data(features_white,qlty_white)

#Model fitting for white wine
reg_w= fit_model(X_train_w,y_train_w)

# Producing the value of max_depth
print ("Parameter 'max_depth' is {} for the optimal model.".format(reg_w.get_params()['max_depth']))

Parameter 'max_depth' is 5 for the optimal model.


In [8]:
# Getting the train and test set of data for red wine

# Spliting dataset
X_train_r,X_test_r,y_train_r,y_test_r = split_data(features_red,qlty_red)

# Model fitting for red wine 
reg_r= fit_model(X_train_r,y_train_r)

# Producing the value of max_depth
print ("Parameter 'max_depth' is {} for the optimal model.".format(reg_r.get_params()['max_depth']))

Parameter 'max_depth' is 4 for the optimal model.


In [9]:
# creating function for predicting the values for the model 

def PredictTrials(X, y, fitter, data):
    """ Performs trials of fitting and predicting data. """
    quality = []
    for k in range(10):
        # Split the data
        X_train, X_test, y_train, y_test = train_test_split(X, y, \
            test_size = 0.2, random_state = k)
        
        # Fit the data
        reg = fitter(X_train, y_train)
        
        # Make a prediction
        pred = reg.predict([data[0]])[0]
        
        #appending the list quality list 
        quality.append(pred)
        
        # Result
        print ("Trial {}: {:,.2f}".format(k+1, pred))
    # printing the mean value of all the prediction performed by our model
    print("Mean Value of our prediction is {}".format(np.mean(quality)))

In [None]:
#Sample data for our 
white_data =  [[7,0.27,0.36,20.7,0.045,45,170,1.001,3,0.45,8.8],
[6.3,0.3,0.34,1.6,0.049,14,132,0.994,3.3,0.49,9.5],
[8.1,0.28,0.4,6.9,0.05,30,97,0.9951,3.26,0.44,10.1],
[7.2,0.23,0.32,8.5,0.058,47,186,0.9956,3.19,0.4,9.9],
[7.2,0.23,0.32,8.5,0.058,47,186,0.9956,3.19,0.4,9.9]]

In [None]:
# predicting the trials for white wine 

PredictTrials(features_white,qlty_white,fit_model,white_data)

Trial 1: 5.45
Trial 2: 5.59
Trial 3: 5.34
Trial 4: 5.51
Trial 5: 5.66
Trial 6: 5.15
Trial 7: 5.42


In [None]:
red_data =[[7.9,0.6,0.06,1.6,0.069,15,59,0.9964,3.3,0.46,9.4],
[7.8,0.88,0,2.6,0.098,25,67,0.9968,3.2,0.68,9.8],
[7.8,0.76,0.04,2.3,0.092,15,54,0.997,3.26,0.65,9.8],
[11.2,0.28,0.56,1.9,0.075,17,60,0.998,3.16,0.58,9.8],
[7.4,0.7,0,1.9,0.076,11,34,0.9978,3.51,0.56,9.4],
[7.4,0.66,0,1.8,0.075,13,40,0.9978,3.51,0.56,9.4],
[7.3,0.65,0,1.2,0.065,15,21,0.9946,3.39,0.47,10,7]]

In [None]:
PredictTrials(features_red,qlty_red,fit_model,red_data)