In [1]:
# Import our dependencies
%matplotlib inline
import pandas as pd
from pathlib import Path
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import os
from sklearn.svm import SVC
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,OneHotEncoder
from sklearn.metrics import accuracy_score

In [2]:
# Loading ipython-sql
%load_ext sql

In [3]:
# Importing create_engine function from sqlalchemy
from sqlalchemy import create_engine

In [4]:
# Connecting ipython-sql to PostgreSQL Database
%sql postgresql://postgres:pencil@localhost/wine_db

In [5]:
# Connecting sqlalchemy to PostgreSQL Database
engine = create_engine('postgresql://postgres:pencil@localhost/wine_db')

In [6]:
# Checking that we can pull data from PostgreSQL Database in cell below

In [7]:
%%sql

SELECT
    *
FROM red_wine_quality 
LIMIT 3

 * postgresql://postgres:***@localhost/wine_db
3 rows affected.


type,fixed_acidity,volatile_acidity,citric_acid,residual_sugar,chlorides,free_sulfur_dioxide,total_sulfur_dioxide,density,ph,sulfates,alcohol,quality
red,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5.0
red,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5.0
red,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5.0


In [8]:
# Formulating DataFrame from Database
df_red_wine = pd.read_sql('SELECT * FROM red_wine_quality', engine)
df_red_wine

Unnamed: 0,type,fixed_acidity,volatile_acidity,citric_acid,residual_sugar,chlorides,free_sulfur_dioxide,total_sulfur_dioxide,density,ph,sulfates,alcohol,quality
0,red,7.4,0.700,0.00,1.9,0.076,11.0,34.0,0.99780,3.51,0.56,9.4,5.0
1,red,7.8,0.880,0.00,2.6,0.098,25.0,67.0,0.99680,3.20,0.68,9.8,5.0
2,red,7.8,0.760,0.04,2.3,0.092,15.0,54.0,0.99700,3.26,0.65,9.8,5.0
3,red,11.2,0.280,0.56,1.9,0.075,17.0,60.0,0.99800,3.16,0.58,9.8,6.0
4,red,7.4,0.700,0.00,1.9,0.076,11.0,34.0,0.99780,3.51,0.56,9.4,5.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1588,red,6.8,0.620,0.08,1.9,0.068,28.0,38.0,0.99651,3.42,0.82,9.5,6.0
1589,red,6.2,0.600,0.08,2.0,0.090,32.0,44.0,0.99490,3.45,0.58,10.5,5.0
1590,red,6.3,0.510,0.13,2.3,0.076,29.0,40.0,0.99574,3.42,0.75,11.0,6.0
1591,red,5.9,0.645,0.12,2.0,0.075,32.0,44.0,0.99547,3.57,0.71,10.2,5.0


In [9]:
# Drop the non-beneficial ID columns, 'residual_sugar', 'free_sulfur_dioxide', and 'ph'.
df_red_wine.drop(['residual_sugar', 'free_sulfur_dioxide', 'ph', 'type'], axis=1, inplace=True)

In [10]:
# Create Classification version of target variable
df_red_wine['goodquality'] = [1 if x >= 7 else 0 for x in df_red_wine['quality']]

In [11]:
df_red_wine

Unnamed: 0,fixed_acidity,volatile_acidity,citric_acid,chlorides,total_sulfur_dioxide,density,sulfates,alcohol,quality,goodquality
0,7.4,0.700,0.00,0.076,34.0,0.99780,0.56,9.4,5.0,0
1,7.8,0.880,0.00,0.098,67.0,0.99680,0.68,9.8,5.0,0
2,7.8,0.760,0.04,0.092,54.0,0.99700,0.65,9.8,5.0,0
3,11.2,0.280,0.56,0.075,60.0,0.99800,0.58,9.8,6.0,0
4,7.4,0.700,0.00,0.076,34.0,0.99780,0.56,9.4,5.0,0
...,...,...,...,...,...,...,...,...,...,...
1588,6.8,0.620,0.08,0.068,38.0,0.99651,0.82,9.5,6.0,0
1589,6.2,0.600,0.08,0.090,44.0,0.99490,0.58,10.5,5.0,0
1590,6.3,0.510,0.13,0.076,40.0,0.99574,0.75,11.0,6.0,0
1591,5.9,0.645,0.12,0.075,44.0,0.99547,0.71,10.2,5.0,0
