# Prediction of wine type using random forest classifier

Before you run this notebook you must first install the dependencies using the dependencies.ipynb notebook.

Only needs to be done once, but if you start a new Jupyter instance then you will have to load them again.

In [None]:
from sklearn.datasets import load_wine
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from jaqpotpy import Jaqpot
from jaqpotpy.models import MolecularModel

In [3]:
# see here for info on the wine dataset https://scikit-learn.org/stable/datasets/toy_dataset.html
# and https://archive.ics.uci.edu/ml/datasets/Wine
data = load_wine()

In [4]:
# create the data frame
df_X = pd.DataFrame(data['data'], columns=data['feature_names'])
print(df_X)

     alcohol  malic_acid   ash  alcalinity_of_ash  magnesium  total_phenols  \
0      14.23        1.71  2.43               15.6      127.0           2.80   
1      13.20        1.78  2.14               11.2      100.0           2.65   
2      13.16        2.36  2.67               18.6      101.0           2.80   
3      14.37        1.95  2.50               16.8      113.0           3.85   
4      13.24        2.59  2.87               21.0      118.0           2.80   
..       ...         ...   ...                ...        ...            ...   
173    13.71        5.65  2.45               20.5       95.0           1.68   
174    13.40        3.91  2.48               23.0      102.0           1.80   
175    13.27        4.28  2.26               20.0      120.0           1.59   
176    13.17        2.59  2.37               20.0      120.0           1.65   
177    14.13        4.10  2.74               24.5       96.0           2.05   

     flavanoids  nonflavanoid_phenols  proanthocyan

In [5]:
df_Y = pd.DataFrame(data['target'], columns = ["target"])
print(df_Y)

     target
0         0
1         0
2         0
3         0
4         0
..      ...
173       2
174       2
175       2
176       2
177       2

[178 rows x 1 columns]


In [6]:
# create the model
model = RandomForestClassifier(max_depth=10, random_state=0)
model.fit(df_X, df_Y)
# this gives a strange warning about "column-vector y was passed when a 1d array was expected" which can be ignored, but need to work out how to avoid this. 

  model.fit(df_X, df_Y)


RandomForestClassifier(max_depth=10, random_state=0)

In [None]:
# Create the Jaqpot instance and prompt for the API key
a = input()
jaqpot = Jaqpot()
jaqpot.set_api_key(a)
# TODO - Jaqpot prints the key which is a security risk. Avoid doing this.

In [9]:
# Deploy the model to Jaqpot along with its title and description
model_id = jaqpot.deploy_sklearn(model, df_X, df_Y, "RF wine model from Squonk", "Random Forest wine category predictive model from Squonk")

  2022-08-22 09:38:33,794 - INFO - Model with id: gG1ySbhBauoTuTa0eSIf created. Please visit the application to proceed


In [10]:
# Run the predictions using the uploaded model. The Jaqpot client handles this for us.
df_pred, predicts = jaqpot.predict(df_X, model_id)

  2022-08-22 09:38:49,149 - INFO - completed 10.0
  2022-08-22 09:38:50,311 - INFO - completed 10.0
  2022-08-22 09:38:51,480 - INFO - completed 100.0


In [11]:
print(predicts)
print(df_pred)
print(df_pred[predicts])

['target']
     alcalinity_of_ash  nonflavanoid_phenols  proline  color_intensity  \
0                 15.6                  0.28     1065             5.64   
1                 11.2                  0.26     1050             4.38   
2                 18.6                  0.30     1185             5.68   
3                 16.8                  0.24     1480             7.80   
4                 21.0                  0.39      735             4.32   
..                 ...                   ...      ...              ...   
173               20.5                  0.52      740             7.70   
174               23.0                  0.43      750             7.30   
175               20.0                  0.43      835            10.20   
176               20.0                  0.53      840             9.30   
177               24.5                  0.56      560             9.20   

     od280/od315_of_diluted_wines  flavanoids   ash   hue  malic_acid  \
0                          