In [3]:
import pandas as pd

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_val_predict
from sklearn import metrics

import coremltools
from sklearn import tree
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import _tree

https://github.com/apple/coremltools

You can use trained models from frameworks like Caffe, Keras, and scikit-learn, among others, and using coremltools, a Python library provided by Apple, you can convert those models to the CoreML format.<br>
First of all, pip coremltools package...

In [1]:
# pip install coremltools==6.0b2

Collecting coremltools==6.0b1
  Downloading coremltools-6.0b1-cp39-none-macosx_10_15_x86_64.whl (1.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m0m
Installing collected packages: coremltools
  Attempting uninstall: coremltools
    Found existing installation: coremltools 5.2.0
    Uninstalling coremltools-5.2.0:
      Successfully uninstalled coremltools-5.2.0
Successfully installed coremltools-6.0b1
Note: you may need to restart the kernel to use updated packages.


In this tutorial, we’re going to review the process of creating a prediction model with scikit-learn, converting it to Core ML format, and integrating it into an app. It is aimed at beginners, so it will explain some concepts and guide you to install a Python environment for creating the model. A little knowledge of machine learning and Python will help you, but it’s not absolutely required.

## I. the process of creating a prediction model with scikit-learn

In [4]:
from sklearn.datasets import load_wine

data = load_wine()
dataset_url = 'http://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data' #url
names = ['cultivar', 'alcohol', 'malic_acid', 'ash', 'alkalinity_ash', 'magnesium', 'total_phenols', 'flavonoids', 'nonflavonoid_phenols', 'proanthocyanins', 'color intensity', 'hue', 'od280_od315', 'proline']
data = pd.read_csv(dataset_url, names=names, header=None)

In [21]:
X = data[['alcohol','malic_acid', 'ash', 'alkalinity_ash', 'magnesium', 'total_phenols']]
y = data['cultivar'].astype(str)

model = RandomForestClassifier()

scores = cross_val_score(model, X, y, cv=5)
print('Scores: {}'.format(scores))
print('Accuracy: {0:0.2f} (+/- {1:0.2f})'.format(scores.mean(), scores.std() * 2))

Scores: [0.86111111 0.94444444 0.86111111 0.94285714 0.94285714]
Accuracy: 0.91 (+/- 0.08)


In [22]:
# result
predicted = cross_val_predict(model, X, y, cv=5)
print('Predicted: {}'.format(predicted))

accuracy_score = metrics.accuracy_score(y, predicted)
print('Accuracy: {0:0.2f}'.format(accuracy_score))

Predicted: ['1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1'
 '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1'
 '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1'
 '1' '1' '1' '1' '1' '2' '2' '2' '1' '2' '2' '2' '2' '2' '1' '2' '2' '1'
 '3' '1' '2' '2' '2' '2' '2' '2' '2' '2' '2' '3' '2' '2' '2' '2' '2' '2'
 '2' '2' '2' '2' '2' '2' '2' '2' '2' '2' '2' '2' '2' '2' '2' '3' '2' '2'
 '2' '2' '2' '2' '2' '2' '2' '2' '2' '2' '3' '2' '2' '2' '3' '2' '2' '2'
 '2' '2' '2' '2' '3' '3' '3' '3' '2' '2' '3' '3' '3' '3' '3' '3' '3' '3'
 '3' '3' '3' '3' '3' '3' '3' '3' '2' '3' '2' '3' '3' '2' '1' '2' '2' '3'
 '3' '3' '3' '3' '3' '3' '3' '3' '3' '3' '3' '3' '3' '3' '3' '3']
Accuracy: 0.90


In [23]:
# Fit the data
model.fit(X, y)

## II. converting it to Core ML format

In [18]:
# Convert model to Core ML 
coreml_model = coremltools.converters.sklearn.convert(model, input_features=['alcohol','malicAcid', 'ash', 'alkalinityAsh', 'magnesium', 'totalPhenols'])

# Save Core ML Model
coreml_model.save('wine.mlmodel')
print('Core ML Model saved')

TypeError: Couldn't build proto file into descriptor pool: duplicate file name (FeatureTypes.proto)

[Angry] CoreMlTools is suited for scikit-learn version of 19.2 and below. Probably you have the greater version. <br>
So...we need to change the version! 

In [31]:
## way1
# !pip install --force-reinstall 'scikit-learn==0.19.2' 


## way2
# VERSION='0.19.2'
# pip install --force-reinstall --no-cache-dir scikit-learn==$VERSION


## way3
# pip install 'scikit-learn==0.19.2' 

Collecting scikit-learn==0.19.2
  Using cached scikit-learn-0.19.2.tar.gz (9.7 MB)
  Preparing metadata (setup.py) ... [?25ldone
[?25hBuilding wheels for collected packages: scikit-learn
  Building wheel for scikit-learn (setup.py) ... [?25lerror
  [1;31merror[0m: [1msubprocess-exited-with-error[0m
  
  [31m×[0m [32mpython setup.py bdist_wheel[0m did not run successfully.
  [31m│[0m exit code: [1;36m1[0m
  [31m╰─>[0m [31m[557 lines of output][0m
  [31m   [0m Partial import of sklearn during the build process.
  [31m   [0m blas_opt_info:
  [31m   [0m blas_mkl_info:
  [31m   [0m customize UnixCCompiler
  [31m   [0m   FOUND:
  [31m   [0m     libraries = ['mkl_rt', 'pthread']
  [31m   [0m     library_dirs = ['/opt/anaconda3/lib']
  [31m   [0m     define_macros = [('SCIPY_MKL_H', None), ('HAVE_CBLAS', None)]
  [31m   [0m     include_dirs = ['/usr/local/include', '/opt/anaconda3/include']
  [31m   [0m 
  [31m   [0m   FOUND:
  [31m   [0m     librarie

Failed to build scikit-learn
Installing collected packages: scikit-learn
  Attempting uninstall: scikit-learn
    Found existing installation: scikit-learn 1.1.2
    Uninstalling scikit-learn-1.1.2:
      Successfully uninstalled scikit-learn-1.1.2
  Running setup.py install for scikit-learn ... [?25lerror
  [1;31merror[0m: [1msubprocess-exited-with-error[0m
  
  [31m×[0m [32mRunning setup.py install for scikit-learn[0m did not run successfully.
  [31m│[0m exit code: [1;36m1[0m
  [31m╰─>[0m [31m[557 lines of output][0m
  [31m   [0m Partial import of sklearn during the build process.
  [31m   [0m blas_opt_info:
  [31m   [0m blas_mkl_info:
  [31m   [0m customize UnixCCompiler
  [31m   [0m   FOUND:
  [31m   [0m     libraries = ['mkl_rt', 'pthread']
  [31m   [0m     library_dirs = ['/opt/anaconda3/lib']
  [31m   [0m     define_macros = [('SCIPY_MKL_H', None), ('HAVE_CBLAS', None)]
  [31m   [0m     include_dirs = ['/usr/local/include', '/opt/anaconda3/incl

  [31m   [0m copying sklearn/model_selection/tests/test_search.py -> build/lib.macosx-10.9-x86_64-cpython-39/sklearn/model_selection/tests
  [31m   [0m copying sklearn/model_selection/tests/__init__.py -> build/lib.macosx-10.9-x86_64-cpython-39/sklearn/model_selection/tests
  [31m   [0m copying sklearn/model_selection/tests/common.py -> build/lib.macosx-10.9-x86_64-cpython-39/sklearn/model_selection/tests
  [31m   [0m creating build/lib.macosx-10.9-x86_64-cpython-39/sklearn/neural_network
  [31m   [0m copying sklearn/neural_network/_base.py -> build/lib.macosx-10.9-x86_64-cpython-39/sklearn/neural_network
  [31m   [0m copying sklearn/neural_network/multilayer_perceptron.py -> build/lib.macosx-10.9-x86_64-cpython-39/sklearn/neural_network
  [31m   [0m copying sklearn/neural_network/__init__.py -> build/lib.macosx-10.9-x86_64-cpython-39/sklearn/neural_network
  [31m   [0m copying sklearn/neural_network/_stochastic_optimizers.py -> build/lib.macosx-10.9-x86_64-cpyth

[Angry] 要下載的依賴褲版本與電腦的python版本不一致

In [46]:
# pip install scikit_learn-0.19.2-cp34-none-any.whl

[0m[31mERROR: scikit_learn-0.19.2-cp34-none-any.whl is not a supported wheel on this platform.[0m[31m
[0mNote: you may need to restart the kernel to use updated packages.


Then, run again the code above

##   *統整code

In [19]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_val_predict
from sklearn import metrics

dataset_url = 'http://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data'
names = ['cultivar', 'alcohol', 'malic_acid', 'ash', 'alkalinity_ash', 'magnesium', 'total_phenols', 'flavonoids', 'nonflavonoid_phenols', 'proanthocyanins', 'color intensity', 'hue', 'od280_od315', 'proline']
data = pd.read_csv(dataset_url, names=names, header=None)


X = data[['alcohol','malic_acid', 'ash', 'alkalinity_ash', 'magnesium', 'total_phenols']]
y = data['cultivar'].astype(str)

a
# Create the model
model = RandomForestClassifier()

# Evaluate the model with cross validation
scores = cross_val_score(model, X, y, cv=5)
print('Scores: {}'.format(scores))
print('Accuracy: {0:0.2f} (+/- {1:0.2f})'.format(scores.mean(), scores.std() * 2))

predicted = cross_val_predict(model, X, y, cv=5)
print('Predicted: {}'.format(predicted))
accuracy_score = metrics.accuracy_score(y, predicted)
print('Accuracy: {0:0.2f}'.format(accuracy_score))


# Fit the data
model.fit(X, y)


# Convert model to Core ML 
coreml_model = coremltools.converters.sklearn.convert(model, input_features=['alcohol','malicAcid', 'ash', 'alkalinityAsh', 'magnesium', 'totalPhenols'])

# Save Core ML Model
coreml_model.save('wine.mlmodel')

print('Core ML Model saved')

URLError: <urlopen error [Errno 60] Operation timed out>

-> I resolved! The problem is the Python version. You need creating a PyCharm Project with python version 3.7 and install scikit-learn 0.19.2<br>
-> python to 3.7 version (below)

## PS. 重新download python的code 😠

才可以下載0.19.2的scikit-learn

In [116]:
# conda install -c anaconda python=3.7

Collecting package metadata (current_repodata.json): done
Solving environment: failed with initial frozen solve. Retrying with flexible solve.
Solving environment: failed with repodata from current_repodata.json, will retry with next repodata source.
Collecting package metadata (repodata.json): done
Solving environment: done

## Package Plan ##

  environment location: /opt/anaconda3

  added / updated specs:
    - python=3.7


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    _ipyw_jlab_nb_ext_conf-0.1.0|   py37hecd8cb5_1           5 KB  anaconda
    aiohttp-3.8.1              |   py37hca72f7f_1         567 KB  anaconda
    anaconda-custom            |           py37_0           6 KB  anaconda
    anaconda-client-1.10.0     |   py37hecd8cb5_0         142 KB  anaconda
    anaconda-navigator-2.2.0   |   py37hecd8cb5_0         7.2 MB  anaconda
    anyio-3.5.0                |   py37hecd8cb5_

_ipyw_jlab_nb_ext_co | 5 KB      | ##################################### | 100% 
nbconvert-5.6.1      | 495 KB    | ##################################### | 100% 
mkl_fft-1.3.1        | 182 KB    | ##################################### | 100% 
pillow-9.0.1         | 682 KB    | ##################################### | 100% 
grpcio-1.42.0        | 1.9 MB    | ##################################### | 100% 
dbus-1.13.18         | 550 KB    | ##################################### | 100% 
jupyter_core-4.10.0  | 81 KB     | ##################################### | 100% 
pip-21.2.2           | 2.0 MB    | ##################################### | 100% 
anaconda-navigator-2 | 7.2 MB    | ##################################### | 100% 
ruamel_yaml-0.15.100 | 252 KB    | ##################################### | 100% 
mccabe-0.6.1         | 14 KB     | ##################################### | 100% 
mkl-service-2.4.0    | 47 KB     | ##################################### | 100% 
lxml-4.8.0           | 1.2 M

In [1]:
import sys
print(sys.version)

3.7.13 (default, Mar 28 2022, 07:24:34) 
[Clang 12.0.0 ]


In [3]:
pip install 'scikit-learn==0.19.2' 

Collecting scikit-learn==0.19.2
  Downloading scikit_learn-0.19.2-cp37-cp37m-macosx_10_6_intel.macosx_10_9_intel.macosx_10_9_x86_64.macosx_10_10_intel.macosx_10_10_x86_64.whl (7.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.1/7.1 MB[0m [31m4.9 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hInstalling collected packages: scikit-learn
  Attempting uninstall: scikit-learn
    Found existing installation: scikit-learn 1.0.2
    Uninstalling scikit-learn-1.0.2:
      Successfully uninstalled scikit-learn-1.0.2
Successfully installed scikit-learn-0.19.2
Note: you may need to restart the kernel to use updated packages.
