# Basic Prediction Process

In [2]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# import data set
music_data = pd.read_csv('music.csv')

# input set
X = music_data.drop(columns=['genre'])
# output set
Y = music_data['genre']

print(X)
print(Y)

# split data with test
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2)

# train model
model = DecisionTreeClassifier()
model.fit(X_train, Y_train)

# predict
predictions = model.predict(X_test)

# calculate score
score = accuracy_score(Y_test, predictions)
score

    age  gender
0    20       1
1    23       1
2    25       1
3    26       1
4    29       1
5    30       1
6    31       1
7    33       1
8    37       1
9    20       0
10   21       0
11   25       0
12   26       0
13   27       0
14   30       0
15   31       0
16   34       0
17   35       0
0        HipHop
1        HipHop
2        HipHop
3          Jazz
4          Jazz
5          Jazz
6     Classical
7     Classical
8     Classical
9         Dance
10        Dance
11        Dance
12     Acoustic
13     Acoustic
14     Acoustic
15    Classical
16    Classical
17    Classical
Name: genre, dtype: object


0.75

# Persisting Models

In [3]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from joblib import dump, load

# music_data = pd.read_csv('music.csv')
# X = music_data.drop(columns=['genre'])
# Y = music_data['genre']

# model = DecisionTreeClassifier()
# model.fit(X.values, Y)

# extract model
# dump(model, 'music-recommender.joblib')

# load model
model = load('music-recommender.joblib')
predictions = model.predict([[21, 1]])
predictions

array(['HipHop'], dtype=object)

# Visualizing a Decision Tree

In [4]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn import tree

# import data set
music_data = pd.read_csv('music.csv')
# input set
X = music_data.drop(columns=['genre'])
# output set
Y = music_data['genre']

# train
model = DecisionTreeClassifier()
model.fit(X.values, Y)

# Export visual model
tree.export_graphviz(model, 
                    out_file='music-recommender.dot', 
                    feature_names=['age', 'gender'], 
                    class_names=sorted(Y.unique()),
                    label='all',
                    rounded=True,
                    filled=True)