In [34]:
# Import the Data
# http://bit.ly/music-csv
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

music_data = pd.read_csv('music.csv')
music_data

Unnamed: 0,age,gender,genre
0,20,1,HipHop
1,23,1,HipHop
2,25,1,HipHop
3,26,1,Jazz
4,29,1,Jazz
5,30,1,Jazz
6,31,1,Classical
7,33,1,Classical
8,37,1,Classical
9,20,0,Dance


In [35]:
# Prepare and clean data
# remove duplicas and null values or split dataset into two datasets
X = music_data.drop(columns=['genre']) # input
y = music_data['genre'] #output
X
#y

Unnamed: 0,age,gender
0,20,1
1,23,1
2,25,1
3,26,1
4,29,1
5,30,1
6,31,1
7,33,1
8,37,1
9,20,0


In [36]:
# Learning and Predicting
model = DecisionTreeClassifier()
model.fit(X,y)
predictions = model.predict([ [21, 1], [22, 0] ]) # ask to predict genre liked by a 21 male and a 22 female
predictions




array(['HipHop', 'Dance'], dtype=object)

In [37]:
# Measure accuracy of the model
# - split datasets in one for training, one for testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)# is a tuple that return 4 values
# when training model pass only the training dataset
model2 = DecisionTreeClassifier()
model2.fit(X_train, y_train)
predictions2 = model2.predict(X_test)
score = accuracy_score(y_test, predictions2)
score
# check and compare predictions2 with y_test

0.5

In [43]:
# Model Persistence. Persisting Models
# Training a model is time consumming so we build and train a model then save it to a file.
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
import joblib #from sklearn.externals import joblib 

#import dataset
music_data = pd.read_csv('music.csv')
X = music_data.drop(columns=['genre'])
y = music_data['genre']

# create model
model = DecisionTreeClassifier()
# train model
model.fit(X,y)

# save model
joblib.dump(model, 'music-recommender.joblib')

# ask for predictions
# predictions = model.predict([[21,1]])
# predictions

['music-recommender.joblib']

In [46]:
# Using persisted models
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
import joblib

model = joblib.load('music-recommender.joblib')
predictions = model.predict([[21,1]])
predictions



array(['HipHop'], dtype=object)

In [51]:
# Visualizing a Decision Tree
# export model in a visual format
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn import tree

music_data = pd.read_csv('music.csv')
X = music_data.drop(columns=['genre'])
y = music_data['genre']

model = DecisionTreeClassifier()
model.fit(X,y)

tree.export_graphviz(model, out_file='music-recommender.dot', 
                     feature_names=['age', 'gender'], 
                     class_names=sorted(y.unique()),
                     label='all',
                     rounded=True,
                     filled=True)