## Steps in Data Science Machine Learning 
### 1. Import Data 
### 2. Clean Data 
### 3. Split Data into Training and Test Sets 
### 4. Create the Model 
### 5. Train the Model 
### 6. Make Prediction

## Import Data

In [2]:
import pandas as pd 
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import joblib
from sklearn import tree 


music_data = pd.read_csv("./data_sets/music.csv")
music_data

Unnamed: 0,age,gender,genre
0,20,1,HipHop
1,23,1,HipHop
2,25,1,HipHop
3,26,1,Jazz
4,29,1,Jazz
5,30,1,Jazz
6,31,1,Classical
7,33,1,Classical
8,37,1,Classical
9,20,0,Dance


In [3]:
# Display dimension of df 
music_data.shape

(18, 3)

In [4]:
# Display the statistic summary data 
music_data.describe()

Unnamed: 0,age,gender
count,18.0,18.0
mean,27.944444,0.5
std,5.12746,0.514496
min,20.0,0.0
25%,25.0,0.0
50%,28.0,0.5
75%,31.0,1.0
max,37.0,1.0


In [5]:
# Display data frame in array format 
music_data.values

array([[20, 1, 'HipHop'],
       [23, 1, 'HipHop'],
       [25, 1, 'HipHop'],
       [26, 1, 'Jazz'],
       [29, 1, 'Jazz'],
       [30, 1, 'Jazz'],
       [31, 1, 'Classical'],
       [33, 1, 'Classical'],
       [37, 1, 'Classical'],
       [20, 0, 'Dance'],
       [21, 0, 'Dance'],
       [25, 0, 'Dance'],
       [26, 0, 'Acoustic'],
       [27, 0, 'Acoustic'],
       [30, 0, 'Acoustic'],
       [31, 0, 'Classical'],
       [34, 0, 'Classical'],
       [35, 0, 'Classical']], dtype=object)

In [6]:
# Assign X to music_data without the "genre column" 
X = music_data.drop(columns=["genre"])
X

Unnamed: 0,age,gender
0,20,1
1,23,1
2,25,1
3,26,1
4,29,1
5,30,1
6,31,1
7,33,1
8,37,1
9,20,0


In [7]:
# Assign y to "genre" data only 
y = music_data["genre"]
y

0        HipHop
1        HipHop
2        HipHop
3          Jazz
4          Jazz
5          Jazz
6     Classical
7     Classical
8     Classical
9         Dance
10        Dance
11        Dance
12     Acoustic
13     Acoustic
14     Acoustic
15    Classical
16    Classical
17    Classical
Name: genre, dtype: object

In [8]:
# Classify the model 
model = DecisionTreeClassifier()
model.fit(X,y)      # sort data by genre 

# Display genre predictions on two data points of [age, gender]: [21,1] and [22,0]
predictions = model.predict([[21,1], [22,0]])
predictions        

array(['HipHop', 'Dance'], dtype=object)

In [17]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
print("##### X_train ####")
print(X_train)
print("##### X_test #####")
print(X_test)
print("##### y_train ####")
print(y_train)
print("##### y_test #####")
print(y_test)


##### X_train ####
    age  gender
16   34       0
0    20       1
2    25       1
15   31       0
10   21       0
13   27       0
4    29       1
11   25       0
14   30       0
1    23       1
5    30       1
12   26       0
8    37       1
3    26       1
##### X_test #####
    age  gender
7    33       1
6    31       1
9    20       0
17   35       0
##### y_train ####
16    Classical
0        HipHop
2        HipHop
15    Classical
10        Dance
13     Acoustic
4          Jazz
11        Dance
14     Acoustic
1        HipHop
5          Jazz
12     Acoustic
8     Classical
3          Jazz
Name: genre, dtype: object
##### y_test #####
7     Classical
6     Classical
9         Dance
17    Classical
Name: genre, dtype: object


In [18]:
model.fit(X_train, y_train)
predictions = model.predict(X_test)
print("**** Predictions ****")
print(predictions)
score = accuracy_score(y_test, predictions)
print("**** Score ****")
print(score)

**** Predictions ****
['Classical' 'Classical' 'Dance' 'Classical']
**** Score ****
1.0


In [11]:
# Use the joblib utility to dump the model data to music-recommender.joblib file 
joblib.dump(model, "./data_sets/music-recommender.joblib")

['./data_sets/music-recommender.joblib']

In [12]:
# Load data from the music-recommender.joblib file into new_model 
new_model = joblib.load("./data_sets/music-recommender.joblib")

# Make predictions on the new_model data 
predictions = new_model.predict([ [21,1] ])
print("**** Predictions ****")
print(predictions)

**** Predictions ****
['HipHop']


In [13]:
# Save data in graphviz format 
tree.export_graphviz(new_model, out_file="./data_sets/music-recommender.dot", 
    feature_names=["age", "gender"], 
    class_names=sorted(y.unique()),
    label="all",
    rounded=True,
    filled=True)
