# Machine Learning

    A mock data is used to demonstrate decision tree in machine learning.
    
    In the data, male gender is represented with figure 1 and female with 0.

In [89]:
# Importing the necessary libraries

import pandas as pd

from sklearn.tree import DecisionTreeClassifier # to use the decision tree algorithm.

from sklearn.model_selection import train_test_split # to split the data into test and train data.

from sklearn.metrics import accuracy_score # to calculate the accuracy score of the model.


# Loading the music data set
music_data = pd.read_csv('music.csv')

# View the dataframe
music_data

Unnamed: 0,age,gender,genre
0,20,1,HipHop
1,23,1,HipHop
2,25,1,HipHop
3,26,1,Jazz
4,29,1,Jazz
5,30,1,Jazz
6,31,1,Classical
7,33,1,Classical
8,37,1,Classical
9,20,0,Dance


   ## The data is divided into input X and output y

In [20]:
X = music_data.drop(columns = ['genre'])

X

Unnamed: 0,age,gender
0,20,1
1,23,1
2,25,1
3,26,1
4,29,1
5,30,1
6,31,1
7,33,1
8,37,1
9,20,0


In [21]:
y = music_data['genre']

y

0        HipHop
1        HipHop
2        HipHop
3          Jazz
4          Jazz
5          Jazz
6     Classical
7     Classical
8     Classical
9         Dance
10        Dance
11        Dance
12     Acoustic
13     Acoustic
14     Acoustic
15    Classical
16    Classical
17    Classical
Name: genre, dtype: object

   # Model Training and Predictions

In [95]:
 
# The data set is splitted into trained and test data with test size 0f 20%    
    
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)

In [96]:
# Model creation. 
model = DecisionTreeClassifier()


# Model training/fitting.
model.fit(X_train, y_train)

DecisionTreeClassifier()

In [97]:
# Asking the model to make prediction.
predictions = model.predict(X_test)

# Calculating the accuracy of the model.
score = accuracy_score(y_test, predictions)

score

1.0

# Model Persistence

    
    Persistence is a way of storing the model to be reused. That way it does not need to be retrained.
    
    This is done with the use of the 'joblib' library.
    

In [98]:

import joblib as jb

# The model is stored in a file called 'trained_model_joblib'
jb.dump(model, 'trained_model.joblib')


['trained_model.joblib']

    The model is recalled to be tested again.
    
    This time, it does not need training.

In [100]:

# The model is reloaded
trained_model = jb.load('trained_model.joblib')


In [104]:

# The model is used to predict music choice for a female gender of 25 years old and a male gender of 56 years old.
trained_model.predict([[24, 0], [56, 1]])



array(['Dance', 'Classical'], dtype=object)

    The model predicted the 'Dance' and 'Classical' music for the 24 years old female and 56 years old male respectively.