In [20]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split

music_data = pd.read_csv('music.csv')
music_data

Unnamed: 0,age,gender,genre
0,20,1,HipHop
1,23,1,HipHop
2,25,1,HipHop
3,26,1,Jazz
4,29,1,Jazz
5,30,1,Jazz
6,31,1,Classical
7,33,1,Classical
8,37,1,Classical
9,20,0,Dance


|In above csv file's gender column, 0->Female and 1->Male

In [7]:
#Input data set is represented by X by convention
X = music_data.drop(columns=['genre'])
X

Unnamed: 0,age,gender
0,20,1
1,23,1
2,25,1
3,26,1
4,29,1
5,30,1
6,31,1
7,33,1
8,37,1
9,20,0


In [8]:
#Output data set is represented by y by convention
y = music_data['genre']
y

0        HipHop
1        HipHop
2        HipHop
3          Jazz
4          Jazz
5          Jazz
6     Classical
7     Classical
8     Classical
9         Dance
10        Dance
11        Dance
12     Acoustic
13     Acoustic
14     Acoustic
15    Classical
16    Classical
17    Classical
Name: genre, dtype: object

In [9]:
#There is a model in scikit-learn module which uses Decision Tree to predict
model = DecisionTreeClassifier()
model.fit(X, y)
predictions = model.predict([[21, 1], [22, 0]])
predictions



array(['HipHop', 'Dance'], dtype=object)

In [37]:
#Above procedure to make a prediction will not always be accurate, hence, we have to allocate 70-80% of
#dataset to train the model and 20-30% to test the model. This improves the accuracy of the model.
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
model.fit(X_train, y_train)
predictions = model.predict(X_test)
predictions

array(['Acoustic', 'HipHop', 'Dance', 'Jazz'], dtype=object)

In [38]:
from sklearn.metrics import accuracy_score

#To test the accuracy of above predictions, compare the y_test to predictions
score = accuracy_score(y_test, predictions)
score

0.5

In [39]:
import joblib

#Persistance of a model is achieved by saving the results of model training
joblib.dump(model, 'music-recom.joblib')

['music-recom.joblib']

In [40]:
#The saved trained model can be loaded anytime and used
model_loaded = joblib.load('music-recom.joblib')
predictions = model.predict([[21, 1]])
predictions



array(['HipHop'], dtype=object)