In [3]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

music_data = pd.read_csv("music.csv")
# music_data
X = music_data.drop(columns=["genre"])
# X is the what we generally call input
# and y is generally what we denote ouput by
y = music_data["genre"]
# we are gonna use the decision tree

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
# here we have assigned 20% of the dataset for testing

model = DecisionTreeClassifier()
model.fit(X_train, y_train)
# this is telling the model to learn the patterns in the data

# now make the predictions
# predictions = model.predict([[21, 1], [22, 0]])

predictions = model.predict(X_test)
# takes 2D array as an input

# predictions
# print(y_test, predictions)

score = accuracy_score(y_test, predictions)
score

1.0

In [4]:
# now making a persistent model
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.externals import joblib

music_data = pd.read_csv("music.csv")
X = music_data.drop(columns=["genre"])
y = music_data["genre"]

model = DecisionTreeClassifier()
model.fit(X, y)

joblib.dump(model, "music_recommender.joblib")
# this saves the model as we do not need to train the model each time we need to make a prediction so we just train it 
# once and then save it



['music_recommender.joblib']

In [5]:
# now using the above model to make predictions
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.externals import joblib

model = joblib.load("music_recommender.joblib")

predictions = model.predict([[21, 1]])
predictions

array(['HipHop'], dtype=object)

In [6]:
# visualizing the decision tree
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn import tree

music_data = pd.read_csv("music.csv")
X = music_data.drop(columns=["genre"])
y = music_data["genre"]

model = DecisionTreeClassifier()
model.fit(X, y)

tree.export_graphviz(model, out_file="music_recommender.dot", 
                     feature_names=["age", "gender"], 
                     class_names=sorted(y.unique()), 
                     label="all",
                     rounded="true",
                     filled="true")