# 机器学习
## 分析某一性别的用户在某一年龄时喜欢的音乐（gender：1-男性；0-女性）

In [8]:
import pandas as pd
music_data = pd.read_csv('music.csv')
music_data

Unnamed: 0,age,gender,genre
0,20,1,HipHop
1,23,1,HipHop
2,25,1,HipHop
3,26,1,Jazz
4,29,1,Jazz
5,30,1,Jazz
6,31,1,Classical
7,33,1,Classical
8,37,1,Classical
9,20,0,Dance


## 将数据集分开

In [11]:
#输入集
X = music_data.drop(columns=['genre'])
X.head()

Unnamed: 0,age,gender
0,20,1
1,23,1
2,25,1
3,26,1
4,29,1


In [13]:
#输出集
Y = music_data['genre']
Y

0        HipHop
1        HipHop
2        HipHop
3          Jazz
4          Jazz
5          Jazz
6     Classical
7     Classical
8     Classical
9         Dance
10        Dance
11        Dance
12     Acoustic
13     Acoustic
14     Acoustic
15    Classical
16    Classical
17    Classical
Name: genre, dtype: object

# 决策树算法

In [14]:
from sklearn.tree import DecisionTreeClassifier

model = DecisionTreeClassifier()
model.fit(X,Y)
music_data

Unnamed: 0,age,gender,genre
0,20,1,HipHop
1,23,1,HipHop
2,25,1,HipHop
3,26,1,Jazz
4,29,1,Jazz
5,30,1,Jazz
6,31,1,Classical
7,33,1,Classical
8,37,1,Classical
9,20,0,Dance


*这个数据集这没有年龄为21或22的男性，我们预测他们应该喜欢HipHop并希望这个程序也能预测出相同的答案*

In [17]:
predictions = model.predict([[21,1],[22,1],[22,0]])
predictions

array(['HipHop', 'HipHop', 'Dance'], dtype=object)

## 测试

In [33]:
from sklearn.model_selection import train_test_split
#导入度量精度
from sklearn.metrics import accuracy_score
X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.2)
model.fit(X_train,Y_train)
predictions2 = model.predict(X_test)

score = accuracy_score(Y_test,predictions2)
score

1.0

## 用joblib保存模型

In [35]:
import joblib
joblib.dump(model,'music_recommender.joblib')

['music_recommender.joblib']

### 从joblib中加载模型

In [37]:
model2 = joblib.load('music_recommender.joblib')
predictions = model2.predict([[21,1]])
predictions

array(['HipHop'], dtype=object)