# Machine Learning

#### General Machine Learning Steps

1. Import the data
1.1 Understand The Data
2. Clean The Data And Split The Data Into Training/Test Sets
3. Create A Model
4. Calculate The Accuracy Of Model
5. Model Persisting
6. Decision Tree

### Step 1: Import The Data

In [None]:
import pandas as pd
import matplotlib as plt

from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn import tree
import joblib

import warnings
warnings.filterwarnings('ignore')


In [None]:
df = pd.read_csv('music.csv', encoding='ISO-8859-1')

### Step 1.1: Understand The Data

In [None]:
print(df.shape[0])
print(df.shape[1])

In [None]:
df.duplicated().sum()

In [None]:
df.isnull().sum()

In [None]:
df.dtypes

In [None]:
df.describe()

In [None]:
missingData = df.isnull().sum().sort_values(ascending=False)

missingData.plot(kind='barh')

In [None]:
for column in df.columns:
    print(f"{column} : {len(df[column].unique())}")

### Step 2: Clean The Data And Split The Data Into Training/Test Sets

In [None]:
m = df[['age', 'gender']]
n = df['genre']

### Step 3: Create A Model

In [None]:
model = DecisionTreeClassifier()
model.fit(m, n)

In [None]:
predictions = model.predict([ [21,1], [22,0] ])

predictions

### Step 4: Calculate The Accuracy Of Model

In [None]:
mTrain, mTest, nTrain, nTest = train_test_split(m, n, test_size= 0.2)

model.fit(mTrain, nTrain)
predictions = model.predict( mTest )

In [None]:
score = accuracy_score(nTest, predictions)
score

### Step 5: Model Persisting

In [None]:
m = df[['age', 'gender']]
n = df['genre']

model = DecisionTreeClassifier()
model.fit(m, n)

### we saved the model here
joblib.dump(model, 'musicRecommender.joblib')

In [None]:
### we used the model here
joblib.load('musicRecommender.joblib')

In [None]:
model = joblib.load('musicRecommender.joblib')
predictions = model.predict([ [21,1], [22,0] ])

predictions

### Step 6: Decision Tree

In [None]:
tree.export_graphviz(model, out_file= 'musicRecommender.dot', feature_names= ['age', 'gender'], class_names= sorted(n.unique()), label = 'all', rounded= True, filled= True)