<a href="https://colab.research.google.com/github/rahmatdarmawan4/Tugas/blob/main/fix2_multiclass_classification_colab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Your Code Goes here
import pandas as pd

from sklearn import datasets
from sklearn.linear_model import SGDClassifier
from sklearn.metrics import make_scorer
from sklearn.metrics import f1_score
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split

iris_bunch = datasets.load_iris()
iris_bunch.keys()

FEATURES = iris_bunch['feature_names']
TARGET = 'species'

iris_df = pd.DataFrame(iris_bunch['data'], columns=FEATURES)
iris_df[TARGET] = iris_bunch['target']

X_train, X_test, y_train, y_test = train_test_split(
    iris_df[FEATURES],
    iris_df[TARGET], 
    test_size=0.1,
    stratify=iris_df[TARGET])

# Solution Starts Here

estimator = SGDClassifier()

scores = cross_val_score(
    estimator,
    X_train[FEATURES],
    y_train,
    cv=5,
    scoring=make_scorer(f1_score, average='micro')
)


scores.mean()

---

# Exercise 4: Winemaker Identification

Scikit-learn comes prepackaged with many toy datasets. These can be found in the [`sklearn.datasets` package](https://scikit-learn.org/stable/datasets/index.html). In this exercise we'll be working with the [wine dataset](https://scikit-learn.org/stable/datasets/index.html#wine-dataset).

The dataset contains information about the properties of wines produced by three different producers. The grapes that the producers used all come from the same region.

The columns are:

* alcohol
* malic_acid
* ash
* alcalinity_of_ash
* magnesium
* total_phenols
* flavanoids
* nonflavanoid_phenols
* proanthocyanins
* color_intensity
* hue
* od280/od315_of_diluted_wines
* proline

The target column is a 0, 1, or 2. Each number represents a different producer.

Your task in this exercise is to create a classifier that can identify the producer based on the wine properties.

Use as many code blocks as necessary to examine the data and build and validate your model. Document your process using text blocks and/or comments in your code.

**Student Solution**

In [None]:
# Your Code Goes Here
import pandas as pd

from sklearn import datasets
from sklearn.linear_model import SGDClassifier
from sklearn.metrics import make_scorer
from sklearn.metrics import f1_score
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split

wine_bunch = datasets.load_wine()
wine_bunch.keys()


In [None]:

FEATURES = wine_bunch['feature_names']
TARGET = 'species'

wine_df = pd.DataFrame(wine_bunch['data'], columns=FEATURES)
wine_df[TARGET] = wine_bunch['target']


In [None]:
wine_df.head(10)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    wine_df[FEATURES],
    wine_df[TARGET],
    test_size=0.1,
    random_state=45)

y_train.groupby(y_train).count()

In [None]:
y_test.groupby(y_test).count()

In [None]:
estimator = SGDClassifier()

scores = cross_val_score(
    estimator,
    X_train,
    y_train,
    cv=62
)

scores

In [None]:
scores.mean()

In [None]:
X_validation = X_test
y_validation = y_test

In [None]:
wine_df[FEATURES].describe()

In [None]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

scaler.fit(wine_df[FEATURES])

pd.DataFrame(
    scaler.transform(wine_df[FEATURES]),
    columns=FEATURES
).describe()

In [None]:
from sklearn.pipeline import Pipeline

estimator = Pipeline(
  steps=[
    ['scale', StandardScaler()],
    ['classifier', SGDClassifier()],
  ]
)

scores = cross_val_score(
    estimator,
    X_train[FEATURES],
    y_train,
    cv=62,
    # We calculated F1 here too. This isn't required.
    scoring=make_scorer(f1_score, average='micro')
)

# Solution Starts Here
estimator.fit(X_train, y_train)
predictions = estimator.predict(X_validation)
f1_score(y_validation, predictions, average='micro')

In [None]:
#importing confusion matrix
from sklearn.metrics import confusion_matrix
confusion = confusion_matrix(y_validation, predictions)
print('Confusion Matrix\n')
print(confusion)

#importing accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
print('\nAccuracy: {:.2f}\n'.format(accuracy_score(y_validation, predictions)))

print('Micro Precision: {:.2f}'.format(precision_score(y_validation, predictions, average='micro')))
print('Micro Recall: {:.2f}'.format(recall_score(y_validation, predictions, average='micro')))
print('Micro F1-score: {:.2f}\n'.format(f1_score(y_validation, predictions, average='micro')))

print('Macro Precision: {:.2f}'.format(precision_score(y_validation, predictions, average='macro')))
print('Macro Recall: {:.2f}'.format(recall_score(y_validation, predictions, average='macro')))
print('Macro F1-score: {:.2f}\n'.format(f1_score(y_validation, predictions, average='macro')))

print('Weighted Precision: {:.2f}'.format(precision_score(y_validation, predictions, average='weighted')))
print('Weighted Recall: {:.2f}'.format(recall_score(y_validation, predictions, average='weighted')))
print('Weighted F1-score: {:.2f}'.format(f1_score(y_validation, predictions, average='weighted')))

from sklearn.metrics import classification_report
print('\nClassification Report\n')
print(classification_report(y_validation, predictions, target_names=['Class 1', 'Class 2', 'Class 3']))