## Matplotlib Basics

In [None]:
import matplotlib.pyplot as plt
# %matplotlib inline

In [None]:
y = [1, 2, 3, 4, 5, 6, 7, 8, 9, 8, 7, 6, 5, 4, 3, 2, 1]
x = range(1, len(y) + 1)

plt.figure(figsize=(8, 4))
plt.plot(x, y, "o")
plt.grid()

plt.title("Test Graph")
plt.xlabel("X-axis")
plt.ylabel("Y-label")

plt.show()

## Classification: Iris Dataset

* Dataset loading utilities ([sklearn.datasets](https://scikit-learn.org/stable/datasets.html#datasets))

In [None]:
from sklearn.datasets import load_iris

iris = load_iris()

In [None]:
iris.keys()

In [None]:
print(iris.DESCR)

In [None]:
iris.feature_names

In [None]:
iris.data[:5]

In [None]:
iris.target_names

In [None]:
iris.target

### Exploring Dataset with Pandas

In [None]:
import pandas as pd

In [None]:
df = pd.DataFrame(iris.data, columns=iris.feature_names)
df.head()

In [None]:
df.describe()

In [None]:
df["species"] = iris.target
df.head()

In [None]:
pd.plotting.scatter_matrix(df.iloc[:,:-1], figsize=(10, 10), c=df.species)

### (Optional) Label Encoding

* Label Encoder ([preprocessing.LabelEncoder](https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.LabelEncoder.html))

In [None]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
le.fit(iris.target_names)

In [None]:
le.classes_

In [None]:
le.transform(["setosa", "virginica"])

In [None]:
le.inverse_transform([1, 2, 0])

In [None]:
iris.target

In [None]:
le.inverse_transform(iris.target)

### Training and Testing a Softmax Regression Model

* Logistic regresssion ([linear_model.LogisticRegression](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html))

In [None]:
from sklearn.linear_model import LogisticRegression

softmax_reg = LogisticRegression(multi_class="multinomial", max_iter=5000, penalty=None)
softmax_reg.fit(iris.data, iris.target)

# target = "species"
# softmax_reg.fit(df.drop(target, axis=1).values, df[target].values)

In [None]:
y_prob = softmax_reg.predict_proba(iris.data)
y_prob[:10]

In [None]:
y_cls = softmax_reg.predict(iris.data)
y_cls[:10]

* Accuracy ([metrics.accuracy_score](https://scikit-learn.org/stable/modules/generated/sklearn.metrics.accuracy_score.html))

In [None]:
from sklearn.metrics import accuracy_score
print("Accuracy:", accuracy_score(iris.target, y_cls))

### Model Interpretation

In [None]:
print("Coefficients:", softmax_reg.coef_)
print("Intercept:", softmax_reg.intercept_)

## Regression: Diabetes Dataset

In [None]:
from sklearn.datasets import load_diabetes

diabetes = load_diabetes()
print(diabetes.DESCR)

In [None]:
diabetes.keys()

In [None]:
diabetes.feature_names

In [None]:
diabetes.data[:5]

In [None]:
diabetes.target[:10]

### Training and Testing a Linear Regression Model

* Linear regresssion ([linear_model.LinearRegression](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LinearRegression.html))

In [None]:
from sklearn.linear_model import LinearRegression

linear_reg = LinearRegression()
linear_reg.fit(diabetes.data, diabetes.target)

In [None]:
y_pred = linear_reg.predict(diabetes.data)
y_pred[:10]

* Mean Squared Error (MSE, [metrics.mean_squared_error](https://scikit-learn.org/stable/modules/generated/sklearn.metrics.mean_squared_error.html))

In [None]:
from sklearn.metrics import mean_squared_error

mse = mean_squared_error(diabetes.target, y_pred)
print("MSE:", mse)

In [None]:
plt.figure(figsize=(8,6))
plt.plot(diabetes.target, y_pred, "o")

plt.title('True vs. Predicted Values')
plt.xlabel('True Values')
plt.ylabel('Predicted Values')

# Optional: plot a 45-degree line for reference
max_val = max(diabetes.target.max(), y_pred.max())  # Find the maximum value between true and predicted values
min_val = min(diabetes.target.min(), y_pred.min())  # Find the minimum value between true and predicted values
plt.plot([min_val, max_val], [min_val, max_val], '--', color='red', linewidth=2)

plt.grid()
plt.show()

### Model Interpretation

In [None]:
print("Coefficients:", linear_reg.coef_)
print("Intercept:", linear_reg.intercept_)