## Machine Learning (ML) Basics Tutorial

### Example 1: Basic Classification with Iris Dataset

**Objective:** Classify iris flowers (ดอกไอริส) into three species using petal (กลีบดอก) and sepal (กลีบเลี้ยง) measurements.

In [None]:
# Install a pip package in the current Jupyter kernel
import sys
!{sys.executable} -m pip install pip install pandas numpy matplotlib seaborn scikit-learn scipy

In [None]:
# Importing libraries
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# Load dataset
iris = load_iris()
X, y = iris.data, iris.target

In [None]:
# Visualizing the data
sns.pairplot(pd.DataFrame(X, columns=iris.feature_names), diag_kind="kde")
plt.show()

In [None]:
# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

In [None]:
# Create a model and train
model = LogisticRegression(solver='saga', max_iter=2000)  # Using a different solver
model.fit(X_train, y_train)

In [None]:
# Predict and evaluate
predictions = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, predictions))

In [None]:
# Plotting the Confusion Matrix
cm = confusion_matrix(y_test, predictions)
sns.heatmap(cm, annot=True, fmt="d")
plt.ylabel('Actual')
plt.xlabel('Predicted')
plt.show()

### Example 2: Sentiment Analysis with IMDb Reviews

**Objective:** Determine whether movie reviews are positive or negative using text data.

In [None]:
# Install a pip package in the current Jupyter kernel
import sys
!{sys.executable} -m pip install pip install tensorflow

In [None]:
# Importing libraries
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, GlobalAveragePooling1D
from tensorflow.keras.datasets import imdb
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc

In [None]:
# Load dataset
(train_data, train_labels), (test_data, test_labels) = imdb.load_data(num_words=10000)

In [None]:
# Data preprocessing
train_data = tf.keras.preprocessing.sequence.pad_sequences(train_data, value=0, padding='post', maxlen=256)
test_data = tf.keras.preprocessing.sequence.pad_sequences(test_data, value=0, padding='post', maxlen=256)

In [None]:
# Build the model
model = Sequential([
  Embedding(10000, 16),
  GlobalAveragePooling1D(),
  Dense(16, activation='relu'),
  Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
# Train the model
history = model.fit(train_data, train_labels, epochs=30, batch_size=512, validation_data=(test_data, test_labels), verbose=1)

In [None]:
# Visualizing the training history
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(acc) + 1)

In [None]:
# Plot training & validation accuracy values
plt.plot(epochs, acc, 'bo', label='Training accuracy')
plt.plot(epochs, val_acc, 'b', label='Validation accuracy')
plt.title('Training and validation accuracy')
plt.legend()
plt.figure()

In [None]:
# Plot training & validation loss values
plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()
plt.show()

In [None]:
# Making predictions
predictions = model.predict(test_data)

In [None]:
# Calculate ROC curve from predictions
fpr, tpr, _ = roc_curve(test_labels, predictions)
roc_auc = auc(fpr, tpr)

In [None]:
# Plot ROC curve
plt.figure()
plt.plot(fpr, tpr, color='darkorange', lw=2, label='ROC curve (area = %0.2f)' % roc_auc)
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver operating characteristic example')
plt.legend(loc="lower right")
plt.show()

### Example 3: Customer Segmentation with Mall Customer Data

**Objective:** Segment customers based on their spending patterns and characteristics.

In [None]:
# Importing libraries
import pandas as pd
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt

In [None]:
# Load dataset (replace with actual dataset path)
df = pd.read_csv('Mall_Customers.csv')

# One-hot encoding for the 'Gender' column
df = pd.get_dummies(df, columns=['Gender'])

In [None]:
# Select features
X = df.iloc[:, [1, 2]].values

In [None]:
# Apply KMeans clustering
kmeans = KMeans(n_clusters=5)
y_kmeans = kmeans.fit_predict(X)

In [None]:
# Plot the clusters
plt.scatter(X[:, 0], X[:, 1], c=y_kmeans, cmap='rainbow')
plt.xlabel('Annual Income')
plt.ylabel('Spending Score')
plt.title('Customer Segments')
plt.show()