## Imports + Transformations

In [None]:
# Import statements

import pandas as pd
import numpy as np

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, GroupShuffleSplit
from sklearn.metrics import accuracy_score

from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.ensemble import AdaBoostClassifier
from sklearn.cluster import KMeans, AgglomerativeClustering, DBSCAN
from sklearn.mixture import GaussianMixture

In [None]:
# Read the data.

path = 'all_samples_with_id.csv'
data = pd.read_csv(path)

data.head()

In [None]:
data.info()

In [None]:
data.describe()

In [None]:
# Transform to a numpy array

data = np.asarray(data)
data

In [None]:
# Split the data into train and test sets and shuffle it

data_train = data[:4952,]
data_test = data[4952:,]

# Shuffle the data

np.random.shuffle(data_train)
np.random.shuffle(data_test)

In [None]:
# Assign the features to the variable X, and the labels to the variable y.

X_train = data_train[:, 1:-1]
X_test = data_test[:, 1:-1]
y_train = data_train[:,-1]
y_test = data_test[:,-1]

X_train = X_train.astype('int')
X_test = X_test.astype('int')
y_train = y_train.astype('int')
y_test = y_test.astype('int')

X_train

In [None]:
# Scale the features to a zero-mean and a standard deviation of 1

scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)
X_test = scaler.fit_transform(X_test)

In [None]:
# NOT NEEDED !
# Split into train and test data via train_test_split

# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

## Supervised Learning – Decision Tree

In [None]:
# Create the decision tree model and assign it to the variable model.
model = DecisionTreeClassifier()

# Fit the model.
model.fit(X_train, y_train)

# Make predictions. Store them in the variable y_pred.
y_pred = model.predict(X_test)

# Calculate the accuracy and assign it to the variable acc.
acc = accuracy_score(y_test, y_pred)
acc

## Supervised Learning – AdaBoost

In [None]:
# Create the model and assign it to the variable model.
model = AdaBoostClassifier(base_estimator = DecisionTreeClassifier(max_depth=4), n_estimators = 3)

# Fit the model.
model.fit(X_train, y_train)

# Make predictions. Store them in the variable y_pred.
y_pred = model.predict(X_test)

# Calculate the accuracy and assign it to the variable acc.
acc = accuracy_score(y_test, y_pred)
acc

## Supervised Learning – Support Vector Machines

In [None]:
# Create the model and assign it to the variable model.
# kernel{‘linear’, ‘poly’, ‘rbf’, ‘sigmoid’, ‘precomputed’} or callable, default=’rbf’
model = SVC(kernel='rbf', gamma='scale', C=0.5)
#model = SVC(kernel='poly', gamma='scale', C=0.5)
#model = SVC(kernel='sigmoid', gamma='scale', C=1)
#model = SVC(kernel='linear', C=1)

# Fit the model.
model.fit(X_train, y_train)

# Make predictions. Store them in the variable y_pred.
y_pred = model.predict(X_test)

# Calculate the accuracy and assign it to the variable acc.
acc = accuracy_score(y_test, y_pred)
acc

## Unsupervised Learning – KMeans

In [None]:
# Create the model and assign it to the variable model.
model = KMeans(12)
# model = KMeans(4)

# Fit the model.
model.fit(X_train)

# Make predictions. Store them in the variable y_pred.
y_pred = model.predict(X_test)

# Calculate the accuracy and assign it to the variable acc.
acc = accuracy_score(y_test, y_pred)
acc

## Unsupervised Learning – Hierarchical Clustering: Ward's Method

In [None]:
# Create the model and assign it to the variable model.
model = AgglomerativeClustering(n_clusters=12, linkage='ward')

# Make predictions. Store them in the variable y_pred.
y_pred = model.fit_predict(X_test)

# Calculate the accuracy and assign it to the variable acc.
acc = accuracy_score(y_test, y_pred)
acc

## Unsupervised Learning – Hierarchical Clustering: DBSCAN

In [None]:
# Create the model and assign it to the variable model.
model = DBSCAN(eps=0.5, min_samples=5)

# Fit the model.
model.fit(X_train)

# Make predictions. Store them in the variable y_pred.
y_pred = model.fit_predict(X_test)

# Calculate the accuracy and assign it to the variable acc.
acc = accuracy_score(y_test, y_pred)
acc

## Unsupervised Learning – Gaussian Mixture Model

In [None]:
# Create the model and assign it to the variable model.
model = GaussianMixture(n_components=12)

# Fit the model.
model.fit(X_train)

# Make predictions. Store them in the variable y_pred.
y_pred = model.fit_predict(X_test)

# Calculate the accuracy and assign it to the variable acc.
acc = accuracy_score(y_test, y_pred)
acc