# NN from Assignment 1

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import math
import pandas as pd
import matplotlib.pyplot as plt
import sklearn

from sklearn.neural_network import MLPClassifier

from sklearn.preprocessing import RobustScaler

from sklearn.metrics import classification_report

from sklearn.model_selection import train_test_split

%matplotlib inline

plt.style.use('seaborn')

import warnings
warnings.filterwarnings('ignore')

path = "training_data.csv"

data = pd.read_csv(path)
data = data.sample(frac=1)

target = 'Facies'
features = [feature for feature in list(data.head(0)) if feature not in ("Well Name", "Formation", "Facies")]

X = data[features]

scaler = RobustScaler()
scaler.fit(X)
X = scaler.transform(X)


y = data[target]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

train_sizes = list(range(295,2065,295))

assign1_nn = MLPClassifier(activation='relu', alpha=0.01, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=80, learning_rate='constant',
       learning_rate_init=0.001, max_iter=500, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=1, shuffle=True,
       solver='adam', tol=0.0001, validation_fraction=0.1, verbose=False,
       warm_start=False)

# Training and testing on original data

In [2]:
assign1_nn.fit(X_train,y_train)

y_pred = assign1_nn.predict(X_test)
print(classification_report(y_test,y_pred))

print(sklearn.metrics.accuracy_score(y_test, y_pred))
print(sklearn.metrics.f1_score(y_test, y_pred, average='macro'))

              precision    recall  f1-score   support

           1       0.72      0.60      0.66        48
           2       0.77      0.78      0.78       153
           3       0.74      0.85      0.79       114
           4       0.69      0.64      0.67        42
           5       0.69      0.49      0.57        45
           6       0.63      0.74      0.68        86
           7       0.72      0.62      0.67        21
           8       0.74      0.65      0.69       106
           9       0.76      0.88      0.81        32

   micro avg       0.72      0.72      0.72       647
   macro avg       0.72      0.70      0.70       647
weighted avg       0.73      0.72      0.72       647

0.7248840803709428
0.7023136590199139


# Training and testing after dimensionality reduction

## PCA

In [3]:
from sklearn.decomposition import PCA

pca4 = PCA(n_components=4)
pca4_transformed = pca4.fit_transform(X_train)
pca4_transformed_test = pca4.transform(X_test)

assign1_nn.fit(pca4_transformed,y_train)

y_pred = assign1_nn.predict(pca4_transformed_test)

print(classification_report(y_test,y_pred))

print(sklearn.metrics.accuracy_score(y_test, y_pred))
print(sklearn.metrics.f1_score(y_test, y_pred, average='macro'))

              precision    recall  f1-score   support

           1       0.64      0.48      0.55        48
           2       0.55      0.71      0.62       153
           3       0.59      0.65      0.62       114
           4       0.52      0.31      0.39        42
           5       0.67      0.18      0.28        45
           6       0.47      0.70      0.56        86
           7       0.50      0.38      0.43        21
           8       0.64      0.44      0.53       106
           9       0.74      0.88      0.80        32

   micro avg       0.57      0.57      0.57       647
   macro avg       0.59      0.52      0.53       647
weighted avg       0.58      0.57      0.56       647

0.5703245749613601
0.5308077110579659


## ICA

In [4]:
from sklearn.decomposition import FastICA

ica3 = FastICA(n_components=3)
ica3_transformed = ica3.fit_transform(X_train)
ica3_transformed_test = ica3.transform(X_test)

assign1_nn.fit(ica3_transformed,y_train)

y_pred = assign1_nn.predict(ica3_transformed_test)


print(classification_report(y_test,y_pred))

print(sklearn.metrics.accuracy_score(y_test, y_pred))
print(sklearn.metrics.f1_score(y_test, y_pred, average='macro'))

              precision    recall  f1-score   support

           1       0.46      0.27      0.34        48
           2       0.48      0.78      0.59       153
           3       0.55      0.44      0.49       114
           4       0.53      0.24      0.33        42
           5       0.00      0.00      0.00        45
           6       0.44      0.69      0.54        86
           7       1.00      0.05      0.09        21
           8       0.48      0.55      0.51       106
           9       0.00      0.00      0.00        32

   micro avg       0.48      0.48      0.48       647
   macro avg       0.44      0.33      0.32       647
weighted avg       0.45      0.48      0.43       647

0.47913446676970634
0.3207603910433833


## Randomized Projections

In [14]:
#Reconstruction error (try this for LDA as well?)

"""
transformer = random_projection.GaussianRandomProjection(n_components = 20, eps =.2)

X_reduced = transformer.fit_transform(X)

randMat = transformer.components_

X_Proj = X_reduced.dot(randMat)
"""

from sklearn.random_projection import GaussianRandomProjection

grp = GaussianRandomProjection(n_components = 5)

grp_transformed = grp.fit_transform(X_train)

assign1_nn.fit(grp_transformed,y_train)

grp_transformed_test = grp.transform(X_test)

y_pred = assign1_nn.predict(grp_transformed_test)


print(classification_report(y_test,y_pred))

print(sklearn.metrics.accuracy_score(y_test, y_pred))
print(sklearn.metrics.f1_score(y_test, y_pred, average='macro'))

              precision    recall  f1-score   support

           1       0.64      0.60      0.62        48
           2       0.70      0.77      0.73       153
           3       0.70      0.71      0.71       114
           4       0.64      0.71      0.67        42
           5       0.58      0.31      0.41        45
           6       0.61      0.73      0.66        86
           7       0.55      0.57      0.56        21
           8       0.66      0.56      0.61       106
           9       0.81      0.81      0.81        32

   micro avg       0.67      0.67      0.67       647
   macro avg       0.66      0.64      0.64       647
weighted avg       0.67      0.67      0.66       647

0.6676970633693973
0.6425420876657485


## LDA

In [13]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

lda = LinearDiscriminantAnalysis(n_components=1)

lda_transformed = lda.fit_transform(X_train, y_train)

lda_transformed_test = lda.transform(X_test)

assign1_nn.fit(lda_transformed,y_train)

y_pred = assign1_nn.predict(lda_transformed_test)


print(classification_report(y_test,y_pred))

print(sklearn.metrics.accuracy_score(y_test, y_pred))
print(sklearn.metrics.f1_score(y_test, y_pred, average='macro'))

              precision    recall  f1-score   support

           1       0.00      0.00      0.00        48
           2       0.47      0.98      0.64       153
           3       0.67      0.02      0.03       114
           4       0.30      0.33      0.31        42
           5       0.00      0.00      0.00        45
           6       0.31      0.47      0.37        86
           7       0.00      0.00      0.00        21
           8       0.43      0.58      0.50       106
           9       0.71      0.16      0.26        32

   micro avg       0.42      0.42      0.42       647
   macro avg       0.32      0.28      0.23       647
weighted avg       0.40      0.42      0.32       647

0.42194744976816073
0.23461327145854766


# Training and testing after dimensionality reduction and clustering

## PCA and k Means and EM

In [None]:
kmeans = KMeans(n_clusters=4, random_state=0, n_init = 10)
kmeans.fit(X_test)

## ICA and k Means and EM

## Randomized Projections and k Means and EM

## LDA and k Means and EM