# 2 Classes (Dichotomic) Examples

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split

## Example 1: 2 Spirals

Let's create some data:
  - 2 Features (X1, X2)
  - 2 Classes (0, 1)

In [None]:
def twospirals(n_points, noise=.5):
    n = np.sqrt(np.random.rand(n_points,1)) * 780 * (2*np.pi)/360
    d1x = -np.cos(n)*n + np.random.rand(n_points,1) * noise
    d1y = np.sin(n)*n + np.random.rand(n_points,1) * noise
    
    X = np.vstack((np.hstack((d1x,d1y)),np.hstack((-d1x,-d1y))))
    y = np.hstack((np.zeros(n_points), np.ones(n_points)))

    # Scale X
    X = X - X.min()
    X = X / X.max()
    
    return X, y

# Create Datapoints
X, y = twospirals(2_000)

# Plot Datapoints
plt.figure(figsize=(5, 5))
sns.scatterplot(data=None, x=X.T[0], y=X.T[1], hue=y)
plt.show()

Let's split train and test, we are using 20% data to validate model

In [None]:
# Split Datapoints
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# Plot Datasets
fig, ax = plt.subplots(1, 2, figsize=(11, 5))

sns.scatterplot(data=None, x=X_train.T[0], y=X_train.T[1], hue=y_train, ax=ax[0])
sns.scatterplot(data=None, x=X_test.T[0], y=X_test.T[1], hue=y_test, ax=ax[1])

ax[0].title.set_text('Train (%d Samples)' % len(y_train))
ax[1].title.set_text('Test (%d Samples)' % len(y_test))

plt.show()

### Let's create our ANN

Remember: Go crazy with sizes! It's DeepLearning era baby!

In [None]:
from keras.models import Sequential
from keras.layers import Dense

# Build Model
model = Sequential([
    Dense(2, activation='relu'),
    Dense(10, activation='relu'),
    Dense(20, activation='relu'),
    Dense(20, activation='relu'),
    Dense(20, activation='relu'),
    Dense(20, activation='sigmoid'),
    Dense(20, activation='sigmoid'),
    Dense(20, activation='sigmoid'),
    Dense(10, activation='sigmoid'),
    Dense(1, activation='sigmoid')
])

model.compile(
    optimizer='adam',
    loss='binary_crossentropy'
)

# Trian Model
report = model.fit(
    X_train,
    y_train,
    epochs=1_000,
    validation_data=(X_test, y_test),
    verbose=0 # Silent Mode
)

# Plot Losses
plt.figure(figsize=(15, 5))
sns.lineplot(data=report.history)
plt.show()

print('Final Loss Training:', report.history['loss'][-1] )
print('Final Loss Validation:', report.history['val_loss'][-1] )

In [None]:
model.summary()

**Instead eval validation test, let's create a grid to see all posible points**

In [None]:
# Create Grid
X_random_samples = []
for x0 in np.linspace(0, 1, 70):
    for x1 in np.linspace(0, 1, 70):
        X_random_samples.append( (x0, x1) )

# Make Predictions of Probs
X_random_samples = np.array( X_random_samples )
y_proba = model.predict( X_random_samples )
y_proba = y_proba.reshape( (-1) )

# Apply Threshold
y_pred = list(map(lambda p: 1 if p > 0.5 else 0, y_proba))

# Plot Grid
plt.figure(figsize=(5, 5))
sns.scatterplot(
    data=None,
    x=X_random_samples.T[0],
    y=X_random_samples.T[1],
    hue=y_pred,
    marker='s',
    size=1,
    linewidth=0
)
plt.show()

Not let's plot probabilities

In [None]:
plt.figure(figsize=(5, 5))

# Model Probabilities
sns.scatterplot(
    data=None,
    x=X_random_samples.T[0],
    y=X_random_samples.T[1],
    c=y_proba,
    cmap='binary',
    marker='s',
    size=1,
    linewidth=0
)

# Test Datapoints
sns.scatterplot(data=None, x=X_test.T[0], y=X_test.T[1], hue=y_test)

plt.show()

# Sentimient Analysis (Example 2)

Let's read some data from web

In [None]:
import ssl

# Ignore SSL Errors when read data from web
ssl._create_default_https_context = ssl._create_unverified_context

df = pd.read_csv('https://raw.githubusercontent.com/coderakib/Sentimental-analysis-of-amazon-customer-reviews/main/appl_1_amazon_pc.csv')
df = df[ ['review_body', 'sentiment'] ]

df.dropna(inplace=True)
df.reset_index(inplace=True)

df

Is this a unbalanced dataset?

In [None]:
df['sentiment'].value_counts().to_frame()

Yes, it's... Let's do downsampling here...

In [None]:
df_balanced = pd.concat([
    df[ df['sentiment'] == 0 ],
    df[ df['sentiment'] == 1 ].head( 5078 ),
])

df_balanced['sentiment'].value_counts().to_frame()

As encoder we are using [Bag of Words](https://en.wikipedia.org/wiki/Bag-of-words_model)

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer

texts = df_balanced['review_body']

encoder = TfidfVectorizer(min_df=4) # min_df => Word must appear at least 10 times in texts
encoder.fit( texts )

vocabulary = encoder.get_feature_names_out()
input_len = len(vocabulary)
print('X vector input is: %d' % input_len)

X_sparse = encoder.transform( texts )
X = X_sparse.todense() # Tensorflow need Dense Inputs
y = df_balanced['sentiment'].values

X.shape, y.shape

In [None]:
# Split Datapoints
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

print('Training Samples: %d' % len(y_train))
print('Test Samples: %d' % len(y_test))

### Let's create our ANN

Remember: Go crazy with sizes! It's DeepLearning era baby!

In [None]:
from keras.models import Sequential
from keras.layers import Dense, Dropout

# Build Model
model = Sequential([
    Dense(input_len, activation='relu'),
    Dense(1_000, activation='relu'),
    Dropout(0.5),
    Dense(500, activation='relu'),
    Dense(250, activation='sigmoid'),
    Dropout(0.5),
    Dense(50, activation='sigmoid'),
    Dense(1, activation='sigmoid')
])

model.compile(
    optimizer='adam',
    loss='binary_crossentropy'
)

# Trian Model
report = model.fit(
    X_train,
    y_train,
    epochs=50,
    validation_data=(X_test, y_test),
    verbose=2 # Silent Mode
)

# Plot Losses
plt.figure(figsize=(15, 5))
sns.lineplot(data=report.history)
plt.show()

print('Final Loss Training:', report.history['loss'][-1] )
print('Final Loss Validation:', report.history['val_loss'][-1] )

In [None]:
model.summary()

**Instead eval model, let's play with it**

In [None]:
while True:
    text = input('Text:')

    if text == 'exit':
        break
    else:
        X_sparse = encoder.transform( [ text ] )
        X_sample = X_sparse.todense()
        y_proba = model.predict(X_sample, verbose=0)
        y_proba = y_proba.reshape( (-1) )
        y_pred = list(map(lambda p: 1 if p > 0.5 else 0, y_proba))
        
        print('\n"%s" => %d\n' % (text, y_pred[0]))