<a href="https://colab.research.google.com/github/nedlecky/CSC485B/blob/main/CSC485_130_PythagorasInSteps.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# CSC 485B Spring 2023: CSC485_130_PythagorasInSteps using MLP
## Solving the Pythagoras problem in small pieces to make it work
### Input the length of the two sides, ML computes hypotenuse, perimeter, and area
* SUNY Plattsburgh, Spring 2023
* Dr. Ned Lecky
* nleck001@plattsburgh.edu
* ned@lecky.com

# Work In Progress
# This is the version we walked through and discussed in class 2/23/2023
# Most of the tricky problems are solved here but we're not in final form

Assignment:
* Follow the architectural idea from Class 04: Slides 17-18 as discussed in class
* Build your “Pythagoras dataset” to include 400 random triangles with sizes from 2cm to 2000cm on each side
* Split it into training and test portions using sklearn.model_selection.train_test_split
* Scale using std_scaler and figure out how to use a sklearn.pipeline so you don’t have to keep remembering to scale all of your Xs
* Get your super simple MLPs running as suggested on slide 17- Success is: max error < 0.5cm on hypotenuse, < 1cm on perimeter, and < 1cm^2 on area
* Now, make the data more “realistic” by adding small random errors to your dataset
  * Generate an exact dataset
  * Now make your training dataset by assuming there are small errors in side1 and side2 measurements
* Does your same code work when run on the exact data?



In [1]:
# Setup and Support Functions
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import sys

# This makes us reproducible (and we can adjust fixed_seed to get different results)
fixed_seed = 1
np.random.seed(fixed_seed)

# Return n random floats between lo and hi as 1-column NumPy matrix
def rand_nlohi(n=1, lo=0, hi=1):
  # This is just a uniform distribution from lo to hi... we can adjust if appropriate in the future
  return (np.random.rand(n) * (hi - lo) + lo).reshape(-1,1)

# Often a good idea as long as we are keeping values near +/- 1... don't need exponential notation
np.set_printoptions(floatmode='fixed', precision=4, suppress=True)
# This will get us all 400 rows printed... which fails past 40 x 2 columns
np.set_printoptions(threshold=sys.maxsize)

# Simple numpy array print with optional push to file
def nprint(m, name='', also_write_file=False):
  print(f"{name} {m.shape} {m.dtype}")
  print(m)
  if also_write_file and name != '':
    fprint(m, name)

# Print numpy array to file (needs name)
def fprint(m, name=''):
  if name != '':
    with open(name, 'w') as f:
      print(f"{name} {m.shape} {m.dtype}", file=f)
      print(m, file=f)
  else:
    print('fprint needs a name!')

# Remove a file and don't complain if it doesn't exist
def remove_file(name):
  try:
    os.remove(name)
  except:
    return

remove_file('X')
remove_file('Y')
remove_file('Y_pred')
remove_file('XY')
remove_file('X_train')
remove_file('X_train_scaled')
remove_file('X_test')
remove_file('Y_train')
remove_file('Y_test')
remove_file('Y_testY_pred')

remove_file('X1')
remove_file('X1_train')
remove_file('X1_train_scaled')
remove_file('X1_test')
remove_file('Y1')
remove_file('Y1_pred')
remove_file('Y1_train')
remove_file('Y1_test')
remove_file('X1Y1')
remove_file('Y1_testY1_pred')


remove_file('X2')
remove_file('X2_train')
remove_file('X2_train_scaled')
remove_file('X2_test')
remove_file('Y2')
remove_file('Y2_pred')
remove_file('Y2_train')
remove_file('Y2_test')
remove_file('X2Y2')
remove_file('Y2_testY2_pred')



# Make Test Data X
## 400 Triangles with side1 side2 spread from 2 to 2000 cm


In [2]:
# This is the full test input data for right triangles
# Reminder: Final input is the length of the two sides, output is length of hypotenuse, perimeter, and area
# x = [side1, side2]
# y = [hypotenuse, perimeter, area]
import math

# Setup what you want to generate
N = 400
use_trivial_test_data = False  # Force use of just 2 easy triangle
make_lengths_random = True     # Random or just stepped in size
split_data = True              # Split test/train 50/50? (else test=train)

# Big help if we avoid BIG numbers... so lets use cm/100 as our unit
# NOTE: This is because we are squaring and then scaling...
# If we scale and then square, things might work fine (in a pipeline?)
shortest_side = 0.02 # 2 cm
longest_side = 20.00 # 2000 cm

# Generate X
if(use_trivial_test_data):
  # Trivial Test Data
  X = np.array([
    [math.sqrt(2)/2, math.sqrt(2)/2],
    [1, 1]
  ])
else:
  if(make_lengths_random):
    # This makes random X
    side1 = rand_nlohi(N, shortest_side, longest_side)
    side2 = rand_nlohi(N, shortest_side, longest_side)
  else:
    # OR: This makes more regular X, all equilateral triangles
    side1 = np.arange(shortest_side, longest_side, longest_side/N).reshape(-1,1)
    side2 = np.arange(shortest_side, longest_side, longest_side/N).reshape(-1,1)
  X = np.hstack([side1, side2])

fprint(X,'X')


# Make expected Y

In [3]:
# Now let's compute the FULL X,Y expected results
# Reminder: We tell you the length of the two sides, you compute length of hypotenuse, perimeter, and area
# x = [side1, side2]
# y = [hypotenuse, perimeter, area]
from sklearn.model_selection import train_test_split

hypotenuse = np.sqrt(np.square(X[:,0:1]) + np.square(X[:,1:2]))
perimeter = X[:,0:1] + X[:,1:2] + hypotenuse
area = (X[:,0:1] * X[:,1:2]) / 2.
Y = np.hstack([hypotenuse, perimeter, area])
fprint(Y,'Y')
fprint(np.hstack([X,Y]), 'XY')

# Optional noise in Y
# Not tested yet!
# Just bump all up or down by up to 1%
#Y = Y * (100. + (np.random.rand(Y.shape[0],Y.shape[1])-0.5))/100.

# And here are the full train/test sets if we want to try the old way
if(split_data):
  (X_train, X_test, Y_train, Y_test) = train_test_split(X, Y, test_size=0.5, random_state=1)
else:
  # OR we can have train=test!
  X_train = X_test = X
  Y_train = Y_test = Y

fprint(X_train, 'X_train')
fprint(X_test, 'X_test')
fprint(Y_train, 'Y_train')
fprint(Y_test, 'Y_test')


# Make X1 and Y1 for First MLP

In [4]:
# We want X1 to be [side1 side2 side1^2 side2^2]
# And Y1 to be [hypotenuse^2, area]

# Note... to get rid of manual scaling, I need to scale side1 and side2 and then compute
#  the squares of the SCALED values...
X1 = np.hstack([X, np.square(X)])
Y1 = np.hstack([np.square(Y[:, 0:1]), Y[:,2:3]])

fprint(X1, 'X1')
fprint(Y1, 'Y1')
fprint(np.hstack([X1,Y1]), 'X1Y1')

if(split_data):
  (X1_train, X1_test, Y1_train, Y1_test) = train_test_split(X1, Y1, test_size=0.5, random_state=1)
else:
  # OR we can have train=test!
  X1_train = X1_test = X1
  Y1_train = Y1_test = Y1

fprint(X1_train, 'X1_train')
fprint(X1_test, 'X1_test')
fprint(Y1_train, 'Y1_train')
fprint(Y1_test, 'Y1_test')


In [5]:
# Do the MLP scale/train
from sklearn import preprocessing
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error

def scale(X):
  scaler = preprocessing.StandardScaler().fit(X)
  X_scaled = scaler.transform(X)
  return scaler, X_scaled

def train(X, Y, hidden_layer_sizes=(10,10),
            activation='relu', max_iter=10000, title='Predictions'):
  
  regr = MLPRegressor(solver='lbfgs', alpha=1e-5,
                     hidden_layer_sizes=hidden_layer_sizes,
                     activation=activation,
                     max_iter=max_iter,
                     random_state=1,
                      verbose=True)
  regr.fit(X, Y)
  return regr

def test(regr, scaler, X, Y):
  # The reshape below forces Y_pred to come back as 1 column if there is ony 1 output
  Y_pred = regr.predict(scaler.transform(X)).reshape(Y.shape[0],-1)

  # Metrics...
  print(f"Mean squared error: {mean_squared_error(Y, Y_pred):.2f}")
  print(f"Mean absolute error: {mean_absolute_error(Y, Y_pred):.2f}")
  print(f"Mean absolute percentage error: {mean_absolute_percentage_error(Y, Y_pred):.2f}")

  # The Pandas describe
  df = pd.DataFrame(data = Y_pred - Y)
  print(df.describe())

  return Y_pred

(scaler1, X1_train_scaled) = scale(X1_train)
regr1 = train(X1_train_scaled, Y1_train)
Y1_pred = test(regr1, scaler1, X1_test, Y1_test)

fprint(X1_train_scaled,'X1_train_scaled')
fprint(Y1_pred,'Y1_pred')
fprint(np.hstack([Y1_test, Y1_pred]),'Y1_testY1_pred')


Mean squared error: 0.54
Mean absolute error: 0.35
Mean absolute percentage error: 0.05
                0           1
count  200.000000  200.000000
mean    -0.000043    0.172421
std      0.025138    1.029203
min     -0.080464   -1.853392
25%     -0.012425   -0.484213
50%      0.001073   -0.066694
75%      0.008340    0.531677
max      0.160578    6.158041


In [6]:
# That looks pretty good... let's try some examples not from the dataset
X_111 = np.array([1,1,1,1]).reshape(1,-1)
nprint(X_111,'X_111')

# Don't forget to scale the inputs the same way we did for training!
Y_pred = regr1.predict(scaler1.transform(X_111))
nprint(Y_pred,'Y_pred 111')


X_111 (1, 4) int64
[[1 1 1 1]]
Y_pred 111 (1, 2) float64
[[2.0506 0.8893]]


# Make X2 and Y2 for Second MLP

In [7]:
# We want X2 to be [side1 side2 side3] = [X1 , sqrt(Y1)]
# And Y2 to be [perim]
X2 = np.hstack([X, np.sqrt(Y1[:,0:1]).reshape(-1,1)])
Y2 = Y[:,1:2]

fprint(X2, 'X2')
fprint(Y2, 'Y2')
fprint(np.hstack([X2,Y2]), 'X2Y2')

if(split_data):
  (X2_train, X2_test, Y2_train, Y2_test) = train_test_split(X2, Y2, test_size=0.5, random_state=1)
else:
  # OR we can have train=test!
  X2_train = X2_test = X2
  Y2_train = Y2_test = Y2

fprint(X2_train, 'X2_train')
fprint(X2_test, 'X2_test')
fprint(Y2_train, 'Y2_train')
fprint(Y2_test, 'Y2_test')


In [8]:
# Train and Test
(scaler2, X2_train_scaled) = scale(X2_train)
regr2 = train(X2_train_scaled, Y2_train)
Y2_pred = test(regr2, scaler2, X2_test, Y2_test).reshape(-1,1)

fprint(X2_train_scaled,'X2_train_scaled')
fprint(Y2_pred,'Y2_pred')
fprint(np.hstack([Y2_test, Y2_pred]),'Y2_testY2_pred')


  y = column_or_1d(y, warn=True)


Mean squared error: 0.00
Mean absolute error: 0.00
Mean absolute percentage error: 0.00
                0
count  200.000000
mean    -0.000041
std      0.003738
min     -0.028780
25%     -0.000097
50%     -0.000023
75%      0.000058
max      0.009726


In [9]:
# That looks pretty good... let's try some examples not from the dataset
X_771 = np.array([0.707,0.707,1]).reshape(1,-1)
nprint(X_771,'X_771')

# Don't forget to scale the inputs the same way we did for training!
Y_pred = regr2.predict(scaler2.transform(X_771))
nprint(Y_pred,'Y_pred_X_771')


X_771 (1, 3) float64
[[0.7070 0.7070 1.0000]]
Y_pred_X_771 (1,) float64
[2.4032]


# How about another try at all-in-one?

In [10]:
# And if we tried doing it all in one go?
(scaler_all, X_train_scaled) = scale(X_train)
regr_all = train(X_train_scaled, Y_train)
Y_pred = test(regr_all, scaler_all, X_test, Y_test)

fprint(X_train_scaled,'X_train_scaled')
fprint(Y_pred,'Y_pred')
fprint(np.hstack([Y_test, Y_pred]),'Y_testY_pred')



Mean squared error: 1.19
Mean absolute error: 0.57
Mean absolute percentage error: 0.08
                0           1           2
count  200.000000  200.000000  200.000000
mean    -0.019011   -0.020642    0.063669
std      0.288596    0.286767    1.845103
min     -1.159927   -1.456972   -5.290061
25%     -0.176990   -0.159293   -0.880957
50%      0.001990   -0.003237   -0.168737
75%      0.147682    0.155962    0.866702
max      0.725209    0.740662    9.328812


In [11]:
# That looks pretty good... let's try some examples not from the dataset
X_771 = np.array([0.707,0.707]).reshape(1,-1)
nprint(X_771,'X_771')

# Don't forget to scale the inputs the same way we did for training!
Y_pred = regr_all.predict(scaler_all.transform(X_771))
nprint(Y_pred,'Y_pred_X_771')


X_771 (1, 2) float64
[[0.7070 0.7070]]
Y_pred_X_771 (1, 3) float64
[[ 1.3471  2.5991 -0.5311]]
