<a href="https://colab.research.google.com/github/mtnman38/harply/blob/main/harply_exploration.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [57]:
import tensorflow as tf
import numpy as np
import pandas as pd
import tensorflow_probability as tfp
from tensorflow.keras import Model
from tensorflow.keras.losses import Loss
from tensorflow.nn import relu
from tensorflow.keras.layers import (Dense,
                                     Reshape,
                                     Input,
                                     Lambda,
                                     Layer)

tf.keras.backend.set_floatx('float64')

# harply initial explorations

## Get some data

In [58]:
ds_url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv'
column_names = ['fixed acidity',
                'volatile acidity', 
                'citric acid', 
                'residual sugar',
                'chlorides',
                'free sulfur dioxide',
                'total sulfur dioxide', 
                'density', 
                'pH', 
                'sulphates',
                'alcohol', 
                'quality']

In [59]:
dataset_path = tf.keras.utils.get_file(ds_url.split('/')[-1], ds_url)
dataset_csv = pd.read_csv(dataset_path,
                          names=column_names,
                          na_values="?",
                          comment='\t',
                          sep=";",
                          skipinitialspace=True,
                          header=0)

df = dataset_csv[['alcohol',
                  'pH',
                  'density',
                  'chlorides']]

## Next steps

In [60]:
df.head()

Unnamed: 0,alcohol,pH,density,chlorides
0,9.4,3.51,0.9978,0.076
1,9.8,3.2,0.9968,0.098
2,9.8,3.26,0.997,0.092
3,9.8,3.16,0.998,0.075
4,9.4,3.51,0.9978,0.076


Let's take a look at correlations and some summary descriptions.

In [61]:
df.corr()

Unnamed: 0,alcohol,pH,density,chlorides
alcohol,1.0,0.205633,-0.49618,-0.221141
pH,0.205633,1.0,-0.341699,-0.265026
density,-0.49618,-0.341699,1.0,0.200632
chlorides,-0.221141,-0.265026,0.200632,1.0


In [62]:
df.describe()

Unnamed: 0,alcohol,pH,density,chlorides
count,1599.0,1599.0,1599.0,1599.0
mean,10.422983,3.311113,0.996747,0.087467
std,1.065668,0.154386,0.001887,0.047065
min,8.4,2.74,0.99007,0.012
25%,9.5,3.21,0.9956,0.07
50%,10.2,3.31,0.99675,0.079
75%,11.1,3.4,0.997835,0.09
max,14.9,4.01,1.00369,0.611


The idea will be to mimic these descriptions and correlations.

## Build out the generator model

In [63]:
class Linear(Layer):
    def __init__(self, units=32):
        super(Linear, self).__init__()
        self.units = units

    def build(self, input_shape):
        self.w = self.add_weight(
            shape=(input_shape[-1], self.units),
            initializer="random_normal",
            trainable=True,
        )
        self.b = self.add_weight(
            shape=(self.units,),
             initializer="random_normal", 
             trainable=True
        )

    def call(self, inputs):
        return tf.matmul(inputs, self.w) + self.b

In [64]:
test = Linear(units=100)
test.build(input_shape=(None, 4))
test(df.to_numpy())

<tf.Tensor: shape=(1599, 100), dtype=float64, numpy=
array([[-0.35039552,  0.40275999, -1.46403443, ...,  0.43307281,
         0.05762452, -0.26180968],
       [-0.36581034,  0.42643754, -1.49681807, ...,  0.41319748,
         0.02934732, -0.24095331],
       [-0.36583009,  0.42539092, -1.50025373, ...,  0.41731054,
         0.03395869, -0.24546401],
       ...,
       [-0.4132475 ,  0.47937426, -1.66153116, ...,  0.43304801,
         0.0300763 , -0.26201244],
       [-0.38214342,  0.4399439 , -1.56890998, ...,  0.44039334,
         0.05092532, -0.26880593],
       [-0.4128767 ,  0.47904194, -1.65951984, ...,  0.43061393,
         0.02902435, -0.26100923]])>

In [65]:
class CustomModel(Model):
  def __init__(self):
    super(CustomModel, self).__init__()
    self.linear1 = Linear(units=64)
    self.linear2 = Linear(units=32)
    self.linear3 = Linear(units=16)
    self.linear4 = Linear(units=4)

  def call(self, x):
    x = self.linear1(x)
    x = relu(x)
    x = self.linear2(x)
    x = relu(x)
    x = self.linear3(x)
    x = relu(x)
    x = self.linear4(x)
    x = relu(x)
    return x

In [66]:
model = CustomModel()

In [67]:
model(df.to_numpy())

<tf.Tensor: shape=(1599, 4), dtype=float64, numpy=
array([[0.        , 0.05310696, 0.        , 0.        ],
       [0.        , 0.05298296, 0.        , 0.        ],
       [0.        , 0.05298645, 0.        , 0.        ],
       ...,
       [0.        , 0.05259442, 0.        , 0.        ],
       [0.        , 0.05287698, 0.        , 0.        ],
       [0.        , 0.05258639, 0.        , 0.        ]])>

In [68]:
df.corr().to_numpy()

array([[ 1.        ,  0.20563251, -0.49617977, -0.22114054],
       [ 0.20563251,  1.        , -0.34169933, -0.26502613],
       [-0.49617977, -0.34169933,  1.        ,  0.20063233],
       [-0.22114054, -0.26502613,  0.20063233,  1.        ]])

In [69]:
class CustomLoss(Loss):

  def call(self, y_true, y_pred):
    

SyntaxError: ignored