In [39]:
import pandas as pd

from TPNN_2023.lab2.Perceptron import *
from TPNN_2023.lab2.activation_functions import *
from TPNN_2023.lab2.tools import default_init, test_init, Adam, Default
from TPNN_2023.lab2.metrics import *
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split

import plotly.express as px
import plotly.graph_objects as go

### Help reused functions

In [40]:
columns = ["edibility","cap-shape", "cap-surface", "cap-color", "bruises", "odor",
           "gill-attachment", "gill-spacing", "gill-size", "gill-color",
           "stalk-shape", "stalk-root", "stalk-surface-above-ring",
           "stalk-surface-below-ring", "stalk-color-above-ring",
           "stalk-color-below-ring", "veil-type", "veil-color", "ring-number",
           "ring-type", "spore-print-color", "population", "habitat"]

def from_file(file_name, sep: str):
    f = open(file_name, 'r')
    lines = f.readlines()

    lines_count = len(lines)
    data_list = []
    idx = 0

    for line in lines:
        dataset_line = np.array(line.replace("\n", "").split(sep))
        assert len(dataset_line) == 23
        data_list.append(dataset_line)
        idx += 1

    return pd.DataFrame(data=data_list, index=np.arange(0, lines_count, 1), columns=columns)


def encode_df(df, dst_columns):
    encoder = LabelEncoder()
    input_attributes = np.setdiff1d(np.array(df.columns), dst_columns)
    output_attributes = dst_columns

    df = df.apply(encoder.fit_transform)

    return df[input_attributes], df[output_attributes]


### plot functions: ###
def plot_loss(loss_list):
  # plot loss graphic
  x = np.arange(1, len(loss_list) + 1)
  y = np.array(loss_list)

  assert len(x) == len(y)

  fig = px.line(x=x, y=y, labels={'x':'epoch', 'y':'loss'})
  fig.show()

def get_predicted_data(input_data, net: Net):
    predicted_data = []

    for item in input_data:
        predicted_item = net.calc_output(item)
        predicted_data.append(predicted_item)

    predicted_data = np.array(predicted_data)

    return get_predictions(0.5, predicted_data) # convert output to one-hot-enc


### Read dataset

In [41]:
assert len(columns) == 23
dataset_file_name = "../datasets/mushroom_dataset.txt"
df = from_file(dataset_file_name, ',')
df

Unnamed: 0,edibility,cap-shape,cap-surface,cap-color,bruises,odor,gill-attachment,gill-spacing,gill-size,gill-color,...,stalk-surface-below-ring,stalk-color-above-ring,stalk-color-below-ring,veil-type,veil-color,ring-number,ring-type,spore-print-color,population,habitat
0,p,x,s,n,t,p,f,c,n,k,...,s,w,w,p,w,o,p,k,s,u
1,e,x,s,y,t,a,f,c,b,k,...,s,w,w,p,w,o,p,n,n,g
2,e,b,s,w,t,l,f,c,b,n,...,s,w,w,p,w,o,p,n,n,m
3,p,x,y,w,t,p,f,c,n,n,...,s,w,w,p,w,o,p,k,s,u
4,e,x,s,g,f,n,f,w,b,k,...,s,w,w,p,w,o,e,n,a,g
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8119,e,k,s,n,f,n,a,c,b,y,...,s,o,o,p,o,o,p,b,c,l
8120,e,x,s,n,f,n,a,c,b,y,...,s,o,o,p,n,o,p,b,v,l
8121,e,f,s,n,f,n,a,c,b,n,...,s,o,o,p,o,o,p,b,c,l
8122,p,k,y,n,f,y,f,c,n,b,...,k,w,w,p,w,o,e,w,v,l


In [42]:
df.shape

(8124, 23)

### Encode dataframe

In [43]:
in_df_labeled, out_df_labeled = encode_df(df, ['edibility'])

In [44]:
in_df_labeled

Unnamed: 0,bruises,cap-color,cap-shape,cap-surface,gill-attachment,gill-color,gill-size,gill-spacing,habitat,odor,...,ring-type,spore-print-color,stalk-color-above-ring,stalk-color-below-ring,stalk-root,stalk-shape,stalk-surface-above-ring,stalk-surface-below-ring,veil-color,veil-type
0,1,4,5,2,1,4,1,0,5,6,...,4,2,7,7,3,0,2,2,2,0
1,1,9,5,2,1,4,0,0,1,0,...,4,3,7,7,2,0,2,2,2,0
2,1,8,0,2,1,5,0,0,3,3,...,4,3,7,7,2,0,2,2,2,0
3,1,8,5,3,1,5,1,0,5,6,...,4,2,7,7,3,0,2,2,2,0
4,0,3,5,2,1,4,0,1,1,5,...,0,3,7,7,3,1,2,2,2,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8119,0,4,3,2,0,11,0,0,2,5,...,4,0,5,5,0,0,2,2,1,0
8120,0,4,5,2,0,11,0,0,2,5,...,4,0,5,5,0,0,2,2,0,0
8121,0,4,2,2,0,5,0,0,2,5,...,4,0,5,5,0,0,2,2,1,0
8122,0,4,3,3,1,0,1,0,2,8,...,0,7,7,7,0,1,2,1,2,0


In [45]:
out_df_labeled

Unnamed: 0,edibility
0,1
1,0
2,0
3,1
4,0
...,...
8119,0
8120,0
8121,0
8122,1


In [46]:
### Scale input dataframe
scaler = StandardScaler()
in_df_scaled = scaler.fit_transform(in_df_labeled)
in_df_scaled

array([[ 1.18591657, -0.19824983,  1.02971224, ...,  0.58638466,
         0.14203663,  0.        ],
       [ 1.18591657,  1.76587407,  1.02971224, ...,  0.58638466,
         0.14203663,  0.        ],
       [ 1.18591657,  1.37304929, -2.08704716, ...,  0.58638466,
         0.14203663,  0.        ],
       ...,
       [-0.84322964, -0.19824983, -0.8403434 , ...,  0.58638466,
        -3.97905487,  0.        ],
       [-0.84322964, -0.19824983, -0.21699152, ..., -0.89305291,
         0.14203663,  0.        ],
       [-0.84322964, -0.19824983,  1.02971224, ...,  0.58638466,
        -3.97905487,  0.        ]])

### Build Model

In [None]:
net = Net()
net.add_layer(Dense(120, th))
net.add_layer(Dense(2, th))
net.add_layer(Softmax(2))
net.init_net(init_strategy=default_init, input_shape=in_df_labeled.shape[1])
net.print_net_config(ConfigLevel.LOW)