In [1]:
import pandas as pd
import numpy as np
from models.gan import GAN
from sklearn.preprocessing import StandardScaler

In [2]:
df_pokemon= pd.read_csv("data/Pokemon.csv")
df_pokemon.head()

Unnamed: 0,#,Name,Type 1,Type 2,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
0,1,Bulbasaur,Grass,Poison,318,45,49,49,65,65,45,1,False
1,2,Ivysaur,Grass,Poison,405,60,62,63,80,80,60,1,False
2,3,Venusaur,Grass,Poison,525,80,82,83,100,100,80,1,False
3,3,VenusaurMega Venusaur,Grass,Poison,625,80,100,123,122,120,80,1,False
4,4,Charmander,Fire,,309,39,52,43,60,50,65,1,False


In [3]:
df_pokemon = df_pokemon.drop(columns=['Name', 'Total', '#'], axis=1)
mean_values = df_pokemon.select_dtypes(include=[np.number]).mean().to_list()[:-1]
print(mean_values)
std_values = df_pokemon.select_dtypes(include=[np.number]).std().to_list()[:-1]

[69.25875, 79.00125, 73.8425, 72.82, 71.9025, 68.2775]


In [4]:
numerical_cols = ['HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def', 'Speed']
scaler = StandardScaler()

df_pokemon[numerical_cols] = scaler.fit_transform(df_pokemon[numerical_cols])
mean = scaler.mean_
std = scaler.scale_

df_pokemon['Type 2'] = df_pokemon['Type 2'].fillna('None')
df_pokemon = pd.get_dummies(df_pokemon, columns=['Type 1', 'Type 2', 'Generation'])
bool_cols = df_pokemon.select_dtypes(include=['bool']).columns
df_pokemon[bool_cols] = df_pokemon[bool_cols].astype(int)


column_names = df_pokemon.columns
df_pokemon.head()

Unnamed: 0,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Legendary,Type 1_Bug,Type 1_Dark,Type 1_Dragon,...,Type 2_Psychic,Type 2_Rock,Type 2_Steel,Type 2_Water,Generation_1,Generation_2,Generation_3,Generation_4,Generation_5,Generation_6
0,-0.950626,-0.924906,-0.797154,-0.23913,-0.248189,-0.801503,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
1,-0.362822,-0.52413,-0.347917,0.21956,0.291156,-0.285015,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
2,0.420917,0.092448,0.293849,0.831146,1.010283,0.403635,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
3,0.420917,0.647369,1.577381,1.503891,1.729409,0.403635,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
4,-1.185748,-0.832419,-0.989683,-0.392027,-0.787533,-0.112853,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0


Train GAN model

In [5]:
num_samples = 10
num_features = df_pokemon.shape[1]
noise = np.random.normal(0, 1, df_pokemon.shape) 
gan = GAN(data=df_pokemon, noise=noise, epochs=10)
generator = gan.create_generator()
discriminator = gan.create_discriminator()
gan_model = gan.compile(generator=generator, discriminator=discriminator)
trained_gan = gan.train(generator=generator,discriminator=discriminator, gan=gan_model)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step
810


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


ValueError: Arguments `target` and `output` must have the same shape. Received: target.shape=(20, 1), output.shape=(810, 1)

Get predicitions of GAN model

In [None]:
predicted_data = trained_gan.predict(noise)
predicted_df = pd.DataFrame(predicted_data, columns=column_names)
predicted_df.head(10)
print(len(predicted_df))

#distribution von numerischen Werten

Transform prediction to human readable output

In [None]:
def make_binary_attributes_readable(attributes:list) -> pd.DataFrame:
    for attribute in attributes:
        columns = [col for col in predicted_df.columns if attribute in col]

        # Get the column with the highest value for each row among 'Type 1' columns
        max = predicted_df[columns].idxmax(axis=1)

        # Extract the type name from the column names (removing the 'Type 1_' prefix)
        predicted_df[attribute[:-1]] = max.str.replace(attribute, '')

        # Optionally, you can drop the old type columns if they are no longer needed
        predicted_df.drop(columns=columns, inplace=True)
        predicted_df.head()
    return predicted_df

In [None]:
make_readble_cols= ["Generation_", "Type 1_", "Type 2_",]
readable_df = make_binary_attributes_readable(make_readble_cols)
readable_df.head(10)

In [None]:
readable_df['Legendary'] = readable_df['Legendary'] > 0.5 
readable_df.head(10)

In [None]:
for i,col in enumerate(numerical_cols):
    readable_df[col] = (readable_df[col] * std[i]) + mean[i]



In [None]:
readable_df.head(10)

Evaluate discriminator accruacy

In [None]:
gan.evaluate_discriminator(generator, discriminator)

Revert the values to human readable

In [None]:
#TODO: Make data human readble+
