# IMPORT

In [41]:
import numpy as np
import matplotlib.pyplot as plt
import dash
from dash import dcc, html, Input, Output
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from sklearn.preprocessing import StandardScaler, MinMaxScaler

# Read Data

In [32]:
# read data
df = pd.read_csv('Input_data/pokedex gen 1-7.csv')
df

Unnamed: 0,#,Name,Type 1,Type 2,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
0,1,Bulbasaur,Grass,Poison,318,45,49,49,65,65,45,1,False
1,2,Ivysaur,Grass,Poison,405,60,62,63,80,80,60,1,False
2,3,Venusaur,Grass,Poison,525,80,82,83,100,100,80,1,False
3,3,VenusaurMega Venusaur,Grass,Poison,625,80,100,123,122,120,80,1,False
4,4,Charmander,Fire,,309,39,52,43,60,50,65,1,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
795,719,Diancie,Rock,Fairy,600,50,100,150,100,150,50,6,True
796,719,DiancieMega Diancie,Rock,Fairy,700,50,160,110,160,110,110,6,True
797,720,HoopaHoopa Confined,Psychic,Ghost,600,80,110,60,150,130,70,6,True
798,720,HoopaHoopa Unbound,Psychic,Dark,680,80,160,60,170,130,80,6,True


# Change #, index, genreation to categorical data isntead of numerical

In [33]:
# Incorporate index into data
df.reset_index(inplace=True)

numtocat = ['index', '#', 'Generation']

for col in numtocat:
    df[col] = df[col].astype(str)

df

Unnamed: 0,index,#,Name,Type 1,Type 2,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
0,0,1,Bulbasaur,Grass,Poison,318,45,49,49,65,65,45,1,False
1,1,2,Ivysaur,Grass,Poison,405,60,62,63,80,80,60,1,False
2,2,3,Venusaur,Grass,Poison,525,80,82,83,100,100,80,1,False
3,3,3,VenusaurMega Venusaur,Grass,Poison,625,80,100,123,122,120,80,1,False
4,4,4,Charmander,Fire,,309,39,52,43,60,50,65,1,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
795,795,719,Diancie,Rock,Fairy,600,50,100,150,100,150,50,6,True
796,796,719,DiancieMega Diancie,Rock,Fairy,700,50,160,110,160,110,110,6,True
797,797,720,HoopaHoopa Confined,Psychic,Ghost,600,80,110,60,150,130,70,6,True
798,798,720,HoopaHoopa Unbound,Psychic,Dark,680,80,160,60,170,130,80,6,True


# Create New Quantitative Columns

In [34]:
def metric(a,b):
    return df[a] - df[b]

def signed_sqrt(a,b):
    return np.sign(a-b)*np.sqrt(abs(a-b))

def metricsq(a,b):
    return signed_sqrt(df[a]**2, df[b]**2)



# attack potential
df['AP'] = metricsq('Attack', 'Sp. Atk') 

# defense potential
df['DP'] = metricsq('Defense', 'Sp. Def') 

# potential
df['ADP'] = metricsq('AP', 'DP')

# abs potential
df['absADP'] = abs(df['ADP']) 

# ability focus
df['AF'] = metricsq('Speed', 'HP')

# abs focus
df['absAF'] = abs(df['AF'])

# alignment
df['alignment'] = df['ADP'] * df['AF']

df


Unnamed: 0,index,#,Name,Type 1,Type 2,Total,HP,Attack,Defense,Sp. Atk,...,Speed,Generation,Legendary,AP,DP,ADP,absADP,AF,absAF,alignment
0,0,1,Bulbasaur,Grass,Poison,318,45,49,49,65,...,45,1,False,-42.708313,-42.708313,0.000000,0.000000,0.000000,0.000000,0.000000
1,1,2,Ivysaur,Grass,Poison,405,60,62,63,80,...,60,1,False,-50.556899,-49.305172,11.180340,11.180340,0.000000,0.000000,0.000000
2,2,3,Venusaur,Grass,Poison,525,80,82,83,100,...,80,1,False,-57.236352,-55.776339,12.845233,12.845233,0.000000,0.000000,0.000000
3,3,3,VenusaurMega Venusaur,Grass,Poison,625,80,100,123,122,...,80,1,False,-69.885621,27.000000,64.459289,64.459289,0.000000,0.000000,0.000000
4,4,4,Charmander,Fire,,309,39,52,43,60,...,65,1,False,-29.933259,-25.514702,15.652476,15.652476,52.000000,52.000000,813.928744
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
795,795,719,Diancie,Rock,Fairy,600,50,100,150,100,...,50,6,True,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
796,796,719,DiancieMega Diancie,Rock,Fairy,700,50,160,110,160,...,110,6,True,0.000000,0.000000,0.000000,0.000000,97.979590,97.979590,0.000000
797,797,720,HoopaHoopa Confined,Psychic,Ghost,600,80,110,60,150,...,70,6,True,-101.980390,-115.325626,-53.851648,53.851648,-38.729833,38.729833,2085.665361
798,798,720,HoopaHoopa Unbound,Psychic,Dark,680,80,160,60,170,...,80,6,True,-57.445626,-115.325626,-100.000000,100.000000,0.000000,0.000000,-0.000000


# Add Categorical Columns

In [35]:
# add fitler for Megas
df['Mega'] = df['Name'].apply(lambda x: True if 'Mega' in x else False)

# Start by defaulting to 'vanilla'
df['Legendary Mega'] = 'vanilla'

# Where A is True, set 'A'
df['Legendary Mega'] = df['Legendary Mega'].mask(df['Legendary'] & ~df['Mega'], 'Legendary')

# Where B is True, set 'B'
df['Legendary Mega'] = df['Legendary Mega'].mask(df['Mega'] & ~df['Legendary'], 'Mega')

# Where both are True, set 'both'
df['Legendary Mega'] = df['Legendary Mega'].mask(df['Legendary'] & df['Mega'], 'both')

# Add Standardized Copy of Original Stats

In [36]:
stats = ['HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def', 'Speed','Total']
standarized_stats = [col + "_standarized" for col in stats]

# 1. Select numeric columns
# numeric_cols = df.select_dtypes(include='number').columns.to_list()
# ignore index and #

# 2. Standardize numeric data
scaler = StandardScaler()
df_scaled_numeric = pd.DataFrame(
    scaler.fit_transform(df[stats]),
    columns=standarized_stats,  # <-- rename columns
    index=df.index  # keep the same index to avoid alignment issues
)

# 3. Combine original df with scaled numeric columns
df = pd.concat([df, df_scaled_numeric], axis=1)
df

Unnamed: 0,index,#,Name,Type 1,Type 2,Total,HP,Attack,Defense,Sp. Atk,...,alignment,Mega,Legendary Mega,HP_standarized,Attack_standarized,Defense_standarized,Sp. Atk_standarized,Sp. Def_standarized,Speed_standarized,Total_standarized
0,0,1,Bulbasaur,Grass,Poison,318,45,49,49,65,...,0.000000,False,vanilla,-0.950626,-0.924906,-0.797154,-0.239130,-0.248189,-0.801503,-0.976765
1,1,2,Ivysaur,Grass,Poison,405,60,62,63,80,...,0.000000,False,vanilla,-0.362822,-0.524130,-0.347917,0.219560,0.291156,-0.285015,-0.251088
2,2,3,Venusaur,Grass,Poison,525,80,82,83,100,...,0.000000,False,vanilla,0.420917,0.092448,0.293849,0.831146,1.010283,0.403635,0.749845
3,3,3,VenusaurMega Venusaur,Grass,Poison,625,80,100,123,122,...,0.000000,True,Mega,0.420917,0.647369,1.577381,1.503891,1.729409,0.403635,1.583957
4,4,4,Charmander,Fire,,309,39,52,43,60,...,813.928744,False,vanilla,-1.185748,-0.832419,-0.989683,-0.392027,-0.787533,-0.112853,-1.051836
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
795,795,719,Diancie,Rock,Fairy,600,50,100,150,100,...,0.000000,False,Legendary,-0.754692,0.647369,2.443765,0.831146,2.808099,-0.629341,1.375429
796,796,719,DiancieMega Diancie,Rock,Fairy,700,50,160,110,160,...,0.000000,True,both,-0.754692,2.497104,1.160233,2.665905,1.369846,1.436611,2.209541
797,797,720,HoopaHoopa Confined,Psychic,Ghost,600,80,110,60,150,...,2085.665361,False,Legendary,0.420917,0.955658,-0.444182,2.360112,2.088973,0.059310,1.375429
798,798,720,HoopaHoopa Unbound,Psychic,Dark,680,80,160,60,170,...,-0.000000,False,Legendary,0.420917,2.497104,-0.444182,2.971699,2.088973,0.403635,2.042718


# Calculate Total Deviation

In [37]:
df['Deviation'] = 0
df['Abs Dev'] = 0
for s in standarized_stats:
    # print(max(df[s]))
    df['Deviation'] += df[s]
    df['Abs Dev'] += abs(df[s])
    
new_stats = df.select_dtypes(include='number').columns.to_list()
print(new_stats)
df

['Total', 'HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def', 'Speed', 'AP', 'DP', 'ADP', 'absADP', 'AF', 'absAF', 'alignment', 'HP_standarized', 'Attack_standarized', 'Defense_standarized', 'Sp. Atk_standarized', 'Sp. Def_standarized', 'Speed_standarized', 'Total_standarized', 'Deviation', 'Abs Dev']


Unnamed: 0,index,#,Name,Type 1,Type 2,Total,HP,Attack,Defense,Sp. Atk,...,Legendary Mega,HP_standarized,Attack_standarized,Defense_standarized,Sp. Atk_standarized,Sp. Def_standarized,Speed_standarized,Total_standarized,Deviation,Abs Dev
0,0,1,Bulbasaur,Grass,Poison,318,45,49,49,65,...,vanilla,-0.950626,-0.924906,-0.797154,-0.239130,-0.248189,-0.801503,-0.976765,-4.938274,4.938274
1,1,2,Ivysaur,Grass,Poison,405,60,62,63,80,...,vanilla,-0.362822,-0.524130,-0.347917,0.219560,0.291156,-0.285015,-0.251088,-1.260258,2.281689
2,2,3,Venusaur,Grass,Poison,525,80,82,83,100,...,vanilla,0.420917,0.092448,0.293849,0.831146,1.010283,0.403635,0.749845,3.802123,3.802123
3,3,3,VenusaurMega Venusaur,Grass,Poison,625,80,100,123,122,...,Mega,0.420917,0.647369,1.577381,1.503891,1.729409,0.403635,1.583957,7.866559,7.866559
4,4,4,Charmander,Fire,,309,39,52,43,60,...,vanilla,-1.185748,-0.832419,-0.989683,-0.392027,-0.787533,-0.112853,-1.051836,-5.352099,5.352099
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
795,795,719,Diancie,Rock,Fairy,600,50,100,150,100,...,Legendary,-0.754692,0.647369,2.443765,0.831146,2.808099,-0.629341,1.375429,6.721776,9.489840
796,796,719,DiancieMega Diancie,Rock,Fairy,700,50,160,110,160,...,both,-0.754692,2.497104,1.160233,2.665905,1.369846,1.436611,2.209541,10.584549,12.093932
797,797,720,HoopaHoopa Confined,Psychic,Ghost,600,80,110,60,150,...,Legendary,0.420917,0.955658,-0.444182,2.360112,2.088973,0.059310,1.375429,6.816216,7.704581
798,798,720,HoopaHoopa Unbound,Psychic,Dark,680,80,160,60,170,...,Legendary,0.420917,2.497104,-0.444182,2.971699,2.088973,0.403635,2.042718,9.980863,10.869228


# Get some #'s

In [38]:
cols = df.columns.to_list()
print(cols)
nrows, ncols = df.shape
npokemon = int(df['#'].iloc[-1])
ngenerations = int(df['Generation'].iloc[-1])
print(nrows,npokemon, ngenerations, npokemon/ngenerations)

pokemon = df['Name'] 

['index', '#', 'Name', 'Type 1', 'Type 2', 'Total', 'HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def', 'Speed', 'Generation', 'Legendary', 'AP', 'DP', 'ADP', 'absADP', 'AF', 'absAF', 'alignment', 'Mega', 'Legendary Mega', 'HP_standarized', 'Attack_standarized', 'Defense_standarized', 'Sp. Atk_standarized', 'Sp. Def_standarized', 'Speed_standarized', 'Total_standarized', 'Deviation', 'Abs Dev']
800 721 6 120.16666666666667


# Define Functions

In [47]:
def is_categorical(col):
    dtype = df[col].dtype
    return pd.api.types.is_object_dtype(dtype) or isinstance(dtype, pd.CategoricalDtype)


def myscatter(fig, df, x_col, y_col, color_by, size_by, opacity, size_range):

    global_scale = 1.1
    
    min_size, max_size = size_range
    
    # check column data types for numeric stuff
    xnumeric = pd.api.types.is_numeric_dtype(df[x_col])
    ynumeric = pd.api.types.is_numeric_dtype(df[y_col])

    # check if trendline should be added
    trend = None
    if xnumeric and ynumeric:
        trend = 'ols'
    
    # make the scatter plot
    fig = px.scatter(df,
                    x = x_col, 
                    y = y_col, 
                    hover_name = 'Name',
                    hover_data = ['ADP' , 'AF'],
                    title = f'Scatter Plot: {y_col} vs {x_col}',
                    color = color_by,
                    size = size_by, 
                    size_max = max_size, 
                    opacity = opacity, 
                    trendline = trend
                    )
    if xnumeric:
        # fig.update_xaxes(range=[df[x_col].min(), df[x_col].max()])
        xmin = df[x_col].min() * global_scale 
        xmax = df[x_col].max() * global_scale
        dx = max(abs(xmax), abs(xmin))
    
        # Add vertical dashed line at x=0
        fig.add_vline(x=0, line_dash="dash", line_color="black")

    else:
        fig.update_xaxes(type='category')

    if ynumeric:
        ymin = df[y_col].min() * global_scale
        ymax = df[y_col].max() * global_scale
        dy = max(abs(ymax), abs(ymin))
        
        # Add horizontal dashed line at y=0
        fig.add_hline(y=0, line_dash="dash", line_color="black")

    else:
        fig.update_yaxes(type='category')

    if xnumeric and ynumeric:
        # rescale the axes by the largest one
        dxy = max(dx, dy)
        
        # set graph bounds and scaling is equal for both 
        max_range = [-dxy,dxy]
        # range=max_range
        fig.update_xaxes(range=[-dx, dx])
        fig.update_yaxes(range=[-dy, dy])   


        # Add diagonal line
        fig.add_shape(
        type="line",
        x0=-dxy, x1=dxy, y0=-dxy, y1=dxy,
        line=dict(color="purple", width=1, dash="dash")
        )   

    return fig   


def meanvar(fig, df, x_col, y_col, color_by, size_by, opacity, size_range):

    min_size, max_size = size_range
    
    x_cat = is_categorical(x_col)
    y_cat = is_categorical(y_col)

    if x_cat != y_cat:  # exactly one categorical
        if x_cat:
            cat_col = x_col
            num_col = y_col
            error_axis = 'y'
            scatter_x = df[cat_col]
            scatter_y = df[num_col]
        else:
            cat_col = y_col
            num_col = x_col
            error_axis = 'x'
            scatter_x = df[num_col]
            scatter_y = df[cat_col]
    
        stats = df.groupby(cat_col)[num_col].agg(['mean', 'var']).reset_index()
        
        # Add overlay trace
        fig.add_trace(go.Scatter(
            x=stats[cat_col] if error_axis=='y' else stats['mean'],
            y=stats['mean'] if error_axis=='y' else stats[cat_col],
            mode='markers',
            marker=dict(
                size=np.sqrt(stats['var']) * (max_size/np.sqrt(stats['var'].max())),
                color='red',
                opacity=0.7,
                symbol='diamond'  # distinguish summary points
            ),
            error_y=dict(array=np.sqrt(stats['var']), visible=(error_axis=='y')),
            error_x=dict(array=np.sqrt(stats['var']), visible=(error_axis=='x')),
            name='Mean ± SD'
        ))

 
    
    return fig 


# Plotting DASH app

In [48]:
# List of column options
columns = df.columns.tolist()

# Initialize the Dash app
app = dash.Dash(__name__)

nulloption = [{'label': 'None', 'value': 'None'}] 

app.layout = html.Div([

    dcc.Dropdown(
    id='graph-type-dropdown',
    options=[
        {'label': 'Scatter', 'value': 'scatter'},
        {'label': 'Scatter Matrix', 'value': 'scatter-matrix'}
    ],
    value='scatter',  # default
    clearable=False
    ),
    
    html.H1("Interactive Scatter Plot"),

    html.Div([
        html.Label("Select X-axis:"),
        dcc.Dropdown(
            id='x-axis',
            options=[{'label': col, 'value': col} for col in columns],
            value='alignment'
        )
    ], style={'width': '48%', 'display': 'inline-block'}),

    html.Div([
        html.Label("Select Y-axis:"),
        dcc.Dropdown(
            id='y-axis',
            options=[{'label': col, 'value': col} for col in columns],
            value='Total_standarized'
        )
    ], style={'width': '48%', 'display': 'inline-block'}),

    html.Div([
    html.Label("Color by:"),
    dcc.Dropdown(
        id='color-by',
        options=[{'label': col, 'value': col} for col in columns] + nulloption,
        value='Legendary Mega'  # Default to the 3rd column
    )
], style={'width': '48%', 'display': 'inline-block'}),

    html.Div([
    html.Label("Point Size:"),
    dcc.Dropdown(
        id='size-by',
        options=[{'label': col, 'value': col} for col in new_stats] + nulloption,
        value='Deviation'  # Default to the 3rd column
    )
], style={'width': '48%', 'display': 'inline-block'}),

    # opacity slider
    html.Div([
    html.Label("Point Opacity:"),
    dcc.Slider(
        id='opacity-slider',
        min=0.1,
        max=1.0,
        step=0.1,
        value=0.6,
        marks={i/10: f'{i/10:.1f}' for i in range(1, 11)},
        tooltip={"placement": "bottom", "always_visible": True}
    ),

    # max and min size of point slider
    html.Label("Point size range:"),
    dcc.RangeSlider(
        id='size-range',
        min=1, max=50, step=1, value=[5, 20],
        marks={i: str(i) for i in range(0, 55, 5)}
    ),

    ], style={'width': '60%', 'padding': '20px 0'}),

    html.Div([
    dcc.Graph(id='scatter-graph'),
    ])
])

@app.callback(
    Output('scatter-graph', 'figure'),
    Input('x-axis', 'value'),
    Input('y-axis', 'value'),
    Input('color-by', 'value'),
    Input('size-by', 'value'),
    Input('opacity-slider', 'value'),
    Input('size-range', 'value'),
    Input('graph-type-dropdown', 'value')
)
    
def update_scatter(x_col, y_col, color_by, size_by, opacity, size_range, graph_type):

    fig = go.Figure()  # fallback empty figure

    """
    # --- Debug prints ---
    print("===== Callback Inputs =====")
    print(f"x_col = {x_col}")
    print(f"y_col = {y_col}")
    print(f"color_by = {color_by}")
    print(f"size_by = {size_by}")
    print(f"opacity = {opacity}")
    print(f"size_range = {size_range}")
    print(f"graph_type = {graph_type}")
    print("===========================")
    """

    # --- Handle color safely ---
    # check if color is uniform
    color_arg = None if color_by == 'None' else color_by

    # Handle size rescaling only if a column is selected
    min_size, max_size = size_range
    if size_by == 'None':
        # if none is selected they are all made to max size
        #size_rescaled = max_size * len(df)
        size_rescaled = np.full(len(df), max_size)
        
    else:
        # rescale the numeric values for size so its always positive values between 0 and 1
        scaler = MinMaxScaler(feature_range=(0, 1))  # min and max marker size
        size_rescaled = scaler.fit_transform(df[[size_by]]).flatten()
        size_rescaled = min_size + size_rescaled * (max_size - min_size)


    
     # SCATTER MATRIX OPTION
    
    if graph_type == 'scatter-matrix':
        # Select only numeric columns for the scatter matrix
        # numeric_cols = df.select_dtypes(include='number').columns
        numeric_cols = stats
        
        fig = px.scatter_matrix(
            df,
            dimensions = numeric_cols,
            color = color_arg,
            title = "Scatter Matrix of All Numeric Columns",
            size = size_rescaled, 
            size_max = max_size, 
            opacity = opacity, 
            # trendline = 'ols',
            )
        
        # fig.update_traces(diagonal_visible=False)  # optional: hide histograms on diagonal
        fig.update_layout(
        width=1200,
        height=1200,
        margin=dict(l=50, r=50, t=50, b=50),
        autosize=False
    )

    
    # SINGLE SCATTER OPTION

    elif graph_type == 'scatter':
        fig = myscatter(fig, df, x_col, y_col, color_arg, size_rescaled, opacity, size_range)

        fig = meanvar(fig, df, x_col, y_col, color_arg, size_rescaled, opacity, size_range)
        

    # choose what to ohover over based on closest data point
    fig.update_layout(hovermode='closest', transition_duration=500)
         
    return fig

if __name__ == '__main__':
    app.run(debug=True)