# Get coordinates of all tokens of 1 model

In [2]:
import pandas as pd
import numpy as np

In [3]:
def get_coordinates(token, model):
    df = pd.read_csv(f'tokens_coordinates/{token}.tsne.30.tsv', sep='\t')
    # Get the coordinates for each token
    df['x'] = df[model + '.x']
    df['y'] = df[model + '.y']
    df = df[['_id', 'x', 'y']]
    df.to_csv(f'tokens_coordinates/{model}.tsne.30.tsv', sep='\t', index=False)

In [None]:
token = 'time'
model = 'time.nobound4-4TIME.PPMIweight.LENGTH5000.SOCPOSGEN'
get_coordinates(token, model)

# Get the senses

In [1]:
import pandas as pd

In [6]:
def get_senses(token, model):
    df = pd.read_csv(f'{model}.tsne.30.tsv', sep='\t')
    sense_df = pd.read_csv(f'{token}.variables.tsv', sep='\t')
    df['senses'] = df['_id'].map(sense_df.set_index('_id')['senses'])
    df.to_csv(f'{model}.tsne.30.tsv', sep='\t', index=False)

In [7]:
token = 'time'
model = 'time.nobound4-4TIME.PPMIweight.LENGTH5000.SOCPOSGEN'
get_senses(token,model)

# Visualise the model

In [2]:
import pandas as pd
import plotly.express as px

In [None]:
data = 'time_final_no_det-time.bound3-3TIME.PPMIweight.LENGTH5000.SOCPOSNVAA.tsne.30.tsv'
df = pd.read_csv(data, sep='\t', names = ['_id', 'x', 'y', 'senses'])

# Convert x and y columns to numeric
df['x'] = pd.to_numeric(df['x'], errors='coerce')  # Convert to numeric, coercing errors to NaN
df['y'] = pd.to_numeric(df['y'], errors='coerce')

# Drop rows with invalid x or y values
df = df.dropna(subset=['x', 'y']).reset_index(drop=True)

# Create the scatter plot
fig = px.scatter(
    df,
    x='x',
    y='y',
    color='senses',
    title='Interactive Token Visualization',
    labels={'x': 'X-Axis', 'y': 'Y-Axis'},
    hover_name='_id'
    )

# Enforce square aspect ratio
fig.update_layout(
    xaxis=dict(scaleanchor="y", scaleratio=1),  # Ensure square aspect ratio
    yaxis=dict(scaleanchor="x"),  # Link axes scaling
    width=700,  # Set plot width
    height=700  # Set plot height to match width
)

# Show the plot
fig.show()

# Display the selected tokens

In [16]:
import dash
from dash import dcc, html, Output, Input

In [None]:
# Initialize Dash app
app = dash.Dash(__name__)

# Define the layout
app.layout = html.Div([
    dcc.Graph(
        id='scatter-plot',
        figure=fig,
        config={'scrollZoom': True},  # Allow zooming with the mouse
        style={'display': 'inline-block', 'width': '700px', 'height': '700px'}  # Container style
    ),
    html.Div(
        id='selected-tokens', 
        style={'margin-top': '20px', 'font-size': '16px'}
    )
])

# Define callback to handle box selection
@app.callback(
    Output('selected-tokens', 'children'),
    Input('scatter-plot', 'selectedData')
)
def display_selected_tokens(selectedData):
    if selectedData is None:
        return "No tokens selected."
    
    # Extract the token IDs from the selected points
    selected_tokens = [point['customdata'] for point in selectedData['points']]
    return f"Selected Tokens: {', '.join(selected_tokens)}"

# Run the app
if __name__ == '__main__':
    app.run_server(debug=True)

# Test functions

In [20]:
import get_model_data
import get_token_ids
import importlib
# Import the module before reloading
importlib.reload(get_token_ids)
importlib.reload(get_model_data)


<module 'get_model_data' from 'd:\\OneDrive - KU Leuven\\KUL\\SOC_flow\\tokens_coordinates\\get_model_data.py'>

In [21]:
token = 'time_final_no_det'
model = 'time.bound3-3TIME.PPMIweight.LENGTH5000.SOCPOSNVAA'

In [None]:
get_model_data.get_model_data(token, model)

In [22]:
get_token_ids.get_token_ids(token, model)