In [58]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
from sklearn.metrics import roc_auc_score

In [59]:
ranks = pd.read_csv('ranks_with_escapes.csv')

In [60]:
escapes_df = ranks[ranks.is_escape == 'ESCAPE']
not_escapes_df = ranks[ranks.is_escape != 'ESCAPE']

In [61]:
fig = go.Figure()

fig.add_trace(go.Scatter(
    x=not_escapes_df.predicted,
    y=not_escapes_df.sem_log10,
    marker=dict(
        color=not_escapes_df.rank_sum,
        colorscale='Viridis',
        showscale=True
    ),
    name='not escapes',
    mode='markers'
))

fig.add_trace(go.Scatter(
    x=escapes_df.predicted,
    y=escapes_df.sem_log10,
    name='escapes',
    mode='markers',
    marker_color='red'
))

fig.update_layout(
    title='Escape prediction (Sars-Cov2)',
    xaxis_title='Grammaticality, log10(p(xi | x[N] \ i))',
    yaxis_title='Semantic change, log10(Δz)'
)

fig.show()

In [71]:
ranks = ranks.sort_values(by='rank_sum', ascending=True)

true_labels = ranks.is_escape == 'ESCAPE'
predicted = []
predicted.extend([False] * len(not_escapes_df))
predicted.extend([True] * len(escapes_df))

print(roc_auc_score(true_labels, predicted))

0.5073858157271851
