# Interpreting comparisons between
# a number of specific genres

The calculations below support some assertions in passing about the amount of accuracy lost when we use models of one genre to make predictions about another.

Note that there is a better metric than "loss of accuracy," though it's harder to understand and I only mention it briefly in the article. For a better version of "mutual recognition," see "../measuredivergence.:

In [12]:
# Let's start with some standard imports.

import pandas as pd
from matplotlib import pyplot as plt
from scipy.stats import pearsonr
import numpy as np
%matplotlib inline

In [13]:
# Load the data and take a look.

results = pd.read_csv('../results/scarboroughto19c_comparisons.tsv', sep = '\t')
results.head()

Unnamed: 0,testype,name1,name2,ceiling1,floor1,ceiling2,floor2,meandate1,meandate2,acc1,acc2,alienacc1,alienacc2,spearman,spear1on2,spear2on1,loss,loss1on2,loss2on1
0,scarborough-19cfantasy,scarborough_random_0,19c_fantasy_randomB_0,,,,,1881.085714,1896.642857,0.807143,0.828571,0.75,0.757143,0.72945,0.767322,0.691577,0.064286,0.071429,0.057143
1,scarborough-self,scarborough_random_0,scarborough_randomB_0,,,,,1881.085714,1882.671429,0.807143,0.842857,0.921429,0.907143,1.057354,1.06582,1.048888,-0.089286,-0.064286,-0.114286
2,19cfantasy-self,19c_fantasy_random_0,19c_fantasy_randomB_0,,,,,1896.721429,1896.642857,0.835714,0.828571,0.892857,0.857143,1.104304,1.215788,0.99282,-0.042857,-0.028571,-0.057143
3,scarborough-19cfantasy,scarborough_random_0,19c_fantasy_randomB_1,,,,,1881.085714,1892.692857,0.807143,0.835714,0.785714,0.75,0.752467,0.82389,0.681044,0.053571,0.085714,0.021429
4,scarborough-self,scarborough_random_0,scarborough_randomB_1,,,,,1881.085714,1882.642857,0.807143,0.835714,0.921429,0.914286,1.10529,1.132027,1.078552,-0.096429,-0.078571,-0.114286


In [14]:
def endswithself(astring):
    if astring.endswith('self'):
        return True
    else:
        return False


selfcomp = results[results.testype.map(endswithself)]
norm = np.mean(selfcomp.spearman)

In [15]:
def notwithself(astring):
    if astring.endswith('self'):
        return False
    else:
        return True

crosscomp = results[results.testype.map(notwithself)]
differences = crosscomp.spearman / norm
print(np.mean(differences), np.std(differences))

0.729830131793 0.0952400856095


In [16]:
def relative_predictive_power(agenre):
    selfcomp = agenre[agenre.testype.map(endswithself)]
    norm = np.mean(selfcomp.spearman)
    crosscomp = agenre[agenre.testype.map(notwithself)]
    differences = crosscomp.spearman / norm
    print(np.mean(differences), np.std(differences))

def loss_of_accuracy(agenre):
    crosscomp = agenre[agenre.testype.map(notwithself)]
    differences = crosscomp.loss
    print(np.mean(differences), np.std(differences))

bailey19c = pd.read_csv('../results/bailey_to_19cSF_comparisons.tsv', sep = '\t')
relative_predictive_power(bailey19c)

loss_of_accuracy(bailey19c)

1.01474308874 0.103679199817
0.0281746031746 0.0290848923838


In [17]:
scar19c = pd.read_csv('../results/scarboroughto19c_comparisons_nojuv.tsv', sep = '\t')
relative_predictive_power(scar19c)
loss_of_accuracy(scar19c)

0.765317574481 0.111618760102
0.0450396825397 0.0313422988171


In [18]:
scardetective = pd.read_csv('../results/scarborough2detective_comparisons.tsv', sep = '\t')
relative_predictive_power(scardetective)
loss_of_accuracy(scardetective)

0.00237500317314 0.0691200416949
0.320238095238 0.0359515925489


In [19]:
scarbailey = pd.read_csv('../results/scarborough2bailey_comparisons.tsv', sep = '\t')
relative_predictive_power(scarbailey)
loss_of_accuracy(scarbailey)

0.696517329415 0.0514752320907
0.0525793650794 0.0175693296089


In [20]:
sf2fan = pd.read_csv('../results/sf19ctofantasy19c_comparisons.tsv', sep = '\t')
relative_predictive_power(sf2fan)
loss_of_accuracy(sf2fan)

0.778002180495 0.0815836448342
0.0553571428571 0.0317044618704


In [21]:
bailey2fan = pd.read_csv('../results/baileytofantasy19c_comparisons.tsv', sep = '\t')
relative_predictive_power(bailey2fan)
loss_of_accuracy(bailey2fan)

0.897484701424 0.0917177037944
0.0583333333333 0.0214285714286


In [22]:
scar2sf = pd.read_csv('../results/scarborough2sf_comparisons.tsv', sep = '\t')
relative_predictive_power(scar2sf)
loss_of_accuracy(scar2sf)

0.521335721577 0.105803230858
0.111904761905 0.0437084755968
