```python3
import glob
import random
import cld3
from collections import Counter
from multiprocessing import Pool
from tqdm import tqdm
import csv


def classify(file, pickup_rate=0.25):
    try:
        rate = []
        with open(file) as f:
            for line in f:
                line = line.strip()
                if line:
                    if random.random() < pickup_rate:
                        rate.append(cld3.get_language(line).language)
        rate = {lg: val/len(rate) for lg, val in Counter(rate).items()}
        return file, rate.get("la", .0) > 0.6
    except Exception:
        print(f"Failing to parse {file}")
        return file, False

c = open("archives.org.languages.csv", "w")
writer = csv.writer(c)
writer.writerow(["file", "status"])

files = list(glob.glob("./archive-org/raw/**/*.txt"))
with Pool(12) as pool:
    for (file, is_latin) in tqdm(pool.imap_unordered(classify, files), total=len(files)):
        writer.writerow([file, str(is_latin)])
c.close()
```

In [9]:
import pandas as pd


who_is_in_latin = pd.read_csv("../../data/raw/archive.org/archives.org.languages.csv")

In [11]:
who_is_in_latin.status.value_counts()

False    41758
True     20843
Name: status, dtype: int64

In [4]:
preds = pd.read_csv("../../data/raw/archive.org/sycophant.predict.csv")

Unnamed: 0,path,score
0,./archive-org/raw/b31882596_0002/b31882596_000...,93.89
1,./archive-org/raw/b22373494/b22373494_djvu.txt,88.26
2,./archive-org/raw/b22477731/b22477731_djvu.txt,84.73
3,./archive-org/raw/ita-bnc-mag-00001009-001/ita...,77.89
4,./archive-org/raw/ARes7642117/ARes7642117_djvu...,90.45
...,...,...
62938,./archive-org/raw/operaexrecension01taciuoft/o...,87.90
62939,./archive-org/raw/operahorac00horauoft/operaho...,84.50
62940,./archive-org/raw/monasticonanglic22dugd/monas...,66.75
62941,./archive-org/raw/ARes05501/ARes05501_djvu.txt,76.41


In [8]:
preds

Unnamed: 0,path,score
0,./archive-org/raw/b31882596_0002/b31882596_000...,93.89
1,./archive-org/raw/b22373494/b22373494_djvu.txt,88.26
2,./archive-org/raw/b22477731/b22477731_djvu.txt,84.73
3,./archive-org/raw/ita-bnc-mag-00001009-001/ita...,77.89
4,./archive-org/raw/ARes7642117/ARes7642117_djvu...,90.45
...,...,...
62938,./archive-org/raw/operaexrecension01taciuoft/o...,87.90
62939,./archive-org/raw/operahorac00horauoft/operaho...,84.50
62940,./archive-org/raw/monasticonanglic22dugd/monas...,66.75
62941,./archive-org/raw/ARes05501/ARes05501_djvu.txt,76.41
