In [125]:
import json
from collections import defaultdict

import pandas as pd
import streamlit

In [19]:
df = pd.read_csv("avfonts.csv")

In [20]:
df.head()

Unnamed: 0,family,Category,rating,id,weights,styles,connotations,character,available_fonts
0,Roboto,sans-serif,1,roboto,100300400500700900,"['italic', 'normal']","modern, clean, universal, open, informal, prog...",character1,"Roboto-Medium,Roboto-Light,RobotoMono,RobotoSl..."
1,Open Sans,sans-serif,2,open-sans,300400500600700800,"['italic', 'normal']","modern, clean, universal, open, informal, prog...",character1,"OpenSansHebrewCondensed-Italic,OpenSansHebrew-..."
2,Noto Sans JP,sans-serif,3,noto-sans-jp,100300400500700900,['normal'],"modern, clean, universal, open, informal, prog...",character1,NotoSansJP
3,Montserrat,sans-serif,4,montserrat,100200300400500600700800900,"['italic', 'normal']","modern, clean, universal, open, informal, prog...",character1,"MontserratAlternates-Thin,MontserratAlternates..."
4,Lato,sans-serif,5,lato,100300400700900,"['italic', 'normal']","modern, clean, universal, open, informal, prog...",character1,"Lato-Italic,Lato-LightItalic,Lato-Thin,Lato-Bo..."


In [21]:
df["fam_lower"] = df["family"].str.lower().str.replace(" ", "")

In [25]:
years = pd.read_csv("years.csv", dtype={"Family": str, "Year": int})

In [26]:
years.rename(columns={"Family": "fam_lower"}, inplace=True)

In [33]:
wy = df.join(years.set_index("fam_lower"), on="fam_lower")
wy["year"].fillna(2023, inplace=True)

In [49]:
kw = pd.read_csv("keywordsfiltered.csv", dtype={"keyword": str, "count": int})

In [37]:
kw

Unnamed: 0,keyword,count
0,modern,347
1,global,210
2,available,198
3,unmodulated,165
4,regular,149
...,...,...
113,chinese,11
114,famous,11
115,impossible,11
116,sophisticated,11


In [38]:
descs = pd.read_csv("descriptions.csv", dtype={"font": str, "description": str})

In [39]:
descs.rename(columns={"font": "fam_lower"}, inplace=True)

In [45]:
wyd = wy.join(descs.set_index("fam_lower"), on="fam_lower")
wyd["description"].fillna("", inplace=True)

In [50]:
def get_keywords(desc):
    return set(kw for kw in kw["keyword"] if kw in desc)

In [52]:
wyd["keywords"] = wyd["description"].apply(get_keywords)

In [54]:
designers = pd.read_csv("designers.csv", dtype={"designer": str, "font": str})

In [56]:
designers = designers.groupby("font").aggregate(lambda x: list(x))

In [57]:
designers.rename(columns={"font": "fam_lower"}, inplace=True)

In [63]:
wydk = wyd.join(designers, on="fam_lower")

In [65]:
wydk

Unnamed: 0,family,Category,rating,id,weights,styles,connotations,character,available_fonts,fam_lower,year,description,keywords,designer
0,Roboto,sans-serif,1,roboto,100300400500700900,"['italic', 'normal']","modern, clean, universal, open, informal, prog...",character1,"Roboto-Medium,Roboto-Light,RobotoMono,RobotoSl...",roboto,2011.0,\nRoboto has a dual nature. \nIt has a mechani...,"{different, natural, mechanical, variable, com...","[Christian Robertson, Paratype, Font Bureau]"
1,Open Sans,sans-serif,2,open-sans,300400500600700800,"['italic', 'normal']","modern, clean, universal, open, informal, prog...",character1,"OpenSansHebrewCondensed-Italic,OpenSansHebrew-...",opensans,2020.0,\nOpen Sans is a humanist sans serif typeface ...,"{upright, neutral, characteristic, variable, c...",[Steve Matteson]
2,Noto Sans JP,sans-serif,3,noto-sans-jp,100300400500700900,['normal'],"modern, clean, universal, open, informal, prog...",character1,NotoSansJP,notosansjp,2021.0,\n Noto is a global font collection for writi...,"{global, unmodulated, modern}",[Google]
3,Montserrat,sans-serif,4,montserrat,100200300400500600700800900,"['italic', 'normal']","modern, clean, universal, open, informal, prog...",character1,"MontserratAlternates-Thin,MontserratAlternates...",montserrat,2011.0,\nThe old posters and signs in the traditional...,"{normal, original, special, traditional}","[Julieta Ulanovsky, Sol Matas, Juan Pablo del ..."
4,Lato,sans-serif,5,lato,100300400700900,"['italic', 'normal']","modern, clean, universal, open, informal, prog...",character1,"Lato-Italic,Lato-LightItalic,Lato-Thin,Lato-Bo...",lato,2015.0,"\nLato means “Summer” in Polish, and it is a s...","{different, particular, corporate, original, l...",[Łukasz Dziedzic]
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1447,Noto Sans Nabataean,sans-serif,1491,noto-sans-nabataean,400,['normal'],"modern, clean, universal, open, informal, prog...",character1,NotoSansNabataean-Regular,notosansnabataean,2018.0,\n Noto is a global font collection for writi...,"{historical, global, unmodulated, modern}",[Google]
1448,Noto Sans Sogdian,sans-serif,1492,noto-sans-sogdian,400,['normal'],"modern, clean, universal, open, informal, prog...",character1,NotoSansSogdian-Regular,notosanssogdian,2019.0,\n Noto is a global font collection for writi...,"{historical, global, unmodulated, modern}",[Google]
1449,Noto Sans Old Permic,sans-serif,1493,noto-sans-old-permic,400,['normal'],"modern, clean, universal, open, informal, prog...",character1,NotoSansOldPermic-Regular,notosansoldpermic,2017.0,\n Noto is a global font collection for writi...,"{historical, global, unmodulated, modern}",[Google]
1450,Noto Sans Mahajani,sans-serif,1494,noto-sans-mahajani,400,['normal'],"modern, clean, universal, open, informal, prog...",character1,NotoSansMahajani-Regular,notosansmahajani,2018.0,\n Noto is a global font collection for writi...,"{historical, global, unmodulated, modern}",[Google]


In [67]:
wydk.columns

Index(['family', 'Category', 'rating', 'id', 'weights', 'styles',
       'connotations', 'character', 'available_fonts', 'fam_lower', 'year',
       'description', 'keywords', 'designer'],
      dtype='object')

In [68]:
wydk.to_csv("avfonts_full.csv", index=False)

In [117]:
df = pd.read_csv("avfonts_full.csv")

def parse_designer(designer):
    try:
        return json.loads(designer.replace("'", '"'))
    except:
        return None

replace = str.maketrans("", "", "{}'")
df["keywords"] = df["keywords"].apply(lambda x: set(x.translate(replace).split(", ")))
df["weights"] = df["weights"].apply(lambda x: x.split(","))
df["styles"] = df["styles"].apply(lambda x: json.loads(x.replace("'", '"')))
df["connotations"] = df["connotations"].apply(lambda x: x.split(", "))
df["designer"] = df["designer"].astype(str).apply(parse_designer)

In [119]:
liked = df[df["family"].str.contains("obo")]
disliked = df[df["family"].str.contains("ono")]

In [163]:
def rate_fonts(df, liked, disliked):
    keyword_ratings_factor = 10
    designer_ratings_factor = 1
    connotation_ratings_factor = 0.5
    year_factor = 0.1

    keyword_ratings = defaultdict(int)
    for entry in liked["keywords"]:
        for kw in entry:
            keyword_ratings[kw] += 1

    for entry in disliked["keywords"]:
        for kw in entry:
            keyword_ratings[kw] -= 1

    designer_ratings = defaultdict(int)
    for entry in liked["designer"]:
        if entry is None:
            continue
        for d in entry:
            designer_ratings[d] += 1

    for entry in disliked["designer"]:
        if entry is None:
            continue
        for d in entry:
            designer_ratings[d] -= 1

    connotation_ratings = defaultdict(int)
    for entry in liked["connotations"]:
        for c in entry:
            connotation_ratings[c] += 1

    for entry in disliked["connotations"]:
        for c in entry:
            connotation_ratings[c] -= 1

    liked_year_avg = liked["year"].mean()
    disliked_year_avg = disliked["year"].mean()
    middle_year = (liked_year_avg + disliked_year_avg) / 2

    def rate_font(row):
        rating = 0
        for kw in row["keywords"]:
            rating += keyword_ratings[kw] * keyword_ratings_factor
        for d in row["designer"] if row["designer"] is not None else []:
            rating += designer_ratings[d] * designer_ratings_factor
        for c in row["connotations"]:
            rating += connotation_ratings[c] * connotation_ratings_factor

        rating += (row["year"] - middle_year) * year_factor

        rating -= row["rating"] / 1000
        return rating

    df["rating"] = df.apply(rate_font, axis=1)

    return df.sort_values("rating", ascending=False)

r = rate_fonts(df, liked, disliked)

# Remove elements of r that are already in liked or disliked
r = r[~r["family"].isin(liked["family"])]
r = r[~r["family"].isin(disliked["family"])]

In [191]:
# Load ratings from the CSV file
user_input = pd.read_csv("user_input.csv")
liked = user_input[user_input[" Liked"] == "True"][" Selected Font"].str.replace(".ttf", "")
disliked = user_input[user_input[" Disliked"] == "True"][" Selected Font"].str.replace(".ttf", "")

liked_entries = pd.DataFrame(columns=df.columns)
for font in liked:
    liked_entries = liked_entries.append(df.loc[df["available_fonts"].str.contains(font)])

disliked_entrie = pd.DataFrame(columns=df.columns)
for font in disliked:
    disliked = disliked.append(df.loc[df["available_fonts"].str.contains(font)])

  liked = user_input[user_input[" Liked"] == "True"][" Selected Font"].str.replace(".ttf", "")
  disliked = user_input[user_input[" Disliked"] == "True"][" Selected Font"].str.replace(".ttf", "")
  liked_entries = liked_entries.append(df.loc[df["available_fonts"].str.contains(font)])
  liked_entries = liked_entries.append(df.loc[df["available_fonts"].str.contains(font)])
  liked_entries = liked_entries.append(df.loc[df["available_fonts"].str.contains(font)])
  liked_entries = liked_entries.append(df.loc[df["available_fonts"].str.contains(font)])
  liked_entries = liked_entries.append(df.loc[df["available_fonts"].str.contains(font)])
  liked_entries = liked_entries.append(df.loc[df["available_fonts"].str.contains(font)])
  liked_entries = liked_entries.append(df.loc[df["available_fonts"].str.contains(font)])
  liked_entries = liked_entries.append(df.loc[df["available_fonts"].str.contains(font)])
  liked_entries = liked_entries.append(df.loc[df["available_fonts"].str.contains(font)])
  d

TypeError: to_append should be a Series or list/tuple of Series, got DataFrame

LovedbytheKing
Inder-Regular
Armata-Regular
Georama
Cantarell-Bold
NotoSansOldPermic-Regular
NotoSansSinhala
ExpletusSans-Italic
Galada-Regular


  loved = loved.append(df.loc[df["available_fonts"].str.contains(font)])
  loved = loved.append(df.loc[df["available_fonts"].str.contains(font)])
  loved = loved.append(df.loc[df["available_fonts"].str.contains(font)])
  loved = loved.append(df.loc[df["available_fonts"].str.contains(font)])
  loved = loved.append(df.loc[df["available_fonts"].str.contains(font)])
  loved = loved.append(df.loc[df["available_fonts"].str.contains(font)])
  loved = loved.append(df.loc[df["available_fonts"].str.contains(font)])
  loved = loved.append(df.loc[df["available_fonts"].str.contains(font)])
  loved = loved.append(df.loc[df["available_fonts"].str.contains(font)])


In [192]:
liked_entries

Unnamed: 0,family,Category,rating,id,weights,styles,connotations,character,available_fonts,fam_lower,year,description,keywords,designer
814,Loved by the King,handwriting,-2.096714,loved-by-the-king,[400],[normal],"[personal, friendly, whimsical, casual, expres...",character4,LovedbytheKing,lovedbytheking,2006.0,A skinny font that fits in little places. This...,{little},[Kimberly Geswein]
627,Inder,sans-serif,-11.687143,inder,[400],[normal],"[modern, clean, universal, open, informal, pro...",character1,Inder-Regular,inder,2010.0,Foundry: Sorkin Type Co\nInder is a low contra...,{available},[Sorkin Type]
349,Armata,sans-serif,-91.407423,armata,[400],[normal],"[modern, clean, universal, open, informal, pro...",character1,Armata-Regular,armata,2012.0,\nArmata is a low contrast sans serif text fac...,{set()},[Viktoriya Grabowska]
443,Georama,sans-serif,-50.648182,georama,"[100, 200, 300, 400, 500, 600, 700, 800, 900]","[italic, normal]","[modern, clean, universal, open, informal, pro...",character1,"Georama,Georama-Italic",georama,2020.0,Georama is an original typeface available in s...,"{original, several, available}",[Production Type]
660,Geo,sans-serif,-41.657173,geo,[400],"[italic, normal]","[modern, clean, universal, open, informal, pro...",character1,"NotoSansGeorgian,GeostarFill-Regular,Geo-Regul...",geo,2010.0,I was shown squared-off lettering on a record ...,"{strong, similar, modern, complete}",[Ben Weiner]
196,Cantarell,sans-serif,-61.737093,cantarell,"[400, 700]","[italic, normal]","[modern, clean, universal, open, informal, pro...",character1,"Cantarell-Bold,Cantarell-BoldItalic,Cantarell-...",cantarell,2009.0,The Cantarell typeface family was designed dur...,"{particular, contemporary}",[Dave Crossland]
12,Noto Sans,sans-serif,-111.487343,noto-sans,"[100, 200, 300, 400, 500, 600, 700, 800, 900]","[italic, normal]","[modern, clean, universal, open, informal, pro...",character1,"NotoSansLepcha-Regular,NotoSansThaana,NotoSans...",notosans,2021.0,\n Noto is a global font collection for writi...,"{global, modern, specific, unmodulated, suitable}",[Google]
1449,Noto Sans Old Permic,sans-serif,-51.946884,noto-sans-old-permic,[400],[normal],"[modern, clean, universal, open, informal, pro...",character1,NotoSansOldPermic-Regular,notosansoldpermic,2017.0,\n Noto is a global font collection for writi...,"{global, unmodulated, historical, modern}",[Google]
12,Noto Sans,sans-serif,-111.487343,noto-sans,"[100, 200, 300, 400, 500, 600, 700, 800, 900]","[italic, normal]","[modern, clean, universal, open, informal, pro...",character1,"NotoSansLepcha-Regular,NotoSansThaana,NotoSans...",notosans,2021.0,\n Noto is a global font collection for writi...,"{global, modern, specific, unmodulated, suitable}",[Google]
697,Noto Sans Sinhala,sans-serif,-51.447383,noto-sans-sinhala,"[100, 200, 300, 400, 500, 600, 700, 800, 900]",[normal],"[modern, clean, universal, open, informal, pro...",character1,"NotoSansSinhala,NotoSansSinhalaUI",notosanssinhala,2022.0,\n Noto is a global font collection for writi...,"{global, unmodulated, modern}",[Google]


In [183]:
liked

11               LovedbytheKing
14                Inder-Regular
16               Armata-Regular
19                      Georama
22               Cantarell-Bold
25    NotoSansOldPermic-Regular
26              NotoSansSinhala
27          ExpletusSans-Italic
30               Galada-Regular
Name:  Selected Font, dtype: object