In [23]:
!pip install fonttools



In [80]:
from gensim.models import Word2Vec
from tensorflow.keras.layers import Input, Embedding, Flatten, Dense, concatenate
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.text import Tokenizer
import numpy as np
import os, random
import tensorflow as tf

In [None]:
from ttfquery import describe
from google.colab import drive
drive.mount('/content/drive')
!unzip /content/drive/MyDrive/fonts.zip -d "/content/drive/My Drive/fonts"

In [136]:
# Step 1: Load and preprocess font files
font_directory = "/content/drive/My Drive/fonts/Fonts"
font_files = [os.path.join(font_directory, file) for file in os.listdir(font_directory) if file.endswith(".ttf")]
num_fonts = len(font_files)

In [124]:
from fontTools.ttLib import TTFont

def extract_text_from_ttf(font_file):
    try:
        # Open the TrueType font file
        font = TTFont(font_file)

        # Extract the font family, subfamily, and full name
        font_family = font['name'].getName(1, 3, 1, 0x409).string.decode('utf-8')
        font_subfamily = font['name'].getName(2, 3, 1, 0x409).string.decode('utf-8')
        full_name = font['name'].getName(4, 3, 1, 0x409).string.decode('utf-8')

        # Construct a simple text string from font metadata
        text_data = f"Font Family: {font_family}\nFont Subfamily: {font_subfamily}\nFull Name: {full_name}"

        return text_data

    except Exception as e:
        print(f"Error extracting text from {font_file}: {str(e)}")
        return ""

In [145]:
# Step 2: Train Word2Vec model on font data
sentences = []

for font_file in font_files:
    # You may need to extract relevant text data from the font file
    # Replace the following line with the actual code to extract text from .ttf files
    text_data = extract_text_from_ttf(font_file)

    # Tokenize text into sentences
    sentences.extend([sentence.split() for sentence in text_data])
# Train Word2Vec model
word2vec_model = Word2Vec(sentences, vector_size=100, window=5, min_count=1, workers=4)
word2vec_model.train(sentences, total_examples=len(sentences), epochs=10)




(313516, 1491670)

In [149]:
# Get the vocabulary from the Word2Vec model
vocabulary = list(word2vec_model.wv.key_to_index.keys())

# Create a mapping from font to index
font_to_index = {font: i for i, font in enumerate(font_files)}

# Initialize the embedding matrix with zeros
embedding_size = word2vec_model.vector_size
embedding_matrix = np.zeros((len(font_files), embedding_size))

# Populate the embedding matrix with Word2Vec embeddings for each font
for i, font in enumerate(font_files):
    # If the font is in the Word2Vec vocabulary, use its embedding
    if font in vocabulary:
        embedding_matrix[i] = word2vec_model.wv[font]

In [150]:
len(embedding_matrix)

1460

In [151]:
# Step 1: Create pairs of fonts and labels
def create_font_pairs_and_labels(num_pairs):
    font_pairs = []
    contrast_labels = []
    theme_labels = []

    for _ in range(num_pairs):
        font_a, font_b = random.sample(font_files, 2)

        # Replace these with your actual logic for extracting text from .ttf files
        text_a = f"Text from {font_a}"
        text_b = f"Text from {font_b}"

        # You might want to improve this labeling logic based on your dataset
        contrast_label = 1 if random.random() > 0.5 else 0  # Randomly label for contrast
        theme_label = 1 if text_a == text_b else 0  # Label based on some criteria (e.g., same theme)

        font_pairs.append((font_a, font_b))
        contrast_labels.append(contrast_label)
        theme_labels.append(theme_label)

    return font_pairs, contrast_labels, theme_labels

# Step 2: Use the function to create font pairs and labels
num_pairs = 1000  # Adjust based on your dataset size
font_pairs, contrast_labels, theme_labels = create_font_pairs_and_labels(num_pairs)

In [152]:
# Convert labels to numpy arrays
contrast_labels = np.array(contrast_labels)
theme_labels = np.array(theme_labels)

# Convert font pairs to separate lists
font_pair_a, font_pair_b = zip(*font_pairs)
font_pair_a = list(font_pair_a)
font_pair_b = list(font_pair_b)

# Convert font pairs to indices (numbers) because Embedding layer expects numerical inputs
font_to_index = {font: i for i, font in enumerate(font_files)}
font_pair_a_indices = [font_to_index[font] for font in font_pair_a]
font_pair_b_indices = [font_to_index[font] for font in font_pair_b]
font_pair_a_indices = tf.cast(font_pair_a_indices, tf.int32)
font_pair_b_indices = tf.cast(font_pair_b_indices, tf.int32)

In [153]:
# Step 3: Create a Siamese network with pre-trained Word2Vec embeddings
input_a = tf.keras.layers.Input(shape=(1,), name='input_a')
input_b = tf.keras.layers.Input(shape=(1,), name='input_b')

embedding_layer = tf.keras.layers.Embedding(num_fonts, embedding_size, weights=[embedding_matrix], trainable=False)

embedding_a = embedding_layer(input_a)
embedding_b = embedding_layer(input_b)

flattened_a = tf.keras.layers.Flatten()(embedding_a)
flattened_b = tf.keras.layers.Flatten()(embedding_b)

merged = tf.keras.layers.concatenate([flattened_a, flattened_b])

dense_layer_1 = tf.keras.layers.Dense(128, activation='relu')(merged)
dense_layer_2 = tf.keras.layers.Dense(64, activation='relu')(dense_layer_1)

contrast_output = tf.keras.layers.Dense(1, activation='sigmoid', name='contrast_output')(dense_layer_2)
theme_output = tf.keras.layers.Dense(1, activation='sigmoid', name='theme_output')(dense_layer_2)

siamese_model = tf.keras.models.Model(inputs=[input_a, input_b], outputs=[contrast_output, theme_output])

# Print the model summary for verification
siamese_model.summary()

# Compile the model
siamese_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Step 4: Train the model
siamese_model.fit([font_pair_a_indices, font_pair_b_indices], [contrast_labels, theme_labels], epochs=10, batch_size=32)

Model: "model_34"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_a (InputLayer)        [(None, 1)]                  0         []                            
                                                                                                  
 input_b (InputLayer)        [(None, 1)]                  0         []                            
                                                                                                  
 embedding_37 (Embedding)    (None, 1, 100)               146000    ['input_a[0][0]',             
                                                                     'input_b[0][0]']             
                                                                                                  
 flatten_68 (Flatten)        (None, 100)                  0         ['embedding_37[0][0]'] 

<keras.src.callbacks.History at 0x7f41142f7190>

In [154]:
# Create a mapping from font to index
font_to_index = {font: i for i, font in enumerate(font_files)}

# Create a mapping from index to font
index_to_font = {i: font for i, font in enumerate(font_files)}

def recommend_fonts(siamese_model, font_to_index, base_font, num_recommendations=5):
    # Find the index of the base font
    base_font_index = font_to_index.get(base_font, 0)

    # Create input pairs with the base font
    font_pair_a_recommend = [base_font_index] * len(font_to_index)
    font_pair_b_recommend = list(range(len(font_to_index)))

    # Make predictions
    recommendations = siamese_model.predict([np.array(font_pair_a_recommend), np.array(font_pair_b_recommend)])

    # Extract contrast and theme scores
    contrast_scores = recommendations[0].flatten()
    theme_scores = recommendations[1].flatten()

    # Combine scores into a single similarity score (you might adjust this based on your requirements)
    similarity_scores = 0.5 * contrast_scores + 0.5 * theme_scores

    # Get indices of top recommendations
    top_indices = np.argsort(similarity_scores)[::-1][:num_recommendations]

    # Get recommended fonts
    recommended_fonts = [index_to_font[i] for i in top_indices]

    return recommended_fonts

# Example usage:
base_font = "Ac.ttf"  # Replace with your desired base font
recommended_fonts = recommend_fonts(siamese_model, font_to_index, base_font)

print(f"Recommended fonts for {base_font}:")
for i, font in enumerate(recommended_fonts, start=1):
    print(f"{i}. {font}")

Recommended fonts for Ac.ttf:
1. /content/drive/My Drive/fonts/Fonts/FreeUniversal-Regular.ttf
2. /content/drive/My Drive/fonts/Fonts/Colors Of Autumn.ttf
3. /content/drive/My Drive/fonts/Fonts/CursiveSans.ttf
4. /content/drive/My Drive/fonts/Fonts/ARDELANEY.ttf
5. /content/drive/My Drive/fonts/Fonts/Denise_Handwriting.ttf
