In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import joblib

In [2]:
data = pd.read_csv('./font_usage_data_100k.csv')

In [3]:
data.head()

Unnamed: 0,id,age,gender,usage_frequency,usage_context,preferred_font_size,font_name
0,1,22,Male,Always,Personal,17,Times New Roman
1,2,48,Female,Rarely,Professional,13,Helvetica
2,3,35,Female,Rarely,Educational,20,Arima
3,4,51,Female,Often,Educational,16,Helvetica
4,5,44,Female,Rarely,Professional,8,Times New Roman


In [4]:
label_encoders = {}
categorical_columns = ['gender', 'usage_frequency', 'usage_context']

for col in categorical_columns:
    le = LabelEncoder()
    data[col] = le.fit_transform(data[col])
    label_encoders[col] = le

In [5]:
X = data.drop(columns=['font_name', 'id'])
y = data['font_name']


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
# Random Forest Classifier
model = RandomForestClassifier(n_estimators=100, random_state=42)

model.fit(X_train, y_train)

y_pred = model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.4f}")
print("Classification Report:")
print(classification_report(y_test, y_pred))

joblib.dump(model, 'font_usage_classifier.pkl')


Accuracy: 0.1278
Classification Report:
                 precision    recall  f1-score   support

          Arial       0.13      0.12      0.13      2522
          Arima       0.12      0.13      0.13      2406
        Calibri       0.12      0.12      0.12      2564
      Helvetica       0.14      0.14      0.14      2458
      Open Sans       0.12      0.13      0.12      2508
         Roboto       0.14      0.13      0.13      2542
Times New Roman       0.13      0.13      0.13      2513
        Verdana       0.12      0.13      0.13      2487

       accuracy                           0.13     20000
      macro avg       0.13      0.13      0.13     20000
   weighted avg       0.13      0.13      0.13     20000



['font_usage_classifier.pkl']

In [8]:
model = joblib.load('./font_usage_classifier.pkl')

new_data = pd.DataFrame({
    'age': [30],
    'gender': [0],  
    'usage_frequency': [2],  
    'usage_context': [1],  
    'preferred_font_size': [14]
})

font_prediction = model.predict(new_data)
print(f"Predicted Font Name: {font_prediction[0]}")


Predicted Font Name: Verdana
