In [1]:
import pickle

# Load an encoder
with open('encoder_fungi_type.pkl', 'rb') as f:
    encoder = pickle.load(f)

# Check what type it is
print(type(encoder))

# If it's a LabelEncoder or OneHotEncoder
print(encoder.classes_)  # Shows categories it encodes

<class 'sklearn.preprocessing._label.LabelEncoder'>
[' Fomitella fraxinea (ASTI 17001' ' Fomitella fraxinea (ASTI 17002'
 ' Fomitella fraxinea (ASTI 17003' ' Fomitella fraxinea (ASTI 17004'
 ' Fomitella fraxinea (ASTI 17005' ' Fomitella fraxinea (ASTI 17006'
 ' Fomitella fraxinea (ASTI 17007' ' Fomitella fraxinea (ASTI 17008'
 ' Fomitella fraxinea (ASTI 17009' ' Fomitella fraxinea (ASTI 17010'
 ' Fomitella fraxinea (ASTI 17011' ' Fomitella fraxinea (ASTI 17012'
 ' Fomitella fraxinea (ASTI 17013' ' Fomitella fraxinea (ASTI 17014'
 ' Fomitella fraxinea (ASTI 17015' ' Fomitella fraxinea (ASTI 17016'
 ' Fomitella fraxinea (ASTI 17017' ' Fomitella fraxinea (ASTI 17018'
 ' Schizophyllum commune' 'Abortiporus biennis (Bull.) Singer 064–18 '
 'Fomes fomentarius DSM 4986' 'Fomitopsis iberica (104-19)'
 'Fomitopsis pinicola DSM 4985' 'Ganoderma Lucidum' 'Ganoderma lucidum'
 'Ganoderma lucidum DSM9621' 'Hydrophobine gene Schizophyllum commune'
 'Mycelium run wood' 'Phellinus ellipsoideus'
 'Pleau

In [3]:
import pickle
import pandas as pd
from openpyxl import Workbook
from openpyxl.styles import Font, PatternFill, Alignment

# List of encoder files
encoder_files = [
    'encoder_crosslinking.pkl',
    'encoder_fungi_type.pkl',
    'encoder_growth_condition.pkl',
    'encoder_incubation_condition.pkl',
    'encoder_inoculation_state.pkl',
    'encoder_plasticizing.pkl',
    'encoder_reinforcement.pkl',
    'encoder_substrate.pkl'
]

# Create workbook
wb = Workbook()
ws = wb.active
ws.title = "Encoder Features"

# Dictionary to store all data
data_dict = {}

# Load each encoder
for file in encoder_files:
    feature_name = file.replace('encoder_', '').replace('.pkl', '').replace('_', ' ').title()
    
    with open(file, 'rb') as f:
        encoder = pickle.load(f)
    
    if hasattr(encoder, 'classes_'):
        values = encoder.classes_.tolist()
    elif hasattr(encoder, 'categories_'):
        values = encoder.categories_[0].tolist()
    else:
        values = ['Unknown']
    
    data_dict[feature_name] = values

# Write headers
for col_idx, feature_name in enumerate(data_dict.keys(), start=1):
    cell = ws.cell(row=1, column=col_idx, value=feature_name)
    cell.font = Font(bold=True, size=12)
    cell.fill = PatternFill(start_color="4472C4", end_color="4472C4", fill_type="solid")
    cell.font = Font(bold=True, color="FFFFFF")
    cell.alignment = Alignment(horizontal="center", vertical="center")

# Write data
for col_idx, (feature_name, values) in enumerate(data_dict.items(), start=1):
    for row_idx, value in enumerate(values, start=2):
        ws.cell(row=row_idx, column=col_idx, value=value)

# Adjust column widths
for col_idx in range(1, len(data_dict) + 1):
    ws.column_dimensions[chr(64 + col_idx)].width = 20

# Save
output_file = 'encoder_features_formatted.xlsx'
wb.save(output_file)

print(f"✓ Excel file created: {output_file}")
print(f"\nFeature Summary:")
for feature, values in data_dict.items():
    print(f"  • {feature}: {len(values)} unique values")

✓ Excel file created: encoder_features_formatted.xlsx

Feature Summary:
  • Crosslinking: 15 unique values
  • Fungi Type: 38 unique values
  • Growth Condition: 1 unique values
  • Incubation Condition: 7 unique values
  • Inoculation State: 5 unique values
  • Plasticizing: 23 unique values
  • Reinforcement: 6 unique values
  • Substrate: 12 unique values
