In [None]:
#| default_exp string_templating

In [None]:
#| exporti 

from archetypon.base_model import BaseModel
from pydantic import root_validator,validator
from typing import *
import string
import re

In [None]:
#| export

def get_formatters_from_string(input_string:str)->List[str]:
    if input_string:
        keys = [tup[1] for tup in string.Formatter().parse(input_string) if tup[1] is not None]
        if len(keys)>0:
            return keys
        else:
            return {}

In [None]:
f_string = 'I live in {city}, {state}, {country}.'
assert get_formatters_from_string(f_string) == ['city','state','country']

In [None]:
#| export 

def string_to_dict(string, pattern):
    regex = re.sub(r'{(.+?)}', r'(?P<_\1>.+)', pattern)
    values = list(re.search(regex, string).groups())
    keys = re.findall(r'{(.+?)}', pattern)
    _dict = dict(zip(keys, values))
    return _dict

In [None]:
my_string = 'I live in Brooklyn, New York, USA'
string_to_dict(my_string,f_string)

{'city': 'Brooklyn', 'state': 'New York', 'country': 'US'}

In [None]:
#| export

class StringTemplate(BaseModel):
    """String Template Model. 
    
    Define the class with a template and fields, and it can parse a string that matches the template into the attributes of the model
    or accept the attributes and create the string. 
    
    Useful for path operations and partitions. 
    """
    string: Optional[str]=None
    template: str
    
    @classmethod
    def parse_string(cls,string):
        string_format = cls.__fields__['template'].default
        values = string_to_dict(string,string_format)
        return cls(**values)
    
    @validator('template',always=True)
    def validate_template(cls,v):
        template_fields = get_formatters_from_string(v)
        fields = [x for x in cls.__fields__.keys() if x not in ('template','string')]
        assert template_fields == fields,(template_fields,fields)
        return v
    
    @root_validator(skip_on_failure=True)
    def format_template(cls,values):

        values['string'] = values['template'].format(**values)

        return values
    
    def __init__(
        self,
        string=None, # positional only
        /,
        **kwargs
    ):
        if string: 
            obj = self.parse_string(string)
            super().__init__(**obj.dict())
        else:
            super().__init__(**kwargs)

In [None]:
class IntroduceMe(StringTemplate):
    template:str = "Hi! My name is {first} {last}"
    first: str
    last: str

In [None]:
me = IntroduceMe(first='Charlie',last='Schlinkert')
assert me.string == 'Hi! My name is Charlie Schlinkert'
display(me)

0,1
string,Hi! My name is Charlie Schlinkert
template,Hi! My name is {first} {last}
first,Charlie
last,Schlinkert


In [None]:
hb = IntroduceMe.parse_string("Hi! My name is Humphry Bogart")
assert hb.first == 'Humphry'
assert hb.last == 'Bogart'
hb

0,1
string,Hi! My name is Humphry Bogart
template,Hi! My name is {first} {last}
first,Humphry
last,Bogart


Templating is pretty strict. Error message could be better here. 

In [None]:
try:
    IntroduceMe("The name's Bond. James Bond")
except AttributeError as e:
    print(e)

'NoneType' object has no attribute 'groups'


### Example

In [None]:
import pandas as pd

In [None]:

dfs = pd.read_html("https://en.wikipedia.org/wiki/List_of_Major_League_Baseball_wins_records")
df = dfs[0]
df

Unnamed: 0,American League Pitcher,Throws,Wins–losses,Unnamed: 3,National League Pitcher,Throws.1,Wins–losses.1
0,Walter Johnson,R,417–279[2],,Christy Mathewson,R,373–188[3]
1,Roger Clemens,R,316–166[4],,Grover Cleveland Alexander,R,373–208[5]
2,Eddie Plank,L,305–183[6],,Pud Galvin,R,349–295[7]
3,Lefty Grove,L,300–141[8],,Warren Spahn,L,363–245[9]
4,Early Wynn,R,300–244[10],,Kid Nichols,R,361–208[11]
5,Red Ruffing,R,273–225[12],,Greg Maddux,R,355–227[13]
6,Mike Mussina,R,270–153[14],,Tim Keefe,R,342–225[15]
7,Jim Palmer,R,268–152[16],,John Clarkson,R,328–178[17]
8,Bob Feller,R,266–162[18],,Steve Carlton,L,319–226[19]
9,Ted Lyons,R,260–230[20],,Mickey Welch,R,307–210[21]


In [None]:
class WinLoss(StringTemplate):
    template = "{wins}–{losses}\[{footnote}\]"
    wins: int 
    losses: int
    footnote: str 

s = df['Wins–losses'][0]
print(s)

WinLoss.parse_string(s)

417–279[2]


0,1
string,417–279\[2\]
template,{wins}–{losses}\[{footnote}\]
wins,417
losses,279
footnote,2


In [None]:
df.set_index('American League Pitcher')['Wins–losses'].apply(
    lambda x: WinLoss(x).dict(include={'wins','losses'})
).apply(pd.Series).head()

Unnamed: 0_level_0,wins,losses
American League Pitcher,Unnamed: 1_level_1,Unnamed: 2_level_1
Walter Johnson,417,279
Roger Clemens,316,166
Eddie Plank,305,183
Lefty Grove,300,141
Early Wynn,300,244


In [None]:
#| hide
import nbdev; nbdev.nbdev_export()