In [7]:
from typing import Sequence
import pandas as pd

In [8]:
att_none = None
att_any = ""

In [9]:
def findS(train: pd.DataFrame, outcome: str, H: dict[str, Sequence[str]]) ->  dict:

    # Initialize h to the most specific hypothesis in H
    h = {key: att_none for key in H.keys()}

    # For each positive training instance x
    for _, x in train.loc[train[outcome]].iterrows():

        # Foreach attribute constrain a_i in h
        for ak_i, av_i in h.items():

            # If the constrain a_i is not satisfied by x...
            if x[ak_i] != av_i:
                # ...replace a_i in h by the next more general constraint that
                # is satisfied by x
                if h[ak_i] == att_none:
                    h[ak_i] = x[ak_i]
                elif h[ak_i] != att_any:
                    h[ak_i] = att_any

    # Output hypothesis h
    return h

In [10]:
df = pd.read_csv("../data/play_sport.csv")

df

Unnamed: 0,sky,airtemp,humidity,wind,water,forecast,enjoysport
0,sunny,warm,normal,strong,warm,same,True
1,sunny,warm,high,strong,warm,same,True
2,rainy,cold,high,strong,warm,change,False
3,sunny,warm,high,strong,cool,change,True


In [11]:
# Complete hypothesis space given training data
attrs = df.keys().drop('enjoysport')
H = {key: df[key].unique() for key in attrs}
H

{'sky': array(['sunny', 'rainy'], dtype=object),
 'airtemp': array(['warm', 'cold'], dtype=object),
 'humidity': array(['normal', 'high'], dtype=object),
 'wind': array(['strong'], dtype=object),
 'water': array(['warm', 'cool'], dtype=object),
 'forecast': array(['same', 'change'], dtype=object)}

In [12]:
# Obtain most general hypothesis with findS (one of possibly several equally general)
h = findS(df, "enjoysport", H)

h

{'sky': 'sunny',
 'airtemp': 'warm',
 'humidity': '',
 'wind': 'strong',
 'water': '',
 'forecast': ''}