# Linear predictors

In [69]:
# Define feature extractor function
def feature_extractor(input_str):
    features = {}
    if "@" in input_str:
        features["has_@"] = 1
    else:
        features["has_@"] = 0
    if  len(input_str) > 7:
        features["long_enough"] = 1
    else:
        features["long_enough"] = 0
    if input_str[-4] == ".":
        features["ends_tld"] = 1
    else:
        features["ends_tld"] = 0
    return features

### Feature vector:

$\phi(x) \in \mathbb{R}^{d}$

$\phi(x) = [\phi^{}_1(x), \phi^{}_1(x), ... ,\phi^{}_n(x)]$

In [70]:
# Define feature vectorizer
def feature_vector(features):
    return list(features.values())

### Weight vector:
Represents each features contribution to prediction.

$w \in \mathbb{R}^{d}$

$\phi(x) = [\phi^{}_1(x), \phi^{}_1(x), ... ,\phi^{}_n(x)]$

In [71]:
# Define weights for each feature in the feature vector.
def weight_vector():
    return list([1.5, -0.6, 1.1])

### Score:
Weighted combination of features.

Also indicates **confidence** for our prediction.

$w \cdot \phi(x) = \sum_{i=1}^{d} w^{}_i\times\phi(x)^{}_i$

In [73]:
# Calculate score.
def score(feature_vector, weight_vector):
    score = 0
    for i in range(len(feature_vector)):
        score = score + (feature_vector[i] * weight_vector[i])
    return score

### Linear binary classifier:

$f^{}_w(x) = sign(w\times\phi(x)) = \begin{Bmatrix} 
  +1 & if & w\cdot\phi(x) > 0 \\
  -1 & if & w\cdot\phi(x) < 0 \\
  (+1)? & if & w\cdot\phi(x) = 0
\end{Bmatrix}$


In [87]:
# Define linear binary classifier.
def classify(score):
    if score > 0:
        return 1
    elif score < 0:
        return -1
    else:
        return 1

In [98]:
def is_email(address, debug=False):
    features = feature_extractor(address)
    feature_v = feature_vector(features)
    weight_v = weight_vector()
    score_num = score(feature_v, weight_v)
    if debug:
        print(features)
        print(feature_v)
        print(weight_v)
        print(score_num)
    is_email = classify(score_num)
    print("Valid email address!" if is_email > 0 else "Invalid email address!")

In [100]:
is_email("pla@gmail.com", True)
is_email("plagmailcom")

{'has_@': 1, 'long_enough': 1, 'ends_tld': 1}
[1, 1, 1]
[1.5, -0.6, 1.1]
2.0
Valid email address!
Invalid email address!
