# 02 — Entity Extraction (dates, names, roles/depts)
**Date:** 2025-08-09

Deterministic extraction using Recognizers.Text, FuzzySharp, and a small lexicon.


In [None]:
#r "nuget: Microsoft.Recognizers.Text, 1.8.13"
#r "nuget: Microsoft.Recognizers.Text.DateTime, 1.8.13"
#r "nuget: FuzzySharp, 2.0.2"
#r "nuget: RestSharp, 112.1.0"
#r "nuget: Microsoft.Extensions.Caching.Memory, 9.0.8"
#r "nuget: System.Text.Json, 9.0.0"


In [None]:
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text.Json;
using Microsoft.Recognizers.Text;
using Microsoft.Recognizers.Text.DateTime;
using FuzzySharp;
using RestSharp;

public enum Intent { GetContactInfo, FilterByHireDate, FilterByRole, Unknown }

public record Slots(
    string[]? Names = null,
    DateTime? Date = null,
    (DateTime Start, DateTime End)? Range = null,
    string? Operator = null,
    string? Department = null,
    string? Role = null
);

public record QuerySpec(Intent Intent, Slots Slots);

public record Employee(
    string DisplayName,
    string Email,
    string Department,
    string Role,
    DateTime OriginalHireDate
);

var employees = new List<Employee> {
    new("Rick Sanchez",   "rick.sanchez@company.com",   "Engineering", "Staff Engineer",  new DateTime(2015,  5, 10)),
    new("Morty Smith",    "morty.smith@company.com",    "Engineering", "Engineer I",      new DateTime(2023, 10, 12)),
    new("Summer Smith",   "summer.smith@company.com",   "Product",     "PM",              new DateTime(2021,  2,  1)),
    new("Beth Smith",     "beth.smith@company.com",     "HR",          "HR Manager",      new DateTime(2019,  7,  3)),
    new("Jerry Smith",    "jerry.smith@company.com",    "Sales",       "Account Manager", new DateTime(2022,  9, 15))
};

Console.WriteLine($"Loaded demo employees: {employees.Count}");


In [None]:
public record DateExtraction(DateTime? Date, (DateTime Start, DateTime End)? Range, string? Operator);

static DateExtraction ExtractDates(string query)
{
    var results = DateTimeRecognizer.RecognizeDateTime(query, Culture.English);
    var values = new List<DateTime>();

    foreach (var r in results)
    {
        if (!r.Resolution.TryGetValue("values", out var valsObj)) continue;
        var vals = valsObj as List<Dictionary<string, string>>;
        if (vals == null) continue;
        foreach (var v in vals)
        {
            if (v.TryGetValue("value", out var s) && DateTime.TryParse(s, out var dt))
                values.Add(dt);
            else if (v.TryGetValue("start", out var s1) && v.TryGetValue("end", out var s2)
                     && DateTime.TryParse(s1, out var d1) && DateTime.TryParse(s2, out var d2))
                return new DateExtraction(null, (d1, d2), "between");
        }
    }

    string? op = null;
    var lower = query.ToLowerInvariant();
    if (lower.Contains("before")) op = "before";
    else if (lower.Contains("after")) op = "after";
    else if (lower.Contains("between")) op = "between";

    if (values.Count >= 2) return new DateExtraction(null, (values.Min(), values.Max()), "between");
    if (values.Count == 1) return new DateExtraction(values[0], null, op);
    return new DateExtraction(null, null, null);
}

Console.WriteLine(ExtractDates("hired before 2024"));
Console.WriteLine(ExtractDates("hired between 2020 and 2023"));
Console.WriteLine(ExtractDates("hired after Jan 1 2021"));


In [None]:
var lexiconJson = @"{
  ""departments"": {
    ""engineering"": [""engineering"",""eng"",""platform"",""product engineering""],
    ""hr"": [""hr"",""human resources""],
    ""sales"": [""sales"",""bizdev"",""business development""],
    ""product"": [""product"",""pm"",""program management""]
  },
  ""roles"": {
    ""manager"": [""manager"",""supervisor"",""team lead"",""lead""],
    ""engineer"": [""engineer"",""developer"",""dev"",""software engineer""],
    ""pm"": [""pm"",""product manager"",""program manager""]
  }
}";
using System.Text.Json.Nodes;

var lexicon = JsonNode.Parse(lexiconJson)!.AsObject();

static string? MapAlias(System.Text.Json.Nodes.JsonObject section, string input)
{
    var q = input.ToLowerInvariant();
    foreach (var kvp in section)
    {
        var canon = kvp.Key;
        var aliases = kvp.Value!.AsArray().Select(n => n!.ToString().ToLowerInvariant());
        if (aliases.Contains(q) || canon.ToLowerInvariant() == q) return canon;
        var best = FuzzySharp.Process.ExtractOne(q, aliases.ToList());
        if (best != null && best.Score >= 90) return canon;
    }
    return null;
}

string? MapDepartment(string text) => MapAlias((System.Text.Json.Nodes.JsonObject)lexicon["departments"]!, text);
string? MapRole(string text) => MapAlias((System.Text.Json.Nodes.JsonObject)lexicon["roles"]!, text);

Console.WriteLine(MapDepartment("eng"));
Console.WriteLine(MapRole("team lead"));


In [None]:
static List<string> ExtractCandidateNames(string query)
{
    var seps = new [] {","," and "," & "};
    var temp = query.ToLowerInvariant();
    foreach (var s in seps) temp = temp.Replace(s, "|");
    return temp.Split("|", StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries).ToList();
}

static List<Employee> MatchNames(string query, IEnumerable<Employee> all, int minScore = 85, int topK = 3)
{
    var tokens = ExtractCandidateNames(query);
    var results = new List<Employee>();
    foreach (var t in tokens)
    {
        var best = Process.ExtractTop(t, all.ToList(), e => e.DisplayName, limit: topK);
        var accepted = best.FirstOrDefault(b => b.Score >= minScore);
        if (accepted != null && !results.Any(r => r.DisplayName == accepted.Value.DisplayName))
            results.Add(accepted.Value);
    }
    return results;
}

var matched = MatchNames("emails for rick, summer and morty", employees);
Console.WriteLine(string.Join(" | ", matched.Select(m => m.DisplayName)));
