Skip to content

Commit

Permalink
Cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
safesparrow committed Dec 14, 2022
1 parent a5a2bd1 commit 1f9384c
Show file tree
Hide file tree
Showing 4 changed files with 92 additions and 78 deletions.
91 changes: 55 additions & 36 deletions tests/ParallelTypeCheckingTests/Code/GraphProcessing.fs
Original file line number Diff line number Diff line change
@@ -1,12 +1,8 @@
/// Parallel processing of graph of work items with dependencies
module ParallelTypeCheckingTests.GraphProcessing

open System.Collections.Concurrent
open System.Collections.Generic
open System.Threading
open ParallelTypeCheckingTests.Parallel

/// Used for processing
type NodeInfo<'Item> =
{
Item: 'Item
Expand All @@ -15,38 +11,42 @@ type NodeInfo<'Item> =
Dependants: 'Item[]
}

// TODO Do not expose this type to other files
type Node<'Item, 'Result> =
type private PrivateNode<'Item, 'Result> =
{
Info: NodeInfo<'Item>
mutable ProcessedDepsCount: int
mutable Result: 'Result option
}

/// Basic concurrent set implemented using ConcurrentDictionary
type private ConcurrentSet<'a>() =
let dict = ConcurrentDictionary<'a, unit>()

member this.Add(item: 'a): bool =
dict.TryAdd(item, ())

type ProcessedNode<'Item, 'Result> =
{
Info: NodeInfo<'Item>
Result: 'Result
}

/// <summary>
/// A generic method to generate results for a graph of work items in parallel.
/// Processes leaves first, and after each node has been processed, schedules any now unblocked dependants.
/// Returns a list of results, per item.
/// Uses ThreadPool to schedule work.
/// </summary>
/// <param name="graph">Graph of work items</param>
/// <param name="doWork">A function to generate results for a single item</param>
let processGraphSimple<'Item, 'Result when 'Item: equality and 'Item: comparison>
/// <param name="work">A function to generate results for a single item</param>
/// <param name="ct">Cancellation token</param>
/// <remarks>
/// An alternative scheduling approach is to schedule N parallel tasks that process items from a BlockingCollection.
/// My basic tests suggested it's faster, although confirming that would require more detailed testing.
/// </remarks>
let processGraph<'Item, 'Result when 'Item: equality and 'Item: comparison>
(graph: Graph<'Item>)
// TODO Avoid exposing mutable nodes to the caller
(doWork: IReadOnlyDictionary<'Item, Node<'Item, 'Result>> -> Node<'Item, 'Result> -> 'Result)
: 'Result[] // Results in order defined in 'graph'
(work: ('Item -> ProcessedNode<'Item, 'Result>) -> NodeInfo<'Item> -> 'Result)
(ct: CancellationToken)
: ('Item * 'Result)[] // Individual item results
=
let transitiveDeps = graph |> Graph.transitiveOpt
let dependants = graph |> Graph.reverse

let makeNode (item: 'Item) : Node<'Item, 'Result> =
let makeNode (item: 'Item) : PrivateNode<'Item, 'Result> =
let info =
let exists = graph.ContainsKey item

Expand Down Expand Up @@ -80,40 +80,59 @@ let processGraphSimple<'Item, 'Result when 'Item: equality and 'Item: comparison
|> Seq.filter (fun n -> n.Info.Deps.Length = 0)
|> Seq.toArray

printfn $"Node count: {nodes.Count}"
use cts = new CancellationTokenSource()

let mutable processedCount = 0
let waitHandle = new AutoResetEvent(false)
let rec post node =
Async.Start(async {work node}, cts.Token)
and work
(node: Node<'Item, 'Result>)

let getItemPublicNode item =
let node = nodes[item]
{
ProcessedNode.Info = node.Info
ProcessedNode.Result =
node.Result
|> Option.defaultWith (fun () -> failwith $"Results for item '{node.Info.Item}' are not yet available")
}

let incrementProcessedCount =
let mutable processedCount = 0
fun () ->
if Interlocked.Increment(&processedCount) = nodes.Count then
waitHandle.Set() |> ignore

let rec queueNode node =
Async.Start(async {processNode node}, ct)

and processNode
(node: PrivateNode<'Item, 'Result>)
: unit =
let singleRes = doWork nodes node
let info = node.Info

let singleRes = work getItemPublicNode info
node.Result <- Some singleRes

let unblockedDependants =
node.Info.Dependants
|> lookupMany
// For every dependant, increment its number of processed dependencies,
// and filter ones which now have all dependencies processed.
// and filter dependants which now have all dependencies processed (but didn't before).
|> Array.filter (fun dependant ->
// This counter can be incremented by multiple workers on different threads.
let pdc = Interlocked.Increment(&dependant.ProcessedDepsCount)
// Note: We cannot read 'dependant.ProcessedDepsCount' again to avoid returning the same item multiple times.
pdc = dependant.Info.Deps.Length)
unblockedDependants |> Array.iter post
if Interlocked.Increment(&processedCount) = nodes.Count then
waitHandle.Set() |> ignore

unblockedDependants |> Array.iter queueNode
incrementProcessedCount()

leaves |> Array.iter post
leaves |> Array.iter queueNode
// TODO Handle async exceptions
// q.Error += ...
waitHandle.WaitOne() |> ignore

nodes.Values
|> Seq.map (fun node ->
node.Result
|> Option.defaultWith (fun () -> failwith $"Unexpected lack of result for item '{node.Info.Item}'")
let result =
node.Result
|> Option.defaultWith (fun () -> failwith $"Unexpected lack of result for item '{node.Info.Item}'")
node.Info.Item, result
)
|> Seq.toArray
|> Seq.sortBy fst
|> Seq.toArray
9 changes: 0 additions & 9 deletions tests/ParallelTypeCheckingTests/Code/Parallel.fs
Original file line number Diff line number Diff line change
@@ -1,13 +1,9 @@
module ParallelTypeCheckingTests.Parallel

#nowarn "1182"

open System
open System.Collections.Concurrent
open System.Threading

// TODO Could replace with MailboxProcessor+Tasks/Asyncs instead of BlockingCollection + Threads
// See http://www.fssnip.net/nX/title/Limit-degree-of-parallelism-using-an-agent
/// Process items in parallel, allow more work to be scheduled as a result of finished work,
/// limit parallelisation to 'parallelism' threads
let processInParallel
Expand All @@ -24,18 +20,13 @@ let processInParallel
let mutable processedCount = 0

let processItem item =
// printfn $"Processing {_itemToString item}"
let toSchedule = work item

let processedCount =
lock processedCountLock (fun () ->
processedCount <- processedCount + 1
processedCount)

// let toScheduleString =
// toSchedule |> Array.map _itemToString |> (fun names -> String.Join(", ", names))

// printfn $"Scheduling {toSchedule.Length} items: {toScheduleString}"
toSchedule |> Array.iter bc.Add
processedCount

Expand Down
16 changes: 12 additions & 4 deletions tests/ParallelTypeCheckingTests/Code/ParallelTypeChecking.fs
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ let folder (isFinalFold: bool) (state: State) (result: SingleResult) : FinalFile
let CheckMultipleInputsInParallel
((ctok, checkForErrors, tcConfig: TcConfig, tcImports: TcImports, tcGlobals, prefixPathOpt, tcState, eagerFormat, inputs): 'a * (unit -> bool) * TcConfig * TcImports * TcGlobals * LongIdent option * TcState * (PhasedDiagnostic -> PhasedDiagnostic) * ParsedInput list)
: FinalFileResult list * TcState =

use cts = new CancellationTokenSource()

let sourceFiles: FileWithAST array =
inputs
Expand Down Expand Up @@ -140,11 +142,17 @@ let CheckMultipleInputsInParallel
let state: State = tcState, priorErrors

let partialResults, (tcState, _) =
TypeCheckingGraphProcessing.processGraph<int, State, SingleResult, FinalFileResult>
TypeCheckingGraphProcessing.processFileGraph<int, State, SingleResult, FinalFileResult>
graph
processFile
folder
state
10

partialResults |> Array.toList, tcState)
cts.Token

let partialResults =
partialResults
|> Array.sortBy fst
|> Array.map snd
|> Array.toList

partialResults, tcState)
54 changes: 25 additions & 29 deletions tests/ParallelTypeCheckingTests/Code/TypeCheckingGraphProcessing.fs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
/// Parallel processing of graph of work items with dependencies
module ParallelTypeCheckingTests.TypeCheckingGraphProcessing

open System.IO
open ParallelTypeCheckingTests.GraphProcessing
open System.Collections.Generic
open System.Threading
Expand All @@ -14,31 +15,29 @@ open System.Threading
/// <param name="deps">Direct dependencies of a node</param>
/// <param name="transitiveDeps">Transitive dependencies of a node</param>
/// <param name="folder">A way to fold a single result into existing state</param>
/// <remarks>
/// Similar to 'processFileGraph', this function is generic yet specific to the type-checking process.
/// </remarks>
let private combineResults
(emptyState: 'State)
(deps: Node<'Item, 'State * 'Result>[])
(transitiveDeps: Node<'Item, 'State * 'Result>[])
(deps: ProcessedNode<'Item, 'State * 'Result>[])
(transitiveDeps: ProcessedNode<'Item, 'State * 'Result>[])
(folder: 'State -> 'Result -> 'State)
: 'State =
match deps with
| [||] -> emptyState
| _ ->
let biggestDep =
let sizeMetric (node: Node<_,_>) =
let sizeMetric (node: ProcessedNode<_,_>) =
node.Info.TransitiveDeps.Length
deps
|> Array.maxBy sizeMetric

let orFail value =
value |> Option.defaultWith (fun () -> failwith "Unexpected lack of result")

let firstState = biggestDep.Result |> orFail |> fst

// TODO Potential perf optimisation: Keep transDeps in a HashSet from the start,
// avoiding reconstructing the HashSet here
let firstState = biggestDep.Result |> fst

// Add single-file results of remaining transitive deps one-by-one using folder
// Note: Good to preserve order here so that folding happens in file order
// Note: Ordering is not preserved due to reusing results of the biggest child
// rather than starting with empty state
let included =
let set = HashSet(biggestDep.Info.TransitiveDeps)
set.Add biggestDep.Info.Item |> ignore
Expand All @@ -48,44 +47,41 @@ let private combineResults
transitiveDeps
|> Array.filter (fun dep -> included.Contains dep.Info.Item = false)
|> Array.distinctBy (fun dep -> dep.Info.Item)
|> Array.map (fun dep -> dep.Result |> orFail |> snd)
|> Array.map (fun dep -> dep.Result |> snd)

let state = Array.fold folder firstState resultsToAdd
state

let processGraph<'Item, 'State, 'Result, 'FinalFileResult when 'Item: equality and 'Item: comparison>
// TODO This function and its parameters are quite specific to type-checking despite using generic types.
// Perhaps we should make it either more specific and remove type parameters, or more generic.
let processFileGraph<'Item, 'State, 'Result, 'FinalFileResult when 'Item: equality and 'Item: comparison>
(graph: Graph<'Item>)
(doWork: 'Item -> 'State -> 'Result)
(folder: bool -> 'State -> 'Result -> 'FinalFileResult * 'State)
(emptyState: 'State)
(_parallelism: int)
: 'FinalFileResult[] * 'State =
(ct: CancellationToken)
: ('Item * 'FinalFileResult)[] * 'State =

let work
(dict: IReadOnlyDictionary<'Item, Node<'Item, 'State * 'Result>>)
(node: Node<'Item, 'State * 'Result>)
(getFinishedNode: 'Item -> ProcessedNode<'Item, 'State * 'Result>)
(node: NodeInfo<'Item>)
: 'State * 'Result =
let folder x y = folder false x y |> snd
let deps = node.Info.Deps |> Array.map (fun node -> dict[node])
let transitiveDeps = node.Info.TransitiveDeps |> Array.map (fun node -> dict[node])
let deps = node.Deps |> Array.except [|node.Item|] |> Array.map getFinishedNode
let transitiveDeps = node.TransitiveDeps|> Array.except [|node.Item|] |> Array.map getFinishedNode
let inputState = combineResults emptyState deps transitiveDeps folder
let singleRes = doWork node.Info.Item inputState
let singleRes = doWork node.Item inputState
let state = folder inputState singleRes
state, singleRes

use cts = new CancellationTokenSource()

let results =
processGraphSimple
graph
work
let results = processGraph graph work ct

let finals, state: 'FinalFileResult[] * 'State =
let finals, state: ('Item * 'FinalFileResult)[] * 'State =
results
|> Array.fold
(fun (fileResults, state) (_, itemRes) ->
(fun (fileResults, state) (item, (_, itemRes)) ->
let fileResult, state = folder true state itemRes
Array.append fileResults [| fileResult |], state)
Array.append fileResults [| item, fileResult |], state)
([||], emptyState)

finals, state

0 comments on commit 1f9384c

Please sign in to comment.