In [None]:
import sqlite3
import random
from dataclasses import dataclass, field
from collections import defaultdict


In [None]:
db = sqlite3.connect("/home/mikew/extracover/male_t20.db")
db.row_factory = sqlite3.Row

Looking at the IPL match between [Gujarat Titals & Royal Challengers Bengaluru, 2024-05-04 ](https://www.espncricinfo.com/series/indian-premier-league-2024-1410320/royal-challengers-bengaluru-vs-gujarat-titans-52nd-match-1426290/full-scorecard),
let's try to estimate a viable bowling order for the first innings.

We know the actual order was:

| over | bowler | over | bowler |
| --- | --- | --- | --- |
|0|Swapnil Singh (1)|10|Cameron Green (3)|
|1|Mohammed Siraj (1)|11|Karn Sharma (2)|
|2|Yash Dayal (1)|12|Vijaykumar Vyshak (2)|
|3|Mohammed Siraj (2)|13|Cameron Green (4)|
|4|Yash Dayal (2)|14|Vijaykumar Vyshak (3)|
|5|Cameron Green (1)|15|Karn Sharma (3)|
|6|Vijaykumar Vyshak (1)|16|Mohammed Siraj (2)|
|7|Cameron Green (2)|17|Yash Dayal (4)|
|8|Yash Dayal (3)|18|Mohammed Siraj (3)|
|9|Karn Sharma (1)|19|Vijaykumar Vyshak (4)|


Look at the distribution of specific overs bowled by the bowlers we know about across all history we have.

Possible enhancement(s) to consider later:

* consider a "recentness" weighting - might the way particular bowlers are used across the innings have evolved?

In [None]:
sql="""
WITH over_allocs AS (
	SELECT DISTINCT b.over
	, b.match_id
	, b.bowled_by
	FROM balls b
)
, alloc_counts AS (
	SELECT
		over
	,	bowled_by
	,   count(*) AS times_used
	FROM over_allocs
	GROUP BY over, bowled_by
)
SELECT
	c.over
,	p.name
, 	c.times_used
FROM alloc_counts c
JOIN players p ON p.rowid = c.bowled_by
WHERE p.rowid IN (2702, 2717, 374, 1753, 485, 509)
ORDER BY p.name, c.over
"""

In [None]:
@dataclass
class Usage:
    over: int
    name: str
    times_used: int

In [None]:
usages =[Usage(**row) for row in db.execute(sql).fetchall()]

In [None]:
@dataclass()
class Bowler:
    name: str
    overs: list[int] = field(default_factory=list)

    @property
    def weights(self) -> list[float]:
        return [float(over) / self.total_bowled for over in self.overs]

    @property
    def total_bowled(self) -> int:
        return sum(self.overs)


In [None]:
bowlers: dict[str, Bowler] = {}
for u in usages:
    if not bowlers.get(u.name):
        bowlers[u.name] = Bowler(name=u.name, overs = [0]*20)
    bowlers[u.name].overs[u.over] = u.times_used

In [None]:
@dataclass
class OverWeight:
    bowlers: dict[str, float] = field(default_factory=dict)

    def selected(self) -> str:
        weights= list(self.bowlers.values())
        choices= random.choices(list(self.bowlers.keys()), weights=weights)
        return choices[0]



In [None]:
over_weights = [
    OverWeight(bowlers= {b.name: b.weights[ov] for b in bowlers.values()}) for ov in range(20)
]

In [None]:
num_bowled = defaultdict(int)
b, last_b = "", ""

for ov in range(20):
    while b == last_b or num_bowled[b] == 4:
        b = over_weights[ov].selected()
    num_bowled[b] += 1
    print(b)
    last_b = b
