## How do we pick the batsman/bowler for IPL Fantasy 



In [1]:
%matplotlib inline

import requests
import functools
import pandas as pd
from collections import defaultdict

import matplotlib.pyplot as plt
import matplotlib
import seaborn as sns

In [2]:
SCORING_URL = "https://cricketapi.platform.iplt20.com//fixtures/{match_id}/scoring"
IPL_2017_MATCH_IDS = range(5839, 5899)

In [None]:
@functools.lru_cache()
def get_match(match_id):
    r = requests.get(SCORING_URL.format(match_id=match_id))
    return r.json()


innings = lambda match_info: match_info['innings']
innings_score_card = lambda innings: innings['scorecard']
def batting_stats(sc): return sc['battingStats']
def bowling_stats(sc): return sc['bowlingStats']


### Firstly, gather all the stats till now

In [None]:
stats = defaultdict(list)

for match_id in IPL_2017_MATCH_IDS:
    match_info = get_match(match_id)

    for inning in innings(match_info):
        for _func in (batting_stats, bowling_stats):
            for i, stat in enumerate(_func(innings_score_card(inning))):
                if 'mod' in stat:
                    del stat['mod'] # we don't need mode of dismissal for this project

                if 'additionalPlayerIds' in stat:
                    del stat['additionalPlayerIds']

                stat['match_id'] = match_id
                stat['position'] = i + 1
                stats[_func.__name__].append(stat)


print("Total batting stats gathered: {}".format(len(stats['batting_stats'])))
print("Total bowling_stats stats gathered: {}".format(len(stats['bowling_stats'])))

## Batting Stats

In [None]:
df = pd.DataFrame(stats['batting_stats'])
df.head()

Everything looks good!

### Distribution of runs scored between top3, middle order and tail

In [None]:
top_order = range(1, 4)
middle_order = range(4, 7)
tail = range(7, 12)

total_runs_scored = df.r.sum()
runs_scored_by_top_order = df[df.position.isin(top_order)].r.sum()
runs_scored_middle_order = df[df.position.isin(middle_order)].r.sum()
runs_scored_tail = df[df.position.isin(tail)].r.sum()

print("Total runs scored: {}".format(total_runs_scored))

# sns.set_style('darkgrid')
plot = plt.pie([runs_scored_by_top_order, runs_scored_middle_order, runs_scored_tail], 
               colors=('gold', 'yellowgreen', 'lightcoral'),
               labels=['top3', 'middle', 'tail'], 
               autopct='%1.0f%%')

It is nearly an even split between top and middle order. Lets dive deeper...

In [None]:
positions = df.position.unique()
runs = [df[df.position == pos].r.sum() for pos in positions]

plot = sns.barplot(x=positions, y=runs)

Had you picked the main opener and 2-down you would be well off in scoring the batting points!

## Bowling Stats

In [None]:
df = pd.DataFrame(stats['bowling_stats'])
df.head()

In [None]:
total_wickets = df.w.sum()
print("Total Wickets taken: {}".format(total_wickets))

In [None]:
positions = df.position.unique()
wickets = [df[df.position == pos].w.sum() for pos in positions]

plot = sns.barplot(x=positions, y=wickets)

Again you are better off if you pick the top bowler!