### Importing required libraries

In [1]:
import numpy as np
import pandas as pd

### Reading test cricket bowler data set

In [2]:
df = pd.read_csv("wickets.csv")

# Display of first 10 rows of the data set
display(df.head(10))

Unnamed: 0,Player,Span,Mat,Inns,Balls,Runs,Wkts,BBI,BBM,Ave,Econ,SR,5,10
0,M Muralitharan (ICC/SL),1992-2010,133,230,44039,18180,800,9/51,16/220,22.72,2.47,55.0,67,22
1,SK Warne (AUS),1992-2007,145,273,40705,17995,708,8/71,12/128,25.41,2.65,57.4,37,10
2,JM Anderson (ENG),2003-2021,164*,304,35079,16575,623,7/42,11/71,26.6,2.83,56.3,30,3
3,A Kumble (INDIA),1990-2008,132,236,40850,18355,619,10/74,14/149,29.65,2.69,65.9,35,8
4,GD McGrath (AUS),1993-2007,124,243,29248,12186,563,8/24,10/27,21.64,2.49,51.9,29,3
5,SCJ Broad (ENG),2007-2021,149,274,29863,14590,524,8/15,11/121,27.84,2.93,56.9,18,3
6,CA Walsh (WI),1984-2001,132,242,30019,12688,519,7/37,13/55,24.44,2.53,57.8,22,3
7,DW Steyn (SA),2004-2019,93,171,18608,10077,439,7/51,11/60,22.95,3.24,42.3,26,5
8,N Kapil Dev (INDIA),1978-1994,131,227,27740,12867,434,9/83,11/146,29.64,2.78,63.9,23,2
9,HMRKB Herath (SL),1999-2018,93,170,25993,12157,433,9/127,14/184,28.07,2.8,60.0,34,9


##### Description of each column of the data set
- Player: Name of the player and their country name or for whom they played for
- Span: Total duration of their career
- Mat: Total number of matches they played during their career
- Inns: Total number of innings they played
- Balls: Total number of balls they bowled
- Runs: Total number of runs they conceded from total number of balls bowled
- Wkts: Total number of wickets they took
- BBI: stands for 'Best Bowling in Innings' i.e. figure gives best bowling in one inning
- BBM: stands for 'Best Bowling in Match' and gives the combined score over 2 or more innings in one match
- Average: The average number of runs conceded per wicket. (Ave = Runs/W)
- Econ: The average number of runs conceded per over. (Econ = Runs/Overs bowled)
- SR: Stands for 'strike rate'. This is a bowling SR, which is different from batting SR. Bowling SR is defined for a bowler as the average number of balls bowled per wicket taken. The lower the strike rate, the more effective a bowler is at taking wickets quickly.
- 5: The number of innings in which the bowler took at least five wickets
- 10: The number of matches in which the bowler took at least ten wickets


###  number of rows and columns of the dataframe

In [3]:
# number of rows
print("number of rows = ", df.shape[0])

# number of columns
print("number of columns = ", df.shape[1])

number of rows =  79
number of columns =  14


###  Data statistics and data types check

In [4]:
# checking data statistics
display(df.describe())

# checking for missing values and data types of each column
print(df.info())

Unnamed: 0,Inns,Balls,Runs,Wkts,Ave,Econ,SR,5,10
count,79.0,79.0,79.0,79.0,79.0,79.0,79.0,79.0,79.0
mean,144.911392,18638.35443,8599.35443,317.21519,27.469747,2.806835,59.193671,16.35443,2.797468
std,51.180222,7199.256972,3085.168807,121.924911,3.655658,0.351577,9.350132,9.642372,3.235935
min,67.0,8785.0,4846.0,200.0,20.94,1.98,41.2,3.0,0.0
25%,110.0,13583.0,6456.5,229.0,24.5,2.6,53.3,9.5,1.0
50%,129.0,16498.0,7742.0,266.0,28.0,2.82,57.4,14.0,2.0
75%,169.0,21742.5,9756.0,374.5,29.87,3.08,63.95,20.5,3.5
max,304.0,44039.0,18355.0,800.0,34.79,3.46,91.9,67.0,22.0


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 79 entries, 0 to 78
Data columns (total 14 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Player  79 non-null     object 
 1   Span    79 non-null     object 
 2   Mat     79 non-null     object 
 3   Inns    79 non-null     int64  
 4   Balls   79 non-null     int64  
 5   Runs    79 non-null     int64  
 6   Wkts    79 non-null     int64  
 7   BBI     79 non-null     object 
 8   BBM     79 non-null     object 
 9   Ave     79 non-null     float64
 10  Econ    79 non-null     float64
 11  SR      79 non-null     float64
 12  5       79 non-null     int64  
 13  10      79 non-null     int64  
dtypes: float64(3), int64(6), object(5)
memory usage: 8.8+ KB
None


#### There is no missing value found in the data set

### Renaming the column names of the data set

In [5]:
# column names
print(df.columns)

Index(['Player', 'Span', 'Mat', 'Inns', 'Balls', 'Runs', 'Wkts', 'BBI', 'BBM',
       'Ave', 'Econ', 'SR', '5', '10'],
      dtype='object')


In [6]:
df = df.rename(columns={'Mat':'Match', 
                        'Inns':'Innings',
                        'Wkts': 'Wickets',
                        'BBI': 'Best_bowling_innings',
                        'BBM': 'Best_bowling_match',
                        'Ave': 'Average',
                        'Econ': 'Economy',
                        'SR': 'Bowling_strike_rate',
                        5: 'Five_wickets_taken',
                        10: 'Ten_wickets_taken',})

display(df.head())

Unnamed: 0,Player,Span,Match,Innings,Balls,Runs,Wickets,Best_bowling_innings,Best_bowling_match,Average,Economy,Bowling_strike_rate,5,10
0,M Muralitharan (ICC/SL),1992-2010,133,230,44039,18180,800,9/51,16/220,22.72,2.47,55.0,67,22
1,SK Warne (AUS),1992-2007,145,273,40705,17995,708,8/71,12/128,25.41,2.65,57.4,37,10
2,JM Anderson (ENG),2003-2021,164*,304,35079,16575,623,7/42,11/71,26.6,2.83,56.3,30,3
3,A Kumble (INDIA),1990-2008,132,236,40850,18355,619,10/74,14/149,29.65,2.69,65.9,35,8
4,GD McGrath (AUS),1993-2007,124,243,29248,12186,563,8/24,10/27,21.64,2.49,51.9,29,3


### Romoving columns 

In [36]:
# Remove two columns name is 'Best_bowling_innings' and 'Best_bowling_match'
df.drop(['Best_bowling_innings', 'Best_bowling_match'], axis = 1)

Unnamed: 0,Player,Span,Match,Innings,Balls,Runs,Wickets,Average,Economy,Bowling_strike_rate,5,10
0,M Muralitharan (ICC/SL),1992-2010,133,230,44039,18180,800,22.72,2.47,55.0,67,22
1,SK Warne (AUS),1992-2007,145,273,40705,17995,708,25.41,2.65,57.4,37,10
2,JM Anderson (ENG),2003-2021,164*,304,35079,16575,623,26.60,2.83,56.3,30,3
3,A Kumble (INDIA),1990-2008,132,236,40850,18355,619,29.65,2.69,65.9,35,8
4,GD McGrath (AUS),1993-2007,124,243,29248,12186,563,21.64,2.49,51.9,29,3
...,...,...,...,...,...,...,...,...,...,...,...,...
74,SCG MacGill (AUS),1998-2008,44,85,11237,6038,208,29.02,3.22,54.0,12,2
75,Saqlain Mushtaq (PAK),1995-2004,49,86,14070,6206,208,29.83,2.64,67.6,13,3
76,AME Roberts (WI),1974-1983,47,90,11135,5174,202,25.61,2.78,55.1,11,2
77,JA Snow (ENG),1965-1976,49,93,12021,5387,202,26.66,2.68,59.5,8,1


In [37]:
# splitting the 'Player' column to get the information about 'Country'
df_player = df['Player'].str.split("(", expand=True)

display(df_player.head(5))

Unnamed: 0,0,1
0,M Muralitharan,ICC/SL)
1,SK Warne,AUS)
2,JM Anderson,ENG)
3,A Kumble,INDIA)
4,GD McGrath,AUS)


In [38]:
# concatinating 'Country' with the main dataframe
df = pd.concat([df, df_player], axis=1)

display(df.head())

Unnamed: 0,Player,Span,Match,Innings,Balls,Runs,Wickets,Best_bowling_innings,Best_bowling_match,Average,Economy,Bowling_strike_rate,5,10,0,1
0,M Muralitharan (ICC/SL),1992-2010,133,230,44039,18180,800,9/51,16/220,22.72,2.47,55.0,67,22,M Muralitharan,ICC/SL)
1,SK Warne (AUS),1992-2007,145,273,40705,17995,708,8/71,12/128,25.41,2.65,57.4,37,10,SK Warne,AUS)
2,JM Anderson (ENG),2003-2021,164*,304,35079,16575,623,7/42,11/71,26.6,2.83,56.3,30,3,JM Anderson,ENG)
3,A Kumble (INDIA),1990-2008,132,236,40850,18355,619,10/74,14/149,29.65,2.69,65.9,35,8,A Kumble,INDIA)
4,GD McGrath (AUS),1993-2007,124,243,29248,12186,563,8/24,10/27,21.64,2.49,51.9,29,3,GD McGrath,AUS)


In [39]:
# dropping the 'Player' column
df = df.drop('Player', axis=1)

display(df.head())

Unnamed: 0,Span,Match,Innings,Balls,Runs,Wickets,Best_bowling_innings,Best_bowling_match,Average,Economy,Bowling_strike_rate,5,10,0,1
0,1992-2010,133,230,44039,18180,800,9/51,16/220,22.72,2.47,55.0,67,22,M Muralitharan,ICC/SL)
1,1992-2007,145,273,40705,17995,708,8/71,12/128,25.41,2.65,57.4,37,10,SK Warne,AUS)
2,2003-2021,164*,304,35079,16575,623,7/42,11/71,26.6,2.83,56.3,30,3,JM Anderson,ENG)
3,1990-2008,132,236,40850,18355,619,10/74,14/149,29.65,2.69,65.9,35,8,A Kumble,INDIA)
4,1993-2007,124,243,29248,12186,563,8/24,10/27,21.64,2.49,51.9,29,3,GD McGrath,AUS)


In [40]:
# renaming the column names
df = df.rename(columns={0: 'Player',
                        1: 'Country'})

display(df.head())

Unnamed: 0,Span,Match,Innings,Balls,Runs,Wickets,Best_bowling_innings,Best_bowling_match,Average,Economy,Bowling_strike_rate,5,10,Player,Country
0,1992-2010,133,230,44039,18180,800,9/51,16/220,22.72,2.47,55.0,67,22,M Muralitharan,ICC/SL)
1,1992-2007,145,273,40705,17995,708,8/71,12/128,25.41,2.65,57.4,37,10,SK Warne,AUS)
2,2003-2021,164*,304,35079,16575,623,7/42,11/71,26.6,2.83,56.3,30,3,JM Anderson,ENG)
3,1990-2008,132,236,40850,18355,619,10/74,14/149,29.65,2.69,65.9,35,8,A Kumble,INDIA)
4,1993-2007,124,243,29248,12186,563,8/24,10/27,21.64,2.49,51.9,29,3,GD McGrath,AUS)


In [41]:
# remove the ")" from the 'Country' column
df['Country'] = df['Country'].str.replace(")", "")

display(df.head())

Unnamed: 0,Span,Match,Innings,Balls,Runs,Wickets,Best_bowling_innings,Best_bowling_match,Average,Economy,Bowling_strike_rate,5,10,Player,Country
0,1992-2010,133,230,44039,18180,800,9/51,16/220,22.72,2.47,55.0,67,22,M Muralitharan,ICC/SL
1,1992-2007,145,273,40705,17995,708,8/71,12/128,25.41,2.65,57.4,37,10,SK Warne,AUS
2,2003-2021,164*,304,35079,16575,623,7/42,11/71,26.6,2.83,56.3,30,3,JM Anderson,ENG
3,1990-2008,132,236,40850,18355,619,10/74,14/149,29.65,2.69,65.9,35,8,A Kumble,INDIA
4,1993-2007,124,243,29248,12186,563,8/24,10/27,21.64,2.49,51.9,29,3,GD McGrath,AUS


In [42]:
print(df.columns)

Index(['Span', 'Match', 'Innings', 'Balls', 'Runs', 'Wickets',
       'Best_bowling_innings', 'Best_bowling_match', 'Average', 'Economy',
       'Bowling_strike_rate', '5', '10', 'Player', 'Country'],
      dtype='object')


In [44]:
# rearrange the columns
new_col_sequence = ['Player', 'Country','Span', 'Match', 'Innings', 'Balls', 'Runs', 'Wickets',
       'Best_bowling_innings', 'Best_bowling_match', 'Average', 'Economy',
       'Bowling_strike_rate', '5', '10']

In [45]:
df = df[new_col_sequence]

display(df.head())

Unnamed: 0,Player,Country,Span,Match,Innings,Balls,Runs,Wickets,Best_bowling_innings,Best_bowling_match,Average,Economy,Bowling_strike_rate,5,10
0,M Muralitharan,ICC/SL,1992-2010,133,230,44039,18180,800,9/51,16/220,22.72,2.47,55.0,67,22
1,SK Warne,AUS,1992-2007,145,273,40705,17995,708,8/71,12/128,25.41,2.65,57.4,37,10
2,JM Anderson,ENG,2003-2021,164*,304,35079,16575,623,7/42,11/71,26.6,2.83,56.3,30,3
3,A Kumble,INDIA,1990-2008,132,236,40850,18355,619,10/74,14/149,29.65,2.69,65.9,35,8
4,GD McGrath,AUS,1993-2007,124,243,29248,12186,563,8/24,10/27,21.64,2.49,51.9,29,3


### creation a column based on a condition or function

In [46]:
def icc_check(x):
    if "ICC" in x:
        return "Yes"
    else:
        return "No"

In [47]:
# Checking whether a player had played for ICC or not
df['played_for_ICC'] = df['Country'].apply(icc_check)

display(df.head(10))

Unnamed: 0,Player,Country,Span,Match,Innings,Balls,Runs,Wickets,Best_bowling_innings,Best_bowling_match,Average,Economy,Bowling_strike_rate,5,10,played_for_ICC
0,M Muralitharan,ICC/SL,1992-2010,133,230,44039,18180,800,9/51,16/220,22.72,2.47,55.0,67,22,Yes
1,SK Warne,AUS,1992-2007,145,273,40705,17995,708,8/71,12/128,25.41,2.65,57.4,37,10,No
2,JM Anderson,ENG,2003-2021,164*,304,35079,16575,623,7/42,11/71,26.6,2.83,56.3,30,3,No
3,A Kumble,INDIA,1990-2008,132,236,40850,18355,619,10/74,14/149,29.65,2.69,65.9,35,8,No
4,GD McGrath,AUS,1993-2007,124,243,29248,12186,563,8/24,10/27,21.64,2.49,51.9,29,3,No
5,SCJ Broad,ENG,2007-2021,149,274,29863,14590,524,8/15,11/121,27.84,2.93,56.9,18,3,No
6,CA Walsh,WI,1984-2001,132,242,30019,12688,519,7/37,13/55,24.44,2.53,57.8,22,3,No
7,DW Steyn,SA,2004-2019,93,171,18608,10077,439,7/51,11/60,22.95,3.24,42.3,26,5,No
8,N Kapil Dev,INDIA,1978-1994,131,227,27740,12867,434,9/83,11/146,29.64,2.78,63.9,23,2,No
9,HMRKB Herath,SL,1999-2018,93,170,25993,12157,433,9/127,14/184,28.07,2.8,60.0,34,9,No


In [48]:
# Displaying how many players played for ICC
df['played_for_ICC'].value_counts()

No     74
Yes     5
Name: played_for_ICC, dtype: int64

In [49]:
# Removing 'ICC/' from the data set to make the country name free
df['Country'] = df['Country'].str.replace("ICC/", "")

display(df.head())

Unnamed: 0,Player,Country,Span,Match,Innings,Balls,Runs,Wickets,Best_bowling_innings,Best_bowling_match,Average,Economy,Bowling_strike_rate,5,10,played_for_ICC
0,M Muralitharan,SL,1992-2010,133,230,44039,18180,800,9/51,16/220,22.72,2.47,55.0,67,22,Yes
1,SK Warne,AUS,1992-2007,145,273,40705,17995,708,8/71,12/128,25.41,2.65,57.4,37,10,No
2,JM Anderson,ENG,2003-2021,164*,304,35079,16575,623,7/42,11/71,26.6,2.83,56.3,30,3,No
3,A Kumble,INDIA,1990-2008,132,236,40850,18355,619,10/74,14/149,29.65,2.69,65.9,35,8,No
4,GD McGrath,AUS,1993-2007,124,243,29248,12186,563,8/24,10/27,21.64,2.49,51.9,29,3,No


In [50]:
df['Country'].value_counts()

AUS        18
ENG        13
INDIA      10
WI          9
SA          8
PAK         7
NZ          7
SL          3
ENG/ICC     2
BDESH       1
ZIM         1
Name: Country, dtype: int64

In [51]:
# Now removing '/ICC' from the data set to make the country name free
df['Country'] = df['Country'].str.replace("/ICC", "")

display(df.head())

Unnamed: 0,Player,Country,Span,Match,Innings,Balls,Runs,Wickets,Best_bowling_innings,Best_bowling_match,Average,Economy,Bowling_strike_rate,5,10,played_for_ICC
0,M Muralitharan,SL,1992-2010,133,230,44039,18180,800,9/51,16/220,22.72,2.47,55.0,67,22,Yes
1,SK Warne,AUS,1992-2007,145,273,40705,17995,708,8/71,12/128,25.41,2.65,57.4,37,10,No
2,JM Anderson,ENG,2003-2021,164*,304,35079,16575,623,7/42,11/71,26.6,2.83,56.3,30,3,No
3,A Kumble,INDIA,1990-2008,132,236,40850,18355,619,10/74,14/149,29.65,2.69,65.9,35,8,No
4,GD McGrath,AUS,1993-2007,124,243,29248,12186,563,8/24,10/27,21.64,2.49,51.9,29,3,No


In [52]:
df['Country'].value_counts()

AUS      18
ENG      15
INDIA    10
WI        9
SA        8
PAK       7
NZ        7
SL        3
BDESH     1
ZIM       1
Name: Country, dtype: int64

In [53]:
# splitting the 'Span' column based on the "-"
df_span = df['Span'].str.split("-", expand=True)
display(df.head(5))

Unnamed: 0,Player,Country,Span,Match,Innings,Balls,Runs,Wickets,Best_bowling_innings,Best_bowling_match,Average,Economy,Bowling_strike_rate,5,10,played_for_ICC
0,M Muralitharan,SL,1992-2010,133,230,44039,18180,800,9/51,16/220,22.72,2.47,55.0,67,22,Yes
1,SK Warne,AUS,1992-2007,145,273,40705,17995,708,8/71,12/128,25.41,2.65,57.4,37,10,No
2,JM Anderson,ENG,2003-2021,164*,304,35079,16575,623,7/42,11/71,26.6,2.83,56.3,30,3,No
3,A Kumble,INDIA,1990-2008,132,236,40850,18355,619,10/74,14/149,29.65,2.69,65.9,35,8,No
4,GD McGrath,AUS,1993-2007,124,243,29248,12186,563,8/24,10/27,21.64,2.49,51.9,29,3,No


In [54]:
# concatinating the new dataframe with the main dataframe
df = pd.concat([df, df_span], axis=1)
display(df.head(5))

Unnamed: 0,Player,Country,Span,Match,Innings,Balls,Runs,Wickets,Best_bowling_innings,Best_bowling_match,Average,Economy,Bowling_strike_rate,5,10,played_for_ICC,0,1
0,M Muralitharan,SL,1992-2010,133,230,44039,18180,800,9/51,16/220,22.72,2.47,55.0,67,22,Yes,1992,2010
1,SK Warne,AUS,1992-2007,145,273,40705,17995,708,8/71,12/128,25.41,2.65,57.4,37,10,No,1992,2007
2,JM Anderson,ENG,2003-2021,164*,304,35079,16575,623,7/42,11/71,26.6,2.83,56.3,30,3,No,2003,2021
3,A Kumble,INDIA,1990-2008,132,236,40850,18355,619,10/74,14/149,29.65,2.69,65.9,35,8,No,1990,2008
4,GD McGrath,AUS,1993-2007,124,243,29248,12186,563,8/24,10/27,21.64,2.49,51.9,29,3,No,1993,2007


In [55]:
# renaming the newly created column names
df = df.rename(columns={0: "start_year",
                        1: "end_year"})
display(df.head(5))

Unnamed: 0,Player,Country,Span,Match,Innings,Balls,Runs,Wickets,Best_bowling_innings,Best_bowling_match,Average,Economy,Bowling_strike_rate,5,10,played_for_ICC,start_year,end_year
0,M Muralitharan,SL,1992-2010,133,230,44039,18180,800,9/51,16/220,22.72,2.47,55.0,67,22,Yes,1992,2010
1,SK Warne,AUS,1992-2007,145,273,40705,17995,708,8/71,12/128,25.41,2.65,57.4,37,10,No,1992,2007
2,JM Anderson,ENG,2003-2021,164*,304,35079,16575,623,7/42,11/71,26.6,2.83,56.3,30,3,No,2003,2021
3,A Kumble,INDIA,1990-2008,132,236,40850,18355,619,10/74,14/149,29.65,2.69,65.9,35,8,No,1990,2008
4,GD McGrath,AUS,1993-2007,124,243,29248,12186,563,8/24,10/27,21.64,2.49,51.9,29,3,No,1993,2007


In [56]:
# removing the "Span" column
df = df.drop("Span", axis=1)

display(df.head())

Unnamed: 0,Player,Country,Match,Innings,Balls,Runs,Wickets,Best_bowling_innings,Best_bowling_match,Average,Economy,Bowling_strike_rate,5,10,played_for_ICC,start_year,end_year
0,M Muralitharan,SL,133,230,44039,18180,800,9/51,16/220,22.72,2.47,55.0,67,22,Yes,1992,2010
1,SK Warne,AUS,145,273,40705,17995,708,8/71,12/128,25.41,2.65,57.4,37,10,No,1992,2007
2,JM Anderson,ENG,164*,304,35079,16575,623,7/42,11/71,26.6,2.83,56.3,30,3,No,2003,2021
3,A Kumble,INDIA,132,236,40850,18355,619,10/74,14/149,29.65,2.69,65.9,35,8,No,1990,2008
4,GD McGrath,AUS,124,243,29248,12186,563,8/24,10/27,21.64,2.49,51.9,29,3,No,1993,2007


In [57]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 79 entries, 0 to 78
Data columns (total 17 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Player                79 non-null     object 
 1   Country               79 non-null     object 
 2   Match                 79 non-null     object 
 3   Innings               79 non-null     int64  
 4   Balls                 79 non-null     int64  
 5   Runs                  79 non-null     int64  
 6   Wickets               79 non-null     int64  
 7   Best_bowling_innings  79 non-null     object 
 8   Best_bowling_match    79 non-null     object 
 9   Average               79 non-null     float64
 10  Economy               79 non-null     float64
 11  Bowling_strike_rate   79 non-null     float64
 12  5                     79 non-null     int64  
 13  10                    79 non-null     int64  
 14  played_for_ICC        79 non-null     object 
 15  start_year            79 

In [58]:
df['start_year'] = df['start_year'].astype('int') 
df['end_year'] = df['end_year'].astype('int')

print(df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 79 entries, 0 to 78
Data columns (total 17 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Player                79 non-null     object 
 1   Country               79 non-null     object 
 2   Match                 79 non-null     object 
 3   Innings               79 non-null     int64  
 4   Balls                 79 non-null     int64  
 5   Runs                  79 non-null     int64  
 6   Wickets               79 non-null     int64  
 7   Best_bowling_innings  79 non-null     object 
 8   Best_bowling_match    79 non-null     object 
 9   Average               79 non-null     float64
 10  Economy               79 non-null     float64
 11  Bowling_strike_rate   79 non-null     float64
 12  5                     79 non-null     int64  
 13  10                    79 non-null     int64  
 14  played_for_ICC        79 non-null     object 
 15  start_year            79 

In [59]:
df['years_played'] = df['end_year'] - df['start_year']

df = df.drop(['start_year', "end_year"], axis=1)

display(df.head(10))

Unnamed: 0,Player,Country,Match,Innings,Balls,Runs,Wickets,Best_bowling_innings,Best_bowling_match,Average,Economy,Bowling_strike_rate,5,10,played_for_ICC,years_played
0,M Muralitharan,SL,133,230,44039,18180,800,9/51,16/220,22.72,2.47,55.0,67,22,Yes,18
1,SK Warne,AUS,145,273,40705,17995,708,8/71,12/128,25.41,2.65,57.4,37,10,No,15
2,JM Anderson,ENG,164*,304,35079,16575,623,7/42,11/71,26.6,2.83,56.3,30,3,No,18
3,A Kumble,INDIA,132,236,40850,18355,619,10/74,14/149,29.65,2.69,65.9,35,8,No,18
4,GD McGrath,AUS,124,243,29248,12186,563,8/24,10/27,21.64,2.49,51.9,29,3,No,14
5,SCJ Broad,ENG,149,274,29863,14590,524,8/15,11/121,27.84,2.93,56.9,18,3,No,14
6,CA Walsh,WI,132,242,30019,12688,519,7/37,13/55,24.44,2.53,57.8,22,3,No,17
7,DW Steyn,SA,93,171,18608,10077,439,7/51,11/60,22.95,3.24,42.3,26,5,No,15
8,N Kapil Dev,INDIA,131,227,27740,12867,434,9/83,11/146,29.64,2.78,63.9,23,2,No,16
9,HMRKB Herath,SL,93,170,25993,12157,433,9/127,14/184,28.07,2.8,60.0,34,9,No,19


In [60]:
df.sort_values(by='years_played', ascending = False).head(5)

Unnamed: 0,Player,Country,Match,Innings,Balls,Runs,Wickets,Best_bowling_innings,Best_bowling_match,Average,Economy,Bowling_strike_rate,5,10,played_for_ICC,years_played
21,Imran Khan,PAK,88,142,19458,8258,362,8/58,14/116,22.81,2.54,53.7,23,6,No,21
55,GS Sobers,WI,93,159,21599,7999,235,6/73,8/80,34.03,2.22,91.9,6,0,No,20
9,HMRKB Herath,SL,93,170,25993,12157,433,9/127,14/184,28.07,2.8,60.0,34,9,No,19
0,M Muralitharan,SL,133,230,44039,18180,800,9/51,16/220,22.72,2.47,55.0,67,22,Yes,18
2,JM Anderson,ENG,164*,304,35079,16575,623,7/42,11/71,26.6,2.83,56.3,30,3,No,18


### Imran Khan had played for the longest period of time (21 years)

In [61]:
df.sort_values(by='years_played', ascending = False).tail(5)

Unnamed: 0,Player,Country,Match,Innings,Balls,Runs,Wickets,Best_bowling_innings,Best_bowling_match,Average,Economy,Bowling_strike_rate,5,10,played_for_ICC,years_played
54,Yasir Shah,PAK,46*,84,13607,7248,235,8/41,14/184,30.84,3.19,57.9,16,3,No,7
61,SJ Harmison,ENG,63,115,13375,7192,226,7/12,11/76,31.82,3.22,59.1,8,1,Yes,7
72,JR Hazlewood,AUS,55,103,11887,5438,212,6/67,9/115,25.65,2.74,56.0,9,0,No,7
71,K Rabada,SA,47,86,8785,4846,213,7/112,13/144,22.75,3.3,41.2,10,4,No,6
44,GP Swann,ENG,60,109,15349,7642,255,6/65,10/132,29.96,2.98,60.1,17,3,No,5


### GP Swann had played for the shortest period of time (5 years)

In [62]:
# Number of Australian Bowlers present in the dataset
df['Country'].value_counts()

AUS      18
ENG      15
INDIA    10
WI        9
SA        8
PAK       7
NZ        7
SL        3
BDESH     1
ZIM       1
Name: Country, dtype: int64

#### There are 18 Australian bowlers in the dataset
#### There is only one bowler from Bangladesh 

### Which player had the lowest economy rate?

In [63]:
df.sort_values(by='Economy', ascending = False).tail(5)

Unnamed: 0,Player,Country,Match,Innings,Balls,Runs,Wickets,Best_bowling_innings,Best_bowling_match,Average,Economy,Bowling_strike_rate,5,10,played_for_ICC,years_played
68,CV Grimmett,AUS,37,67,14513,5231,216,7/40,14/199,24.21,2.16,67.1,21,7,No,11
39,BS Bedi,INDIA,67,118,21364,7637,266,7/98,10/194,28.71,2.14,80.3,14,1,No,13
47,R Benaud,AUS,63,116,19108,6704,248,7/72,11/105,27.03,2.1,77.0,16,1,No,12
35,DL Underwood,ENG,86,151,21862,7674,297,8/51,13/71,25.83,2.1,73.6,17,6,No,16
32,LR Gibbs,WI,79,148,27115,8989,309,8/38,11/157,29.09,1.98,87.7,18,2,No,18


#### LR Gibbs had the lowest economy rate (1.98)

### Which player had the lowest strike rate?

In [64]:
df.sort_values(by='Bowling_strike_rate', ascending = False).tail(5)

Unnamed: 0,Player,Country,Match,Innings,Balls,Runs,Wickets,Best_bowling_innings,Best_bowling_match,Average,Economy,Bowling_strike_rate,5,10,played_for_ICC,years_played
25,AA Donald,SA,72,129,15519,7344,330,8/71,12/139,22.25,2.83,47.0,20,3,No,10
19,MD Marshall,WI,81,151,17584,7876,376,7/22,11/89,20.94,2.68,46.7,22,4,No,13
20,Waqar Younis,PAK,87,154,16224,8788,373,7/76,13/135,23.56,3.25,43.4,22,5,No,14
7,DW Steyn,SA,93,171,18608,10077,439,7/51,11/60,22.95,3.24,42.3,26,5,No,15
71,K Rabada,SA,47,86,8785,4846,213,7/112,13/144,22.75,3.3,41.2,10,4,No,6


#### K Rabada had the lowest strike rate (41.2)

### Which player had the lowest bowling average?

In [65]:
df.sort_values(by='Average', ascending = False).tail(5)

Unnamed: 0,Player,Country,Match,Innings,Balls,Runs,Wickets,Best_bowling_innings,Best_bowling_match,Average,Economy,Bowling_strike_rate,5,10,played_for_ICC,years_played
4,GD McGrath,AUS,124,243,29248,12186,563,8/24,10/27,21.64,2.49,51.9,29,3,No,14
33,FS Trueman,ENG,67,127,15178,6625,307,8/31,12/119,21.57,2.61,49.4,17,3,No,13
15,CEL Ambrose,WI,98,179,22103,8501,405,8/45,11/84,20.99,2.3,54.5,22,3,No,12
41,J Garner,WI,58,111,13169,5433,259,6/56,9/108,20.97,2.47,50.8,7,0,No,10
19,MD Marshall,WI,81,151,17584,7876,376,7/22,11/89,20.94,2.68,46.7,22,4,No,13


#### MD Marshall had the lowest bowling average (20.94)