# Rohit Sharma Career Data Transformation

## Import Libraries

In [1]:
import pandas as pd

## Data Preparation and Transformation

### Importing files as dataframes

In [2]:
match_awards = pd.read_csv("../output/csv/match_awards.csv")
match_results = pd.read_csv("""../output/csv/match_results.csv""")
scores = pd.read_csv("""../output/csv/scores.csv""")
dismissal_list = pd.read_csv("../output/csv/dismissal_list.csv")

### Scores Data Analysis

In [3]:
scores.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1014 entries, 0 to 1013
Data columns (total 14 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   Runs        1014 non-null   object
 1   Mins        1014 non-null   object
 2   BF          1014 non-null   object
 3   4s          1014 non-null   object
 4   6s          1014 non-null   object
 5   SR          1014 non-null   object
 6   Pos         1014 non-null   object
 7   Dismissal   1014 non-null   object
 8   Inns        1014 non-null   object
 9   Opposition  1014 non-null   object
 10  Ground      1014 non-null   object
 11  Start Date  1014 non-null   object
 12  format      1014 non-null   object
 13  match_id    1014 non-null   int64 
dtypes: int64(1), object(13)
memory usage: 111.0+ KB


### Scores Data Cleaning

#### Removing unncessary columns - "Mins"

In [4]:
scores = scores.drop(labels=["Mins"], axis=1).copy()

#### Drop duplicate Rows

In [5]:
scores[scores.duplicated()]

Unnamed: 0,Runs,BF,4s,6s,SR,Pos,Dismissal,Inns,Opposition,Ground,Start Date,format,match_id
559,DNB,-,-,-,-,-,-,1,v England,Durban,19 Sep 2007,T20I # 40,287873
560,50*,40,7,2,125.00,5,not out,1,v South Africa,Durban,20 Sep 2007,T20I # 43,287876
561,8*,5,0,1,160.00,6,not out,1,v Australia,Durban,22 Sep 2007,T20I # 45,287878
562,30*,16,2,1,187.50,6,not out,1,v Pakistan,Johannesburg,24 Sep 2007,T20I # 46,287879
563,DNB,-,-,-,-,-,-,2,v Australia,Brabourne,20 Oct 2007,T20I # 47,297800
...,...,...,...,...,...,...,...,...,...,...,...,...,...
994,8,13,1,0,61.53,1,caught,1,v Afghanistan,Bridgetown,20 Jun 2024,T20I # 2710,1415743
995,23,11,3,1,209.09,1,caught,1,v Bangladesh,North Sound,22 Jun 2024,T20I # 2716,1415747
996,92,41,7,8,224.39,1,bowled,1,v Australia,Gros Islet,24 Jun 2024,T20I # 2721,1415751
997,57,39,6,2,146.15,1,bowled,1,v England,Providence,27 Jun 2024,T20I # 2724,1415754


In [6]:
scores.drop_duplicates(inplace=True)
scores

Unnamed: 0,Runs,BF,4s,6s,SR,Pos,Dismissal,Inns,Opposition,Ground,Start Date,format,match_id
0,177,301,23,1,58.80,6,lbw,2,v West Indies,Eden Gardens,6 Nov 2013,Test # 2101,676525
1,111*,127,11,3,87.40,6,not out,2,v West Indies,Wankhede,14 Nov 2013,Test # 2102,676527
2,14,42,1,0,33.33,5,caught,1,v South Africa,Johannesburg,18 Dec 2013,Test # 2108,648665
3,6,13,1,0,46.15,5,bowled,3,v South Africa,Johannesburg,18 Dec 2013,Test # 2108,648665
4,0,1,0,0,0.00,5,bowled,1,v South Africa,Durban,26 Dec 2013,Test # 2111,648667
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1009,7,8,1,0,87.50,2,caught,1,v GT,Wankhede,6 May 2025,Twenty20,1473493
1010,5,5,1,0,100.00,2,caught,1,v DC,Wankhede,21 May 2025,Twenty20,1473501
1011,24,21,2,1,114.28,2,caught,1,v Punjab Kings,Jaipur,26 May 2025,Twenty20,1473506
1012,81,50,9,4,162.00,1,caught,1,v GT,New Chandigarh,30 May 2025,Twenty20,1473509


#### Remove matches where he did not play

In [7]:
scores[((scores["Runs"] == "DNB") | (scores["Runs"] == "TDNB"))]

Unnamed: 0,Runs,BF,4s,6s,SR,Pos,Dismissal,Inns,Opposition,Ground,Start Date,format,match_id
31,TDNB,-,-,-,-,-,-,-,v West Indies,Port of Spain,18 Aug 2016,Test # 2218,1022599
37,DNB,-,-,-,-,-,-,3,v New Zealand,Indore,8 Oct 2016,Test # 2223,1030217
116,DNB,-,-,-,-,-,-,4,v Australia,Brisbane,14 Dec 2024,Test # 2570,1426557
119,DNB,-,-,-,-,-,-,2,v Ireland,Belfast,23 Jun 2007,ODI # 2590,293071
154,DNB,-,-,-,-,-,-,2,v New Zealand,Hamilton,11 Mar 2009,ODI # 2824,366624
159,TDNB,-,-,-,-,-,-,-,v West Indies,Gros Islet,5 Jul 2009,ODI # 2855,377316
174,DNB,-,-,-,-,-,-,2,v New Zealand,Chennai,10 Dec 2010,ODI # 3077,467887
196,DNB,-,-,-,-,-,-,1,v Sri Lanka,Mirpur,13 Mar 2012,ODI # 3259,535795
232,TDNB,-,-,-,-,-,-,-,v South Africa,Centurion,11 Dec 2013,ODI # 3444,648655
334,TDNB,-,-,-,-,-,-,-,v West Indies,Providence,8 Aug 2019,ODI # 4196,1188624


In [8]:
scores = scores[~((scores["Runs"] == "DNB") | (scores["Runs"] == "TDNB"))]
scores.shape

(831, 13)

#### Remove '*' from Runs

In [9]:
scores[scores["Runs"].str.endswith("*")]

Unnamed: 0,Runs,BF,4s,6s,SR,Pos,Dismissal,Inns,Opposition,Ground,Start Date,format,match_id
1,111*,127,11,3,87.40,6,not out,2,v West Indies,Wankhede,14 Nov 2013,Test # 2102,676527
9,31*,97,4,0,31.95,5,not out,4,v New Zealand,Wellington,14 Feb 2014,Test # 2120,667653
33,68*,93,8,0,73.11,6,not out,3,v New Zealand,Kanpur,22 Sep 2016,Test # 2221,1030213
36,51*,63,3,2,80.95,6,not out,1,v New Zealand,Indore,8 Oct 2016,Test # 2223,1030217
38,102*,160,8,1,63.75,6,not out,2,v Sri Lanka,Nagpur,24 Nov 2017,Test # 2283,1122724
...,...,...,...,...,...,...,...,...,...,...,...,...,...
827,56*,33,6,2,169.69,3,not out,2,v CSK,Pune,28 Apr 2018,Twenty20,1136587
829,24*,15,1,2,160.00,5,not out,2,v Kings XI,Indore,4 May 2018,Twenty20,1136594
862,55*,48,8,0,114.58,2,not out,2,v KKR,Wankhede,5 May 2019,Twenty20,1178431
982,105*,63,11,5,166.66,1,not out,2,v CSK,Wankhede,14 Apr 2024,Twenty20,1426267


In [10]:
scores["Runs"].str[0:-1]

0        17
1       111
2         1
3          
4          
       ... 
1009       
1010       
1011      2
1012      8
1013       
Name: Runs, Length: 831, dtype: object

In [11]:
remove_astx = lambda x: x[:-1] if x.endswith("*") else x

In [12]:
scores.loc[:, "Runs"] = scores["Runs"].apply(remove_astx)
scores

Unnamed: 0,Runs,BF,4s,6s,SR,Pos,Dismissal,Inns,Opposition,Ground,Start Date,format,match_id
0,177,301,23,1,58.80,6,lbw,2,v West Indies,Eden Gardens,6 Nov 2013,Test # 2101,676525
1,111,127,11,3,87.40,6,not out,2,v West Indies,Wankhede,14 Nov 2013,Test # 2102,676527
2,14,42,1,0,33.33,5,caught,1,v South Africa,Johannesburg,18 Dec 2013,Test # 2108,648665
3,6,13,1,0,46.15,5,bowled,3,v South Africa,Johannesburg,18 Dec 2013,Test # 2108,648665
4,0,1,0,0,0.00,5,bowled,1,v South Africa,Durban,26 Dec 2013,Test # 2111,648667
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1009,7,8,1,0,87.50,2,caught,1,v GT,Wankhede,6 May 2025,Twenty20,1473493
1010,5,5,1,0,100.00,2,caught,1,v DC,Wankhede,21 May 2025,Twenty20,1473501
1011,24,21,2,1,114.28,2,caught,1,v Punjab Kings,Jaipur,26 May 2025,Twenty20,1473506
1012,81,50,9,4,162.00,1,caught,1,v GT,New Chandigarh,30 May 2025,Twenty20,1473509


#### Remove '#' from format

In [13]:
remove_hash = lambda x: x.split("#")[0].strip() if "#" in x else x.strip()

In [14]:
scores.loc[:, "format"] = scores["format"].apply(remove_hash)

#### Identifying '-' as empty in the dataframe and replacing the values

In [15]:
scores.apply(lambda col: col.astype(str).eq("-").sum())

Runs          0
BF            0
4s            0
6s            0
SR            2
Pos           0
Dismissal     0
Inns          0
Opposition    0
Ground        0
Start Date    0
format        0
match_id      0
dtype: int64

In [16]:
scores[scores["SR"] == "-"]

Unnamed: 0,Runs,BF,4s,6s,SR,Pos,Dismissal,Inns,Opposition,Ground,Start Date,format,match_id
410,0,0,0,0,-,5,not out,2,v Zimbabwe,Harare,13 Jun 2010,T20I,452154
645,0,0,0,0,-,3,run out,2,v KKR,Wankhede,25 May 2011,Twenty20,501269


In [17]:
scores.loc[:, "SR"] = scores["SR"].replace("-", 0)

#### Checking for other nulls

In [18]:
scores.isna().sum()

Runs          0
BF            0
4s            0
6s            0
SR            0
Pos           0
Dismissal     0
Inns          0
Opposition    0
Ground        0
Start Date    0
format        0
match_id      0
dtype: int64

#### Changing datatypes of remaining columns

In [19]:
scores.dtypes

Runs          object
BF            object
4s            object
6s            object
SR            object
Pos           object
Dismissal     object
Inns          object
Opposition    object
Ground        object
Start Date    object
format        object
match_id       int64
dtype: object

In [20]:
convert_dict = {
    "Runs": "int",
    "BF": "int",
    "4s": "int",
    "6s": "int",
    "SR": "float",
    "Pos": "int",
    "Inns": "int",
}

In [21]:
scores = scores.astype(convert_dict, copy=True)

In [22]:
scores["Start Date"] = pd.to_datetime(scores["Start Date"], format="""%d %b %Y""")

In [23]:
scores

Unnamed: 0,Runs,BF,4s,6s,SR,Pos,Dismissal,Inns,Opposition,Ground,Start Date,format,match_id
0,177,301,23,1,58.80,6,lbw,2,v West Indies,Eden Gardens,2013-11-06,Test,676525
1,111,127,11,3,87.40,6,not out,2,v West Indies,Wankhede,2013-11-14,Test,676527
2,14,42,1,0,33.33,5,caught,1,v South Africa,Johannesburg,2013-12-18,Test,648665
3,6,13,1,0,46.15,5,bowled,3,v South Africa,Johannesburg,2013-12-18,Test,648665
4,0,1,0,0,0.00,5,bowled,1,v South Africa,Durban,2013-12-26,Test,648667
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1009,7,8,1,0,87.50,2,caught,1,v GT,Wankhede,2025-05-06,Twenty20,1473493
1010,5,5,1,0,100.00,2,caught,1,v DC,Wankhede,2025-05-21,Twenty20,1473501
1011,24,21,2,1,114.28,2,caught,1,v Punjab Kings,Jaipur,2025-05-26,Twenty20,1473506
1012,81,50,9,4,162.00,1,caught,1,v GT,New Chandigarh,2025-05-30,Twenty20,1473509


#### Removing 'v' from the Opposition table

In [24]:
scores["Opposition"] = scores["Opposition"].str[2:]

In [25]:
scores

Unnamed: 0,Runs,BF,4s,6s,SR,Pos,Dismissal,Inns,Opposition,Ground,Start Date,format,match_id
0,177,301,23,1,58.80,6,lbw,2,West Indies,Eden Gardens,2013-11-06,Test,676525
1,111,127,11,3,87.40,6,not out,2,West Indies,Wankhede,2013-11-14,Test,676527
2,14,42,1,0,33.33,5,caught,1,South Africa,Johannesburg,2013-12-18,Test,648665
3,6,13,1,0,46.15,5,bowled,3,South Africa,Johannesburg,2013-12-18,Test,648665
4,0,1,0,0,0.00,5,bowled,1,South Africa,Durban,2013-12-26,Test,648667
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1009,7,8,1,0,87.50,2,caught,1,GT,Wankhede,2025-05-06,Twenty20,1473493
1010,5,5,1,0,100.00,2,caught,1,DC,Wankhede,2025-05-21,Twenty20,1473501
1011,24,21,2,1,114.28,2,caught,1,Punjab Kings,Jaipur,2025-05-26,Twenty20,1473506
1012,81,50,9,4,162.00,1,caught,1,GT,New Chandigarh,2025-05-30,Twenty20,1473509


#### Rename columns

In [26]:
renamed_cols = {
    "Runs": "runs",
    "BF": "balls_faced",
    "4s": "fours",
    "6s": "sixes",
    "SR": "strike_rate",
    "Pos": "position_batted",
    "Dismissal": "dismissal_type",
    "Inns": "inning",
    "Opposition": "opposition",
    "Ground": "venue",
    "Start Date": "date",
}

In [27]:
scores = scores.rename(renamed_cols, axis=1, copy=True)

In [28]:
scores

Unnamed: 0,runs,balls_faced,fours,sixes,strike_rate,position_batted,dismissal_type,inning,opposition,venue,date,format,match_id
0,177,301,23,1,58.80,6,lbw,2,West Indies,Eden Gardens,2013-11-06,Test,676525
1,111,127,11,3,87.40,6,not out,2,West Indies,Wankhede,2013-11-14,Test,676527
2,14,42,1,0,33.33,5,caught,1,South Africa,Johannesburg,2013-12-18,Test,648665
3,6,13,1,0,46.15,5,bowled,3,South Africa,Johannesburg,2013-12-18,Test,648665
4,0,1,0,0,0.00,5,bowled,1,South Africa,Durban,2013-12-26,Test,648667
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1009,7,8,1,0,87.50,2,caught,1,GT,Wankhede,2025-05-06,Twenty20,1473493
1010,5,5,1,0,100.00,2,caught,1,DC,Wankhede,2025-05-21,Twenty20,1473501
1011,24,21,2,1,114.28,2,caught,1,Punjab Kings,Jaipur,2025-05-26,Twenty20,1473506
1012,81,50,9,4,162.00,1,caught,1,GT,New Chandigarh,2025-05-30,Twenty20,1473509


### Clean Results and Awards Table

#### Check and Remove Duplicates in results

In [29]:
match_results[match_results.duplicated()]

Unnamed: 0,result,date,match_id
507,won,19 Sep 2007,287873
508,won,20 Sep 2007,287876
509,won,22 Sep 2007,287878
510,won,24 Sep 2007,287879
511,won,20 Oct 2007,297800
...,...,...,...
942,won,20 Jun 2024,1415743
943,won,22 Jun 2024,1415747
944,won,24 Jun 2024,1415751
945,won,27 Jun 2024,1415754


In [30]:
match_results.drop_duplicates(inplace=True)

#### Check and Remove duplicates in match_awards

In [31]:
match_awards[match_awards.duplicated()]

Unnamed: 0,award,date,match_id
44,player of the match,20 Sep 2007,287876
48,player of the match,9 Jan 2011,463149
56,player of the match,24 Feb 2016,966745
57,player of the match,3 Mar 2016,966761
63,player of the match,22 Dec 2017,1122730
64,player of the match,14 Mar 2018,1133821
67,player of the match,8 Jul 2018,1119545
68,player of the match,6 Nov 2018,1157760
70,player of the match,7 Nov 2019,1187014
71,player of the match,29 Jan 2020,1187679


In [32]:
match_awards.drop_duplicates(inplace=True)

#### Check and Remove duplicates in dismissal_list table

In [52]:
match_awards.dtypes

award       object
date        object
match_id     int64
dtype: object

In [34]:
dismissal_list[dismissal_list.duplicated()]

Unnamed: 0,dismissal_type,bowler,inning,match_id
559,DNB,,1,287873
560,not out,,1,287876
561,not out,,1,287878
562,not out,,1,287879
563,DNB,,2,297800
...,...,...,...,...
994,caught,Fazalhaq Farooqi,1,1415743
995,caught,Shakib Al Hasan,1,1415747
996,bowled,MA Starc,1,1415751
997,bowled,AU Rashid,1,1415754


In [35]:
dismissal_list.drop_duplicates(inplace=True)

In [36]:
dismissal_list[dismissal_list.duplicated("match_id")]

Unnamed: 0,dismissal_type,bowler,inning,match_id
3,bowled,JH Kallis,3,648665
5,lbw,VD Philander,3,648667
7,caught,TG Southee,4,667651
9,not out,,4,667653
11,caught,JM Anderson,4,667715
13,caught,NM Lyon,4,754737
15,caught,MG Johnson,3,754739
17,caught,SR Watson,4,754743
20,bowled,HMRKB Herath,4,895773
22,caught,PHT Kaushal,3,895775


In [37]:
duplicate_match_id = scores[scores.duplicated("match_id")]

In [38]:
len(duplicate_match_id)

50

In [39]:
dismissal_list[dismissal_list["match_id"].isin(duplicate_match_id["match_id"])]

Unnamed: 0,dismissal_type,bowler,inning,match_id
2,caught,VD Philander,1,648665
3,bowled,JH Kallis,3,648665
4,bowled,DW Steyn,1,648667
5,lbw,VD Philander,3,648667
6,bowled,TA Boult,2,667651
...,...,...,...,...
112,caught,MJ Henry,4,1439898
113,lbw,SM Boland,1,1426556
114,bowled,PJ Cummins,3,1426556
117,caught,PJ Cummins,2,1426558


### Merging all the tables

In [40]:
scores_merged = (
    scores.merge(match_results, "left", on="match_id")
    .merge(match_awards, "left", "match_id")
    .merge(dismissal_list, "left", ["match_id", "inning"])
)

In [41]:
scores_merged

Unnamed: 0,runs,balls_faced,fours,sixes,strike_rate,position_batted,dismissal_type_x,inning,opposition,venue,date_x,format,match_id,result,date_y,award,date,dismissal_type_y,bowler
0,177,301,23,1,58.80,6,lbw,2,West Indies,Eden Gardens,2013-11-06,Test,676525,won,6 Nov 2013,player of the match,6 Nov 2013,lbw,V Permaul
1,111,127,11,3,87.40,6,not out,2,West Indies,Wankhede,2013-11-14,Test,676527,won,14 Nov 2013,,,not out,
2,14,42,1,0,33.33,5,caught,1,South Africa,Johannesburg,2013-12-18,Test,648665,draw,18 Dec 2013,,,caught,VD Philander
3,6,13,1,0,46.15,5,bowled,3,South Africa,Johannesburg,2013-12-18,Test,648665,draw,18 Dec 2013,,,bowled,JH Kallis
4,0,1,0,0,0.00,5,bowled,1,South Africa,Durban,2013-12-26,Test,648667,lost,26 Dec 2013,,,bowled,DW Steyn
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
826,7,8,1,0,87.50,2,caught,1,GT,Wankhede,2025-05-06,Twenty20,1473493,lost,6 May 2025,,,caught,Arshad Khan
827,5,5,1,0,100.00,2,caught,1,DC,Wankhede,2025-05-21,Twenty20,1473501,won,21 May 2025,,,caught,Mustafizur Rahman
828,24,21,2,1,114.28,2,caught,1,Punjab Kings,Jaipur,2025-05-26,Twenty20,1473506,lost,26 May 2025,,,caught,Harpreet Brar
829,81,50,9,4,162.00,1,caught,1,GT,New Chandigarh,2025-05-30,Twenty20,1473509,won,30 May 2025,player of the match,30 May 2025,caught,M Prasidh Krishna


#### Clean up 'awards' column

In [42]:
scores_merged["potm"] = scores_merged["award"].apply(
    lambda x: "yes" if x == "player of the match" else "no"
)

In [43]:
scores_merged.head()

Unnamed: 0,runs,balls_faced,fours,sixes,strike_rate,position_batted,dismissal_type_x,inning,opposition,venue,date_x,format,match_id,result,date_y,award,date,dismissal_type_y,bowler,potm
0,177,301,23,1,58.8,6,lbw,2,West Indies,Eden Gardens,2013-11-06,Test,676525,won,6 Nov 2013,player of the match,6 Nov 2013,lbw,V Permaul,yes
1,111,127,11,3,87.4,6,not out,2,West Indies,Wankhede,2013-11-14,Test,676527,won,14 Nov 2013,,,not out,,no
2,14,42,1,0,33.33,5,caught,1,South Africa,Johannesburg,2013-12-18,Test,648665,draw,18 Dec 2013,,,caught,VD Philander,no
3,6,13,1,0,46.15,5,bowled,3,South Africa,Johannesburg,2013-12-18,Test,648665,draw,18 Dec 2013,,,bowled,JH Kallis,no
4,0,1,0,0,0.0,5,bowled,1,South Africa,Durban,2013-12-26,Test,648667,lost,26 Dec 2013,,,bowled,DW Steyn,no


#### Removing extra columns

In [44]:
scores_merged.drop(
    ["date_y", "date", "award", "dismissal_type_y"], axis=1, inplace=True
)

In [45]:
scores_merged.head()

Unnamed: 0,runs,balls_faced,fours,sixes,strike_rate,position_batted,dismissal_type_x,inning,opposition,venue,date_x,format,match_id,result,bowler,potm
0,177,301,23,1,58.8,6,lbw,2,West Indies,Eden Gardens,2013-11-06,Test,676525,won,V Permaul,yes
1,111,127,11,3,87.4,6,not out,2,West Indies,Wankhede,2013-11-14,Test,676527,won,,no
2,14,42,1,0,33.33,5,caught,1,South Africa,Johannesburg,2013-12-18,Test,648665,draw,VD Philander,no
3,6,13,1,0,46.15,5,bowled,3,South Africa,Johannesburg,2013-12-18,Test,648665,draw,JH Kallis,no
4,0,1,0,0,0.0,5,bowled,1,South Africa,Durban,2013-12-26,Test,648667,lost,DW Steyn,no


#### Renaming date column

In [46]:
scores_merged.rename(
    {"date_x": "date", "dismissal_type_x": "dismissal_type"}, axis=1, inplace=True
)

In [47]:
scores_merged.head()

Unnamed: 0,runs,balls_faced,fours,sixes,strike_rate,position_batted,dismissal_type,inning,opposition,venue,date,format,match_id,result,bowler,potm
0,177,301,23,1,58.8,6,lbw,2,West Indies,Eden Gardens,2013-11-06,Test,676525,won,V Permaul,yes
1,111,127,11,3,87.4,6,not out,2,West Indies,Wankhede,2013-11-14,Test,676527,won,,no
2,14,42,1,0,33.33,5,caught,1,South Africa,Johannesburg,2013-12-18,Test,648665,draw,VD Philander,no
3,6,13,1,0,46.15,5,bowled,3,South Africa,Johannesburg,2013-12-18,Test,648665,draw,JH Kallis,no
4,0,1,0,0,0.0,5,bowled,1,South Africa,Durban,2013-12-26,Test,648667,lost,DW Steyn,no


In [48]:
scores_merged.columns

Index(['runs', 'balls_faced', 'fours', 'sixes', 'strike_rate',
       'position_batted', 'dismissal_type', 'inning', 'opposition', 'venue',
       'date', 'format', 'match_id', 'result', 'bowler', 'potm'],
      dtype='object')

In [49]:
scores_merged = scores_merged[
    [
        "runs",
        "balls_faced",
        "fours",
        "sixes",
        "strike_rate",
        "position_batted",
        "bowler",
        "dismissal_type",
        "result",
        "potm",
        "date",
        "inning",
        "opposition",
        "venue",
        "format",
    ]
].sort_values("date", axis=0)

In [50]:
scores_merged

Unnamed: 0,runs,balls_faced,fours,sixes,strike_rate,position_batted,bowler,dismissal_type,result,potm,date,inning,opposition,venue,format
532,40,37,5,1,108.10,3,,not out,won,no,2007-04-03,2,Baroda,Wankhede,Twenty20
533,101,45,13,5,224.44,3,,not out,won,no,2007-04-04,2,Gujarat,Brabourne,Twenty20
534,13,11,0,1,118.18,3,SP Jobanputra,caught,won,no,2007-04-05,1,Saurashtra,Wankhede,Twenty20
535,18,11,2,1,163.63,3,SS Mundhe,caught,lost,no,2007-04-06,2,Maharashtra,Brabourne,Twenty20
536,25,26,1,1,96.15,4,R Prasanna,bowled,lost,no,2007-04-16,2,Tamil Nadu,Ahmedabad,Twenty20
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
826,7,8,1,0,87.50,2,Arshad Khan,caught,lost,no,2025-05-06,1,GT,Wankhede,Twenty20
827,5,5,1,0,100.00,2,Mustafizur Rahman,caught,won,no,2025-05-21,1,DC,Wankhede,Twenty20
828,24,21,2,1,114.28,2,Harpreet Brar,caught,lost,no,2025-05-26,1,Punjab Kings,Jaipur,Twenty20
829,81,50,9,4,162.00,1,M Prasidh Krishna,caught,won,yes,2025-05-30,1,GT,New Chandigarh,Twenty20


In [53]:
rohit_sharma = pd.read_csv('../output/csv/rohit_sharma_career.csv')