In [149]:
import requests 
from bs4 import BeautifulSoup
import pandas as pd

In [150]:
URL = "https://www.espncricinfo.com/records/tournament/team-match-results/indian-premier-league-2023-15129"
response = requests.get(URL)
print(response)

<Response [200]>


In [151]:
soup = BeautifulSoup(response.text, 'html.parser')
table = soup.select('table')[0] 

In [152]:
df = pd.read_html(str(table))[0]

In [153]:
df['Match'] = df['Team 1'] + ' Vs ' + df['Team 2']
match_column = df.pop('Match')  # Remove 'Match' column and store it in a variable
df.insert(0, 'Match', match_column)  # Insert 'Match' as the first column

In [154]:
df = df.rename(columns={'Match Date': 'Match_Date'})

In [155]:
#Extracting the last column with the match Stats
last_column_links = []
rows = table.find_all('tr')

In [156]:
for row in rows[1:]:  # Exclude the header row
    cells = row.find_all('td')
    if len(cells) >= 7:
        last_column_link = cells[-1].find('a')
        if last_column_link:
            last_column_links.append(last_column_link['href'])
            df_links = pd.DataFrame({'Links': last_column_links})

In [157]:
df_links['Links'] = 'https://www.espncricinfo.com' + df_links['Links']


In [158]:
max_length = df_links['Links'].str.len().max()
pd.set_option('display.max_colwidth', max_length + 10)

In [159]:
Summary_df = pd.concat([df,df_links],axis=1)


In [160]:
Summary_df.head(2)

Unnamed: 0,Match,Team 1,Team 2,Winner,Margin,Ground,Match_Date,Scorecard,Links
0,Titans Vs Super Kings,Titans,Super Kings,Titans,5 wickets,Ahmedabad,"Mar 31, 2023",T20,https://www.espncricinfo.com/series/indian-premier-league-2023-1345038/gujarat-titans-vs-chennai-super-kings-1st-match-1359475/full-scorecard
1,Punjab Kings Vs KKR,Punjab Kings,KKR,Punjab Kings,7 runs,Mohali,"Apr 1, 2023",T20,https://www.espncricinfo.com/series/indian-premier-league-2023-1345038/punjab-kings-vs-kolkata-knight-riders-2nd-match-1359476/full-scorecard


In [161]:
final_df = pd.DataFrame()  # Initialize an empty DataFrame to store the final results

for _, row in Summary_df.iterrows():
    scorecard_link = row['Links']
    match_info = row['Match']
    response = requests.get(scorecard_link)
    soup = BeautifulSoup(response.content, 'html.parser')
    table = soup.select('table')[0]
    df = pd.read_html(str(table))[0]
    main_div = soup.find('div', class_='ds-flex ds-flex-col ds-grow ds-justify-center')
    span_element = main_div.find('span', class_='ds-text-title-xs ds-font-bold ds-capitalize')
    team_name = span_element.text

    df['Match'] = match_info
    df['Team'] = team_name
    df = df.drop(columns=['Unnamed: 8', 'Unnamed: 9'])

    cols = df.columns.tolist()
    cols = ['Match', 'Team'] + [col for col in cols if col not in ['Match', 'Team']]
    df = df[cols]
    df = df.dropna(subset=['BATTING'])
    final_df = pd.concat([final_df, df], ignore_index=True)

In [162]:
final_df['BATTING'] = final_df['BATTING'].astype(str)  # Convert to string data type
final_df = final_df[~final_df['BATTING'].str.contains('TOTAL|Did not bat|Extras|Fall of wickets')]


In [163]:
df_firstinnings = final_df

In [164]:
df_firstinnings.head(2)

Unnamed: 0,Match,Team,BATTING,Unnamed: 1,R,B,M,4s,6s,SR
0,Titans Vs Super Kings,Chennai Super Kings,Devon Conway,b Mohammed Shami,1,6,-,0,0,16.66
1,Titans Vs Super Kings,Chennai Super Kings,Ruturaj Gaikwad,c Shubman Gill b Joseph,92,50,-,4,9,184.0


In [212]:
df_secondinnings = pd.DataFrame()
for _, row in Summary_df.iterrows():
    scorecard_link = row['Links']
    match_info = row['Match']
    response = requests.get(scorecard_link)
    soup = BeautifulSoup(response.content, 'html.parser')
    table_second_innings = soup.select('table')[2]

    df_1 = pd.read_html(str(table_second_innings))[0]
    main_span = soup.find('span', class_='ds-ml-2')
    span_element = main_span.find('span', class_='ds-text-tight-xs')
    team_name = span_element.text
    #print(team_name)

    df_1['Match'] = match_info
    df_1['Team'] = team_name
    #df_1 = df_1.drop(columns=['Unnamed: 8', 'Unnamed: 9'])

    cols = df_1.columns.tolist()
    cols = ['Match', 'Team'] + [col for col in cols if col not in ['Match', 'Team']]
    df_1 = df_1[cols]
    #df_1 = df_1.dropna(subset=['BATTING'])
    df_secondinnings = pd.concat([df_secondinnings, df_1], ignore_index=True)


In [218]:
df_secondinnings['Team'] = df_secondinnings['Team'].str.replace(' Innings', '')


In [219]:
df_secondinnings

Unnamed: 0,Match,Team,BATTING,Unnamed: 1,R,B,M,4s,6s,SR,Unnamed: 8,Unnamed: 9,0,1
0,Titans Vs Super Kings,Titans,Wriddhiman Saha †,c Dube b Hangargekar,25,16,-,2,2,156.25,,,,
1,Titans Vs Super Kings,Titans,,,,,,,,,,,,
2,Titans Vs Super Kings,Titans,Shubman Gill,c Gaikwad b Deshpande,63,36,-,6,3,175.00,,,,
3,Titans Vs Super Kings,Titans,,,,,,,,,,,,
4,Titans Vs Super Kings,Titans,Sai Sudharsan,c †Dhoni b Hangargekar,22,17,-,3,0,129.41,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1334,Titans Vs Super Kings,Super Kings,Ravindra Jadeja,not out,15,6,14,1,1,250.00,,,,
1335,Titans Vs Super Kings,Super Kings,Extras,"(lb 1, w 4)",5,,,,,,,,,
1336,Titans Vs Super Kings,Super Kings,TOTAL,15 Ov (RR: 11.40),171/5,,,,,,,,,
1337,Titans Vs Super Kings,Super Kings,"Did not bat: Moeen Ali, Deepak Chahar, Tushar Deshpande, Maheesh Theekshana","Did not bat: Moeen Ali, Deepak Chahar, Tushar Deshpande, Maheesh Theekshana","Did not bat: Moeen Ali, Deepak Chahar, Tushar Deshpande, Maheesh Theekshana","Did not bat: Moeen Ali, Deepak Chahar, Tushar Deshpande, Maheesh Theekshana","Did not bat: Moeen Ali, Deepak Chahar, Tushar Deshpande, Maheesh Theekshana","Did not bat: Moeen Ali, Deepak Chahar, Tushar Deshpande, Maheesh Theekshana","Did not bat: Moeen Ali, Deepak Chahar, Tushar Deshpande, Maheesh Theekshana","Did not bat: Moeen Ali, Deepak Chahar, Tushar Deshpande, Maheesh Theekshana","Did not bat: Moeen Ali, Deepak Chahar, Tushar Deshpande, Maheesh Theekshana",,,


In [220]:
df_secondinnings = df_secondinnings.dropna(subset=['BATTING'])

In [222]:
df_secondinnings = df_secondinnings.reset_index()

In [224]:
df_secondinnings = df_secondinnings.drop(['index'],axis=1)

In [226]:
df_secondinnings['BATTING'] = df_secondinnings['BATTING'].astype(str) 
df_secondinnings = df_secondinnings[~df_secondinnings['BATTING'].str.contains('TOTAL|Did not bat|Extras|Fall of wickets')]


In [228]:
df_secondinnings.columns

Index([     'Match',       'Team',    'BATTING', 'Unnamed: 1',          'R',
                'B',          'M',         '4s',         '6s',         'SR',
       'Unnamed: 8', 'Unnamed: 9',            0,            1],
      dtype='object')

In [229]:
df_secondinnings = df_secondinnings[['Match','Team',    'BATTING', 'Unnamed: 1',          'R',
                'B',          'M',         '4s',         '6s',         'SR',]]

In [230]:
df_secondinnings

Unnamed: 0,Match,Team,BATTING,Unnamed: 1,R,B,M,4s,6s,SR
0,Titans Vs Super Kings,Titans,Wriddhiman Saha †,c Dube b Hangargekar,25,16,-,2,2,156.25
1,Titans Vs Super Kings,Titans,Shubman Gill,c Gaikwad b Deshpande,63,36,-,6,3,175.00
2,Titans Vs Super Kings,Titans,Sai Sudharsan,c †Dhoni b Hangargekar,22,17,-,3,0,129.41
3,Titans Vs Super Kings,Titans,Hardik Pandya (c),b Jadeja,8,11,-,0,0,72.72
4,Titans Vs Super Kings,Titans,Vijay Shankar,c Santner b Hangargekar,27,21,-,2,1,128.57
...,...,...,...,...,...,...,...,...,...,...
860,Titans Vs Super Kings,Super Kings,Shivam Dube,not out,32,21,49,0,2,152.38
861,Titans Vs Super Kings,Super Kings,Ajinkya Rahane,c Shankar b Sharma,27,13,20,2,2,207.69
862,Titans Vs Super Kings,Super Kings,Ambati Rayudu,c & b Sharma,19,8,8,1,2,237.50
863,Titans Vs Super Kings,Super Kings,MS Dhoni (c)†,c Miller b Sharma,0,1,1,0,0,0.00


In [231]:
Df_batting_summary = pd.concat([df_firstinnings, df_secondinnings], ignore_index=True)

In [232]:
Df_batting_summary

Unnamed: 0,Match,Team,BATTING,Unnamed: 1,R,B,M,4s,6s,SR
0,Titans Vs Super Kings,Chennai Super Kings,Devon Conway,b Mohammed Shami,1,6,-,0,0,16.66
1,Titans Vs Super Kings,Chennai Super Kings,Ruturaj Gaikwad,c Shubman Gill b Joseph,92,50,-,4,9,184.00
2,Titans Vs Super Kings,Chennai Super Kings,Moeen Ali,c †Saha b Rashid Khan,23,17,-,4,1,135.29
3,Titans Vs Super Kings,Chennai Super Kings,Ben Stokes,c †Saha b Rashid Khan,7,6,-,1,0,116.66
4,Titans Vs Super Kings,Chennai Super Kings,Ambati Rayudu,b Little,12,12,-,0,1,100.00
...,...,...,...,...,...,...,...,...,...,...
1176,Titans Vs Super Kings,Super Kings,Shivam Dube,not out,32,21,49,0,2,152.38
1177,Titans Vs Super Kings,Super Kings,Ajinkya Rahane,c Shankar b Sharma,27,13,20,2,2,207.69
1178,Titans Vs Super Kings,Super Kings,Ambati Rayudu,c & b Sharma,19,8,8,1,2,237.50
1179,Titans Vs Super Kings,Super Kings,MS Dhoni (c)†,c Miller b Sharma,0,1,1,0,0,0.00


In [234]:
df_bowling = pd.DataFrame()
for _, row in Summary_df.iterrows():
    scorecard_link = row['Links']
    match_info = row['Match']
    response = requests.get(scorecard_link)
    soup = BeautifulSoup(response.content, 'html.parser')
    table_first_bowling = soup.select('table')[1]
    df_2 = pd.read_html(str(table_first_bowling))[0]
    df_2['Match'] = match_info
    cols = df_2.columns.tolist()
    cols = ['Match'] + [col for col in cols if col != 'Match']
    df_2 = df_2[cols]
    df_bowling = pd.concat([df_bowling, df_2], ignore_index=True)
    df_bowling = df_bowling[~df_bowling['BOWLING'].astype(str).str.match('^\d')]


In [235]:
df_bowling_2 = pd.DataFrame()
for _, row in Summary_df.iterrows():
    scorecard_link = row['Links']
    match_info = row['Match']
    response = requests.get(scorecard_link)
    soup = BeautifulSoup(response.content, 'html.parser')
    table_second_bowling = soup.select('table')[3]
    df_3 = pd.read_html(str(table_second_bowling))[0]
    df_3['Match'] = match_info
    cols = df_3.columns.tolist()
    cols = ['Match'] + [col for col in cols if col != 'Match']
    df_3 = df_3[cols]
    df_bowling_2 = pd.concat([df_bowling_2, df_3], ignore_index=True)
    df_bowling_2 = df_bowling_2[~df_bowling_2['BOWLING'].astype(str).str.match('^\d')]

In [236]:
df_bowling_2 = df_bowling_2[['Match', 'BOWLING', 'O', 'M', 'R', 'W', 'ECON', '0s', '4s', '6s', 'WD','NB']]

In [237]:
Df_bowling_summary = pd.concat([df_bowling, df_bowling_2], ignore_index=True)

In [238]:
Df_bowling_summary

Unnamed: 0,Match,BOWLING,O,M,R,W,ECON,0s,4s,6s,WD,NB
0,Titans Vs Super Kings,Mohammed Shami,4,0,29,2,7.25,13,2,2,0,1
1,Titans Vs Super Kings,Hardik Pandya,3,0,28,0,9.33,6,2,2,0,0
2,Titans Vs Super Kings,Josh Little,4,0,41,1,10.25,10,4,3,0,0
3,Titans Vs Super Kings,Rashid Khan,4,0,26,2,6.50,10,2,1,0,0
4,Titans Vs Super Kings,Alzarri Joseph,4,0,33,2,8.25,8,0,3,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...
905,Titans Vs Super Kings,Hardik Pandya,1,0,14,0,14.00,1,1,1,1,0
906,Titans Vs Super Kings,Rashid Khan,3,0,44,0,14.66,2,4,3,0,0
907,Titans Vs Super Kings,Noor Ahmad,3,0,17,2,5.66,6,0,0,3,0
908,Titans Vs Super Kings,Josh Little,2,0,30,0,15.00,1,0,3,0,0


In [None]:
url = "https://en.wikipedia.org//wiki/3i"
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')
main_div = soup.find('div', class_="mw-content-container")
span_element = main_div.find('span', class_='mw-page-title-main')
#span_element_2 = span_element.find('span', class_='mw-page-title-main')

text = span_element.text
print(text)