## Scrapping Players Details for T20 Men's Cricket WorldCup 2022

In [128]:
from bs4 import BeautifulSoup as bs
import requests
import pandas as pd
from tqdm import tqdm

In [185]:
def page_content_getter(URL:str)-> bs:
    try:
        page_content = requests.get(URL).content
        soup = bs(page_content,"html.parser")
        return soup
    except Exception as e:
        print("Error occured: ",e)

In [4]:
URL = "https://www.espncricinfo.com/series/icc-men-s-t20-world-cup-2022-23-1298134/squads"
home_page = page_content_getter(URL)

#### Getting links for each team squad

In [15]:
squad_links = home_page.findAll("div",{"class":"ds-mb-4"})[0].findAll("a")

In [29]:
prefix = "https://www.espncricinfo.com"
players_url=[]
print(f"{'>'*20} Getting links for each squads in the WorldCup {'<'*20}")
print(f"{'_'*60}")
for url in tqdm(squad_links):
    ur = prefix+url.get("href")
    squad = page_content_getter(ur).findAll("div",{"class":"ds-relative ds-flex ds-flex-row ds-space-x-4 ds-p-4 lg:ds-px-6"})
    for link in squad:
        players_url.append(prefix+link.a.get("href"))
        

https://www.espncricinfo.com/series/icc-men-s-t20-world-cup-2022-23-1298134/afghanistan-squad-1334760/series-squads
https://www.espncricinfo.com/series/icc-men-s-t20-world-cup-2022-23-1298134/australia-squad-1331879/series-squads
https://www.espncricinfo.com/series/icc-men-s-t20-world-cup-2022-23-1298134/bangladesh-squad-1334654/series-squads
https://www.espncricinfo.com/series/icc-men-s-t20-world-cup-2022-23-1298134/england-squad-1332100/series-squads
https://www.espncricinfo.com/series/icc-men-s-t20-world-cup-2022-23-1298134/india-squad-1334371/series-squads
https://www.espncricinfo.com/series/icc-men-s-t20-world-cup-2022-23-1298134/ireland-squad-1336390/series-squads
https://www.espncricinfo.com/series/icc-men-s-t20-world-cup-2022-23-1298134/namibia-squad-1335339/series-squads
https://www.espncricinfo.com/series/icc-men-s-t20-world-cup-2022-23-1298134/netherlands-squad-1332846/series-squads
https://www.espncricinfo.com/series/icc-men-s-t20-world-cup-2022-23-1298134/new-zealand-squad

#### Collection details for each player in the WorldCup Squad

In [118]:
player = page_content_getter(players_url[209])

In [160]:
 def details_fetcher(player:bs)->tuple:
    name1 = player.findAll("h1",{"class":"ds-text-title-l ds-font-bold"})[0].text
    team1 = player.findAll("span", {"class":"ds-text-comfortable-s"})[0].text
    
    batter_attr = player.findAll("span",{"class":"ds-text-title-s ds-font-bold ds-text-ui-typo"})
    if len(batter_attr) ==6:
        bat_style1 = batter_attr[3].h5.text
        bowl_style1 = batter_attr[4].h5.text
        play_role1 = batter_attr[5].h5.text
    else:
        bat_style1 = batter_attr[3].h5.text
        bowl_style1 = ''
        play_role1 = batter_attr[4].h5.text
    
    description1 = player.findAll("div",{"class":"ci-player-bio-content"})
    if len(description1) != 0:
        description1 = description1[0].p.text
    else:
        description1 = ''
    return (name1, team1, bat_style1, bowl_style1, play_role1, description1)

In [164]:
name=[]
team=[]
image=[]
bat_style=[]
bowl_style=[]
play_role=[]
description=[]

print(f"{'>'*20} Scrapping players details {'<'*20}")
print(f"{'_'*60}")
for link in tqdm(players_url):
    try: 
        player=page_content_getter(link)
        details = details_fetcher(player)
        name.append(details[0])
        team.append(details[1])
        bat_style.append(details[2])
        bowl_style.append(details[3])
        play_role.append(details[4])
        description.append(details[5])
    except Exception as e:
        print(f"Error in {link} \nError:{e}")
        continue
print("Data Scarpped successfully!!!")
data_dict ={
    "Name": name,
    "Team": team,
    "Batting_Style": bat_style,
    "Bowling_Style": bowl_style,
    "Playing_Role": play_role,
    "Description": description
}

print("Saving data into Dataframe........")
players_details = pd.DataFrame(data_dict)

>>>>>>>>>>>>>>>>>>>> Scrapping players details <<<<<<<<<<<<<<<<<<<<
____________________________________________________________


100%|████████████████████████████████████████████████████████████████████████████████| 254/254 [05:41<00:00,  1.34s/it]

Data Scarpped successfully!!!
Saving data into Dataframe........





In [178]:
players_details.to_csv("players_details.csv",index=None)

In [172]:
url = prefix+squad_links[0].get("href")

In [173]:
img = page_content_getter(url).findAll("div",{"class":"ds-relative ds-flex ds-flex-row ds-space-x-4 ds-p-4 lg:ds-px-6"})

In [179]:
players_details

Unnamed: 0,Name,Team,Batting_Style,Bowling_Style,Playing_Role,Description
0,Mohammad Nabi,Afghanistan,Right hand Bat,Right arm Offbreak,Allrounder,Mohammad Nabi hails from a well-to-do family t...
1,Azmatullah Omarzai,Afghanistan,Right hand Bat,Right arm Medium fast,Allrounder,
2,Darwish Rasooli,Afghanistan,Right hand Bat,Right arm Offbreak,Top order Batter,
3,Fareed Ahmad,Afghanistan,Farid Malik,,Left hand Bat,
4,Fazalhaq Farooqi,Afghanistan,Right hand Bat,Left arm Fast medium,Bowler,
...,...,...,...,...,...,...
249,Blessing Muzarabani,Zimbabwe,Right hand Bat,Right arm Fast medium,Bowler,
250,Richard Ngarava,Zimbabwe,Left hand Bat,Left arm Fast medium,Bowler,
251,Milton Shumba,Zimbabwe,Left hand Bat,Slow Left arm Orthodox,Top order Batter,
252,Sikandar Raza,Zimbabwe,Right hand Bat,Right arm Offbreak,Batting Allrounder,Sikandar Raza's story isn't that of your every...


In [186]:
img = page_content_getter(prefix+players_url[0])

Error occured:  HTTPSConnectionPool(host='www.espncricinfo.comhttps', port=443): Max retries exceeded with url: //www.espncricinfo.com/player/mohammad-nabi-25913 (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001A4B38B7730>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))
