In [66]:
import requests # type: ignore
import os
import pandas as pd # type: ignore
from bs4 import BeautifulSoup # type: ignore
from dotenv import load_dotenv # type: ignore
from requests.exceptions import RequestException # type: ignore
from fake_useragent import UserAgent # type: ignore
from tenacity import (retry,retry_if_exception_type,stop_after_attempt , wait_exponential)
from datetime import date ,time

In [67]:
load_dotenv()

True

In [68]:
base_url =  os.getenv("BASE_URL")
matches_url = os.getenv("MATCHES_URL")

In [81]:
def genrate_random_useragent():
    # Create a UserAgent object
    ua = UserAgent()
    # Get a random user agent string
    random_user_agent = ua['Chrome']
    return random_user_agent



In [82]:
@retry(
    reraise = True,
    stop = stop_after_attempt(5),
    wait = wait_exponential(multiplier = 1 , min = 1 , max = 10),
    retry = (retry_if_exception_type(RequestException))
)
def requests_with_retry(url):
    
    user_agent = genrate_random_useragent()
    headers = {'User-Agent': user_agent}
    
    try:
        res = requests.get(url=url,headers=headers)
        res.raise_for_status()
        print(res.request.headers)
        return res
    except RequestException as e:
        print(f"Exception occured : {e}")
        raise RequestException
           
    

In [83]:
response = requests_with_retry(url = matches_url)
soup = BeautifulSoup(response.content)


Exception occured : 403 Client Error: Forbidden for url: https://www.hltv.org/matches
{'User-Agent': 'Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Mobile Safari/537.36', 'Accept-Encoding': 'gzip, deflate', 'Accept': '*/*', 'Connection': 'keep-alive'}


In [84]:
upcomming_matches_sections = soup.find_all('div',class_='upcomingMatchesSection')

In [85]:
# Initialize an empty list to store match dictionaries
all_matches = []

for section in upcomming_matches_sections:
    match_date_tag = section.find('span', class_="matchDayHeadline")
    # print(match_date_tag.text)

    upcoming_matches = section.find_all('div', class_='upcomingMatch')

    for match in upcoming_matches:
        match_meta_tag = match.find('div', class_='matchMeta')
        match_time_tag = match.find('div', class_='matchTime')
       
        match_rating = match['stars']

        # Create a new match dictionary for each match
        match_dictionary = {
            'matchDate': match_date_tag.text,
            'matchEventName': 'ABHI LITE LEE RAHE HAI',
            'matchMeta': match_meta_tag.text,
            'matchTime': match_time_tag.text,
            'matchRating': match_rating,
            'matchTeam1': '',
            'matchTeam2': ''
        }

        if match.find_all('div', class_='matchInfoEmpty'):
            match_dictionary['matchTeam1'] = 'TBD'
            match_dictionary['matchTeam2'] = 'TBD'
            match_event_name_tag = match.find('span', class_='line-clamp-3')
            match_dictionary['matchEventName'] = match_event_name_tag.text
        else:
            match_event_name_tag = match.find('div', class_='matchEventName')
            match_dictionary['matchEventName'] = match_event_name_tag.text
            teams = match.find_all('div', class_='matchTeamName')
            if len(teams) == 1:
                match_dictionary['matchTeam2'] = 'TBD'
                match_dictionary['matchTeam1'] = teams[0].text
            else:
                match_dictionary['matchTeam1'] = teams[0].text
                match_dictionary['matchTeam2'] = teams[1].text

        # Append the match dictionary to the list
        all_matches.append(match_dictionary)

# Now all_matches contains a list of dictionaries, each representing a match



In [86]:
all_matches_df = pd.DataFrame(all_matches)

In [87]:
all_matches_df.head(20)

Unnamed: 0,matchDate,matchEventName,matchMeta,matchTime,matchRating,matchTeam1,matchTeam2
0,Wednesday - 2024-05-29,IEM Dallas 2024,bo3,19:00,2,Natus Vincere,HEROIC
1,Wednesday - 2024-05-29,IEM Dallas 2024,bo3,19:00,1,Virtus.pro,BIG
2,Wednesday - 2024-05-29,CCT Season 2 South America Series 1,bo3,19:30,0,FURIA Academy,Bounty Hunters
3,Wednesday - 2024-05-29,ESEA Advanced Season 49 Europe,bo3,20:00,0,V1dar,Grannys Knockers
4,Wednesday - 2024-05-29,ESEA Advanced Season 49 Europe,bo3,20:00,0,Enterprise,FLuffy Gangsters
5,Wednesday - 2024-05-29,ESEA Advanced Season 49 Europe,bo3,20:30,0,DMS,Verdant
6,Wednesday - 2024-05-29,YGames Pro Series 2024,bo3,21:00,0,INFINITE,Illuminar
7,Wednesday - 2024-05-29,ESEA Advanced Season 49 Europe,bo3,21:00,0,Zero Tenacity,JANO
8,Wednesday - 2024-05-29,ESEA Advanced Season 49 Europe,bo3,21:00,0,LEON,HOTU
9,Wednesday - 2024-05-29,IEM Dallas 2024,bo3,22:00,2,9z,Vitality
