In [1]:
import requests # type: ignore
import os
import pandas as pd # type: ignore
from bs4 import BeautifulSoup # type: ignore
from dotenv import load_dotenv # type: ignore
from requests.exceptions import RequestException # type: ignore
from fake_useragent import UserAgent # type: ignore
from tenacity import (retry,retry_if_exception_type,stop_after_attempt , wait_exponential)
from datetime import date ,time , datetime, timedelta

In [2]:
load_dotenv()

True

In [3]:
base_url =  os.getenv("BASE_URL")
matches_url = os.getenv("MATCHES_URL")

In [4]:
def genrate_random_useragent():
    # Create a UserAgent object
    ua = UserAgent()
    # Get a random user agent string
    random_user_agent = ua['Chrome']
    return random_user_agent



In [5]:
@retry(
    reraise = True,
    stop = stop_after_attempt(10),
    wait = wait_exponential(multiplier = 1 , min = 1 , max = 10),
    retry = (retry_if_exception_type(RequestException))
)
def requests_with_retry(url):
    
    user_agent = genrate_random_useragent()
    headers = {'User-Agent': user_agent}
    
    try:
        res = requests.get(url=url,headers=headers)
        res.raise_for_status()
        print(res.request.headers)
        return res
    except RequestException as e:
        print(f"Exception occured : {e}")
        raise RequestException
           
    

In [6]:
response = requests_with_retry(url = matches_url)
soup = BeautifulSoup(response.content)


Exception occured : 403 Client Error: Forbidden for url: https://www.hltv.org/matches
Exception occured : 403 Client Error: Forbidden for url: https://www.hltv.org/matches
{'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36 Agency/98.8.8175.80', 'Accept-Encoding': 'gzip, deflate', 'Accept': '*/*', 'Connection': 'keep-alive'}


In [8]:
upcomming_matches_sections = soup.find_all('div',class_='upcomingMatchesSection')

In [33]:
# Initialize an empty list to store match dictionaries
all_matches = []

for section in upcomming_matches_sections:
    upcoming_matches = section.find_all('div', class_='upcomingMatch')

    for match in upcoming_matches:
        
        match_meta_tag = match.find('div', class_='matchMeta')
        match_time_tag = match.find('div', class_='matchTime')
        match_rating = match['stars']
        
        timestamp = int(match_time_tag['data-unix'])//1000
        match_date = datetime.fromtimestamp(timestamp).strftime('%Y-%m-%d %H:%M')
        date = match_date.split()[0]
        time = match_date.split()[1]    

        # Create a new match dictionary for each match
        match_dictionary = {
            'matchDate': date,
            'matchEventName': 'ABHI LITE LEE RAHE HAI',
            'matchMeta': match_meta_tag.text,
            'matchTime': time,
            'matchRating': match_rating,
            'matchTeam1': '',
            'matchTeam2': ''
        }

        if match.find_all('div', class_='matchInfoEmpty'):
            match_dictionary['matchTeam1'] = 'TBD'
            match_dictionary['matchTeam2'] = 'TBD'
            match_event_name_tag = match.find('span', class_='line-clamp-3')
            match_dictionary['matchEventName'] = match_event_name_tag.text
        else:
            match_event_name_tag = match.find('div', class_='matchEventName')
            match_dictionary['matchEventName'] = match_event_name_tag.text
            teams = match.find_all('div', class_='matchTeamName')
            if len(teams) == 1:
                match_dictionary['matchTeam2'] = 'TBD'
                match_dictionary['matchTeam1'] = teams[0].text
            else:
                match_dictionary['matchTeam1'] = teams[0].text
                match_dictionary['matchTeam2'] = teams[1].text

        # Append the match dictionary to the list
        all_matches.append(match_dictionary)

# Now all_matches contains a list of dictionaries, each representing a match



In [34]:
all_matches_df = pd.DataFrame(all_matches)

In [35]:
all_matches_df

Unnamed: 0,matchDate,matchEventName,matchMeta,matchTime,matchRating,matchTeam1,matchTeam2
0,2024-05-30,IEM Dallas 2024,bo3,01:45,2,9z,Vitality
1,2024-05-30,CCT Season 2 South America Series 1,bo3,02:00,0,Case,Dusty Roots
2,2024-05-30,IEM Dallas 2024,bo3,04:30,2,G2,Liquid
3,2024-05-30,IEM Dallas 2024,bo3,04:30,2,HEROIC,BIG
4,2024-05-30,CCT Season 2 South America Series 1,bo3,05:00,0,Vikings KR,Yawara
...,...,...,...,...,...,...,...
194,2024-06-11,YGames Pro Series 2024 - Quarter-final #4,bo3,23:30,0,TBD,TBD
195,2024-06-12,YGames Pro Series 2024 - Semi-final #1,bo3,20:30,0,TBD,TBD
196,2024-06-12,YGames Pro Series 2024 - Semi-final #2,bo3,23:30,0,TBD,TBD
197,2024-06-13,YGames Pro Series 2024 - 3rd Place Decider Match,bo3,20:30,0,TBD,TBD
