In [3]:
import requests # type: ignore
import os
import pandas as pd # type: ignore
from bs4 import BeautifulSoup # type: ignore
from dotenv import load_dotenv # type: ignore
from requests.exceptions import RequestException # type: ignore
from fake_useragent import UserAgent # type: ignore
from tenacity import (retry,retry_if_exception_type,stop_after_attempt , wait_exponential)
from datetime import date ,time , datetime, timedelta
from utils import convert_date_format
from dbutils import Database

In [4]:
load_dotenv()

True

In [5]:
base_url =  os.getenv("BASE_URL")
matches_url = os.getenv("MATCHES_URL")

In [6]:
def genrate_random_useragent():
    # Create a UserAgent object
    ua = UserAgent()
    # Get a random user agent string
    random_user_agent = ua.random
    return random_user_agent



In [7]:
@retry(
    reraise = True,
    stop = stop_after_attempt(10),
    wait = wait_exponential(multiplier = 2 , min = 1 , max = 10),
    retry = (retry_if_exception_type(RequestException))
)
def requests_with_retry(url):
    
    user_agent = genrate_random_useragent()
    headers = {'User-Agent': user_agent}
    
    try:
        res = requests.get(url=url,headers=headers)
        # print(res.request.headers)
        res.raise_for_status()
        return res
    except RequestException as e:
        print(f"Exception occured : {e}")
        raise RequestException
           
    

In [8]:
response = requests_with_retry(url = matches_url)
soup = BeautifulSoup(response.content)


In [9]:
upcomming_matches_sections = soup.find_all('div',class_='upcomingMatchesSection')

In [10]:
# Initialize an empty list to store match dictionaries
all_matches = []

for section in upcomming_matches_sections:
    upcoming_matches = section.find_all('div', class_='upcomingMatch')

    for match in upcoming_matches:
        
        match_meta_tag = match.find('div', class_='matchMeta')
        match_time_tag = match.find('div', class_='matchTime')
        match_rating = match['stars']
        
        timestamp = int(match_time_tag['data-unix'])//1000
        match_date = datetime.fromtimestamp(timestamp).strftime('%Y-%m-%d %H:%M')
        date = convert_date_format(match_date.split()[0])
        time = match_date.split()[1]    

        # Create a new match dictionary for each match
        match_dictionary = {
            'matchDate': date,
            'matchEventName': 'ABHI LITE LEE RAHE HAI',
            'matchMeta': match_meta_tag.text,
            'matchTime': time,
            'matchRating': match_rating,
            'matchTeam1': '',
            'matchTeam2': ''
        }

        if match.find_all('div', class_='matchInfoEmpty'):
            match_dictionary['matchTeam1'] = 'TBD'
            match_dictionary['matchTeam2'] = 'TBD'
            match_event_name_tag = match.find('span', class_='line-clamp-3')
            match_dictionary['matchEventName'] = match_event_name_tag.text
        else:
            match_event_name_tag = match.find('div', class_='matchEventName')
            match_dictionary['matchEventName'] = match_event_name_tag.text
            teams = match.find_all('div', class_='matchTeamName')
            if len(teams) == 1:
                match_dictionary['matchTeam2'] = 'TBD'
                match_dictionary['matchTeam1'] = teams[0].text
            else:
                match_dictionary['matchTeam1'] = teams[0].text
                match_dictionary['matchTeam2'] = teams[1].text

        # Append the match dictionary to the list
        all_matches.append(match_dictionary)

# Now all_matches contains a list of dictionaries, each representing a match



In [11]:
all_matches_df = pd.DataFrame(all_matches)

In [12]:
all_matches_df

Unnamed: 0,matchDate,matchEventName,matchMeta,matchTime,matchRating,matchTeam1,matchTeam2
0,"2024-05-30, Thursday",MESA Nomadic Masters Spring 2024,bo3,15:45,1,Chinggis Warriors,OG
1,"2024-05-30, Thursday",European Pro League Season 16,bo3,16:00,0,iNation,Permitta
2,"2024-05-30, Thursday",CCT Season 2 Europe Series 4,bo3,17:30,1,FURIA,RUBY
3,"2024-05-30, Thursday",European Pro League Season 16,bo3,19:00,0,Zero Tenacity,Passion UA
4,"2024-05-30, Thursday",ESEA Advanced Season 49 Europe,bo3,19:30,0,Nemiga,DMS
...,...,...,...,...,...,...,...
215,"2024-06-11, Tuesday",YGames Pro Series 2024 - Quarter-final #4,bo3,23:30,0,TBD,TBD
216,"2024-06-12, Wednesday",YGames Pro Series 2024 - Semi-final #1,bo3,20:30,0,TBD,TBD
217,"2024-06-12, Wednesday",YGames Pro Series 2024 - Semi-final #2,bo3,23:30,0,TBD,TBD
218,"2024-06-13, Thursday",YGames Pro Series 2024 - 3rd Place Decider Match,bo3,20:30,0,TBD,TBD


In [13]:
test = Database('test.db')

In [19]:
test.create_table_from_dataframe('matches',dataframe=all_matches_df)

Table 'matches' created successfully and records inserted from the DataFrame.


In [20]:
test.describe_table('matches')

Field		Type		Null	Key	Default	Extra
0	matchDate	TEXT	0	None	0
1	matchEventName	TEXT	0	None	0
2	matchMeta	TEXT	0	None	0
3	matchTime	TEXT	0	None	0
4	matchRating	TEXT	0	None	0
5	matchTeam1	TEXT	0	None	0
6	matchTeam2	TEXT	0	None	0


In [22]:
test.clear_table('matches')

All rows deleted from the 'matches' table.
