In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, ElementClickInterceptedException, ElementNotInteractableException
import time
import pandas as pd
import os
import json

#Function to check if show more button is on screen
def is_button_present(driver, timeout=5):
    try:
        WebDriverWait(driver, timeout).until(
            EC.presence_of_element_located((By.CLASS_NAME, "event__more.event__more--static"))
        )
        return True
    except TimeoutException:
        return False


# Start Chrome WebDriver
driver = webdriver.Chrome()

try:
    driver.get('https://www.flashscore.com/football/italy/serie-a/archive/')

    # First, we do a check to make sure the page loads by printing out its title
    title = driver.title
    print(title)

    # After loading, find archive list with class name: archive
    archiveList = driver.find_element(by=By.CLASS_NAME, value='archive')
    # print(archiveList)

    # Get all season links
    allSeasonLinks = driver.find_elements(by=By.CLASS_NAME, value="archive__text--clickable")

    

    # Directory for storing season data
    season_data_dir = "season_data"
    os.makedirs(season_data_dir, exist_ok=True)
    # After getting all season links, go to each individual link
    for index, link in enumerate(allSeasonLinks[1:9], start=1):
        season_text = link.text.strip().lower().replace(' ', '_')
        season_filename = f'game_data_{season_text.replace('/', '_')}.json'
        season_filepath = os.path.join(season_data_dir, season_filename)
        if index % 2 == 1:
            # Get the URL of the season
            time.sleep(2)
            season_url = link.get_attribute('href')
        
            # Open the season URL in a new tab
            driver.execute_script("window.open('');")
            driver.switch_to.window(driver.window_handles[-1])
            driver.get(season_url)
        
            try:
                # Use explicit wait for the "results" element (wait up to 4 seconds)
                results_element = WebDriverWait(driver, 4).until(
                    EC.element_to_be_clickable((By.CLASS_NAME, "tabs__tab.results"))
                )
                driver.execute_script("arguments[0].scrollIntoView();", results_element)
                driver.execute_script("arguments[0].click();", results_element)
                
                time.sleep(1)

                try:
                    click_count = 0
                    while is_button_present(driver):
                        try:
                            # Wait for the "Show more matches" button to be clickable
                            button_element = WebDriverWait(driver, 2).until(
                                EC.element_to_be_clickable((By.CLASS_NAME, "event__more.event__more--static"))
                            )
                            
                            # Scroll the button into view
                            driver.execute_script("arguments[0].scrollIntoView();", button_element)
                            
                            # Click the button using JavaScript
                            driver.execute_script("arguments[0].click();", button_element)
                            
                            print(f"Clicked 'Show more matches' button successfully. (Click {click_count + 1})")
                            click_count += 1
                            
                            time.sleep(3)  # Wait for new content to load

                        except Exception as e:
                            print(f"Error occurred: {str(e)}. Retrying...")
                            time.sleep(2)

                    #Here, I have clicked on all show matches, and all season matches are on the page as links with this format \
                    # \ <a href="https://www.flashscore.com/match/UwFBuMaf/#/match-summary" target="_blank" rel="nofollow" class="eventRowLink" aria-describedby="g_1_UwFBuMaf" title="Click for match detail!"></a>

                    #So now, I want to click on each link (each link opens a popup, so I will have to switch the driver between windows to interact with the popup and do the scraping
                    # , and then close the popup window, and switch back to the original window. I want to do this for all games. So first, I will need to get all game links, and for each of them
                    # I will need to use javascript to click on each. Lets go!

                    # get all match links => use for loop through all memebers of match_links array and use js to click on them => check if new popup window occured => if yes then use driver to switch window
                    # => continue onto the next link.

                    # if window switch => wait until body is found, then print title => use xpath selector to click on the stats tab using this xpath: /html/body/div[1]/div/div[7]/div/a[2] => at stats, pause for 3 seconds 
                    # => in stats, I want to scrape the items in the 'sections' class

                    #Get match links
                    allMatchLinks = driver.find_elements(by=By.CLASS_NAME, value='eventRowLink')

                    # Use for loop to go through all match links
                    for match_link in allMatchLinks:
                        
                        # Scroll the match link into view
                        driver.execute_script("arguments[0].scrollIntoView(true);", match_link)

                        # Get the current window handle before clicking
                        original_window = driver.current_window_handle

                        # Click the link using JavaScript (opens the popup)
                        driver.execute_script("arguments[0].click();", match_link)

                        # Wait for the new window (popup) to open
                        try: 
                            WebDriverWait(driver, 10).until(EC.number_of_windows_to_be(3))

                            # Switch to the new window (popup)
                            driver.switch_to.window(driver.window_handles[-1])

                            # Now you are controlling the popup window
                            # Wait for the body element to load
                            WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.TAG_NAME, "body")))

                            # Print the popup title or scrape the data you need
                            print(f"Popup title: {driver.title}")

                            # Example: Interact with the stats tab using the provided XPath
                            try:
                               #now, I want to click on the stats tab
                                stats_check = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, '/html/body/div[1]/div/div[7]/div/a[2]')))
                                stats_element = driver.find_element(by=By.XPATH, value="/html/body/div[1]/div/div[7]/div/a[2]")

                                driver.execute_script('arguments[0].click();', stats_element)
                                time.sleep(4)  # Pause to load the content in the stats tab


                                # Now, lets make a dataframe from the stats were getting:
                                # Date, time, comp, round, day, venue,Home Team Name, Away Team Name, Home Team Goals, Away team goals, possession, home team goal attempts, away team goal attempts, home team shots on target 
                                # away team shots on target, home team shots off goal, away team shots off goal, home team blocked shots, away team blocked shots, home team goalkeeper saves, away team goalkeeper saves
                                

                                # For this, I will create a dataframe and scrape the content into all the equivalent columns/field, then I will open a csv file and append the 
                                # \ dataframe to the csv

                                # Alternatively, I can search for all stats divs, then based on the contents I can create a pd and then push it to the csv file

                                # Dictionary to hold onto the values
                                gameData = {}
                                league_and_round_element = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, '/html/body/div[1]/div/div[3]/div/span[3]/a')))
                                league_and_round_text = league_and_round_element.text

                                league_and_round_array = league_and_round_text.split(' - ')

                                gameData['comp'] = league_and_round_array[0]
                                gameData['round'] = league_and_round_array[1]

                                home_team_element = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, "/html/body/div[1]/div/div[4]/div[2]/div[3]/div[2]/a")))
                                away_team_element = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, "/html/body/div[1]/div/div[4]/div[4]/div[3]/div[1]/a")))
                                
                                home_team_text = home_team_element.text
                                away_team_text = away_team_element.text

                                gameData['home_team'] = home_team_text
                                gameData['away_team'] = away_team_text

                                home_goals_element = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, "/html/body/div[1]/div/div[4]/div[3]/div/div[1]/span[1]")))
                                away_goals_element = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, "/html/body/div[1]/div/div[4]/div[3]/div/div[1]/span[3]")))

                                home_goals_text = home_goals_element.text
                                away_goals_text = away_goals_element.text

                                gameData['home_goals'] = home_goals_text
                                gameData['away_goals'] = away_goals_text


                                date_and_time_element = WebDriverWait(driver, 7).until(EC.presence_of_element_located((By.XPATH, '/html/body/div[1]/div/div[4]/div[1]/div')))

                                date_and_time_text = date_and_time_element.text

                                date_and_time_array = date_and_time_text.split(' ')
                                
                                gameData['date'] = date_and_time_array[0]
                                gameData['time'] = date_and_time_array[1]
                                # First, I will find all the stats
                                stats_section = WebDriverWait(driver, 10).until(
                                    EC.presence_of_element_located((By.CLASS_NAME, "section"))
                                )
                                
                                # Find all stat rows
                                stat_rows = stats_section.find_elements(By.CLASS_NAME, "_category_18zuy_15")
                                
                                stats = {}
                                for row in stat_rows:
                                    # Get the category name
                                    home_value = row.find_element(By.CLASS_NAME, "_homeValue_7ptpb_9").text
                                    
                                    # Extract the stat title (e.g., "Ball Possession")
                                    stat_title = row.find_element(By.CLASS_NAME, "_category_1haer_4").text
                                    
                                    # Extract away team stat value
                                    away_value = row.find_element(By.CLASS_NAME, "_awayValue_7ptpb_13").text
                                    
                                    # Print or save the extracted data
                                    print(f"Stat: {stat_title}, Home: {home_value}, Away: {away_value}")

                                    # I can directly add the stats to the gameData dictionary, then append gameData to csv file.

                                    stat_title = stat_title.lower().replace(' ', '_')

                                    gameData[f'home_{stat_title}'] = home_value
                                    gameData[f'away_{stat_title}'] = away_value
                                    
                                #Here, I want to open my csv file and write the gameData into the csv so I can open it with pandas sometime in the future to clean the data

                                #Actually, a json format will be better considering my data
                                # Now, append the gameData to a JSON file
                                json_filename = 'game_stats.json'

                                # Check if the JSON file exists and read existing data
                                if os.path.exists(season_filepath):
                                    with open(season_filepath, 'r') as file:
                                        existing_data = json.load(file)
                                else:
                                    existing_data = []

                                # Append new game data to the existing data
                                existing_data.append(gameData)

                                # Write the updated data back to the JSON file
                                with open(season_filepath, 'w') as file:
                                    json.dump(existing_data, file, indent=4)

                                print("This is the appended data: ", gameData)

                                print(f"Data appended to {json_filename} successfully.")

                                

                            except Exception as e:
                                print(f"Error interacting with popup: {str(e)}")

                            # Close the popup window
                            driver.close()

                            # Switch back to the original window
                            driver.switch_to.window(original_window)

                            # Wait a bit before clicking the next match link
                            time.sleep(2)
                        
                        except Exception as e:
                            print(f'Error getting match popup: {str(e)}')

                    print(f"Finished loading all match details.")
                    
# # Existing code...

                        
#                         driver.execute_script("arguments[0].click();", match_link)

#                         time.sleep(4)

#                         print("Something something")
#                         # driver.switch_to.window(driver.driver.window_handles[-1])



#                         # here, I want to go back, and do nothing else. How do i navigate back? 

                        

    
#                     print(f"Finished loading all results. Total clicks: {click_count}")
                        
                except Exception as e:
                    print(f"An error occurred while processing {season_url}: {str(e)}")
        
            except Exception as e:
                print(f"An error occurred while processing {season_url}: {str(e)}")
        
            finally:
                # Close the current tab and switch back to the main tab if the window handle exists
                if len(driver.window_handles) > 1:
                    driver.close()
                    driver.switch_to.window(driver.window_handles[0])

        print("Finished visiting all season links.")
    
finally:
    # Ensures WebDriver quits even if an error occurs
    driver.quit()
    print("WebDriver session terminated.")


In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

def parse_stats(driver, url):
    driver.get(url)
    
    # Wait for the statistics section to load
    stats_section = WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.CLASS_NAME, "section"))
    )
    
    # Find all stat rows
    stat_rows = stats_section.find_elements(By.CLASS_NAME, "_category_18zuy_15")
    
    stats = {}
    for row in stat_rows:
        # Get the category name
        home_value = row.find_element(By.CLASS_NAME, "_homeValue_7ptpb_9").text
        
        # Extract the stat title (e.g., "Ball Possession")
        stat_title = row.find_element(By.CLASS_NAME, "_category_1haer_4").text
        
        # Extract away team stat value
        away_value = row.find_element(By.CLASS_NAME, "_awayValue_7ptpb_13").text
        
        # Print or save the extracted data
        print(f"Stat: {stat_title}, Home: {home_value}, Away: {away_value}")

        stats[stat_title] = {
            'home': home_value,
            'away': away_value
        }

        time.sleep(5)

    
    return stats

# Set up the WebDriver (make sure you have the appropriate driver installed)
driver = webdriver.Chrome()  # or webdriver.Firefox(), etc.

# URL of the match statistics page
url = "https://www.flashscore.com/match/jXP8f29S/#/match-summary/match-statistics/0"

try:
    match_stats = parse_stats(driver, url)
    
    # Print the extracted stats
    for category, values in match_stats.items():
        print(f"{category}:")
        print(f"  Home: {values['home']}")
        print(f"  Away: {values['away']}")
        print()

finally:
    driver.quit()