# Acquisizione Dati Partite

In [1]:
import sys
import os
import time
import logging
import importlib
from datetime import datetime
from selenium import webdriver

# Aggiungiamo i moduli locali
if 'modules' not in sys.path:
    sys.path.append(os.path.abspath('modules'))

try:
    from modules import get_matches_per_day
    from modules import db_module
    from modules import fetching
    
    # Ricarica moduli per attivare le nuove barre
    importlib.reload(get_matches_per_day)
    importlib.reload(db_module)
    importlib.reload(fetching)
    
    from modules.fetching import process_date
    
    print("Moduli ricaricati. Sistema di checkpoint attivo.")
except ImportError as e:
    print(f"Errore nell'importazione dei moduli: {e}")

# Silenziamo i log informativi
logging.basicConfig(
    level=logging.WARNING, 
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[logging.StreamHandler(sys.stdout)],
    force=True
)

Moduli ricaricati. Sistema di checkpoint attivo.


### Configurazione Parametri
Imposta la data di interesse e le opzioni del browser.

In [2]:
# RANGE DI DATE (Formato YYYY-MM-DD)
START_DATE = '2025-01-09'
END_DATE = '2026-02-09'

# Opzioni Selenium
HEADLESS_MODE = True

# Nome del file di checkpoint (verrà creato nella cartella dello script)
CHECKPOINT_FILE = "last_processed_date.txt"

### Funzioni di Utility
Setup del driver e gestione del workflow.

### Esecuzione
Avvia il processo di download.

In [None]:
from datetime import datetime, timedelta
from tqdm.auto import tqdm
import os

# 1. Calcolo date originali
start_dt = datetime.strptime(START_DATE, '%Y-%m-%d')
end_dt = datetime.strptime(END_DATE, '%Y-%m-%d')

# 2. Controllo Checkpoint: se esiste, sovrascrive la data di inizio
if os.path.exists(CHECKPOINT_FILE):
    with open(CHECKPOINT_FILE, "r") as f:
        saved_date_str = f.read().strip()
        try:
            saved_dt = datetime.strptime(saved_date_str, '%Y-%m-%d')
            # Inizia dal giorno DOPO l'ultimo salvato
            new_start = saved_dt + timedelta(days=1)
            # Solo se il checkpoint è dentro il range attuale lo usiamo
            if start_dt < new_start <= end_dt:
                start_dt = new_start
                print(f"--- Ripresa dal checkpoint: Inizio da {start_dt.strftime('%Y-%m-%d')} ---")
            elif new_start > end_dt:
                print("--- Tutte le date nel range risultano già elaborate nel checkpoint. ---")
        except:
            print("Checkpoint corrotto, inizio dalla data definita nei parametri.")

# 3. Preparazione lista date effettive da elaborare
date_list = []
current = start_dt
while current <= end_dt:
    date_list.append(current.strftime('%Y-%m-%d'))
    current += timedelta(days=1)

if not date_list:
    print("Nessuna nuova data da elaborare.")
else:
    # Formato barra: tempo rimasto e data a destra
    c_format = '{l_bar}{bar}| {n_fmt}/{total_fmt} [Mancano: {remaining}] {postfix}'
    pbar_total = tqdm(date_list, desc="Progresso", position=0, leave=True, bar_format=c_format)

    for curr_str in pbar_total:
        pbar_total.set_postfix_str(f"Data: {curr_str}")
        
        # ELABORAZIONE
        process_date(curr_str, headless_mode=HEADLESS_MODE)
        
        # 4. SALVATAGGIO CHECKPOINT: solo dopo che il giorno è finito con successo
        with open(CHECKPOINT_FILE, "w") as f:
            f.write(curr_str)

  from .autonotebook import tqdm as notebook_tqdm
Progresso:  13%|█▎        | 52/397 [Mancano: 73:48:20] , Data: 2025-03-02

Errore nell'inserimento dei dati base: 'season'


Progresso:  13%|█▎        | 53/397 [Mancano: 76:26:50] , Data: 2025-03-03

Errore nell'inserimento dei dati base: 'season'


Progresso:  15%|█▍        | 58/397 [Mancano: 55:37:53] , Data: 2025-03-08

2026-02-10 17:16:30,804 - ERROR - Errore nel recupero degli incidenti per match 13558008: Message: timeout: Timed out receiving message from renderer: 15.000
  (Session info: chrome=144.0.7559.133)
Stacktrace:
Symbols not available. Dumping unresolved backtrace:
	0x7ff7e566f3d5
	0x7ff7e566f430
	0x7ff7e54110bd
	0x7ff7e53fdda9
	0x7ff7e53fda91
	0x7ff7e53fb6c1
	0x7ff7e53fbfff
	0x7ff7e540b45a
	0x7ff7e5422377
	0x7ff7e5429a7a
	0x7ff7e53fc7c1
	0x7ff7e54220a4
	0x7ff7e54b9bd0
	0x7ff7e545cb0c
	0x7ff7e545da53
	0x7ff7e594b470
	0x7ff7e594586d
	0x7ff7e596621a
	0x7ff7e568b235
	0x7ff7e5693a5c
	0x7ff7e5678844
	0x7ff7e56789f6
	0x7ff7e565eb87
	0x7ff8d4b5e8d7
	0x7ff8d69cc53c





2026-02-10 17:16:47,038 - ERROR - Errore nel recupero dei grafici per match 13558007: Message: timeout: Timed out receiving message from renderer: 15.000
  (Session info: chrome=144.0.7559.133)
Stacktrace:
Symbols not available. Dumping unresolved backtrace:
	0x7ff7e566f3d5
	0x7ff7e566f430
	0x7ff7e54110bd
	0x7ff7e53fdda9
	0x7ff7e53fda91
	0x7ff7e53fb6c1
	0x7ff7e53fbfff
	0x7ff7e540b45a
	0x7ff7e5422377
	0x7ff7e5429a7a
	0x7ff7e53fc7c1
	0x7ff7e54220a4
	0x7ff7e54b9bd0
	0x7ff7e545cb0c
	0x7ff7e545da53
	0x7ff7e594b470
	0x7ff7e594586d
	0x7ff7e596621a
	0x7ff7e568b235
	0x7ff7e5693a5c
	0x7ff7e5678844
	0x7ff7e56789f6
	0x7ff7e565eb87
	0x7ff8d4b5e8d7
	0x7ff8d69cc53c

2026-02-10 17:17:02,527 - ERROR - Errore nel recupero delle statistiche per match 13558007: Message: timeout: Timed out receiving message from renderer: 15.000
  (Session info: chrome=144.0.7559.133)
Stacktrace:
Symbols not available. Dumping unresolved backtrace:
	0x7ff7e566f3d5
	0x7ff7e566f430
	0x7ff7e54110bd
	0x7ff7e53fdda9
	0x7ff7e53f



2026-02-10 17:17:34,884 - ERROR - Errore nel recupero dei grafici per match 13558013: Message: timeout: Timed out receiving message from renderer: 15.000
  (Session info: chrome=144.0.7559.133)
Stacktrace:
Symbols not available. Dumping unresolved backtrace:
	0x7ff7e566f3d5
	0x7ff7e566f430
	0x7ff7e54110bd
	0x7ff7e53fdda9
	0x7ff7e53fda91
	0x7ff7e53fb6c1
	0x7ff7e53fbfff
	0x7ff7e540b45a
	0x7ff7e5422377
	0x7ff7e5429a7a
	0x7ff7e53fc7c1
	0x7ff7e54220a4
	0x7ff7e54b9bd0
	0x7ff7e545cb0c
	0x7ff7e545da53
	0x7ff7e594b470
	0x7ff7e594586d
	0x7ff7e596621a
	0x7ff7e568b235
	0x7ff7e5693a5c
	0x7ff7e5678844
	0x7ff7e56789f6
	0x7ff7e565eb87
	0x7ff8d4b5e8d7
	0x7ff8d69cc53c

2026-02-10 17:17:50,428 - ERROR - Errore nel recupero delle statistiche per match 13558013: Message: timeout: Timed out receiving message from renderer: 15.000
  (Session info: chrome=144.0.7559.133)
Stacktrace:
Symbols not available. Dumping unresolved backtrace:
	0x7ff7e566f3d5
	0x7ff7e566f430
	0x7ff7e54110bd
	0x7ff7e53fdda9
	0x7ff7e53f



2026-02-10 17:18:22,229 - ERROR - Errore nel recupero dei grafici per match 13558014: Message: timeout: Timed out receiving message from renderer: 15.000
  (Session info: chrome=144.0.7559.133)
Stacktrace:
Symbols not available. Dumping unresolved backtrace:
	0x7ff7e566f3d5
	0x7ff7e566f430
	0x7ff7e54110bd
	0x7ff7e53fdda9
	0x7ff7e53fda91
	0x7ff7e53fb6c1
	0x7ff7e53fbfff
	0x7ff7e540b45a
	0x7ff7e5422377
	0x7ff7e5429a7a
	0x7ff7e53fc7c1
	0x7ff7e54220a4
	0x7ff7e54b9bd0
	0x7ff7e545cb0c
	0x7ff7e545da53
	0x7ff7e594b470
	0x7ff7e594586d
	0x7ff7e596621a
	0x7ff7e568b235
	0x7ff7e5693a5c
	0x7ff7e5678844
	0x7ff7e56789f6
	0x7ff7e565eb87
	0x7ff8d4b5e8d7
	0x7ff8d69cc53c

2026-02-10 17:18:37,491 - ERROR - Errore nel recupero delle statistiche per match 13558014: Message: timeout: Timed out receiving message from renderer: 15.000
  (Session info: chrome=144.0.7559.133)
Stacktrace:
Symbols not available. Dumping unresolved backtrace:
	0x7ff7e566f3d5
	0x7ff7e566f430
	0x7ff7e54110bd
	0x7ff7e53fdda9
	0x7ff7e53f

Progresso:  15%|█▌        | 60/397 [Mancano: 84:26:18] , Data: 2025-03-10

2026-02-10 17:43:30,423 - ERROR - Errore nel recupero dei grafici per match 12487677: Message: timeout: Timed out receiving message from renderer: 15.000
  (Session info: chrome=144.0.7559.133)
Stacktrace:
Symbols not available. Dumping unresolved backtrace:
	0x7ff6ec68f3d5
	0x7ff6ec68f430
	0x7ff6ec4310bd
	0x7ff6ec41dda9
	0x7ff6ec41da91
	0x7ff6ec41b6c1
	0x7ff6ec41bfff
	0x7ff6ec42b45a
	0x7ff6ec442377
	0x7ff6ec449a7a
	0x7ff6ec41c7c1
	0x7ff6ec4420a4
	0x7ff6ec4d9bd0
	0x7ff6ec47cb0c
	0x7ff6ec47da53
	0x7ff6ec96b470
	0x7ff6ec96586d
	0x7ff6ec98621a
	0x7ff6ec6ab235
	0x7ff6ec6b3a5c
	0x7ff6ec698844
	0x7ff6ec6989f6
	0x7ff6ec67eb87
	0x7ff8d4b5e8d7
	0x7ff8d69cc53c





### Stampa primi 10 record per tabella

In [None]:
import db_module
import pandas as pd

conn = db_module.create_connection()
if conn:
    try:
        # Query per gli incidenti (ultimi 10)
        print("--- Ultimi 10 Incidenti (Tabella Colonne) ---")
        query = "SELECT match_id, time, incident_type, player_name, home_score, away_score FROM match_incidents_column LIMIT 10"
        df_inc = pd.read_sql(query, conn)
        display(df_inc)
    except Exception as e:
        print(f"Errore SQL: {e}")
    finally:
        conn.close()


ImportError: attempted relative import with no known parent package