In [13]:


import pandas as pd
from sqlalchemy import create_engine, MetaData, Table, Column, Integer, String, Float, ForeignKey, TIMESTAMP
from sqlalchemy.exc import SQLAlchemyError

DB_URL = "postgresql+psycopg2://postgres:kaoutar2002@localhost:5432/football_db"


engine = create_engine(DB_URL)
metadata = MetaData()

print(" Configuration & Engine ready.")


 Configuration & Engine ready.


In [14]:




saison = Table(
    "saison", metadata,
    Column("saison_id", Integer, primary_key=True),
    Column("year", String(9))
)

competition = Table(
    "competition", metadata,
    Column("competition_id", Integer, primary_key=True),
    Column("competition_name", String(50))
)

team = Table(
    "team", metadata,
    Column("team_id", Integer, primary_key=True),
    Column("team_name", String(50)),
    Column("saison_id", Integer, ForeignKey("saison.saison_id"))
)

player = Table(
    "player", metadata,
    Column("player_id", Integer, primary_key=True),
    Column("Player", String(100)),
    Column("Nation", String(3)),
    Column("Age", Float),
    Column("Pos", String(10)),
    Column("team_id", Integer, ForeignKey("team.team_id"))
)

match = Table(
    "match", metadata,
    Column("match_id", Integer, primary_key=True),
    Column("team_id", Integer, ForeignKey("team.team_id")),
    Column("opponent_id", Integer),
    Column("datetime", TIMESTAMP),
    Column("Attendance", Integer),
    Column("Referee", String(50)),
    Column("saison_id", Integer, ForeignKey("saison.saison_id")),
    Column("competition_id", Integer, ForeignKey("competition.competition_id"))
)

match_result = Table(
    "match_result", metadata,
    Column("match_id", Integer, ForeignKey("match.match_id"), primary_key=True),
    Column("GF", Integer),
    Column("GA", Integer),
    Column("Result", String(1)),
    Column("xG", Float),
    Column("xGA", Float)
)

player_statistics = Table(
    "player_statistics", metadata,
    Column("player_id", Integer, ForeignKey("player.player_id"), primary_key=True),
    Column("MP", Integer),
    Column("Starts", Integer),
    Column("Min", Float),
    Column("90s", Float),
    Column("Gls", Float),
    Column("Ast", Float),
    Column("G+A", Float),
    Column("G-PK", Float),
    Column("PK", Float),
    Column("PKatt", Float),
    Column("CrdY", Float),
    Column("CrdR", Float)
)

print(" Table schemas defined successfully.")


 Table schemas defined successfully.


In [15]:


try:
    metadata.drop_all(engine)  
    metadata.create_all(engine)
    print(" Tables dropped and recreated successfully.")
except SQLAlchemyError as e:
    print(" Error creating tables:", e)


 Tables dropped and recreated successfully.


In [16]:
DATA_DIR = "../data/processed/"


def insert_from_csv(table, csv_path):
    """Insert CSV data into the given SQLAlchemy table."""
    try:
        df = pd.read_csv(csv_path)
        print(f"📄 Loading {len(df)} rows from {csv_path} ...")

        # Replace NaN with None for SQL compatibility
        df = df.where(pd.notnull(df), None)

        # print(df.to_dict(orient="records"))
        

        # Insert data in bulk (efficient)
        with engine.begin() as conn:
            conn.execute(table.insert(), df.to_dict(orient="records"))
        print(f"✅ Data inserted into {table.name} ({len(df)} rows).")

    except Exception as e:
        print(f"❌ Error inserting into {table.name}: {e}")


# --- Order matters (respect FKs) ---
insertion_order = [
    (saison, f"{DATA_DIR}saison.csv"),
    (competition, f"{DATA_DIR}competition.csv"),
    (team, f"{DATA_DIR}team.csv"),
    (player, f"{DATA_DIR}player.csv"),
    (match, f"{DATA_DIR}match.csv"),
    (match_result, f"{DATA_DIR}match_result.csv"),
    (player_statistics, f"{DATA_DIR}player_statistics.csv")
]

for table, path in insertion_order:
    insert_from_csv(table, path)


📄 Loading 1 rows from ../data/processed/saison.csv ...
✅ Data inserted into saison (1 rows).
📄 Loading 7 rows from ../data/processed/competition.csv ...
✅ Data inserted into competition (7 rows).
📄 Loading 20 rows from ../data/processed/team.csv ...
✅ Data inserted into team (20 rows).
📄 Loading 702 rows from ../data/processed/player.csv ...
❌ Error inserting into player: (psycopg2.errors.StringDataRightTruncation) ERREUR:  valeur trop longue pour le type character varying(3)

[SQL: INSERT INTO player (player_id, "Player", "Nation", "Age", "Pos", team_id) VALUES (%(player_id__0)s, %(Player__0)s, %(Nation__0)s, %(Age__0)s, %(Pos__0)s, %(team_id__0)s), (%(player_id__1)s, %(Player__1)s, %(Nation__1)s, %(Age__1)s, %(Pos__1)s, %(team ... 70673 characters truncated ... (%(player_id__701)s, %(Player__701)s, %(Nation__701)s, %(Age__701)s, %(Pos__701)s, %(team_id__701)s)]
[parameters: {'Pos__0': 'GK', 'Player__0': 'David Raya', 'team_id__0': 1, 'Age__0': 28.0, 'Nation__0': 'ESP', 'player_id__0'