In [7]:
import oracledb
import os
import pandas as pd
from dotenv import load_dotenv
import numpy as np
import logging
import os
import glob

# Configure logging
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")

def get_oracle_type(dtype):
    """Map pandas dtypes to Oracle data types"""
    if pd.api.types.is_integer_dtype(dtype):
        return "NUMBER"
    elif pd.api.types.is_float_dtype(dtype):
        return "NUMBER"
    elif pd.api.types.is_datetime64_any_dtype(dtype):
        return "TIMESTAMP"
    elif pd.api.types.is_string_dtype(dtype):
        return "VARCHAR2(500)"
    else:
        return "VARCHAR2(500)"  # Default to VARCHAR2 for unknown types

def clean_value(val):
    """Convert pandas/numpy types to Python native types for Oracle insertion"""
    if pd.isna(val):
        return None
    elif isinstance(val, (np.int64, np.int32)):
        return int(val)
    elif isinstance(val, (np.float64, np.float32)):
        return float(val)
    elif isinstance(val, pd.Timestamp):
        return val.strftime('%Y-%m-%d %H:%M:%S')
    return val

def connect_to_oracle():
    """Establishes a connection to the Oracle database"""
    load_dotenv()
    required_vars = ["DB_USER", "DB_PASSWORD", "DB_DSN", "ORACLE_CLIENT_LIB_DIR", "ORACLE_CONFIG_DIR"]
    
    for var in required_vars:
        if os.getenv(var) is None:
            raise EnvironmentError(f"Environment variable '{var}' not set")

    oracledb.init_oracle_client(
        lib_dir=os.getenv("ORACLE_CLIENT_LIB_DIR"),
        config_dir=os.getenv("ORACLE_CONFIG_DIR")
    )

    return oracledb.connect(
        user=os.getenv("DB_USER"),
        password=os.getenv("DB_PASSWORD"),
        dsn=os.getenv("DB_DSN")
    )

def delete_table(connection, table_to_delete):
    """Deletes the specified table from Oracle database"""
    with connection.cursor() as cursor:
        try:
            cursor.execute(f"DROP TABLE {table_to_delete} PURGE")
            logging.info(f"Table {table_to_delete} deleted successfully.")
        except oracledb.DatabaseError as e:
            logging.error(f"Error deleting table {table_to_delete}: {e}")

def prepare_data(data):
    """Prepares data by converting date columns and cleaning column names"""
    # Convert date columns
    if 'H_DATETIME' in data.columns:
        data['H_DATETIME'] = pd.to_datetime(data['H_DATETIME'], format='%d.%m.%Y %H:%M', errors='coerce')
    if 'A_DATETIME' in data.columns:
        data['A_DATETIME'] = pd.to_datetime(data['A_DATETIME'], format='%d.%m.%Y %H:%M', errors='coerce')
    
    # Clean and format column names for Oracle
    data.columns = data.columns.str.replace('[^0-9a-zA-Z]+', '_', regex=True).str.upper()
    return data

def create_table(cursor, data, table_name):
    """Creates a new table in Oracle with appropriate columns based on the DataFrame schema"""
    columns_def = [f"{col} {get_oracle_type(data[col].dtype)}" for col in data.columns]
    create_table_sql = f"CREATE TABLE {table_name} ({', '.join(columns_def)})"
    logging.info(f"Creating table {table_name} with SQL:\n{create_table_sql}")
    cursor.execute(create_table_sql)

def insert_data_batch(cursor, data, table_name, batch_size=1000):
    """Inserts data into the table in batches"""
    columns = ', '.join(data.columns)
    placeholders = ', '.join([f":{i + 1}" for i in range(len(data.columns))])
    insert_sql = f"INSERT INTO {table_name} ({columns}) VALUES ({placeholders})"
    rows_to_insert = [tuple(clean_value(row[col]) for col in data.columns) for _, row in data.iterrows()]
    
    # Insert in batches
    successful_inserts = 0
    for start in range(0, len(rows_to_insert), batch_size):
        batch = rows_to_insert[start:start + batch_size]
        try:
            cursor.executemany(insert_sql, batch)
            successful_inserts += len(batch)
            logging.info(f"Inserted {successful_inserts} rows so far...")
        except Exception as e:
            logging.error(f"Failed to insert batch at row {start}: {e}")

def create_and_upload(connection, csv_file_path, table_name):
    """Creates a table and uploads data from a CSV file to the Oracle database"""
    data = pd.read_csv(csv_file_path).loc[:, ~pd.read_csv(csv_file_path).columns.str.contains('^Unnamed')]
    data = prepare_data(data)
    
    with connection.cursor() as cursor:
        delete_table(connection, table_name)
        create_table(cursor, data, table_name)
        
        # Insert data in batches
        insert_data_batch(cursor, data, table_name)
        
        # Final commit after batch insertions
        connection.commit()
        logging.info(f"Upload to table {table_name} completed.")

def show_table(connection, table_name):
    """Fetches and displays data from the specified Oracle table"""
    with connection.cursor() as cursor:
        cursor.execute(f"SELECT * FROM {table_name}")
        rows = cursor.fetchall()
        column_names = [desc[0] for desc in cursor.description]
    return pd.DataFrame(rows, columns=column_names)


In [9]:
# #  update csvs as tables into oracle db

# Define the folder path containing the CSV files
csv_folder_path = r"C:\Users\gebel\github\football_stats\data\htdatan"
csv_files = glob.glob(os.path.join(csv_folder_path, "*.csv"))
csv_files = ['C:\\Users\\gebel\\github\\football_stats\\data\\htdatan\\sa_2425.csv']

In [10]:


for csv_file_path in csv_files:
    table_name = os.path.splitext(os.path.basename(csv_file_path))[0].upper()
    
    # Establish a connection and upload the CSV file as a table in Oracle
    try:
        with connect_to_oracle() as connection:
            create_and_upload(connection, csv_file_path, table_name)
            logging.info(f"Successfully uploaded {csv_file_path} to table {table_name}")
    except Exception as e:
        logging.error(f"Failed to upload {csv_file_path} to table {table_name}: {e}")


2024-11-03 01:03:45,097 - INFO - Table SA_2425 deleted successfully.
2024-11-03 01:03:45,100 - INFO - Creating table SA_2425 with SQL:
CREATE TABLE SA_2425 (H_DATETIME VARCHAR2(500), H_GAMEINFO VARCHAR2(500), H_TEAMNAMES VARCHAR2(500), H_GOALS NUMBER, H_BALL_POSSESSION NUMBER, H_GOAL_ATTEMPTS NUMBER, H_SHOTS_ON_GOAL NUMBER, H_SHOTS_OFF_GOAL NUMBER, H_BLOCKED_SHOTS NUMBER, H_FREE_KICKS NUMBER, H_CORNER_KICKS NUMBER, H_OFFSIDES NUMBER, H_GOALKEEPER_SAVES NUMBER, H_FOULS NUMBER, H_YELLOW_CARDS NUMBER, H_RED_CARDS NUMBER, H_TOTAL_PASSES NUMBER, H_COMPLETED_PASSES NUMBER, H_TACKLES NUMBER, H_ATTACKS NUMBER, H_DANGEROUS_ATTACKS NUMBER, A_DATETIME VARCHAR2(500), A_GAMEINFO VARCHAR2(500), A_TEAMNAMES VARCHAR2(500), A_GOALS NUMBER, A_BALL_POSSESSION NUMBER, A_GOAL_ATTEMPTS NUMBER, A_SHOTS_ON_GOAL NUMBER, A_SHOTS_OFF_GOAL NUMBER, A_BLOCKED_SHOTS NUMBER, A_FREE_KICKS NUMBER, A_CORNER_KICKS NUMBER, A_OFFSIDES NUMBER, A_GOALKEEPER_SAVES NUMBER, A_FOULS NUMBER, A_YELLOW_CARDS NUMBER, A_RED_CARDS NUM

In [12]:
connection = connect_to_oracle()
df = show_table(connection, "SA_2425")
df

Unnamed: 0,H_DATETIME,H_GAMEINFO,H_TEAMNAMES,H_GOALS,H_BALL_POSSESSION,H_GOAL_ATTEMPTS,H_SHOTS_ON_GOAL,H_SHOTS_OFF_GOAL,H_BLOCKED_SHOTS,H_FREE_KICKS,...,A_FOULS,A_YELLOW_CARDS,A_RED_CARDS,A_TOTAL_PASSES,A_COMPLETED_PASSES,A_TACKLES,A_ATTACKS,A_DANGEROUS_ATTACKS,H_CAT,GAMEURL
0,29.09.2024 18:00,FOOTBALL\r\nITALY: SERIE A - ROUND 6,Empoli,0,36,3,0,2,1,5,...,6,,,309,276,,,,cat115,https://www.flashscore.com/match/0YqKhheT/#/ma...
1,29.09.2024 20:45,FOOTBALL\r\nITALY: SERIE A - ROUND 6,Napoli,0,36,5,0,4,1,2,...,2,,,319,289,,,,cat116,https://www.flashscore.com/match/WETw9DuA/#/ma...
2,29.09.2024 20:45,FOOTBALL\r\nITALY: SERIE A - ROUND 6,Napoli,2,62,6,2,2,2,7,...,6,2.0,,229,210,,,,cat117,https://www.flashscore.com/match/WETw9DuA/#/ma...
3,30.09.2024 20:45,FOOTBALL\r\nITALY: SERIE A - ROUND 6,Parma,2,67,10,3,6,1,13,...,9,2.0,,142,99,,,,cat118,https://www.flashscore.com/match/vuXo7iAM/#/ma...
4,30.09.2024 20:45,FOOTBALL\r\nITALY: SERIE A - ROUND 6,Parma,0,55,8,1,4,3,9,...,7,0.0,,208,167,,,,cat119,https://www.flashscore.com/match/vuXo7iAM/#/ma...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
155,20.10.2024 18:00,FOOTBALL\r\nITALY: SERIE A - ROUND 8,Cagliari,1,43,9,2,3,4,7,...,7,2.0,,234,208,,,,cat15,https://www.flashscore.com/match/GMBD7YY8/#/ma...
156,20.10.2024 20:45,FOOTBALL\r\nITALY: SERIE A - ROUND 8,AS Roma,0,56,9,2,1,6,7,...,7,3.0,,184,151,,,,cat16,https://www.flashscore.com/match/0lpzB7u1/#/ma...
157,20.10.2024 20:45,FOOTBALL\r\nITALY: SERIE A - ROUND 8,AS Roma,0,54,5,1,4,0,6,...,6,,,216,185,,,,cat17,https://www.flashscore.com/match/0lpzB7u1/#/ma...
158,21.10.2024 20:45,FOOTBALL\r\nITALY: SERIE A - ROUND 8,Verona,0,58,7,3,3,1,7,...,7,1.0,,184,152,,,,cat18,https://www.flashscore.com/match/McP0d8Q7/#/ma...


In [None]:
# delete_table(connection, "EXAMPLE_TABLE")

Error deleting table EXAMPLE_TABLE: ORA-00942: table or view "ADMIN"."EXAMPLE_TABLE" does not exist
Help: https://docs.oracle.com/error-help/db/ora-00942/
