In [20]:
import oracledb
import os
import pandas as pd
from dotenv import load_dotenv
import numpy as np

def get_oracle_type(dtype):
    """Map pandas dtypes to Oracle data types"""
    if pd.api.types.is_integer_dtype(dtype):
        return "NUMBER"
    elif pd.api.types.is_float_dtype(dtype):
        return "NUMBER"
    elif pd.api.types.is_datetime64_any_dtype(dtype):
        return "TIMESTAMP"
    elif pd.api.types.is_string_dtype(dtype):
        return "VARCHAR2(500)"
    else:
        return "VARCHAR2(500)"  # Default to VARCHAR2 for unknown types

def clean_value(val):
    """Convert pandas/numpy types to Python native types"""
    if pd.isna(val):
        return None
    elif isinstance(val, (np.int64, np.int32)):
        return int(val)
    elif isinstance(val, (np.float64, np.float32)):
        return float(val)
    elif isinstance(val, pd.Timestamp):
        return val.strftime('%Y-%m-%d %H:%M:%S')
    return val


def connect_to_oracle():
    try:
        load_dotenv()
        
        oracledb.init_oracle_client(
            lib_dir=r"{}".format(os.getenv("ORACLE_CLIENT_LIB_DIR")),
            config_dir=r"{}".format(os.getenv("ORACLE_CONFIG_DIR"))
        )

        connection = oracledb.connect(
            user=os.getenv("DB_USER"),
            password=os.getenv("DB_PASSWORD"),
            dsn=os.getenv("DB_DSN")
        )
        return connection
    except Exception as e:
        print(f"An error occurred: {str(e)}")
        raise


def delete_table(connection, table_to_delete):
    try:
        cursor = connection.cursor()
        cursor.execute(f"DROP TABLE {table_to_delete}")
        print(f"Table {table_to_delete} deleted successfully.")
    except oracledb.DatabaseError as e:
        error, = e.args
        print(f"Error deleting table {table_to_delete}: {error.message}")
    finally:
        cursor.close()

        
def create_and_upload(connection, csv_file_path, table_name):
    try:
        cursor = connection.cursor()
        
        # Load CSV data
        data = pd.read_csv(csv_file_path)
        data = data.loc[:, ~data.columns.str.contains('^Unnamed')]
        
        # Convert date columns
        if 'H_DATETIME' in data.columns:
            data['H_DATETIME'] = pd.to_datetime(data['H_DATETIME'], format='%d.%m.%Y %H:%M', errors='coerce')
        if 'A_DATETIME' in data.columns:
            data['A_DATETIME'] = pd.to_datetime(data['A_DATETIME'], format='%d.%m.%Y %H:%M', errors='coerce')
        
        # Rename columns to Oracle-friendly format
        data.columns = data.columns.str.replace('[^0-9a-zA-Z]+', '_', regex=True).str.upper()
        
        # Drop existing table if it exists
        try:
            cursor.execute(f"DROP TABLE {table_name} PURGE")
            print(f"Dropped existing table {table_name}")
        except:
            print(f"Table {table_name} does not exist yet")
        
        # Create table with appropriate columns
        columns_def = []
        for col in data.columns:
            oracle_type = get_oracle_type(data[col].dtype)
            columns_def.append(f"{col} {oracle_type}")
        
        create_table_sql = f"""
        CREATE TABLE {table_name} (
            {', '.join(columns_def)}
        )
        """
        
        print("\nCreating table with SQL:")
        print(create_table_sql)
        
        cursor.execute(create_table_sql)
        print(f"\nCreated table {table_name}")
        
        # Generate insert statement
        columns = ', '.join(data.columns)
        placeholders = ', '.join([f":{i + 1}" for i in range(len(data.columns))])
        insert_sql = f"INSERT INTO {table_name} ({columns}) VALUES ({placeholders})"
        
        # Insert data
        successful_inserts = 0
        failed_inserts = 0
        
        for idx, row in data.iterrows():
            try:
                row_data = [clean_value(row[col]) for col in data.columns]
                cursor.execute(insert_sql, row_data)
                successful_inserts += 1
                
                if successful_inserts % 1000 == 0:
                    connection.commit()
                    print(f"Processed {successful_inserts} rows...")
                    
            except Exception as e:
                failed_inserts += 1
                print(f"Error on row {idx + 1}: {str(e)}")
                print(f"Problematic data: {row_data}")
                continue
        
        # Final commit
        connection.commit()
        
        print(f"\nUpload completed:")
        print(f"Successfully inserted: {successful_inserts} rows")
        print(f"Failed inserts: {failed_inserts} rows")
        
        # Verify table structure
        cursor.execute(f"""
            SELECT column_name, data_type, data_length 
            FROM user_tab_columns 
            WHERE table_name = '{table_name.upper()}'
            ORDER BY column_id
        """)
        
        print("\nFinal table structure:")
        for col in cursor.fetchall():
            print(f"{col[0]}: {col[1]}({col[2]})")
        
    except Exception as e:
        print(f"An error occurred: {str(e)}")
        raise
        
    finally:
        if 'cursor' in locals():
            cursor.close()
        if 'connection' in locals():
            connection.close()

def show_table(connection, table_name):
    cursor = connection.cursor()
    cursor.execute(f"SELECT * FROM {table_name}")
    # Fetch all rows and column names
    rows = cursor.fetchall()
    column_names = [desc[0] for desc in cursor.description]
    # Create a DataFrame with the fetched data
    df = pd.DataFrame(rows, columns=column_names)
    # Close the cursor after reading the data
    cursor.close()
    return df

In [None]:
# #  update csvs as tables into oracle db

# get all csv files from this folder: C:\Users\gebel\github\football_stats\data\htdatan
# import glob
# csv_files = glob.glob(r"C:\Users\gebel\github\football_stats\data\htdatan\*.csv")
# csv_files

# for csv_file_path in csv_files:
#     table_name = os.path.splitext(os.path.basename(csv_file_path))[0]
#     connection = connect_to_oracle()
#     create_and_upload(connection, csv_file_path, table_name)

Table b_2122 does not exist yet

Creating table with SQL:

        CREATE TABLE b_2122 (
            H_DATETIME VARCHAR2(500), H_GAMEINFO VARCHAR2(500), H_TEAMNAMES VARCHAR2(500), H_GOALS NUMBER, H_BALL_POSSESSION NUMBER, H_GOAL_ATTEMPTS NUMBER, H_SHOTS_ON_GOAL NUMBER, H_SHOTS_OFF_GOAL NUMBER, H_BLOCKED_SHOTS NUMBER, H_FREE_KICKS NUMBER, H_CORNER_KICKS NUMBER, H_OFFSIDES NUMBER, H_GOALKEEPER_SAVES NUMBER, H_FOULS NUMBER, H_YELLOW_CARDS NUMBER, H_RED_CARDS NUMBER, H_TOTAL_PASSES NUMBER, H_COMPLETED_PASSES NUMBER, H_TACKLES NUMBER, H_ATTACKS NUMBER, H_DANGEROUS_ATTACKS NUMBER, H_THROW_IN NUMBER, A_DATETIME VARCHAR2(500), A_GAMEINFO VARCHAR2(500), A_TEAMNAMES VARCHAR2(500), A_GOALS NUMBER, A_BALL_POSSESSION NUMBER, A_GOAL_ATTEMPTS NUMBER, A_SHOTS_ON_GOAL NUMBER, A_SHOTS_OFF_GOAL NUMBER, A_BLOCKED_SHOTS NUMBER, A_FREE_KICKS NUMBER, A_CORNER_KICKS NUMBER, A_OFFSIDES NUMBER, A_GOALKEEPER_SAVES NUMBER, A_FOULS NUMBER, A_YELLOW_CARDS NUMBER, A_RED_CARDS NUMBER, A_TOTAL_PASSES NUMBER, A_COMPLET

In [21]:
connection = connect_to_oracle()
df = show_table(connection, "B_2122")
df

Unnamed: 0,H_DATETIME,H_GAMEINFO,H_TEAMNAMES,H_GOALS,H_BALL_POSSESSION,H_GOAL_ATTEMPTS,H_SHOTS_ON_GOAL,H_SHOTS_OFF_GOAL,H_BLOCKED_SHOTS,H_FREE_KICKS,...,A_FOULS,A_YELLOW_CARDS,A_RED_CARDS,A_TOTAL_PASSES,A_COMPLETED_PASSES,A_TACKLES,A_ATTACKS,A_DANGEROUS_ATTACKS,A_THROW_IN,H_CAT
0,14.05.2022 15:30,FOOTBALL\r\nGERMANY: BUNDESLIGA - ROUND 34,Bayer Leverkusen,2,53,10,5,5,,5,...,6,1.0,,232,,,56,33,,cat11
1,14.05.2022 15:30,FOOTBALL\r\nGERMANY: BUNDESLIGA - ROUND 34,Bayer Leverkusen,0,57,1,0,1,,6,...,6,2.0,,155,,,55,21,,cat12
2,14.05.2022 15:30,FOOTBALL\r\nGERMANY: BUNDESLIGA - ROUND 34,B. Monchengladbach,2,54,8,4,4,,6,...,6,1.0,,302,,,67,19,,cat13
3,14.05.2022 15:30,FOOTBALL\r\nGERMANY: BUNDESLIGA - ROUND 34,B. Monchengladbach,3,44,11,5,6,,5,...,6,1.0,,172,,,43,21,,cat14
4,14.05.2022 15:30,FOOTBALL\r\nGERMANY: BUNDESLIGA - ROUND 34,Augsburg,1,46,2,1,1,,8,...,7,1.0,,163,,,50,21,,cat15
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
607,20.11.2021 18:30,FootballFootballGERMANY: Bundesliga - Round 12,Union Berlin,2,40,7,4,3,,7,...,6,1.0,,251,,,58,28,20.0,cat13
608,21.11.2021 15:30,FootballFootballGERMANY: Bundesliga - Round 12,Freiburg,0,64,14,2,12,,10,...,9,4.0,,128,,,30,12,7.0,cat14
609,21.11.2021 15:30,FootballFootballGERMANY: Bundesliga - Round 12,Freiburg,0,52,6,4,2,,9,...,7,,,221,,,54,10,8.0,cat15
610,21.11.2021 17:30,FootballFootballGERMANY: Bundesliga - Round 12,Mainz,0,48,5,1,4,,7,...,5,1.0,,240,,,57,13,13.0,cat16


In [None]:
# delete_table(connection, "EXAMPLE_TABLE")

Error deleting table EXAMPLE_TABLE: ORA-00942: table or view "ADMIN"."EXAMPLE_TABLE" does not exist
Help: https://docs.oracle.com/error-help/db/ora-00942/
