In [16]:
import pandas as pd
import sqlite3

In [17]:
def import_csv(file):
    data = pd.read_csv(f'data/{file}.csv')
    return data

In [18]:
results = import_csv('results')
match_stats = import_csv('match_stats')
season_league_tiers = import_csv('season_league_tiers')
players = import_csv('players')
player_apps = import_csv('player_apps')
subs = import_csv('subs')
goals = import_csv('goals').sort_values(by=['game_date', 'goal_min'])
cards = import_csv('cards')
lge_tabs = import_csv('league_tables')
eos_lge_tabs = import_csv('league_tables_eos')
managers = import_csv('managers')
manager_reigns = import_csv('manager_reigns')
cup_details = import_csv('cup_game_details')
sponsors_cups = import_csv('sponsors_cups')
sponsors_leagues = import_csv('sponsors_leagues')

In [19]:
dfs = [results, match_stats, season_league_tiers, players, player_apps, subs, goals, cards, lge_tabs, eos_lge_tabs, managers, manager_reigns, cup_details, sponsors_cups, sponsors_leagues]

In [20]:
df_names = ['results', 'match_stats', 'season_league_tiers', 'players', 'player_apps', 'subs', 'goals', 'cards', 'league_tables', 'league_tables_eos', 'managers', 'manager_reigns', 'cup_game_details', 'sponsors_cups', 'sponsors_leagues']

In [21]:
all_dfs = []

for i in range(len(dfs)):
    d = {
        'name': df_names[i],
        'df': dfs[i]
    }
    all_dfs.append(d)

In [22]:
# Connect to the SQLite database (or create it if it doesn't exist)
connection = sqlite3.connect('trfc.db')

In [23]:
# Create a cursor object
cursor = connection.cursor()

In [24]:
def add_goal_no(df):
    df['goal_no'] = df.groupby(['game_date', 'player_id']).cumcount() + 1
    return df[['game_date', 'player_id', 'goal_no', 'goal_min', 'penalty', 'own_goal']]

In [25]:
def csv_to_sqlite(csv_file, db_file, table_name):
    # Read the CSV file
    df = pd.read_csv(f'./data/{csv_file}.csv')

    if table_name == 'goals':
        df = add_goal_no(df)
    if table_name == 'cards':
        df = df.drop(columns=['player_name'])

    # Connect to SQLite database (or create it if it doesn't exist)
    conn = sqlite3.connect(db_file)

    # Write the data to a SQLite table
    df.to_sql(table_name, conn, if_exists='replace', index=False)

    # Close the connection
    conn.close()

In [26]:
for df in df_names:
    csv_to_sqlite(df, 'trfc.db', df)

In [27]:
import time

def create_index(db='trfc.db', table='results', column='game_date'):
    with sqlite3.connect(db) as connection:  # Removed extra .db extension
        cursor = connection.cursor()

        # SQL command to create an index on the name column
        create_index_query = f'''
        CREATE INDEX IF NOT EXISTS idx_name ON {table} ({column});
        '''

        # Measure the start time
        start_time = time.perf_counter_ns()

        # Execute the SQL command to create the index
        cursor.execute(create_index_query)

        # Measure the start time
        end_time = time.perf_counter_ns()

        # Commit the changes
        connection.commit()

        print(f"Index on '{column}' column successfully created in {table} table!")

        # Calculate the total time taken
        elapsed_time = (end_time - start_time) / 1000

        # Display the results and the time taken
        print(f"Query completed in {elapsed_time:.5f} microseconds.")

In [28]:
primary_keys = [
    {
        'df': 'results',
        'pk': 'game_date'
    },
    {
        'df': 'match_stats',
        'pk': 'game_date'
    },
    {
        'df': 'season_league_tiers',
        'pk': 'season'
    },
    {
        'df': 'players',
        'pk': 'player_id'
    },
    {
        'df': 'player_apps',
        'pk': ['game_date', 'player_id']
    },
    {
        'df': 'subs',
        'pk': ['game_date', 'player_id']
    },
    {
        'df': 'goals',
        'pk': ['game_date', 'player_id', 'goal_no']
    },
    {
        'df': 'cards',
        'pk': ['game_date', 'player_id', 'card_type']
    },
    {
        'df': 'league_tables',
        'pk': 'game_date'
    },
    {
        'df': 'league_tables_eos',
        'pk': 'season'
    },
    {
        'df': 'cup_game_details',
        'pk': ['cup_game_date']
    }
]

In [29]:
for db in primary_keys:
    create_index(table=db['df'], column=db['pk'])

Index on 'game_date' column successfully created in results table!
Query completed in 1930.04200 microseconds.
Index on 'game_date' column successfully created in match_stats table!
Query completed in 180.58300 microseconds.
Index on 'season' column successfully created in season_league_tiers table!
Query completed in 88.50000 microseconds.
Index on 'player_id' column successfully created in players table!
Query completed in 69.00000 microseconds.
Index on '['game_date', 'player_id']' column successfully created in player_apps table!
Query completed in 158.00000 microseconds.
Index on '['game_date', 'player_id']' column successfully created in subs table!
Query completed in 84.95800 microseconds.
Index on '['game_date', 'player_id', 'goal_no']' column successfully created in goals table!
Query completed in 66.62500 microseconds.
Index on '['game_date', 'player_id', 'card_type']' column successfully created in cards table!
Query completed in 63.79200 microseconds.
Index on 'game_date' c

In [30]:
# Use 'with' to connect to the SQLite database
with sqlite3.connect('trfc.db') as connection:

    # Create a cursor object
    cursor = connection.cursor()

    # Write the SQL command to select all records from the Students table
    select_query = '''
        SELECT * FROM goals;
    '''

    # Execute the SQL command
    cursor.execute(select_query)

    # Fetch all records
    all_results = cursor.fetchall()

    # Display results in the terminal
    print("All goals:")
    for r in all_results:
        print(r)

All goals:
('1921-08-27', 'StuartTom18931025', 1.0, None, None, 0)
('1921-08-27', 'MilnesCharles21885', 1.0, None, None, 0)
('1921-08-27', 'GrovesFred18920506', 1.0, None, None, 0)
('1921-08-27', 'FordJohn', 1.0, None, None, 0)
('1921-09-03', 'PrenticeJohn18981019', 1.0, None, None, 0)
('1921-09-24', 'BulloughDenis18951129', 1.0, None, None, 0)
('1921-09-24', 'PrenticeJohn18981019', 1.0, None, None, 0)
('1921-10-01', 'GrovesFred18920506', 1.0, None, None, 0)
('1921-10-01', 'BulloughDenis18951129', 1.0, None, None, 0)
('1921-10-08', 'BulloughDenis18951129', 1.0, None, None, 0)
('1921-10-15', 'GrovesFred18920506', 1.0, None, None, 0)
('1921-10-15', 'GrovesFred18920506', 2.0, None, None, 0)
('1921-10-15', 'BulloughDenis18951129', 1.0, None, None, 0)
('1921-10-15', 'CunninghamCharlie', 1.0, None, None, 0)
('1921-10-15', 'CunninghamCharlie', 2.0, None, None, 0)
('1921-10-15', 'CunninghamCharlie', 3.0, None, None, 0)
('1921-10-15', 'CunninghamCharlie', 4.0, None, None, 0)
('1921-10-22', 'Cam