In [1]:
from xPoints import *

import argparse
import json
import time

# Database connection
import sqlalchemy

def create_db_connection(args):
    """
    Creates a connection to the database specified in args
    :return: Returns the active connection
    """
    username = args['username']
    password = args['password']
    server = args['server']
    database = args['database']

    engine = sqlalchemy.create_engine(
        f'mssql+pyodbc://{username}:{password}@{server}/{database}?driver=SQL Server&Trusted_Connection=yes')
    conn = engine.connect()
    return conn


def check_existing_data(conn, table_name, dataframe, colnames):
    """
    Compares existing and new data and filters new rows to append to the database
    :param: conn -> DB conexion
    :param: table_name
    :param: dataframe -> new data obtained
    :param: colnames -> columns to compare
    :return: a dataframe with the non-existing data
    """
    try:
        data = pd.read_sql_table(table_name=table_name, con=conn)

        new = dataframe[colnames]
        old = data[colnames]
        # Indexes that arent in old data
        new_indexes = (new.index[~new.apply(tuple, 1).isin(old.apply(tuple, 1))].tolist())
        join = dataframe.iloc[new_indexes]
    except Exception as e:
        # Table not created...
        print(f'{e}')
        join = dataframe

    return join



def update_table(connection, table_name, data, colnames):
    """
    Discards old data and inserts the new into the SQL Server
    """
    new_data = check_existing_data(connection, table_name, data, colnames)
    new_data.to_sql(table_name, connection, if_exists='append', index=False)

    return new_data

In [2]:
# Create connection from db file info
db_file = 'db.json'
with open(db_file) as db:
    connection = create_db_connection(json.load(db))

matches = pd.read_csv('datasets/matches.csv')
matches = update_table(connection, 'Match', matches, ['Match_id'])

print(matches.shape)
players = pd.read_csv('datasets/all_players.csv')
players = update_table(connection, 'PlayerStats', players, ['Player_id', 'Squad_id'])
print(players.shape)

goalkeepers = pd.read_csv('datasets/gk_players.csv')
goalkeepers = update_table(connection, 'GoalkeepingStats', goalkeepers, ['Player_id', 'Squad_id'])
print(goalkeepers.shape)

Table Match not found
(306, 17)
Table PlayerStats not found
(1328, 78)
Table GoalkeepingStats not found
(106, 34)
