# Testing the new database I've created in SQLite

## Establishing connection

In [1]:
import sqlite3
from uuid import uuid4
from typing import List, Optional
import pandas as pd


class SQLConnection:

    def __init__(self, db_name: str = None) -> None:
        self.current_cursor = str(uuid4())
        if db_name is None:
            self.db_name = f'.student_{self.current_cursor}.db'
        else:
            self.db_name = db_name

    def q(self, query: str) -> Optional[List[str]]:
        """Executes a query and returns the result"""
        res = None
        with sqlite3.connect(self.db_name) as con:
            cur = con.cursor()
            for q in query.split(';'):
                try:
                    res = pd.read_sql_query(q.strip(), con)
                except (TypeError, ValueError):
                    pass
        return res

    def connect(self):
        return sqlite3.connect(self.db_name)

database = SQLConnection('ATP_database')

In [2]:
# examining the schema
database.q("SELECT * FROM SQLITE_SCHEMA")

Unnamed: 0,type,name,tbl_name,rootpage,sql
0,table,MatchPlayer,MatchPlayer,2,CREATE TABLE MatchPlayer (\n\tplayer_id INTEGE...
1,index,sqlite_autoindex_MatchPlayer_1,MatchPlayer,3,
2,table,Players,Players,4,"CREATE TABLE Players (\n\tplayer_id INTEGER,\n..."
3,table,Level,Level,5,"CREATE TABLE ""Level"" (\n\tlevel_id INTEGER,\n\..."
4,table,Tournaments,Tournaments,6,CREATE TABLE Tournaments (\n\ttourney_id INTEG...
5,table,Matches,Matches,7,"CREATE TABLE Matches (\n\ttourney_id INTEGER,\..."
6,index,sqlite_autoindex_Matches_1,Matches,8,


### COMMENTS
* Can see the tables I created using dbeaver 
* Can see that two indexes have also been created
    * These are created for the tables that have a composite key (i.e. more than one column in the primary key)
    * Would like to ask Tomas what this means
* Connection is therefore successful using the SQLConnection Class I stole from a previous week
* Will put this in a separate assets folder when I create the proper python file (i.e. migrate from jupyter notebooks) 

### NEXT STEPS
* See if I can change the name of the schema from ATP_test to something more permanent ✅
* Practice populating and deleting data from the schema (might need to change the foreign key dependencies if it doesn't let me delete)
* Try to automate the population of the database using python and the csv files I have

## Adding test data into the database

### Creating the test data (players from the 2021 season)

In [79]:
# import information for all players (theres a lot of them)

df_players = pd.read_csv('data/atp_players.csv')
df_players

Unnamed: 0,player_id,name_first,name_last,hand,dob,ioc,height,wikidata_id
0,100001,Gardnar,Mulloy,R,19131122,USA,185.0,Q54544
1,100002,Pancho,Segura,R,19210620,ECU,168.0,Q54581
2,100003,Frank,Sedgman,R,19271002,AUS,180.0,Q962049
3,100004,Giuseppe,Merlo,R,19271011,ITA,,Q1258752
4,100005,Richard,Gonzalez,R,19280509,USA,188.0,Q53554
...,...,...,...,...,...,...,...,...
56767,211735,David,Saye,U,,USA,,
56768,211736,Patrick,Fletchall,U,,USA,,
56769,211737,Sean,Daryabeigi,U,,USA,,
56770,211738,Jonah,Braswell,U,,USA,,


In [47]:
# import player names from 2021 (not really useful as name is all one column whereas it is 
# split into first and last in df_players)

colnames = ['index', 'player_name']
df_players_2021 = pd.read_csv('data/2021_players.csv', names=colnames)
df_players_2021 = df_players_2021.drop('index', axis='columns')

In [74]:
# importing player_ids for those who played in the 2021 season 

colnames = ['index', 'player_id']
df_2021_player_ids = pd.read_csv('data/2021_player_ids.csv', names=colnames)
df_2021_player_ids = df_2021_player_ids.drop('index', axis='columns')

In [77]:
# merging 2021 player ids with the df_players to return the info about those who 
# played in the 2021 season 

df_2021_player_info = df_2021_player_ids.merge(df_players, on='player_id')

In [88]:
df_2021_player_info.dtypes

player_id        int64
name_first      object
name_last       object
hand            object
dob             object
ioc             object
height         float64
wikidata_id     object
dtype: object

### Using the database connection to insert into the players table 

In [81]:
database.q('SELECT * FROM players')

Unnamed: 0,player_id,first_name,last_name,hand,height,nationality,dob


In [93]:
ids = df_2021_player_info.player_id
firsts = df_2021_player_info.name_first
lasts = df_2021_player_info.name_last
hands = df_2021_player_info.hand
heights = df_2021_player_info.height
nats = df_2021_player_info.ioc
dobs = df_2021_player_info.dob

for id, first, last, hand, height, nat, dob  in zip(ids, firsts, lasts, hands, heights, nats, dobs):
    database.q(f"""
                INSERT INTO players 
                    (player_id, first_name, last_name, hand, height, nationality, dob) 
                VALUES 
                    ({id}, {first}, {last}, {hand}, {height}, {nat}, {dob});
                    """)

# having some trouble inserting into the table, unsure what the problem is (bugging me 
# and need to fix)

DatabaseError: Execution failed on sql 'INSERT INTO players 
                    (player_id, first_name, last_name, hand, height, nationality, dob) 
                VALUES 
                    ( 200711, Nicolas, Mejia, R, 185.0, COL, 20000211)': no such column: Nicolas