# Creating SQL Databases

### Chuẩn bị các database mẫu theo các định dạng khác nhau
_ SQL Server

_ MySQL

_ PostgreSQL

_ sqlite

_ File CSV

_ FIle Parquet

1. Pandas' `.to_sql` method
2. Manually via cursor and `INSERT INTO`

### Tham khảo
https://www.kaggle.com/abecklas/fifa-world-cup
https://www.kaggle.com/hugomathien/soccer
https://www.football-data.co.uk/englandm.php
https://datahub.io/collections/football

In [2]:
import pandas as pd
import numpy as np
import os


In [4]:
# importing our sample data
# set the path of the external data from the third party source - Kaggle
external_data_path = os.path.join(os.path.pardir, '', 'data','external')

WorldCupMatches = os.path.join(external_data_path, 'WorldCupMatches.csv')
# import dataset
WorldCupMatches = pd.read_csv(WorldCupMatches, delimiter=',', skipinitialspace = True)

WorldCups = os.path.join(external_data_path, 'WorldCups.csv')
# import dataset
WorldCups = pd.read_csv(WorldCups, delimiter=',', skipinitialspace = True)

display(WorldCupMatches.head(), WorldCups.head())

Unnamed: 0,Year,Datetime,Stage,Stadium,City,Home Team Name,Home Team Goals,Away Team Goals,Away Team Name,Win conditions,Attendance,Half-time Home Goals,Half-time Away Goals,Referee,Assistant 1,Assistant 2,RoundID,MatchID,Home Team Initials,Away Team Initials
0,1930.0,13 Jul 1930 - 15:00,Group 1,Pocitos,Montevideo,France,4.0,1.0,Mexico,,4444.0,3.0,0.0,LOMBARDI Domingo (URU),CRISTOPHE Henry (BEL),REGO Gilberto (BRA),201.0,1096.0,FRA,MEX
1,1930.0,13 Jul 1930 - 15:00,Group 4,Parque Central,Montevideo,USA,3.0,0.0,Belgium,,18346.0,2.0,0.0,MACIAS Jose (ARG),MATEUCCI Francisco (URU),WARNKEN Alberto (CHI),201.0,1090.0,USA,BEL
2,1930.0,14 Jul 1930 - 12:45,Group 2,Parque Central,Montevideo,Yugoslavia,2.0,1.0,Brazil,,24059.0,2.0,0.0,TEJADA Anibal (URU),VALLARINO Ricardo (URU),BALWAY Thomas (FRA),201.0,1093.0,YUG,BRA
3,1930.0,14 Jul 1930 - 14:50,Group 3,Pocitos,Montevideo,Romania,3.0,1.0,Peru,,2549.0,1.0,0.0,WARNKEN Alberto (CHI),LANGENUS Jean (BEL),MATEUCCI Francisco (URU),201.0,1098.0,ROU,PER
4,1930.0,15 Jul 1930 - 16:00,Group 1,Parque Central,Montevideo,Argentina,1.0,0.0,France,,23409.0,0.0,0.0,REGO Gilberto (BRA),SAUCEDO Ulises (BOL),RADULESCU Constantin (ROU),201.0,1085.0,ARG,FRA


Unnamed: 0,Year,Country,Winner,Runners-Up,Third,Fourth,GoalsScored,QualifiedTeams,MatchesPlayed,Attendance
0,1930,Uruguay,Uruguay,Argentina,USA,Yugoslavia,70,13,18,590.549
1,1934,Italy,Italy,Czechoslovakia,Germany,Austria,70,16,17,363.000
2,1938,France,Italy,Hungary,Brazil,Sweden,84,15,18,375.700
3,1950,Brazil,Uruguay,Brazil,Sweden,Spain,88,13,22,1.045.246
4,1954,Switzerland,Germany FR,Hungary,Austria,Uruguay,140,16,26,768.607


## Using Pandas' `.to_sql()` method

In [5]:
from sqlalchemy import create_engine

# must start with 'sqlite:///' for a relative path
engine = create_engine('sqlite:///WorldCups.db', echo=True) 
# echo determines whether actions are output

In [6]:
# as a function
def create_sql_table(df, table_name, engine):
    df.to_sql(table_name, con=engine, if_exists='replace')

In [7]:
create_sql_table(WorldCups, 'WorldCups', engine)

2022-03-01 09:34:20,682 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("WorldCups")
2022-03-01 09:34:20,685 INFO sqlalchemy.engine.Engine [raw sql] ()
2022-03-01 09:34:20,687 INFO sqlalchemy.engine.Engine PRAGMA temp.table_info("WorldCups")
2022-03-01 09:34:20,688 INFO sqlalchemy.engine.Engine [raw sql] ()
2022-03-01 09:34:20,690 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2022-03-01 09:34:20,691 INFO sqlalchemy.engine.Engine 
CREATE TABLE "WorldCups" (
	"index" BIGINT, 
	"Year" BIGINT, 
	"Country" TEXT, 
	"Winner" TEXT, 
	"Runners-Up" TEXT, 
	"Third" TEXT, 
	"Fourth" TEXT, 
	"GoalsScored" BIGINT, 
	"QualifiedTeams" BIGINT, 
	"MatchesPlayed" BIGINT, 
	"Attendance" TEXT
)


2022-03-01 09:34:20,692 INFO sqlalchemy.engine.Engine [no key 0.00050s] ()
2022-03-01 09:34:20,700 INFO sqlalchemy.engine.Engine CREATE INDEX "ix_WorldCups_index" ON "WorldCups" ("index")
2022-03-01 09:34:20,701 INFO sqlalchemy.engine.Engine [no key 0.00113s] ()
2022-03-01 09:34:20,707 INFO sqlalchemy.engine

In [8]:
create_sql_table(WorldCupMatches, 'WorldCupMatches', engine)

2022-03-01 09:34:31,782 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("WorldCupMatches")
2022-03-01 09:34:31,782 INFO sqlalchemy.engine.Engine [raw sql] ()
2022-03-01 09:34:31,782 INFO sqlalchemy.engine.Engine PRAGMA temp.table_info("WorldCupMatches")
2022-03-01 09:34:31,782 INFO sqlalchemy.engine.Engine [raw sql] ()
2022-03-01 09:34:31,794 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2022-03-01 09:34:31,797 INFO sqlalchemy.engine.Engine 
CREATE TABLE "WorldCupMatches" (
	"index" BIGINT, 
	"Year" FLOAT, 
	"Datetime" TEXT, 
	"Stage" TEXT, 
	"Stadium" TEXT, 
	"City" TEXT, 
	"Home Team Name" TEXT, 
	"Home Team Goals" FLOAT, 
	"Away Team Goals" FLOAT, 
	"Away Team Name" TEXT, 
	"Win conditions" TEXT, 
	"Attendance" FLOAT, 
	"Half-time Home Goals" FLOAT, 
	"Half-time Away Goals" FLOAT, 
	"Referee" TEXT, 
	"Assistant 1" TEXT, 
	"Assistant 2" TEXT, 
	"RoundID" FLOAT, 
	"MatchID" FLOAT, 
	"Home Team Initials" TEXT, 
	"Away Team Initials" TEXT
)


2022-03-01 09:34:31,798 INFO sqlalchem

### Re-reading in our data

In [9]:
result = engine.execute('SELECT * FROM WorldCupMatches;')
pd.DataFrame(result.fetchall(), columns = result.keys())

2022-03-01 09:34:37,540 INFO sqlalchemy.engine.Engine SELECT * FROM WorldCupMatches;
2022-03-01 09:34:37,541 INFO sqlalchemy.engine.Engine [raw sql] ()


Unnamed: 0,index,Year,Datetime,Stage,Stadium,City,Home Team Name,Home Team Goals,Away Team Goals,Away Team Name,...,Attendance,Half-time Home Goals,Half-time Away Goals,Referee,Assistant 1,Assistant 2,RoundID,MatchID,Home Team Initials,Away Team Initials
0,0,1930.0,13 Jul 1930 - 15:00,Group 1,Pocitos,Montevideo,France,4.0,1.0,Mexico,...,4444.0,3.0,0.0,LOMBARDI Domingo (URU),CRISTOPHE Henry (BEL),REGO Gilberto (BRA),201.0,1096.0,FRA,MEX
1,1,1930.0,13 Jul 1930 - 15:00,Group 4,Parque Central,Montevideo,USA,3.0,0.0,Belgium,...,18346.0,2.0,0.0,MACIAS Jose (ARG),MATEUCCI Francisco (URU),WARNKEN Alberto (CHI),201.0,1090.0,USA,BEL
2,2,1930.0,14 Jul 1930 - 12:45,Group 2,Parque Central,Montevideo,Yugoslavia,2.0,1.0,Brazil,...,24059.0,2.0,0.0,TEJADA Anibal (URU),VALLARINO Ricardo (URU),BALWAY Thomas (FRA),201.0,1093.0,YUG,BRA
3,3,1930.0,14 Jul 1930 - 14:50,Group 3,Pocitos,Montevideo,Romania,3.0,1.0,Peru,...,2549.0,1.0,0.0,WARNKEN Alberto (CHI),LANGENUS Jean (BEL),MATEUCCI Francisco (URU),201.0,1098.0,ROU,PER
4,4,1930.0,15 Jul 1930 - 16:00,Group 1,Parque Central,Montevideo,Argentina,1.0,0.0,France,...,23409.0,0.0,0.0,REGO Gilberto (BRA),SAUCEDO Ulises (BOL),RADULESCU Constantin (ROU),201.0,1085.0,ARG,FRA
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4567,4567,,,,,,,,,,...,,,,,,,,,,
4568,4568,,,,,,,,,,...,,,,,,,,,,
4569,4569,,,,,,,,,,...,,,,,,,,,,
4570,4570,,,,,,,,,,...,,,,,,,,,,


# Parquet

In [12]:
WorldCupMatches.to_parquet('WorldCupMatches.parquet')
WorldCups.to_parquet('WorldCupMatches.parquet')
print("sucessfully!")

sucessfully!


# MySQL

In [2]:
!pip install mysql-connector-python

Collecting mysql-connector-python
  Downloading mysql_connector_python-8.0.28-cp39-cp39-win_amd64.whl (7.2 MB)
Installing collected packages: mysql-connector-python
Successfully installed mysql-connector-python-8.0.28


In [3]:
import mysql.connector as msql
from mysql.connector import Error
try:
    conn = msql.connect(host='localhost', user='root',  
                        password='root@123')
    if conn.is_connected():
        cursor = conn.cursor()
        cursor.execute("CREATE DATABASE WorldCupMatches")
        print("Database is created")
except Error as e:
    print("Error while connecting to MySQL", e)

Error while connecting to MySQL 2003 (HY000): Can't connect to MySQL server on 'localhost:3306' (10061)
