<h1>Python에서 MySQL 연동하기</h1>

In [2]:
import os
import mysql.connector
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [4]:
user_name = os.environ.get('DB_USER')
password = os.environ.get('DB_PASS')

In [5]:
connection = mysql.connector.connect(host = 'localhost',
                                    user = 'root',
                                    passwd = 'root',
                                    db = 'employees')

In [6]:
connection

<mysql.connector.connection.MySQLConnection at 0x232ea655550>

In [7]:
employees_tables = pd.read_sql_query('SHOW TABLES FROM employees', connection)

In [8]:
employees_tables

Unnamed: 0,Tables_in_employees
0,departments
1,dept_emp
2,dept_manager
3,employees
4,salaries
5,titles


In [9]:
tables = employees_tables['Tables_in_employees']

In [10]:
for table_name in tables:
    output = pd.read_sql_query('DESCRIBE {}'.format(table_name), connection)
    print(table_name)
    print(output, '\n')

departments
       Field         Type Null  Key Default Extra
0    dept_no      char(4)   NO  PRI    None      
1  dept_name  varchar(40)   NO  UNI    None       

dept_emp
       Field     Type Null  Key Default Extra
0     emp_no  int(11)   NO  PRI    None      
1    dept_no  char(4)   NO  PRI    None      
2  from_date     date   NO         None      
3    to_date     date   NO         None       

dept_manager
       Field     Type Null  Key Default Extra
0    dept_no  char(4)   NO  PRI    None      
1     emp_no  int(11)   NO  PRI    None      
2  from_date     date   NO         None      
3    to_date     date   NO         None       

employees
        Field           Type Null  Key Default Extra
0      emp_no        int(11)   NO  PRI    None      
1  birth_date           date   NO         None      
2  first_name    varchar(14)   NO         None      
3   last_name    varchar(16)   NO         None      
4      gender  enum('M','F')   NO         None      
5   hire_date         

<h2>employees 테이블을 활용하여 연도별 입사자수 count</h2>

In [11]:
employees_hired_year = 'SELECT YEAR(e.hire_date) as hire_date, COUNT(e.emp_no) as employee_count\
                        FROM employees e\
                        GROUP BY YEAR(e.hire_date)\
                        ORDER BY hire_date asc'

In [12]:
df = pd.read_sql_query(employees_hired_year, connection)

In [13]:
df

Unnamed: 0,hire_date,employee_count
0,1985,35316
1,1986,36150
2,1987,33501
3,1988,31436
4,1989,28394
5,1990,25610
6,1991,22568
7,1992,20402
8,1993,17772
9,1994,14835


In [14]:
employees_table = 'SELECT * FROM employees'

In [15]:
df = pd.read_sql_query(employees_table, connection)

In [16]:
df

Unnamed: 0,emp_no,birth_date,first_name,last_name,gender,hire_date
0,10001,1953-09-02,Georgi,Facello,M,1986-06-26
1,10002,1964-06-02,Bezalel,Simmel,F,1985-11-21
2,10003,1959-12-03,Parto,Bamford,M,1986-08-28
3,10004,1954-05-01,Chirstian,Koblick,M,1986-12-01
4,10005,1955-01-21,Kyoichi,Maliniak,M,1989-09-12
5,10006,1953-04-20,Anneke,Preusig,F,1989-06-02
6,10007,1957-05-23,Tzvetan,Zielinski,F,1989-02-10
7,10008,1958-02-19,Saniya,Kalloufi,M,1994-09-15
8,10009,1952-04-19,Sumant,Peac,F,1985-02-18
9,10010,1963-06-01,Duangkaew,Piveteau,F,1989-08-24


<h2>SQL문 대신 pandas를 이용해 연도별 입사자수 count</h2>

In [17]:
df['hire_year'] = df['hire_date'].apply(lambda date: date.year)

In [18]:
df

Unnamed: 0,emp_no,birth_date,first_name,last_name,gender,hire_date,hire_year
0,10001,1953-09-02,Georgi,Facello,M,1986-06-26,1986
1,10002,1964-06-02,Bezalel,Simmel,F,1985-11-21,1985
2,10003,1959-12-03,Parto,Bamford,M,1986-08-28,1986
3,10004,1954-05-01,Chirstian,Koblick,M,1986-12-01,1986
4,10005,1955-01-21,Kyoichi,Maliniak,M,1989-09-12,1989
5,10006,1953-04-20,Anneke,Preusig,F,1989-06-02,1989
6,10007,1957-05-23,Tzvetan,Zielinski,F,1989-02-10,1989
7,10008,1958-02-19,Saniya,Kalloufi,M,1994-09-15,1994
8,10009,1952-04-19,Sumant,Peac,F,1985-02-18,1985
9,10010,1963-06-01,Duangkaew,Piveteau,F,1989-08-24,1989


In [19]:
df.groupby(['hire_year'])['emp_no'].count()

hire_year
1985    35316
1986    36150
1987    33501
1988    31436
1989    28394
1990    25610
1991    22568
1992    20402
1993    17772
1994    14835
1995    12115
1996     9574
1997     6669
1998     4155
1999     1514
2000       13
Name: emp_no, dtype: int64

In [23]:
query = 'SELECT * FROM salaries WHERE salary>50000;'

In [24]:
df = pd.read_sql_query(query, connection)

In [25]:
df

Unnamed: 0,emp_no,salary,from_date,to_date
0,10001,60117,1986-06-26,1987-06-26
1,10001,62102,1987-06-26,1988-06-25
2,10001,66074,1988-06-25,1989-06-25
3,10001,66596,1989-06-25,1990-06-25
4,10001,66961,1990-06-25,1991-06-25
5,10001,71046,1991-06-25,1992-06-24
6,10001,74333,1992-06-24,1993-06-24
7,10001,75286,1993-06-24,1994-06-24
8,10001,75994,1994-06-24,1995-06-24
9,10001,76884,1995-06-24,1996-06-23


In [26]:
connection.close()