***
# CHAPTER 3. 그룹함수
### < COUNT, MAX, MIN, SUM, AVG, STDDEV, VARIANCE >
* 주의 : COUNT함수를 제외한 다른 그룹함수는 NULL 포함시키지 않는다

In [1]:
import cx_Oracle
import pandas as pd
xedb = cx_Oracle.connect('hr/hr@localhost/xe')
cur = xedb.cursor()

In [2]:
df = pd.read_sql("""
        SELECT department_id, count(*)
        FROM employees
        GROUP BY department_id
        ORDER BY 1
        """,xedb)
print(df)

    DEPARTMENT_ID  COUNT(*)
0            10.0         1
1            20.0         2
2            30.0         6
3            40.0         1
4            50.0        45
5            60.0         5
6            70.0         1
7            80.0        34
8            90.0         3
9           100.0         6
10          110.0         2
11            NaN         1


In [3]:
df = pd.read_sql("""
        SELECT department_id, job_id
        FROM employees
        ORDER BY 1
        """,xedb)
print(df)


     DEPARTMENT_ID      JOB_ID
0             10.0     AD_ASST
1             20.0      MK_MAN
2             20.0      MK_REP
3             30.0      PU_MAN
4             30.0    PU_CLERK
5             30.0    PU_CLERK
6             30.0    PU_CLERK
7             30.0    PU_CLERK
8             30.0    PU_CLERK
9             40.0      HR_REP
10            50.0      ST_MAN
11            50.0      ST_MAN
12            50.0      ST_MAN
13            50.0      ST_MAN
14            50.0      ST_MAN
15            50.0    ST_CLERK
16            50.0    ST_CLERK
17            50.0    ST_CLERK
18            50.0    ST_CLERK
19            50.0    ST_CLERK
20            50.0    ST_CLERK
21            50.0    ST_CLERK
22            50.0    ST_CLERK
23            50.0    ST_CLERK
24            50.0    ST_CLERK
25            50.0    ST_CLERK
26            50.0    ST_CLERK
27            50.0    ST_CLERK
28            50.0    ST_CLERK
29            50.0    ST_CLERK
..             ...         ...
77      

In [4]:
df = pd.read_sql("""
        SELECT department_id, job_id,count(*)
        FROM employees
        GROUP BY department_id, job_id
        ORDER BY 1
        """,xedb)
print(df)

    DEPARTMENT_ID      JOB_ID  COUNT(*)
0            10.0     AD_ASST         1
1            20.0      MK_MAN         1
2            20.0      MK_REP         1
3            30.0    PU_CLERK         5
4            30.0      PU_MAN         1
5            40.0      HR_REP         1
6            50.0    SH_CLERK        20
7            50.0    ST_CLERK        20
8            50.0      ST_MAN         5
9            60.0     IT_PROG         5
10           70.0      PR_REP         1
11           80.0      SA_MAN         5
12           80.0      SA_REP        29
13           90.0     AD_PRES         1
14           90.0       AD_VP         2
15          100.0  FI_ACCOUNT         5
16          100.0      FI_MGR         1
17          110.0  AC_ACCOUNT         1
18          110.0      AC_MGR         1
19            NaN      SA_REP         1


In [5]:
df = pd.read_sql("""
        SELECT department_id, job_id,count(*),avg(salary), sum(salary)
        FROM employees
        WHERE department_id IN(10,20,30)
        GROUP BY department_id, job_id
        ORDER BY 1
        """,xedb)
print(df)

   DEPARTMENT_ID    JOB_ID  COUNT(*)  AVG(SALARY)  SUM(SALARY)
0             10   AD_ASST         1         4400         4400
1             20    MK_MAN         1        13000        13000
2             20    MK_REP         1         6000         6000
3             30  PU_CLERK         5         2780        13900
4             30    PU_MAN         1        11000        11000


###  HAVING : 그룹함수의 결과를 제한하는 절 
참고 : where절은 행을 제한하는 절

In [6]:
df = pd.read_sql("""
        SELECT department_id, job_id, count(*), avg(salary), sum(salary)
        FROM employees
        WHERE department_id IN(10,20,30)
        GROUP BY department_id, job_id
        HAVING avg(salary)>6000
        ORDER BY 1
        """,xedb)
print(df)

   DEPARTMENT_ID  JOB_ID  COUNT(*)  AVG(SALARY)  SUM(SALARY)
0             20  MK_MAN         1        13000        13000
1             30  PU_MAN         1        11000        11000


### 작업수행순서
- 4 SELECT 
- 1 FROM 
- 2 WHERE 
- 3 GROUP BY 
- 5 HAVING 
- 6 ORDER BY 

***
## [ 연습문제 ]
***

#### (1) 모든 사원의 최고급여, 최저급여, 급여합계 및 평균급여를 조회하세요. 결과는 소수점을 반올림하여 정수값으로 출력하세요.

In [7]:
df = pd.read_sql("""
        SELECT ROUND(MAX(salary),0) Maximum,
                ROUND(MIN(salary),0) Minimum,
                ROUND(SUM(salary),0) Sum,
                ROUND(AVG(salary),0) Average
        FROM employees
        """,xedb)
print(df)

   MAXIMUM  MINIMUM     SUM  AVERAGE
0    24000     2100  691416     6462


#### (2) 2008년도에 입사한 사원들의 job_id별 사원수를 조회하고 인원수가 많은 순으로 출력하세요.

In [8]:
df = pd.read_sql("""
        SELECT job_id, COUNT(*)\
        FROM employees WHERE hire_date >= to_date('2008-01-01','YYYY-MM-DD')
        AND hire_date < to_date('2009-01-01','YYYY-MM-DD')
        GROUP BY job_id
        ORDER BY 2 desc
        """,xedb)
print(df)

     JOB_ID  COUNT(*)
0    SA_REP         6
1  SH_CLERK         2
2  ST_CLERK         2
3    SA_MAN         1


#### (3) 총 사원수와 2005년, 2006년, 2007년, 2008년에 각각 입사한 사원의 수를 출력하세요.

In [10]:
df = pd.read_sql("""
                 SELECT TO_CHAR(HIRE_DATE,'YYYY'), COUNT(*)
                 FROM EMPLOYEES
                 GROUP BY TO_CHAR(HIRE_DATE,'YYYY')
                 ORDER BY 1
                 """,xedb)
print(df)

  TO_CHAR(HIRE_DATE,'YYYY')  COUNT(*)
0                      2001         1
1                      2002         7
2                      2003         6
3                      2004        10
4                      2005        29
5                      2006        24
6                      2007        19
7                      2008        11


#### SUM, DECODE

In [11]:
df = pd.read_sql("""
        SELECT COUNT(*) total,\
                SUM(DECODE(TO_CHAR(hire_date,'YYYY'),'2005',1)) YR_05,
                SUM(DECODE(TO_CHAR(hire_date,'YYYY'),'2006',1)) YR_06,
                SUM(DECODE(TO_CHAR(hire_date,'YYYY'),'2007',1)) YR_07,
                SUM(DECODE(TO_CHAR(hire_date,'YYYY'),'2008',1)) YR_08
        FROM employees
        """,xedb)
print(df)

   TOTAL  YR_05  YR_06  YR_07  YR_08
0    107     29     24     19     11


#### COUNT, CASE..WHEN

In [12]:
df = pd.read_sql("""
        SELECT COUNT(*) total,\
                COUNT(CASE TO_CHAR(hire_date,'YYYY') WHEN '2005' THEN 1 END) YR_05,
                COUNT(CASE TO_CHAR(hire_date,'YYYY') WHEN '2006' THEN 1 END) YR_06,
                COUNT(CASE TO_CHAR(hire_date,'YYYY') WHEN '2007'THEN 1 END) YR_07,
                COUNT(CASE TO_CHAR(hire_date,'YYYY') WHEN '2008'THEN 1 END) YR_08
        FROM employees
        """,xedb)
print(df)

   TOTAL  YR_05  YR_06  YR_07  YR_08
0    107     29     24     19     11


In [13]:
df = pd.read_sql("""
        SELECT COUNT(*) total,\
                COUNT(CASE EXTRACT(YEAR FROM hire_date) WHEN 2005 THEN 'X' END) YR_05,
                COUNT(CASE EXTRACT(YEAR FROM hire_date) WHEN 2006 THEN 'X' END) YR_06,
                COUNT(CASE EXTRACT(YEAR FROM hire_date) WHEN 2007 THEN 'X' END)YR_07,
                COUNT(CASE EXTRACT(year from hire_date) WHEN 2008 THEN 'X' END) YR_08
        FROM employees
        """,xedb)
print(df)

   TOTAL  YR_05  YR_06  YR_07  YR_08
0    107     29     24     19     11


In [15]:
cur.close()
xedb.close()