# chapter 5. SUBQUERY
***
## 5-1 SINGLE ROW SUBQUERY 

#### 사원 141의 job_id 와 동일한 job_id 가진 사원의 이름을 조회

In [1]:
import cx_Oracle
import pandas as pd
xedb = cx_Oracle.connect('hr/hr@localhost/xe')
cur = xedb.cursor()

In [2]:
df = pd.read_sql("""
        SELECT last_name, job_id
        FROM employees
        WHERE job_id =  (SELECT job_id
                         FROM employees
                         WHERE employee_id = 141)
        """,xedb)
print(df)

      LAST_NAME    JOB_ID
0         Nayer  ST_CLERK
1   Mikkilineni  ST_CLERK
2        Landry  ST_CLERK
3        Markle  ST_CLERK
4        Bissot  ST_CLERK
5      Atkinson  ST_CLERK
6        Marlow  ST_CLERK
7         Olson  ST_CLERK
8        Mallin  ST_CLERK
9        Rogers  ST_CLERK
10          Gee  ST_CLERK
11   Philtanker  ST_CLERK
12       Ladwig  ST_CLERK
13       Stiles  ST_CLERK
14          Seo  ST_CLERK
15        Patel  ST_CLERK
16         Rajs  ST_CLERK
17       Davies  ST_CLERK
18        Matos  ST_CLERK
19       Vargas  ST_CLERK


#### 사원141와 동일한 job_id의  사원 중에 141사원 보다 급여를 많이 받는 사원을 조회

In [3]:
df = pd.read_sql("""
        SELECT last_name, job_id, salary
        FROM employees
        WHERE job_id = (SELECT job_id
                        FROM employees
                        WHERE employee_id = 141)
        AND salary > (SELECT salary
                      FROM employees
                      WHERE employee_id = 141)
        """,xedb)
print(df)

  LAST_NAME    JOB_ID  SALARY
0    Ladwig  ST_CLERK  3600.0


#### 최소월급을 받는 사원의 이름과 월급을 출력

In [4]:
df = pd.read_sql("""
        SELECT last_name, salary
        FROM employees
        WHERE salary = (SELECT MIN(salary)
                        FROM employees)         
        """,xedb)
print(df)

  LAST_NAME  SALARY
0     Olson  2100.0


#### 평균 급여가 가장 낮은 job_id

In [5]:
df = pd.read_sql("""
        SELECT job_id, AVG(salary)
        FROM employees
        GROUP BY job_id
        HAVING AVG(salary) = (SELECT MIN(AVG(salary))
                          FROM employees
                          GROUP BY job_id)
        """,xedb)
print(df)

     JOB_ID  AVG(SALARY)
0  PU_CLERK         2780


## 5-2  MULTIPLE ROW SUBQUERY
* Multiple row subquery  연산자 :  in, any, all

### < IN, NOT IN >
* IN (=, or ,=, or, =. or......)
* NOT IN (<>, AND, <>, AND, <>, AND .....) 

#### 부서별로 최소 급여자들의 이름과 급여 출력

In [6]:
df = pd.read_sql("""
        select last_name, salary
        FROM employees
        WHERE salary IN (SELECT MIN(salary)
                         FROM employees
                         GROUP BY department_id)
        """,xedb)
print(df)

     LAST_NAME   SALARY
0      Kochhar  17000.0
1      De Haan  17000.0
2        Ernst   6000.0
3      Lorentz   4200.0
4         Popp   6900.0
5   Colmenares   2500.0
6      Vollman   6500.0
7       Marlow   2500.0
8        Olson   2100.0
9        Patel   2500.0
10      Vargas   2500.0
11      Tucker  10000.0
12     Tuvault   7000.0
13        King  10000.0
14      Sewall   7000.0
15       Bloom  10000.0
16       Kumar   6100.0
17       Grant   7000.0
18    Sullivan   2500.0
19    Sarchand   4200.0
20     Perkins   2500.0
21      Whalen   4400.0
22         Fay   6000.0
23      Mavris   6500.0
24        Baer  10000.0
25       Gietz   8300.0


#### 관리자인 사원들의 이름을 조회

In [8]:
df = pd.read_sql("""
        SELECT last_name
        FROM employees
        WHERE employee_id IN (SELECT manager_id
                              FROM employees)
        """,xedb)
print(df)

    LAST_NAME
0   Cambrault
1     De Haan
2   Errazuriz
3       Fripp
4   Greenberg
5   Hartstein
6     Higgins
7      Hunold
8    Kaufling
9        King
10    Kochhar
11    Mourgos
12   Partners
13   Raphaely
14    Russell
15    Vollman
16      Weiss
17    Zlotkey


#### 관리자가 아닌 사원들의 이름을 조회

In [9]:
df = pd.read_sql("""
        SELECT last_name
        FROM employees
        WHERE employee_id NOT IN (SELECT manager_id
                                  FROM employees)
        """,xedb)
print(df)

# but 서브쿼리에 null값이 포함되어 있을 경우 조회가 되지않는다 (T and null = null)

Empty DataFrame
Columns: [LAST_NAME]
Index: []


In [10]:
df = pd.read_sql("""
        SELECT  last_name
        FROM  employees
        WHERE  employee_id NOT IN (SELECT  manager_id
                                   FROM employees
                                   WHERE manager_id IS NOT NULL)
        """,xedb)
print(df)

     LAST_NAME
0         Abel
1         Ande
2     Atkinson
3       Austin
4         Baer
5        Baida
6        Banda
7        Bates
8         Bell
9    Bernstein
10      Bissot
11       Bloom
12        Bull
13      Cabrio
14   Cambrault
15        Chen
16       Chung
17  Colmenares
18      Davies
19   Dellinger
20       Dilly
21       Doran
22       Ernst
23     Everett
24      Faviet
25         Fay
26      Feeney
27      Fleaur
28         Fox
29       Gates
..         ...
59    OConnell
60       Olsen
61       Olson
62        Ozer
63   Pataballa
64       Patel
65     Perkins
66  Philtanker
67        Popp
68        Rajs
69      Rogers
70    Sarchand
71     Sciarra
72         Seo
73      Sewall
74       Smith
75       Smith
76      Stiles
77    Sullivan
78       Sully
79      Taylor
80      Taylor
81      Tobias
82      Tucker
83     Tuvault
84       Urman
85      Vargas
86     Vishney
87       Walsh
88      Whalen

[89 rows x 1 columns]


### < ANY >
#### job_id 'IT_PROG'에서 최고 급여를 받는 사원보다 급여를 적게 받는 사원을 조회

In [11]:
df = pd.read_sql("""
        SELECT employee_id, last_name, job_id, salary
        FROM employees
        WHERE salary < (SELECT MAX(salary)
                        FROM employees
                        WHERE job_id = 'IT_PROG')
        """,xedb)
print(df)

    EMPLOYEE_ID    LAST_NAME      JOB_ID  SALARY
0           104        Ernst     IT_PROG  6000.0
1           105       Austin     IT_PROG  4800.0
2           106    Pataballa     IT_PROG  4800.0
3           107      Lorentz     IT_PROG  4200.0
4           110         Chen  FI_ACCOUNT  8200.0
5           111      Sciarra  FI_ACCOUNT  7700.0
6           112        Urman  FI_ACCOUNT  7800.0
7           113         Popp  FI_ACCOUNT  6900.0
8           115         Khoo    PU_CLERK  3100.0
9           116        Baida    PU_CLERK  2900.0
10          117       Tobias    PU_CLERK  2800.0
11          118       Himuro    PU_CLERK  2600.0
12          119   Colmenares    PU_CLERK  2500.0
13          120        Weiss      ST_MAN  8000.0
14          121        Fripp      ST_MAN  8200.0
15          122     Kaufling      ST_MAN  7900.0
16          123      Vollman      ST_MAN  6500.0
17          124      Mourgos      ST_MAN  5800.0
18          125        Nayer    ST_CLERK  3200.0
19          126  Mik

In [12]:
df = pd.read_sql("""
        SELECT employee_id, last_name, job_id, salary
        FROM employees
        WHERE salary < ANY (SELECT salary
                            FROM employees
                            where job_id='IT_PROG')
        """,xedb)
print(df)

    EMPLOYEE_ID    LAST_NAME      JOB_ID  SALARY
0           132        Olson    ST_CLERK  2100.0
1           128       Markle    ST_CLERK  2200.0
2           136   Philtanker    ST_CLERK  2200.0
3           127       Landry    ST_CLERK  2400.0
4           135          Gee    ST_CLERK  2400.0
5           119   Colmenares    PU_CLERK  2500.0
6           131       Marlow    ST_CLERK  2500.0
7           140        Patel    ST_CLERK  2500.0
8           144       Vargas    ST_CLERK  2500.0
9           182     Sullivan    SH_CLERK  2500.0
10          191      Perkins    SH_CLERK  2500.0
11          118       Himuro    PU_CLERK  2600.0
12          143        Matos    ST_CLERK  2600.0
13          198     OConnell    SH_CLERK  2600.0
14          199        Grant    SH_CLERK  2600.0
15          126  Mikkilineni    ST_CLERK  2700.0
16          139          Seo    ST_CLERK  2700.0
17          117       Tobias    PU_CLERK  2800.0
18          130     Atkinson    ST_CLERK  2800.0
19          183     

### < AND >
#### job_id 'IT_PROG'에서 최저 급여를 받는 사원보다 급여를 적게 받는 사원을 조회

In [13]:
df = pd.read_sql("""
        SELECT employee_id, last_name, job_id, salary
        FROM employees
        WHERE salary < (SELECT MIN(salary)
                        FROM employees
                        WHERE job_id = 'IT_PROG')
        """,xedb)
print(df)

    EMPLOYEE_ID    LAST_NAME    JOB_ID  SALARY
0           115         Khoo  PU_CLERK  3100.0
1           116        Baida  PU_CLERK  2900.0
2           117       Tobias  PU_CLERK  2800.0
3           118       Himuro  PU_CLERK  2600.0
4           119   Colmenares  PU_CLERK  2500.0
5           125        Nayer  ST_CLERK  3200.0
6           126  Mikkilineni  ST_CLERK  2700.0
7           127       Landry  ST_CLERK  2400.0
8           128       Markle  ST_CLERK  2200.0
9           129       Bissot  ST_CLERK  3300.0
10          130     Atkinson  ST_CLERK  2800.0
11          131       Marlow  ST_CLERK  2500.0
12          132        Olson  ST_CLERK  2100.0
13          133       Mallin  ST_CLERK  3300.0
14          134       Rogers  ST_CLERK  2900.0
15          135          Gee  ST_CLERK  2400.0
16          136   Philtanker  ST_CLERK  2200.0
17          137       Ladwig  ST_CLERK  3600.0
18          138       Stiles  ST_CLERK  3200.0
19          139          Seo  ST_CLERK  2700.0
20          1

In [14]:
df = pd.read_sql("""
        SELECT employee_id, last_name, job_id, salary
        FROM employees
        WHERE salary < ALL(SELECT salary
                           FROM employees
                           WHERE job_id = 'IT_PROG')
        """,xedb)
print(df)

    EMPLOYEE_ID    LAST_NAME    JOB_ID  SALARY
0           185         Bull  SH_CLERK  4100.0
1           192         Bell  SH_CLERK  4000.0
2           193      Everett  SH_CLERK  3900.0
3           188        Chung  SH_CLERK  3800.0
4           189        Dilly  SH_CLERK  3600.0
5           137       Ladwig  ST_CLERK  3600.0
6           141         Rajs  ST_CLERK  3500.0
7           186    Dellinger  SH_CLERK  3400.0
8           133       Mallin  ST_CLERK  3300.0
9           129       Bissot  ST_CLERK  3300.0
10          125        Nayer  ST_CLERK  3200.0
11          194       McCain  SH_CLERK  3200.0
12          180       Taylor  SH_CLERK  3200.0
13          138       Stiles  ST_CLERK  3200.0
14          115         Khoo  PU_CLERK  3100.0
15          196        Walsh  SH_CLERK  3100.0
16          181       Fleaur  SH_CLERK  3100.0
17          142       Davies  ST_CLERK  3100.0
18          197       Feeney  SH_CLERK  3000.0
19          187       Cabrio  SH_CLERK  3000.0
20          1

## 5-3 MULTIPLE COLUMN SUBQUERY  
### < 쌍비교방식 >

In [15]:
df = pd.read_sql("""
        SELECT employee_id, manager_id, department_id
        FROM employees
        WHERE (manager_id, department_id) IN (SELECT manager_id, department_id
                                              FROM employees
                                              WHERE first_name = 'John')
        """,xedb)
print(df)

    EMPLOYEE_ID  MANAGER_ID  DEPARTMENT_ID
0           137         123             50
1           138         123             50
2           139         123             50
3           140         123             50
4           192         123             50
5           193         123             50
6           194         123             50
7           195         123             50
8           145         100             80
9           146         100             80
10          147         100             80
11          148         100             80
12          149         100             80
13          109         108            100
14          110         108            100
15          111         108            100
16          112         108            100
17          113         108            100


### < 비쌍비교방식 >

In [16]:
df = pd.read_sql("""
        SELECT employee_id, manager_id, department_id
        FROM employees
        WHERE manager_id IN (SELECT manager_id
                             FROM employees
                             WHERE first_name = 'John')
        AND department_id IN (SELECT department_id
                             FROM employees
                             WHERE first_name = 'John')
        """,xedb)
print(df)

    EMPLOYEE_ID  MANAGER_ID  DEPARTMENT_ID
0           120         100             50
1           121         100             50
2           122         100             50
3           123         100             50
4           124         100             50
5           137         123             50
6           138         123             50
7           139         123             50
8           140         123             50
9           192         123             50
10          193         123             50
11          194         123             50
12          195         123             50
13          145         100             80
14          146         100             80
15          147         100             80
16          148         100             80
17          149         100             80
18          109         108            100
19          110         108            100
20          111         108            100
21          112         108            100
22         

## 5-4 CORRELATED SUBQUERY 
* 메인쿼리의 컬럼이 서브쿼리에 포함되어 있는 경우
* 메인쿼리의 값을 서브쿼리가 사용하고, 서브쿼리를 수행한 다음에 그 결과를 다시 메인쿼리로 반환해서 수행하는 구조 
    (중첩서브쿼리의 경우 서브쿼리절이 먼저 수행되고, 그 다음에 메인쿼리가 수행되는 반면에 CORRELATED SUBQUERY는 메인쿼리가 먼저 수행이 된다.)

#### 자신의 부서 평균급여보다 많은 급여를 받는 사원 조회
    SELECT employee_id, salary, department_id
    FROM employees
    WHERE salary > 내 부서 평균급여 (SELECT avg(salary)
                                   FROM employees
                                   WHERE department_id=내 부서코드)
     => 전체사원을 조회하게 된다.
     => 그러므로 메인쿼리가 먼저 수행되어야 한다.
     (메인쿼리의 row만큼 수행하기 때문에 부하가 심하다)


In [17]:
df = pd.read_sql("""
        SELECT last_name, salary, department_id
        FROM employees o
        WHERE salary > (SELECT AVG(salary)
                        FROM employees 
                        WHERE department_id = o.department_id)
        """,xedb)
print(df)

    LAST_NAME   SALARY  DEPARTMENT_ID
0        King  24000.0             90
1      Hunold   9000.0             60
2       Ernst   6000.0             60
3   Greenberg  12008.0            100
4      Faviet   9000.0            100
5    Raphaely  11000.0             30
6       Weiss   8000.0             50
7       Fripp   8200.0             50
8    Kaufling   7900.0             50
9     Vollman   6500.0             50
10    Mourgos   5800.0             50
11     Ladwig   3600.0             50
12       Rajs   3500.0             50
13    Russell  14000.0             80
14   Partners  13500.0             80
15  Errazuriz  12000.0             80
16  Cambrault  11000.0             80
17    Zlotkey  10500.0             80
18     Tucker  10000.0             80
19  Bernstein   9500.0             80
20       Hall   9000.0             80
21       King  10000.0             80
22      Sully   9500.0             80
23     McEwen   9000.0             80
24    Vishney  10500.0             80
25     Green

### < EXISTS, NOT EXISTS >
#### 관리자인 사원들의 정보 출력
    SELECT * 
    FROM employees
    WHERE employee_id IN (SELECT manager_id
                          FROM employees )
   
    =>서브쿼리를 먼저 수행하기 때문에 중복된 것 검색하는 불필요한 작업을 수행한다.
    이 때 존재여부를 찾는 CORRELATED SUBQUERY의 연산자 EXISTS 사용
    후보행 값이 서브쿼리에 존재하면 데이터검색을 종료하여 불필요한 작업수행X

In [18]:
df = pd.read_sql("""
        SELECT last_name
        FROM employees o
        WHERE EXISTS (SELECT 'X'
                      FROM employees
                      WHERE manager_id = o.employee_id)
        """,xedb)
print(df)

    LAST_NAME
0   Cambrault
1     De Haan
2   Errazuriz
3       Fripp
4   Greenberg
5   Hartstein
6     Higgins
7      Hunold
8    Kaufling
9        King
10    Kochhar
11    Mourgos
12   Partners
13   Raphaely
14    Russell
15    Vollman
16      Weiss
17    Zlotkey


#### 관리자가 아닌 사원에 대해서 출력
     SELECT last_name
     FROM employees
     WHERE employee_id NOT IN (SELECT manager_id
                               FROM employees
                               WHERE manager_id IS NOT NULL )

In [19]:
df = pd.read_sql("""
        SELECT last_name
        FROM employees o
        WHERE NOT EXISTS (SELECT 'X'
                          FROM employees
                          WHERE manager_id = o.employee_id)
        """,xedb)
print(df)

# NOT IN과 차이점 : 값이 존재하지 않는 데이터를 찾기 때문에 null값이 있어도 상관없다

     LAST_NAME
0         Abel
1         Ande
2     Atkinson
3       Austin
4         Baer
5        Baida
6        Banda
7        Bates
8         Bell
9    Bernstein
10      Bissot
11       Bloom
12        Bull
13      Cabrio
14   Cambrault
15        Chen
16       Chung
17  Colmenares
18      Davies
19   Dellinger
20       Dilly
21       Doran
22       Ernst
23     Everett
24      Faviet
25         Fay
26      Feeney
27      Fleaur
28         Fox
29       Gates
..         ...
59    OConnell
60       Olsen
61       Olson
62        Ozer
63   Pataballa
64       Patel
65     Perkins
66  Philtanker
67        Popp
68        Rajs
69      Rogers
70    Sarchand
71     Sciarra
72         Seo
73      Sewall
74       Smith
75       Smith
76      Stiles
77    Sullivan
78       Sully
79      Taylor
80      Taylor
81      Tobias
82      Tucker
83     Tuvault
84       Urman
85      Vargas
86     Vishney
87       Walsh
88      Whalen

[89 rows x 1 columns]


## 5-5 SCALAR SUBQUERY
* SELECT절에서 사용하는 서브쿼리 
* 한 개의 ROW만 반환하며 메인쿼리에서 추출되는 데이터 건수만큼 수행되기 때문에
    JOIN으로 작업할 때 보다 수행횟수가 적다.
* 일치하는 값이 없을 경우 NULL을 반환한다.
* 캐시기능이 돌아가는 서브쿼리
* 하나의 행에서 정확히 하나의 열 값을 반환하는 서브쿼리

In [20]:
df = pd.read_sql("""
        SELECT e.last_name, e.department_id, d.department_id, d.department_name
        FROM employees e, departments d
        WHERE e.department_id = d.department_id
        ORDER BY 2,3
        """,xedb)
print(df)

      LAST_NAME  DEPARTMENT_ID  DEPARTMENT_ID  DEPARTMENT_NAME
0        Whalen             10             10   Administration
1           Fay             20             20        Marketing
2     Hartstein             20             20        Marketing
3        Tobias             30             30       Purchasing
4    Colmenares             30             30       Purchasing
5         Baida             30             30       Purchasing
6      Raphaely             30             30       Purchasing
7          Khoo             30             30       Purchasing
8        Himuro             30             30       Purchasing
9        Mavris             40             40  Human Resources
10       Feeney             50             50         Shipping
11       Fleaur             50             50         Shipping
12        Fripp             50             50         Shipping
13        Gates             50             50         Shipping
14          Gee             50             50         S

=> 동일한 인수값이 들어와도 반복적으로 결과를 출력하기 때문에 비효율적

* 이 때 동일한 값이 들어오면 SELECT가 수행되지 않는 CACHE기능이 있어 효율적인 작업이 가능
* NULL이 있어도 출력되기 때문에 OUTER JOIN의 기능이 포함된다.
* 중복성이 많을수록 성능은 좋아지지만
* 다중열을 표현할 수 없기 때문에 단일컬럼, 단일값만 수행할 수 있는 제약 


In [21]:
df = pd.read_sql("""
        SELECT e.last_name, e.department_id, (SELECT department_name
                                                FROM departments
                                                WHERE department_id = e.department_id) DEPARTMENT_NAME
        FROM employees e
        ORDER BY 2
        """,xedb)
print(df)

       LAST_NAME  DEPARTMENT_ID  DEPARTMENT_NAME
0         Whalen           10.0   Administration
1      Hartstein           20.0        Marketing
2            Fay           20.0        Marketing
3       Raphaely           30.0       Purchasing
4           Khoo           30.0       Purchasing
5          Baida           30.0       Purchasing
6         Tobias           30.0       Purchasing
7         Himuro           30.0       Purchasing
8     Colmenares           30.0       Purchasing
9         Mavris           40.0  Human Resources
10         Weiss           50.0         Shipping
11         Fripp           50.0         Shipping
12      Kaufling           50.0         Shipping
13       Vollman           50.0         Shipping
14       Mourgos           50.0         Shipping
15         Nayer           50.0         Shipping
16   Mikkilineni           50.0         Shipping
17        Landry           50.0         Shipping
18        Markle           50.0         Shipping
19        Bissot    

## 5-6  INLINE VIEW
* 가상테이블
#### 자신의 부서 평균급여보다 많은 급여를 받는 사원 조회

In [22]:
df = pd.read_sql("""
        SELECT last_name, salary, department_id
        FROM employees o
        WHERE salary > (SELECT avg(salary)
                        FROM employees
                        WHERE department_id = o.department_id)
        """,xedb)
print(df)

    LAST_NAME   SALARY  DEPARTMENT_ID
0        King  24000.0             90
1      Hunold   9000.0             60
2       Ernst   6000.0             60
3   Greenberg  12008.0            100
4      Faviet   9000.0            100
5    Raphaely  11000.0             30
6       Weiss   8000.0             50
7       Fripp   8200.0             50
8    Kaufling   7900.0             50
9     Vollman   6500.0             50
10    Mourgos   5800.0             50
11     Ladwig   3600.0             50
12       Rajs   3500.0             50
13    Russell  14000.0             80
14   Partners  13500.0             80
15  Errazuriz  12000.0             80
16  Cambrault  11000.0             80
17    Zlotkey  10500.0             80
18     Tucker  10000.0             80
19  Bernstein   9500.0             80
20       Hall   9000.0             80
21       King  10000.0             80
22      Sully   9500.0             80
23     McEwen   9000.0             80
24    Vishney  10500.0             80
25     Green

비효율적인 서브쿼리를 개선하기 위해서 부서별 평균급여가 있는 테이블을 가상으로 만든다.

In [23]:
df = pd.read_sql("""
        SELECT e2.last_name, e2.salary, e2.department_id
        FROM (SELECT department_id, avg(salary) avgsal
              FROM employees
              GROUP BY department_id) e1, employees e2
        WHERE e1.department_id = e2.department_id
        AND e2.salary > e1.avgsal
        """,xedb)
print(df)

    LAST_NAME   SALARY  DEPARTMENT_ID
0        King  24000.0             90
1      Hunold   9000.0             60
2       Ernst   6000.0             60
3   Greenberg  12008.0            100
4      Faviet   9000.0            100
5    Raphaely  11000.0             30
6       Weiss   8000.0             50
7       Fripp   8200.0             50
8    Kaufling   7900.0             50
9     Vollman   6500.0             50
10    Mourgos   5800.0             50
11     Ladwig   3600.0             50
12       Rajs   3500.0             50
13    Russell  14000.0             80
14   Partners  13500.0             80
15  Errazuriz  12000.0             80
16  Cambrault  11000.0             80
17    Zlotkey  10500.0             80
18     Tucker  10000.0             80
19  Bernstein   9500.0             80
20       Hall   9000.0             80
21       King  10000.0             80
22      Sully   9500.0             80
23     McEwen   9000.0             80
24    Vishney  10500.0             80
25     Green

#### 부서별 총액급여 조회

In [24]:
df = pd.read_sql("""
        SELECT d.department_name, sumsal
        FROM (SELECT department_id, sum(salary) sumsal
              FROM employees
              GROUP BY department_id)e, departments d
        WHERE e.department_id=d.department_id
        """,xedb)
print(df)

     DEPARTMENT_NAME  SUMSAL
0     Administration    4400
1          Marketing   19000
2         Purchasing   24900
3    Human Resources    6500
4           Shipping  156400
5                 IT   28800
6   Public Relations   10000
7              Sales  304500
8          Executive   58000
9            Finance   51608
10        Accounting   20308


=> INLINE VIEW는 오브젝트가 아니기 때문에 호출할 수 없다. 
즉 가공하여 다시 사용하지 못한다. 이 제약을 해결하기 위해 WITH절 사용

## 5-7 WITH
* WITH절 내에 퀄리블락(가상집합)을 생성한 후에 SELECT문에서 바로 사용
* 그러나 WITH 역시 오브젝트가 아니기 때문에 다른 쿼리문에서는 사용하지 못한다.

In [25]:
df = pd.read_sql("""
        WITH
        dept_costs AS (SELECT d.department_name, SUM(e.salary) dept_total
                       FROM employees e
                       JOIN departments d
                       ON e.department_id = d.department_id
                       GROUP BY d.department_name),
        avg_cost AS (SELECT SUM(dept_total)/COUNT(*) dept_avg
                     FROM dept_costs)
        
        SELECT *
        FROM dept_costs
        WHERE dept_total > (SELECT dept_avg
                            FROM avg_cost)
                            ORDER BY department_name
        """,xedb)
print(df)

  DEPARTMENT_NAME  DEPT_TOTAL
0           Sales      304500
1        Shipping      156400


***
# [ 더 연습문제 ]

* ##  IN, ANY, ALL 
#### (1) 사원전체의 평균급여보다 급여를 많이 받는 사원의 정보를 조회하세요

In [26]:
df = pd.read_sql("""
        SELECT employee_id, last_name, salary
        FROM employees
        WHERE salary >= (SELECT avg(salary)
                         FROM employees)
        """,xedb)
print(df)

    EMPLOYEE_ID   LAST_NAME   SALARY
0           100        King  24000.0
1           101     Kochhar  17000.0
2           102     De Haan  17000.0
3           103      Hunold   9000.0
4           108   Greenberg  12008.0
5           109      Faviet   9000.0
6           110        Chen   8200.0
7           111     Sciarra   7700.0
8           112       Urman   7800.0
9           113        Popp   6900.0
10          114    Raphaely  11000.0
11          120       Weiss   8000.0
12          121       Fripp   8200.0
13          122    Kaufling   7900.0
14          123     Vollman   6500.0
15          145     Russell  14000.0
16          146    Partners  13500.0
17          147   Errazuriz  12000.0
18          148   Cambrault  11000.0
19          149     Zlotkey  10500.0
20          150      Tucker  10000.0
21          151   Bernstein   9500.0
22          152        Hall   9000.0
23          153       Olsen   8000.0
24          154   Cambrault   7500.0
25          155     Tuvault   7000.0
2

#### (2) last_name에 'u'가 있는 사원과 같은 부서에 근무하는 사원의 정보를 조회하세요

In [27]:
df = pd.read_sql("""
        SELECT employee_id, last_name
        FROM employees
        WHERE department_id IN (SELECT department_id
                                FROM employees
                                WHERE last_name LIKE '%u%')
        """,xedb)
print(df)

    EMPLOYEE_ID   LAST_NAME
0           107     Lorentz
1           106   Pataballa
2           105      Austin
3           104       Ernst
4           103      Hunold
5           199       Grant
6           198    OConnell
7           197      Feeney
8           196       Walsh
9           195       Jones
10          194      McCain
11          193     Everett
12          192        Bell
13          191     Perkins
14          190       Gates
15          189       Dilly
16          188       Chung
17          187      Cabrio
18          186   Dellinger
19          185        Bull
20          184    Sarchand
21          183       Geoni
22          182    Sullivan
23          181      Fleaur
24          180      Taylor
25          144      Vargas
26          143       Matos
27          142      Davies
28          141        Rajs
29          140       Patel
..          ...         ...
60          168        Ozer
61          167       Banda
62          166        Ande
63          165     

#### (3) location_id가 1700 인 사원의 last_name, department_id, job_id를 조회하세요.

In [28]:
df = pd.read_sql("""
        SELECT last_name, department_id, job_id
        FROM employees
        WHERE department_id IN (SELECT department_id
                                FROM departments
                                WHERE location_id = 1700)
        """,xedb)
print(df)

     LAST_NAME  DEPARTMENT_ID      JOB_ID
0         King             90     AD_PRES
1      Kochhar             90       AD_VP
2      De Haan             90       AD_VP
3    Greenberg            100      FI_MGR
4       Faviet            100  FI_ACCOUNT
5         Chen            100  FI_ACCOUNT
6      Sciarra            100  FI_ACCOUNT
7        Urman            100  FI_ACCOUNT
8         Popp            100  FI_ACCOUNT
9     Raphaely             30      PU_MAN
10        Khoo             30    PU_CLERK
11       Baida             30    PU_CLERK
12      Tobias             30    PU_CLERK
13      Himuro             30    PU_CLERK
14  Colmenares             30    PU_CLERK
15      Whalen             10     AD_ASST
16     Higgins            110      AC_MGR
17       Gietz            110  AC_ACCOUNT


In [29]:
df = pd.read_sql("""
        SELECT e.last_name, e.department_id, e.job_id
        FROM employees e, departments d
        WHERE e.department_id = d.department_id
        AND d.location_id = 1700
        """,xedb)
print(df)

     LAST_NAME  DEPARTMENT_ID      JOB_ID
0         King             90     AD_PRES
1      Kochhar             90       AD_VP
2      De Haan             90       AD_VP
3    Greenberg            100      FI_MGR
4       Faviet            100  FI_ACCOUNT
5         Chen            100  FI_ACCOUNT
6      Sciarra            100  FI_ACCOUNT
7        Urman            100  FI_ACCOUNT
8         Popp            100  FI_ACCOUNT
9     Raphaely             30      PU_MAN
10        Khoo             30    PU_CLERK
11       Baida             30    PU_CLERK
12      Tobias             30    PU_CLERK
13      Himuro             30    PU_CLERK
14  Colmenares             30    PU_CLERK
15      Whalen             10     AD_ASST
16     Higgins            110      AC_MGR
17       Gietz            110  AC_ACCOUNT


#### (4) King에게 결재보고하는 사원의 이름과 급여를 조회하세요.

In [30]:
df = pd.read_sql("""
        SELECT last_name, salary
        FROM employees
        WHERE manager_id IN (SELECT employee_id
                             FROM employees
                             WHERE last_name = 'King')
        """,xedb)
print(df)

    LAST_NAME   SALARY
0     Kochhar  17000.0
1     De Haan  17000.0
2    Raphaely  11000.0
3       Weiss   8000.0
4       Fripp   8200.0
5    Kaufling   7900.0
6     Vollman   6500.0
7     Mourgos   5800.0
8     Russell  14000.0
9    Partners  13500.0
10  Errazuriz  12000.0
11  Cambrault  11000.0
12    Zlotkey  10500.0
13  Hartstein  13000.0


In [31]:
df = pd.read_sql("""
        SELECT w.employee_id work_id, w.last_name work_name
        FROM employees w, employees m
        WHERE w.manager_id = m.employee_id
        AND m.last_name = 'King'
        """,xedb)
print(df)

    WORK_ID  WORK_NAME
0       101    Kochhar
1       102    De Haan
2       114   Raphaely
3       120      Weiss
4       121      Fripp
5       122   Kaufling
6       123    Vollman
7       124    Mourgos
8       145    Russell
9       146   Partners
10      147  Errazuriz
11      148  Cambrault
12      149    Zlotkey
13      201  Hartstein


#### (5) Executive부서의 모든 사원에 대한 department_id, last_name, job_id 출력하세요.

In [32]:
df = pd.read_sql("""
        SELECT department_id, last_name, job_id
        FROM employees
        WHERE department_id IN (SELECT department_id
                                FROM departments
                                WHERE department_name = 'Executive')
        """,xedb)
print(df)

   DEPARTMENT_ID LAST_NAME   JOB_ID
0             90      King  AD_PRES
1             90   Kochhar    AD_VP
2             90   De Haan    AD_VP


In [33]:
df = pd.read_sql("""
        SELECT e.department_id, e.last_name, e.job_id
        FROM employees e, departments d
        WHERE e.department_id = d.department_id
        AND department_name = 'Executive'
        """,xedb)
print(df)

   DEPARTMENT_ID LAST_NAME   JOB_ID
0             90      King  AD_PRES
1             90   Kochhar    AD_VP
2             90   De Haan    AD_VP


#### (6) 부서 60에 소속된 사원의 급여보다 높은 급여를 받는 모든 사원을 조회하세요.

In [34]:
df = pd.read_sql("""
        SELECT last_name
        FROM employees
        WHERE salary > ALL (SELECT salary
                            FROM employees
                            WHERE department_id=60)
        """,xedb)
print(df)

    LAST_NAME
0       Sully
1      Greene
2   Bernstein
3         Fox
4        Baer
5       Bloom
6        King
7      Tucker
8     Vishney
9     Zlotkey
10  Cambrault
11   Raphaely
12       Abel
13       Ozer
14  Errazuriz
15    Higgins
16  Greenberg
17  Hartstein
18   Partners
19    Russell
20    De Haan
21    Kochhar
22       King


In [35]:
df = pd.read_sql("""
        SELECT last_name
        FROM employees
        WHERE salary > (SELECT max(salary)
                        FROM employees
                        WHERE department_id=60)
        """,xedb)
print(df)

    LAST_NAME
0        King
1     Kochhar
2     De Haan
3   Greenberg
4    Raphaely
5     Russell
6    Partners
7   Errazuriz
8   Cambrault
9     Zlotkey
10     Tucker
11  Bernstein
12       King
13      Sully
14    Vishney
15     Greene
16       Ozer
17      Bloom
18        Fox
19       Abel
20  Hartstein
21       Baer
22    Higgins


#### (7) IT 부서 사원에 대한 employee_id, last_name, job_id을 조회하세요.

In [36]:
df = pd.read_sql("""
        SELECT employee_id, last_name, job_id
        FROM employees
        WHERE department_id = (SELECT department_id
                               FROM departments
                               WHERE department_name = 'IT')
        """,xedb)
print(df)

   EMPLOYEE_ID  LAST_NAME   JOB_ID
0          103     Hunold  IT_PROG
1          104      Ernst  IT_PROG
2          105     Austin  IT_PROG
3          106  Pataballa  IT_PROG
4          107    Lorentz  IT_PROG


In [37]:
df = pd.read_sql("""
        SELECT e.employee_id, e.last_name, e.job_id
        FROM employees e, departments d
        WHERE e.department_id = d.department_id
        AND d.department_name = 'IT'
        """,xedb)
print(df)

   EMPLOYEE_ID  LAST_NAME   JOB_ID
0          103     Hunold  IT_PROG
1          104      Ernst  IT_PROG
2          105     Austin  IT_PROG
3          106  Pataballa  IT_PROG
4          107    Lorentz  IT_PROG


#### (8) 전체 사원의 평균 급여보다 급여를 많이 받고, last_name에 "u"가 포함된 사원이 있는 부서에서 근무하는 모든 사원의 employee_id, last_name, salary을 조회하세요

In [38]:
df = pd.read_sql("""
        SELECT employee_id, last_name, salary
        FROM employees
        WHERE department_id IN (SELECT department_id
                                FROM employees
                                WHERE last_name like '%u%')
        AND salary >(SELECT AVG(salary) 
                     FROM employees)
        """,xedb)
print(df)

    EMPLOYEE_ID   LAST_NAME   SALARY
0           103      Hunold   9000.0
1           123     Vollman   6500.0
2           122    Kaufling   7900.0
3           121       Fripp   8200.0
4           120       Weiss   8000.0
5           177  Livingston   8400.0
6           176      Taylor   8600.0
7           175      Hutton   8800.0
8           174        Abel  11000.0
9           172       Bates   7300.0
10          171       Smith   7400.0
11          170         Fox   9600.0
12          169       Bloom  10000.0
13          168        Ozer  11500.0
14          165         Lee   6800.0
15          164     Marvins   7200.0
16          163      Greene   9500.0
17          162     Vishney  10500.0
18          161      Sewall   7000.0
19          160       Doran   7500.0
20          159       Smith   8000.0
21          158      McEwen   9000.0
22          157       Sully   9500.0
23          156        King  10000.0
24          155     Tuvault   7000.0
25          154   Cambrault   7500.0
2

* ## EXISTS
#### (9) 같은 부서에서 자신보다 입사한 날짜는 늦지만 더 높은 급여를 받는 사원을 조회하세요

In [39]:
df = pd.read_sql("""
        SELECT  employee_id, first_name, hire_date, salary
        FROM employees o
        WHERE EXISTS (SELECT 'X'
                      FROM employees i
                      WHERE i.department_id = o.department_id
                      AND i.hire_date > o.hire_date
                      AND i.salary > o.salary)
        """,xedb)
print(df)

    EMPLOYEE_ID   FIRST_NAME  HIRE_DATE   SALARY
0           102          Lex 2001-01-13  17000.0
1           105        David 2005-06-25   4800.0
2           107        Diana 2007-02-07   4200.0
3           106        Valli 2006-02-05   4800.0
4           109       Daniel 2002-08-16   9000.0
5           111       Ismael 2005-09-30   7700.0
6           117        Sigal 2005-07-24   2800.0
7           192        Sarah 2004-02-04   4000.0
8           184      Nandita 2004-01-27   4200.0
9           141       Trenna 2003-10-17   3500.0
10          137       Renske 2003-07-14   3600.0
11          133        Jason 2004-06-14   3300.0
12          122        Payam 2003-05-01   7900.0
13          193      Britney 2005-03-03   3900.0
14          185       Alexis 2005-02-20   4100.0
15          142       Curtis 2005-01-29   3100.0
16          131        James 2005-02-16   2500.0
17          120      Matthew 2004-07-18   8000.0
18          189     Jennifer 2005-08-13   3600.0
19          188     

#### (10) 부서 이름별 사원들의 평균 급여와 급여 총액
#### < JOIN >

In [40]:
df = pd.read_sql("""
        SELECT d.department_name, sum(e.salary) sum_sal, round(avg(e.salary),0)avg_sal
        FROM employees e, departments d
        WHERE e.department_id = d.department_id
        GROUP BY department_name
        """,xedb)
print(df)

     DEPARTMENT_NAME  SUM_SAL  AVG_SAL
0     Administration     4400     4400
1         Accounting    20308    10154
2         Purchasing    24900     4150
3    Human Resources     6500     6500
4                 IT    28800     5760
5   Public Relations    10000    10000
6          Executive    58000    19333
7           Shipping   156400     3476
8              Sales   304500     8956
9            Finance    51608     8601
10         Marketing    19000     9500


#### < INLINE VIEW >

In [41]:
df = pd.read_sql("""
        SELECT d.department_name, e.sumsal, round(e.avgsal,0) avgsal
        FROM (SELECT department_id, sum(salary) sumsal, avg(salary) avgsal
              FROM employees
              GROUP BY department_id) e, departments d
        WHERE e.department_id = d.department_id
        """,xedb)
print(df)

     DEPARTMENT_NAME  SUMSAL  AVGSAL
0     Administration    4400    4400
1          Marketing   19000    9500
2         Purchasing   24900    4150
3    Human Resources    6500    6500
4           Shipping  156400    3476
5                 IT   28800    5760
6   Public Relations   10000   10000
7              Sales  304500    8956
8          Executive   58000   19333
9            Finance   51608    8601
10        Accounting   20308   10154


#### < SCALAR SUBQUERY >

In [42]:
df = pd.read_sql("""
        SELECT department_name, (SELECT sum(salary)
                                 FROM employees
                                 WHERE department_id = d.department_id) sumsal,
                                   
                                 (SELECT round(avg(salary),0)
                                  FROM employees
                                  WHERE department_id = d.department_id) avgsal
        FROM departments d
        """,xedb)
print(df)

         DEPARTMENT_NAME    SUMSAL   AVGSAL
0         Administration    4400.0   4400.0
1              Marketing   19000.0   9500.0
2             Purchasing   24900.0   4150.0
3        Human Resources    6500.0   6500.0
4               Shipping  156400.0   3476.0
5                     IT   28800.0   5760.0
6       Public Relations   10000.0  10000.0
7                  Sales  304500.0   8956.0
8              Executive   58000.0  19333.0
9                Finance   51608.0   8601.0
10            Accounting   20308.0  10154.0
11              Treasury       NaN      NaN
12         Corporate Tax       NaN      NaN
13    Control And Credit       NaN      NaN
14  Shareholder Services       NaN      NaN
15              Benefits       NaN      NaN
16         Manufacturing       NaN      NaN
17          Construction       NaN      NaN
18           Contracting       NaN      NaN
19            Operations       NaN      NaN
20            IT Support       NaN      NaN
21                   NOC       N

#### (11) 사원수가 3명 미만인 부서번호, 부서이름, 인원수를 조회하세요
#### < JOIN >

In [43]:
df = pd.read_sql("""
        SELECT d.department_id, d.department_name, COUNT(*) cn
        FROM departments d 
        JOIN employees e
        ON d.department_id = e.department_id
        GROUP BY d.department_id, d.department_name
        HAVING COUNT(*) < 3
        """,xedb)
print(df)

   DEPARTMENT_ID   DEPARTMENT_NAME  CN
0             70  Public Relations   1
1             10    Administration   1
2            110        Accounting   2
3             40   Human Resources   1
4             20         Marketing   2


#### < INLINE VIEW >

In [44]:
df = pd.read_sql("""
        SELECT d.department_id, d.department_name, e.cn
        FROM (SELECT department_id, count(*) cn
              FROM employees
              GROUP BY department_id
              HAVING count(*) < 3) e, departments d
        WHERE d.department_id = e.department_id
        """,xedb)
print(df)

   DEPARTMENT_ID   DEPARTMENT_NAME  CN
0             70  Public Relations   1
1             10    Administration   1
2             20         Marketing   2
3             40   Human Resources   1
4            110        Accounting   2


#### (12) 사원 수가 가장 많은 부서번호, 부서이름, 인원수를 조회하세요.

In [45]:
df = pd.read_sql("""
        SELECT d.department_id, d.department_name, COUNT(*)
        FROM departments d 
        JOIN employees e
        ON d.department_id = e.department_id
        GROUP BY d.department_id, d.department_name
        HAVING COUNT(*) = (SELECT MAX(COUNT(*))
                         FROM employees
                         GROUP BY department_id)
        """,xedb)
print(df)

   DEPARTMENT_ID DEPARTMENT_NAME  COUNT(*)
0             50        Shipping        45


#### (13) 각 해에 입사한 사원들의 수를 조회하세요.

In [46]:
df = pd.read_sql("""
        SELECT to_char(hire_date,'yyyy') day, count(*)
        FROM employees
        GROUP BY to_char(hire_date,'yyyy')
        ORDER BY DAY
        """,xedb)
print(df)

    DAY  COUNT(*)
0  2001         1
1  2002         7
2  2003         6
3  2004        10
4  2005        29
5  2006        24
6  2007        19
7  2008        11


####  < 데이터 출력물의 작성방법에 따라 쿼리가공 >

In [47]:
df = pd.read_sql("""
        SELECT
                MAX (decode(YEAR,'2001',cn))yr2001,
                MAX (decode(YEAR,'2002',cn))yr2002,
                MAX (decode(YEAR,'2003',cn))yr2003,
                MAX (decode(YEAR,'2004',cn))yr2004,
                MAX (decode(YEAR,'2005',cn))yr2005,
                MAX (decode(YEAR,'2006',cn))yr2006,
                MAX (decode(YEAR,'2007',cn))yr2007,
                MAX (decode(YEAR,'2008',cn))yr2008
        FROM (SELECT to_char(hire_date,'yyyy')YEAR, count(*)cn
              FROM employees
              GROUP BY (to_char(hire_date,'yyyy')))
        """,xedb)
print(df)

   YR2001  YR2002  YR2003  YR2004  YR2005  YR2006  YR2007  YR2008
0       1       7       6      10      29      24      19      11


In [48]:
cur.close()
xedb.close()