# Exercise 1

In [1]:
import os
from sqlalchemy import create_engine
import pandas as pd
pd.set_option('display.max_columns', 20)

In [2]:
def query_pandas(sql, db):
    """
    Executes a SQL query on a PostgreSQL database and returns the result as a Pandas DataFrame.

    Args:
        sql (str): The SQL query to execute.
        db (str): The name of the PostgreSQL database to connect to.

    Returns:
        pandas.DataFrame: The result of the SQL query as a Pandas DataFrame.
    """

    DATABASE_URL='postgresql://postgres:postgres@postgis_container:5432/{}'.format(db)
    conn = create_engine(DATABASE_URL)

    df = pd.read_sql(sql=sql, con=conn)

    return df

### Q1: 埼玉県で一番小さい面積の市町村を調べる

In [3]:
# " "のなかにSQL文を記述
sql = "select name_2, nl_name_2, st_area(geom::geography)/1000000 as area_km2 \
        from adm2 \
            where name_1='Saitama' and st_area(geom::geography)/1000000 = \
                (select min(st_area(geom::geography)/1000000) from adm2 where name_1='Saitama');"


In [4]:
out = query_pandas(sql, 'gisdb') #specify db name
print(out)

   name_2 nl_name_2  area_km2
0  Warabi        蕨市  6.587194


### Q2. 都道府県ごとに一番大きい面積を有する市町村を調べる

In [11]:
# " "のなかにSQL文を記述
sql = "select a1.name_1, a1.name_2, a1.type_2, st_area(a1.geom::geography)/1000000 as area_km2 \
        from adm2 a1 \
        where st_area(a1.geom::geography)/1000000 = \
                (select max(st_area(geom::geography)/1000000) \
                        from adm2 \
                        group by name_1 \
                        having name_1 = a1.name_1 ) \
        order by area_km2 desc;"


In [12]:
out = query_pandas(sql, 'gisdb') #specify db name
print(out)

       name_1         name_2      type_2     area_km2
0        Gifu       Takayama         Shi  2173.869108
1    Shizuoka      Hamamatsu         Shi  1502.644477
2     Tochigi          Nikkō         Shi  1444.964660
3    Hokkaido         Ashoro       Machi  1406.101261
4    Yamagata       Tsuruoka         Shi  1343.268161
5      Toyama         Toyama         Shi  1255.339885
6       Akita      Yurihonjō         Shi  1236.171305
7   Hiroshima        Shōbara         Shi  1233.377442
8   Fukushima          Iwaki         Shi  1212.132562
9     Niigata       Murakami         Shi  1183.739695
10      Iwate     Ichinoseki         Shi  1139.296242
11   Wakayama      Kyōtanabe         Shi  1049.130028
12     Nagano      Matsumoto         Shi   943.137861
13      Aichi         Toyota         Shi   914.965700
14       Oita          Saiki         Shi   908.719706
15      Fukui            Ōno         Shi   878.521299
16  Yamaguchi        Iwakuni         Shi   867.025699
17     Aomori          Mutsu

### Q3. 都道府県ごとに市町村の総数が多い順に並べる

In [39]:
# " "のなかにSQL文を記述
sql = " select name_1, count(*) as num \
        from adm2 \
        group by name_1 \
        order by count(*) desc;"


In [40]:
out = query_pandas(sql, 'gisdb') #specify db name
print(out)

       name_1  num
0    Hokkaido  180
1      Nagano   82
2     Saitama   70
3     Fukuoka   66
4       Aichi   64
5   Fukushima   60
6       Chiba   56
7       Tokyo   53
8    Kumamoto   48
9   Kagoshima   46
10    Ibaraki   45
11      Osaka   43
12       Gifu   43
13   Shizuoka   43
14    Okinawa   42
15      Hyōgo   41
16     Aomori   40
17       Nara   39
18      Gunma   38
19     Miyagi   36
20      Kochi   35
21   Yamagata   35
22      Iwate   35
23   Kanagawa   33
24    Niigata   31
25    Tochigi   31
26    Okayama   30
27   Miyazaki   30
28   Wakayama   29
29  Yamanashi   28
30        Mie   28
31      Shiga   27
32      Kyoto   26
33      Akita   25
34  Tokushima   24
35   Naoasaki   23
36  Hiroshima   23
37    Shimane   22
38       Saga   20
39      Ehime   20
40  Yamaguchi   20
41   Ishikawa   19
42       Oita   18
43    Tottori   18
44      Fukui   17
45     Toyama   15
46     Kagawa   14


### Q4. 都道府県ごとに村の総数が多い順に並べる

In [65]:
# " "のなかにSQL文を記述
sql = " select name_1, count(*) as number_of_villages \
                from adm2 \
                group by name_1, engtype_2 \
                having engtype_2 = 'Village' \
                order by count(*) desc;"


In [66]:
out = query_pandas(sql, 'gisdb') #specify db name
print(out)

       name_1  number_of_villages
0      Nagano                  34
1     Okinawa                  16
2    Hokkaido                  15
3   Fukushima                  13
4        Nara                  11
5    Kumamoto                   8
6      Aomori                   7
7   Yamanashi                   6
8       Iwate                   6
9     Fukuoka                   4
10      Kochi                   4
11      Gunma                   4
12    Niigata                   3
13      Akita                   3
14        Mie                   2
15       Gifu                   2
16    Ibaraki                   2
17  Kagoshima                   2
18      Aichi                   2
19   Miyazaki                   2
20    Okayama                   2
21       Oita                   1
22      Osaka                   1
23    Shimane                   1
24  Tokushima                   1
25    Tottori                   1
26     Toyama                   1
27      Kyoto                   1
