# Python 下載CSV檔案與解析


* 了解 csv 檔案格式與內容
* 能夠利用套件存取 csv 格式的檔案



## 作業目標

* 比較一下範例檔案中的「File I/O」與「CSV Reader」讀出來的內容有什麼差異

* 根據範例檔案的結果：
    1. 取出班次一的每一個時間
    2. 將班次一的每一個時間用一種資料型態保存
    3. 將班次一到五與其所有時間用一種資料型態個別保存


### 比較一下範例檔案中的「File I/O」與「CSV Reader」讀出來的內容有什麼差異

In [1]:
from urllib.request import urlretrieve
import os
import csv

path = './Data'
dirs = os.listdir(path)

res = "http://opendata.hccg.gov.tw/dataset/432257df-491f-4875-8b56-dd814aee5d7b/resource/de014c8b-9b75-4152-9fc6-f0d499cefbe4/download/20150305140446074.csv"
urlretrieve(res, os.path.join(path, "example.csv"))

N = 6

In [2]:
# 開啟 CSV 檔案(File I/O)
with open(os.path.join(path, "example.csv"), "r", encoding="utf-8") as fh:
    f = [next(fh).strip()+"\n" for x in range(N)]
print(''.join(f))

﻿序號,路線編號,起訖站,平/假日,站名,班次1,班次2,班次3,班次4,班次5,班次6,班次7,班次8,班次9,班次10,班次11,是否單邊停靠
1,51,香山區公所—火車站（先經中山路）,平日,香山區公所,06:30,07:50,09:10,10:30,12:00,13:20,14:40,16:10,18:10,19:40,21:10,否
2,51,香山區公所—火車站（先經中山路）,平日,香山國小,06:32,07:52,09:12,10:32,12:02,13:22,14:42,16:12,18:12,19:42,21:12,否
3,51,香山區公所—火車站（先經中山路）,平日,聖德宮,06:33,07:53,09:13,10:33,12:03,13:23,14:43,16:13,18:13,19:43,21:13,否
4,51,香山區公所—火車站（先經中山路）,平日,頂埔社區,06:34,07:55,09:15,10:35,12:05,13:24,14:45,16:15,18:15,19:45,21:15,否
5,51,香山區公所—火車站（先經中山路）,平日,大鵬新城,06:36,07:57,09:17,10:37,12:07,13:26,14:47,16:17,18:17,19:47,21:17,單邊停靠



In [3]:
# 開啟 CSV 檔案(CSV Reader)
l = []
with open(os.path.join(path, "example.csv"), newline='', encoding="utf-8") as csvfile:
    # 讀取 CSV 檔案內容
    rows = csv.reader(csvfile)
    for row in rows:
        l.append(row)
    csvfile.seek(0)
    rows_N = csv.reader(csvfile.readlines()[:N])
    # 以迴圈輸出每一列
    for row in rows_N:
        print(row)

['\ufeff序號', '路線編號', '起訖站', '平/假日', '站名', '班次1', '班次2', '班次3', '班次4', '班次5', '班次6', '班次7', '班次8', '班次9', '班次10', '班次11', '是否單邊停靠']
['1', '51', '香山區公所—火車站（先經中山路）', '平日', '香山區公所', '06:30', '07:50', '09:10', '10:30', '12:00', '13:20', '14:40', '16:10', '18:10', '19:40', '21:10', '否']
['2', '51', '香山區公所—火車站（先經中山路）', '平日', '香山國小', '06:32', '07:52', '09:12', '10:32', '12:02', '13:22', '14:42', '16:12', '18:12', '19:42', '21:12', '否']
['3', '51', '香山區公所—火車站（先經中山路）', '平日', '聖德宮', '06:33', '07:53', '09:13', '10:33', '12:03', '13:23', '14:43', '16:13', '18:13', '19:43', '21:13', '否']
['4', '51', '香山區公所—火車站（先經中山路）', '平日', '頂埔社區', '06:34', '07:55', '09:15', '10:35', '12:05', '13:24', '14:45', '16:15', '18:15', '19:45', '21:15', '否']
['5', '51', '香山區公所—火車站（先經中山路）', '平日', '大鵬新城', '06:36', '07:57', '09:17', '10:37', '12:07', '13:26', '14:47', '16:17', '18:17', '19:47', '21:17', '單邊停靠']


### 根據範例檔案的結果：

1. 取出班次一的每一個時間
2. 將班次一的每一個時間用一種資料型態保存
3. 將班次一到五與其所有時間用一種資料型態個別保存

In [4]:
# 1. 取出班次一的每一個時間
bus = str(', '.join([''.join(row[5]) for row in l[1:]]))
print(str(l[0][5]) + ":\n" + bus)

班次1:
06:30, 06:32, 06:33, 06:34, 06:36, 06:38, 06:39, 06:41, 06:42, 06:43, 06:44, 06:45, 06:46, 06:47, 06:48, 06:49, 06:50, 06:51, 06:53, 06:55, 06:57, 06:58, 07:00, 07:01, 07:03, 07:05, 07:07, 07:09, 07:10, 07:12, 07:14, 07:16, 07:18, 07:20, 07:21, 07:23, 07:24, 07:25, 07:00, 07:02, 07:03, 07:04, 07:06, 07:08, 07:09, 07:11, 07:12, 07:13, 07:14, 07:15, 07:16, 07:17, 07:18, 07:19, 07:20, 07:21, 07:23, 07:25, 07:27, 07:28, 07:30, 07:31, 07:33, 07:35, 07:37, 07:39, 07:40, 07:42, 07:44, 07:46, 07:48, 07:49, 07:51, 07:53, 07:54, 07:55


In [5]:
# 2. 將班次一的每一個時間用一種資料型態保存
bus_dict1 = {}
bus_list = []

for row in l[1:]:
    bus_list.append(row[5])
bus_dict1[l[0][5]] = bus_list

print(bus_dict1)

{'班次1': ['06:30', '06:32', '06:33', '06:34', '06:36', '06:38', '06:39', '06:41', '06:42', '06:43', '06:44', '06:45', '06:46', '06:47', '06:48', '06:49', '06:50', '06:51', '06:53', '06:55', '06:57', '06:58', '07:00', '07:01', '07:03', '07:05', '07:07', '07:09', '07:10', '07:12', '07:14', '07:16', '07:18', '07:20', '07:21', '07:23', '07:24', '07:25', '07:00', '07:02', '07:03', '07:04', '07:06', '07:08', '07:09', '07:11', '07:12', '07:13', '07:14', '07:15', '07:16', '07:17', '07:18', '07:19', '07:20', '07:21', '07:23', '07:25', '07:27', '07:28', '07:30', '07:31', '07:33', '07:35', '07:37', '07:39', '07:40', '07:42', '07:44', '07:46', '07:48', '07:49', '07:51', '07:53', '07:54', '07:55']}


In [6]:
# 3. 將班次一到五與其所有時間用一種資料型態個別保存
bus_dict = {}
bus_dict_a = {}
bus_dict_b = {}
bus_index = [l[0][5 + cols] for cols in range(5)]

for index in bus_index:
    col = 5 + bus_index.index(index)
    bus_list_a = []
    bus_list_b = []

    # method 1
    for row in l[1:]:
        bus_list_a.append(row[col])
    bus_dict_a[index] = bus_list_a

    # method 2
    for row in range(len(l[1:])):
        bus_list_b.append(l[row+1][col])
    bus_dict_b[index] = bus_list_b

# 顯示班次一到五
bus_dict = bus_dict_a
print(bus_dict)

{'班次1': ['06:30', '06:32', '06:33', '06:34', '06:36', '06:38', '06:39', '06:41', '06:42', '06:43', '06:44', '06:45', '06:46', '06:47', '06:48', '06:49', '06:50', '06:51', '06:53', '06:55', '06:57', '06:58', '07:00', '07:01', '07:03', '07:05', '07:07', '07:09', '07:10', '07:12', '07:14', '07:16', '07:18', '07:20', '07:21', '07:23', '07:24', '07:25', '07:00', '07:02', '07:03', '07:04', '07:06', '07:08', '07:09', '07:11', '07:12', '07:13', '07:14', '07:15', '07:16', '07:17', '07:18', '07:19', '07:20', '07:21', '07:23', '07:25', '07:27', '07:28', '07:30', '07:31', '07:33', '07:35', '07:37', '07:39', '07:40', '07:42', '07:44', '07:46', '07:48', '07:49', '07:51', '07:53', '07:54', '07:55'], '班次2': ['07:50', '07:52', '07:53', '07:55', '07:57', '07:59', '08:01', '08:03', '08:04', '08:05', '08:06', '08:07', '08:08', '08:09', '08:10', '08:11', '08:12', '08:13', '08:15', '08:17', '08:19', '08:21', '08:23', '08:24', '08:26', '08:28', '08:30', '08:32', '08:33', '08:35', '08:37', '08:39', '08:41', '

In [7]:
# 只顯示各班次資料
for i in range(5):
    print(str(bus_index[i]) + ":\n%s\n" % ', '.join(bus_dict[bus_index[i]]))

班次1:
06:30, 06:32, 06:33, 06:34, 06:36, 06:38, 06:39, 06:41, 06:42, 06:43, 06:44, 06:45, 06:46, 06:47, 06:48, 06:49, 06:50, 06:51, 06:53, 06:55, 06:57, 06:58, 07:00, 07:01, 07:03, 07:05, 07:07, 07:09, 07:10, 07:12, 07:14, 07:16, 07:18, 07:20, 07:21, 07:23, 07:24, 07:25, 07:00, 07:02, 07:03, 07:04, 07:06, 07:08, 07:09, 07:11, 07:12, 07:13, 07:14, 07:15, 07:16, 07:17, 07:18, 07:19, 07:20, 07:21, 07:23, 07:25, 07:27, 07:28, 07:30, 07:31, 07:33, 07:35, 07:37, 07:39, 07:40, 07:42, 07:44, 07:46, 07:48, 07:49, 07:51, 07:53, 07:54, 07:55

班次2:
07:50, 07:52, 07:53, 07:55, 07:57, 07:59, 08:01, 08:03, 08:04, 08:05, 08:06, 08:07, 08:08, 08:09, 08:10, 08:11, 08:12, 08:13, 08:15, 08:17, 08:19, 08:21, 08:23, 08:24, 08:26, 08:28, 08:30, 08:32, 08:33, 08:35, 08:37, 08:39, 08:41, 08:43, 08:45, 08:47, 08:49, 08:50, 08:50, 08:52, 08:53, 08:55, 08:57, 08:59, 09:01, 09:03, 09:04, 09:05, 09:06, 09:07, 09:08, 09:09, 09:10, 09:11, 09:12, 09:13, 09:15, 09:17, 09:19, 09:21, 09:23, 09:24, 09:26, 09:28, 09:30, 09: