#### 在不同地區及溫度下，是否會促進酸雨的形成物質(NOx、SO2)的濃度提升?
- 地區：下拉式選單
- 時間：X軸
- 溫度：Y軸
- 空氣指標(NOx、SO2)：泡泡大小+顏色
    - 濃度用泡泡大小表示
    - 不同指標用不同顏色

In [8]:
import pandas as pd

In [9]:
df = pd.read_csv('./rawData/空氣品質監測月值.csv')

new_col_names = {'"siteid"':'siteid', '"sitename"':'sitename', '"itemid"':'itemid', '"itemname"':'itemname', 
                '"itemengname"':'itemengname', '"itemunit"':'itemunit', '"monitormonth"':'monitormonth', '"concentration"':'concentration'}
df = df.rename(columns=new_col_names)
df

Unnamed: 0,siteid,sitename,itemid,itemname,itemengname,itemunit,monitormonth,concentration
0,71,復興,6,一氧化氮,NO,ppb,201101,17.1
1,38,崙背,6,一氧化氮,NO,ppb,201101,3.44
2,38,崙背,7,二氧化氮,NO2,ppb,201101,13.33
3,11,士林,10,風速,WIND_SPEED,m/sec,201101,2.39
4,49,仁武,8,總碳氫化合物,THC,ppm,201101,2.253
...,...,...,...,...,...,...,...,...
157911,85,大城,14,溫度,AMB_TEMP,℃,202302,16.9
157912,85,大城,31,甲烷,CH4,ppm,202302,2.02
157913,85,大城,33,細懸浮微粒,PM2.5,μg/m3,202302,19
157914,85,大城,38,相對濕度,RH,%,202302,85


In [10]:
# 溫度、NOx、SO2
df = df[df['itemengname'].isin(['NOx', 'SO2', 'AMB_TEMP'])]
df

Unnamed: 0,siteid,sitename,itemid,itemname,itemengname,itemunit,monitormonth,concentration
7,38,崙背,14,溫度,AMB_TEMP,℃,201101,14.98
20,11,士林,14,溫度,AMB_TEMP,℃,201101,14.31
23,39,新港,1,二氧化硫,SO2,ppb,201101,3.3
25,49,仁武,5,氮氧化物,NOx,ppb,201101,40.62
33,39,新港,5,氮氧化物,NOx,ppb,201101,16.07
...,...,...,...,...,...,...,...,...
157890,84,富貴角,5,氮氧化物,NOx,ppb,202302,3.1
157896,84,富貴角,14,溫度,AMB_TEMP,℃,202302,15.5
157901,85,大城,1,二氧化硫,SO2,ppb,202302,0.9
157905,85,大城,5,氮氧化物,NOx,ppb,202302,6.6


In [11]:
# 取測站位置資料
site_df = pd.read_csv('./rawData/監測站基本資料.csv')
merged_df = df.merge(site_df[['sitename', 'areaname', 'county']], on='sitename')


merged_df['concentration'] = pd.to_numeric(merged_df['concentration'], errors='coerce')
print(merged_df.dtypes['concentration'])

# 分割欄位
merged_df['monitormonth'] = merged_df['monitormonth'].astype(str)
merged_df[['year', 'month']] = merged_df['monitormonth'].str.extract(r'(.{4})(.*)')


merged_df

float64


Unnamed: 0,siteid,sitename,itemid,itemname,itemengname,itemunit,monitormonth,concentration,areaname,county,year,month
0,38,崙背,14,溫度,AMB_TEMP,℃,201101,14.98,雲嘉南空品區,雲林縣,2011,01
1,38,崙背,1,二氧化硫,SO2,ppb,201101,3.50,雲嘉南空品區,雲林縣,2011,01
2,38,崙背,5,氮氧化物,NOx,ppb,201101,16.77,雲嘉南空品區,雲林縣,2011,01
3,38,崙背,14,溫度,AMB_TEMP,℃,201102,16.57,雲嘉南空品區,雲林縣,2011,02
4,38,崙背,5,氮氧化物,NOx,ppb,201102,16.74,雲嘉南空品區,雲林縣,2011,02
...,...,...,...,...,...,...,...,...,...,...,...,...
33332,85,大城,5,氮氧化物,NOx,ppb,202301,5.70,中部空品區,彰化縣,2023,01
33333,85,大城,1,二氧化硫,SO2,ppb,202301,0.80,中部空品區,彰化縣,2023,01
33334,85,大城,1,二氧化硫,SO2,ppb,202302,0.90,中部空品區,彰化縣,2023,02
33335,85,大城,5,氮氧化物,NOx,ppb,202302,6.60,中部空品區,彰化縣,2023,02


將資料整理成視覺化需要的模樣

In [12]:
temp_df = merged_df[merged_df['itemengname'] == 'AMB_TEMP']
temp_df['temperature'] = temp_df['concentration']
temp_df = temp_df[['year', 'month', 'sitename', 'county', 'areaname', 'temperature']]
temp_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp_df['temperature'] = temp_df['concentration']


Unnamed: 0,year,month,sitename,county,areaname,temperature
0,2011,01,崙背,雲林縣,雲嘉南空品區,14.98
3,2011,02,崙背,雲林縣,雲嘉南空品區,16.57
8,2011,03,崙背,雲林縣,雲嘉南空品區,17.81
10,2011,04,崙背,雲林縣,雲嘉南空品區,22.61
14,2011,05,崙背,雲林縣,雲嘉南空品區,25.70
...,...,...,...,...,...,...
33324,2022,10,大城,彰化縣,中部空品區,25.00
33327,2022,11,大城,彰化縣,中部空品區,23.40
33330,2022,12,大城,彰化縣,中部空品區,17.70
33331,2023,01,大城,彰化縣,中部空品區,16.60


In [18]:
merged_df = merged_df[merged_df['itemengname'].isin(['NOx', 'SO2'])]
mergeKey = ['year', 'month', 'sitename', 'county', 'areaname']
merge = pd.merge(merged_df, temp_df, on=mergeKey)
merge


Unnamed: 0,siteid,sitename,itemid,itemname,itemengname,itemunit,monitormonth,concentration,areaname,county,year,month,temperature
0,38,崙背,1,二氧化硫,SO2,ppb,201101,3.50,雲嘉南空品區,雲林縣,2011,01,14.98
1,38,崙背,5,氮氧化物,NOx,ppb,201101,16.77,雲嘉南空品區,雲林縣,2011,01,14.98
2,38,崙背,5,氮氧化物,NOx,ppb,201102,16.74,雲嘉南空品區,雲林縣,2011,02,16.57
3,38,崙背,1,二氧化硫,SO2,ppb,201102,2.90,雲嘉南空品區,雲林縣,2011,02,16.57
4,38,崙背,1,二氧化硫,SO2,ppb,201103,4.00,雲嘉南空品區,雲林縣,2011,03,17.81
...,...,...,...,...,...,...,...,...,...,...,...,...,...
21949,85,大城,5,氮氧化物,NOx,ppb,202212,6.40,中部空品區,彰化縣,2022,12,17.70
21950,85,大城,5,氮氧化物,NOx,ppb,202301,5.70,中部空品區,彰化縣,2023,01,16.60
21951,85,大城,1,二氧化硫,SO2,ppb,202301,0.80,中部空品區,彰化縣,2023,01,16.60
21952,85,大城,1,二氧化硫,SO2,ppb,202302,0.90,中部空品區,彰化縣,2023,02,16.90


處理合併產生的空值

In [21]:
merge.dropna(subset=['temperature', 'concentration'], inplace=True) 
merge['temperature'].fillna(0, inplace=True) 
merge['concentration'].fillna(0, inplace=True) 
merge


Unnamed: 0,siteid,sitename,itemid,itemname,itemengname,itemunit,monitormonth,concentration,areaname,county,year,month,temperature
0,38,崙背,1,二氧化硫,SO2,ppb,201101,3.50,雲嘉南空品區,雲林縣,2011,01,14.98
1,38,崙背,5,氮氧化物,NOx,ppb,201101,16.77,雲嘉南空品區,雲林縣,2011,01,14.98
2,38,崙背,5,氮氧化物,NOx,ppb,201102,16.74,雲嘉南空品區,雲林縣,2011,02,16.57
3,38,崙背,1,二氧化硫,SO2,ppb,201102,2.90,雲嘉南空品區,雲林縣,2011,02,16.57
4,38,崙背,1,二氧化硫,SO2,ppb,201103,4.00,雲嘉南空品區,雲林縣,2011,03,17.81
...,...,...,...,...,...,...,...,...,...,...,...,...,...
21949,85,大城,5,氮氧化物,NOx,ppb,202212,6.40,中部空品區,彰化縣,2022,12,17.70
21950,85,大城,5,氮氧化物,NOx,ppb,202301,5.70,中部空品區,彰化縣,2023,01,16.60
21951,85,大城,1,二氧化硫,SO2,ppb,202301,0.80,中部空品區,彰化縣,2023,01,16.60
21952,85,大城,1,二氧化硫,SO2,ppb,202302,0.90,中部空品區,彰化縣,2023,02,16.90


###  Tidy Data

In [23]:
columns_to_keep = ['year', 'month', 'sitename', 'county', 'areaname', 'temperature', 'itemengname', 'concentration', 'monitormonth']
tidy = merge.reindex(columns=columns_to_keep)
tidy

Unnamed: 0,year,month,sitename,county,areaname,temperature,itemengname,concentration,monitormonth
0,2011,01,崙背,雲林縣,雲嘉南空品區,14.98,SO2,3.50,201101
1,2011,01,崙背,雲林縣,雲嘉南空品區,14.98,NOx,16.77,201101
2,2011,02,崙背,雲林縣,雲嘉南空品區,16.57,NOx,16.74,201102
3,2011,02,崙背,雲林縣,雲嘉南空品區,16.57,SO2,2.90,201102
4,2011,03,崙背,雲林縣,雲嘉南空品區,17.81,SO2,4.00,201103
...,...,...,...,...,...,...,...,...,...
21949,2022,12,大城,彰化縣,中部空品區,17.70,NOx,6.40,202212
21950,2023,01,大城,彰化縣,中部空品區,16.60,NOx,5.70,202301
21951,2023,01,大城,彰化縣,中部空品區,16.60,SO2,0.80,202301
21952,2023,02,大城,彰化縣,中部空品區,16.90,SO2,0.90,202302


## Plotly

In [29]:
import plotly.express as px

In [31]:
_min = tidy['concentration'].min()
_max = tidy['concentration'].max()

In [36]:
fig2 = px.scatter(tidy, x='concentration', y="temperature", size='concentration',
	        color="itemengname", hover_name="sitename", log_x=True, size_max=60, 
            animation_frame='year', animation_group='sitename', range_x=[_min, _max], range_y=[0, 40],
            labels=dict(county='County', NOx='NOx (ppb)', temperature='Temperature'))

for trace in fig2.data:
    trace.marker.line.width = 1
    trace.marker.line.color = 'black'


fig2.show()

## Streamlit

In [270]:
import streamlit as st


list_county = list(merge2['county'].unique())

option = st.selectbox(
    'Please Select an Air Quality Area',
    list_county)

st.write('You selected:', option)