## After the data conversion from the previous files, we'll do an analysis on the complete data as well as predictions for it.

If you want to see more in depth thoughts on this analysis, please visit my blog: https://datasciencerecruit.com/

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly
import plotly.graph_objs as go
import plotly.offline as py
import datetime

In [None]:
df = pd.read_csv("../input/air-quality-seoul/Air_Quality_Seoul_2017-2020.csv")

In [None]:
df['Measurement date'] = pd.to_datetime(df['Measurement date'])

In [None]:
df.info()

In [None]:
dates = ["2018-03-05 09:00:00", "2018-03-05 10:00:00", "2018-03-06 09:00:00", "2018-03-06 10:00:00"]
Station = 117

to_drop = df.loc[
        (df['Measurement date']==dates[0]) | 
        (df['Measurement date']==dates[1]) |
        (df['Measurement date']==dates[2]) |
        (df['Measurement date']==dates[3])].loc[(df['Station code']==Station)]
    
df.drop(to_drop.index, inplace=True)

In [None]:
entire_seoul = df.groupby(by='Measurement date').sum()

In [None]:
entire_seoul.drop('Station code', axis=1, inplace=True)

In [None]:
entire_seoul.info()

In [None]:
entire_seoul.to_csv("Air_Quality_Entire_Seoul_2017-2020_Adjusted.csv")

In [None]:
polluents = {'SO2':[0.02,0.05,0.15,1],
             'NO2':[0.03,0.06,0.2,2],
             'CO':[2,9,15,50],
             'O3':[0.03,0.09,0.15,0.5],
             'PM2.5':[15,35,75,500],
             'PM10':[30,80,150,600]}

quality = ['Good','Normal','Bad','Very Bad']
seoul_standard = pd.DataFrame(polluents, index=quality)
seoul_standard

In [None]:
polluents

In [None]:
data = [go.Scatter(x=entire_seoul.index,
                   y=entire_seoul['SO2'])]
       
##layout object
layout = go.Layout(title='SO2 Levels',
                    yaxis={'title':'Level (ppm)'},
                    xaxis={'title':'Date'})

    
## Figure object
fig = go.Figure(data=data, layout=layout)


##Adding the text and positioning it
fig.add_annotation(x='2018-01-01 00:00:00', y=25*polluents['SO2'][0],
            text="Good Level",
            showarrow=True,
            arrowhead=1)

##Adding horizontal line
fig.add_shape(
        # Line Horizontal
            type="line",
            x0='2017-01-01 00:00:00',
            y0=25*polluents['SO2'][0],
            x1='2020-12-31 23:00:00',
            y1=25*polluents['SO2'][0],
            line=dict(
                color="Green",
                width=4,
                dash="dashdot",
            ))


## Plotting
fig.show()

In [None]:
data = [go.Scatter(x=entire_seoul.index,
                   y=entire_seoul['NO2'])]
       
##layout object
layout = go.Layout(title='NO2 Levels',
                    yaxis={'title':'Level (ppm)'},
                    xaxis={'title':'Date'})
                    
## Figure object

fig = go.Figure(data=data, layout=layout)

    

##Adding the text and positioning it
fig.add_annotation(x='2017-07-01 00:00:00', y=25*polluents['NO2'][0],
            text="Good Level",
            showarrow=True,
            arrowhead=1)

fig.add_annotation(x='2018-07-01 00:00:00', y=25*polluents['NO2'][1],
            text="Normal Level",
            showarrow=True,
            arrowhead=1)

##Adding horizontal line
fig.add_shape(
        # Line Horizontal
            type="line",
            x0='2017-01-01 00:00:00',
            y0=25*polluents['NO2'][0],
            x1='2020-12-31 23:00:00',
            y1=25*polluents['NO2'][0],
            line=dict(
                color="Green",
                width=4,
                dash="dashdot",
            ))

fig.add_shape(
        # Line Horizontal
            type="line",
            x0='2017-01-01 00:00:00',
            y0=25*polluents['NO2'][1],
            x1='2020-12-31 23:00:00',
            y1=25*polluents['NO2'][1],
            line=dict(
                color="Orange",
                width=4,
                dash="dashdot",
            ))


## Plotting
py.iplot(fig)

In [None]:
data = [go.Scatter(x=entire_seoul.index,
                   y=entire_seoul['O3'])]
       
##layout object
layout = go.Layout(title='O3 Levels',
                    yaxis={'title':'Level (ppm)'},
                    xaxis={'title':'Date'})
                    
## Figure object

fig = go.Figure(data=data, layout=layout)

    

##Adding the text and positioning it
fig.add_annotation(x='2017-07-01 00:00:00', y=25*polluents['O3'][0],
            text="Good Level",
            showarrow=True,
            arrowhead=1)

fig.add_annotation(x='2018-07-01 00:00:00', y=25*polluents['O3'][1],
            text="Normal Level",
            showarrow=True,
            arrowhead=1)

##Adding horizontal line
fig.add_shape(
        # Line Horizontal
            type="line",
            x0='2017-01-01 00:00:00',
            y0=25*polluents['O3'][0],
            x1='2020-12-31 23:00:00',
            y1=25*polluents['O3'][0],
            line=dict(
                color="Green",
                width=4,
                dash="dashdot",
            ))

fig.add_shape(
        # Line Horizontal
            type="line",
            x0='2017-01-01 00:00:00',
            y0=25*polluents['O3'][1],
            x1='2020-12-31 23:00:00',
            y1=25*polluents['O3'][1],
            line=dict(
                color="Orange",
                width=4,
                dash="dashdot",
            ))


## Plotting
py.iplot(fig)

In [None]:
data = [go.Scatter(x=entire_seoul.index,
                   y=entire_seoul['CO'])]
       
##layout object
layout = go.Layout(title='CO Levels',
                    yaxis={'title':'Level (ppm)'},
                    xaxis={'title':'Date'})
                    
## Figure object

fig = go.Figure(data=data, layout=layout)

    

##Adding the text and positioning it
fig.add_annotation(x='2017-07-01 00:00:00', y=25*polluents['CO'][0],
            text="Good Level",
            showarrow=True,
            arrowhead=1)

##Adding horizontal line
fig.add_shape(
        # Line Horizontal
            type="line",
            x0='2017-01-01 00:00:00',
            y0=25*polluents['CO'][0],
            x1='2020-12-31 23:00:00',
            y1=25*polluents['CO'][0],
            line=dict(
                color="Green",
                width=4,
                dash="dashdot",
            ))


## Plotting
py.iplot(fig)

In [None]:
data = [go.Scatter(x=entire_seoul.index,
                   y=entire_seoul['PM2.5'])]
       
##layout object
layout = go.Layout(title='PM2.5 Levels',
                    yaxis={'title':'Mircrogram/m3'},
                    xaxis={'title':'Date'})
                    
## Figure object

fig = go.Figure(data=data, layout=layout)

    

##Adding the text and positioning it
fig.add_annotation(x='2017-07-01 00:00:00', y=25*polluents['PM2.5'][0],
            text="Good Level",
            showarrow=True,
            arrowhead=1)

fig.add_annotation(x='2017-12-01 00:00:00', y=25*polluents['PM2.5'][1],
            text="Normal Level",
            showarrow=True,
            arrowhead=1)


##Adding horizontal line
fig.add_shape(
        # Line Horizontal
            type="line",
            x0='2017-01-01 00:00:00',
            y0=25*polluents['PM2.5'][0],
            x1='2020-12-31 23:00:00',
            y1=25*polluents['PM2.5'][0],
            line=dict(
                color="Green",
                width=4,
                dash="dashdot",
            ))

fig.add_shape(
        # Line Horizontal
            type="line",
            x0='2017-01-01 00:00:00',
            y0=25*polluents['PM2.5'][1],
            x1='2020-12-31 23:00:00',
            y1=25*polluents['PM2.5'][1],
            line=dict(
                color="Orange",
                width=4,
                dash="dashdot",
            ))


## Plotting
py.iplot(fig)

In [None]:
data = [go.Scatter(x=entire_seoul.index,
                   y=entire_seoul['PM10'])]
       
##layout object
layout = go.Layout(title='PM10 Levels',
                    yaxis={'title':'Mircrogram/m3'},
                    xaxis={'title':'Date'})
                    
## Figure object

fig = go.Figure(data=data, layout=layout)

    

##Adding the text and positioning it
fig.add_annotation(x='2017-07-01 00:00:00', y=25*polluents['PM10'][0],
            text="Good Level",
            showarrow=True,
            arrowhead=1)

fig.add_annotation(x='2017-07-01 00:00:00', y=25*polluents['PM10'][1],
            text="Normal Level",
            showarrow=True,
            arrowhead=1)


##Adding horizontal line
fig.add_shape(
        # Line Horizontal
            type="line",
            x0='2017-01-01 00:00:00',
            y0=25*polluents['PM10'][0],
            x1='2020-12-31 23:00:00',
            y1=25*polluents['PM10'][0],
            line=dict(
                color="Green",
                width=4,
                dash="dashdot",
            ))

fig.add_shape(
        # Line Horizontal
            type="line",
            x0='2017-01-01 00:00:00',
            y0=25*polluents['PM10'][1],
            x1='2020-12-31 23:00:00',
            y1=25*polluents['PM10'][1],
            line=dict(
                color="Orange",
                width=4,
                dash="dashdot",
            ))


## Plotting
py.iplot(fig)

## Analysis year by year

In [None]:
entire_seoul['Year'] = entire_seoul.index.year

In [None]:
entire_seoul

In [None]:
entire_seoul_by_year = entire_seoul.groupby(by='Year').sum()
entire_seoul_by_year

In [None]:
## Data for each gas

trace0 = [go.Scatter(x=entire_seoul_by_year.index,y=entire_seoul_by_year['SO2'])]
trace1 = [go.Scatter(x=entire_seoul_by_year.index,y=entire_seoul_by_year['NO2'])]
trace2 = [go.Scatter(x=entire_seoul_by_year.index,y=entire_seoul_by_year['O3'])]
trace3 = [go.Scatter(x=entire_seoul_by_year.index,y=entire_seoul_by_year['CO'])]
trace4 = [go.Scatter(x=entire_seoul_by_year.index,y=entire_seoul_by_year['PM10'])]
trace5 = [go.Scatter(x=entire_seoul_by_year.index,y=entire_seoul_by_year['PM2.5'])]

In [None]:
##layout objects for each gas

layout0 = go.Layout(title='SO2 Levels',
                    yaxis={'title':'Level (ppm)'},
                    xaxis={'title':'Date', 'nticks':4},
                    )

layout1 = go.Layout(title='NO2 Levels',
                    yaxis={'title':'Level (ppm)'},
                    xaxis={'title':'Date', 'nticks':4})

layout2 = go.Layout(title='O3 Levels',
                    yaxis={'title':'Level (ppm)'},
                    xaxis={'title':'Date', 'nticks':4})

layout3 = go.Layout(title='CO Levels',
                    yaxis={'title':'Level (ppm)'},
                    xaxis={'title':'Date', 'nticks':4})

layout4 = go.Layout(title='PM2.5 Levels',
                    yaxis={'title':'Microgram/m3'},
                    xaxis={'title':'Date', 'nticks':4})

layout5 = go.Layout(title='PM10 Levels',
                    yaxis={'title':'Microgram/m3'},
                    xaxis={'title':'Date', 'nticks':4})

In [None]:
fig = go.Figure(data=trace0, layout=layout0)
py.iplot(fig)

In [None]:
fig = go.Figure(data=trace1, layout=layout1)
py.iplot(fig)

In [None]:
fig = go.Figure(data=trace2, layout=layout2)
py.iplot(fig)

In [None]:
fig = go.Figure(data=trace3, layout=layout3)
py.iplot(fig)

In [None]:
fig = go.Figure(data=trace4, layout=layout4)
py.iplot(fig)

In [None]:
fig = go.Figure(data=trace5, layout=layout5)
py.iplot(fig)

## Analysis month by month

In [None]:
entire_seoul['Month'] = entire_seoul.index.month
entire_seoul

In [None]:
m2017 = pd.DataFrame(entire_seoul[['SO2', 'NO2', 'O3', 'CO', 'PM10', 'PM2.5']].groupby(by=entire_seoul['Month'].loc[entire_seoul['Year']==2017]).sum())
m2018 = pd.DataFrame(entire_seoul[['SO2', 'NO2', 'O3', 'CO', 'PM10', 'PM2.5']].groupby(by=entire_seoul['Month'].loc[entire_seoul['Year']==2018]).sum())
m2019 = pd.DataFrame(entire_seoul[['SO2', 'NO2', 'O3', 'CO', 'PM10', 'PM2.5']].groupby(by=entire_seoul['Month'].loc[entire_seoul['Year']==2019]).sum())
m2020 = pd.DataFrame(entire_seoul[['SO2', 'NO2', 'O3', 'CO', 'PM10', 'PM2.5']].groupby(by=entire_seoul['Month'].loc[entire_seoul['Year']==2020]).sum())

In [None]:
m2017

In [None]:
trace_monthsSO2 = [go.Bar(x=m2017['SO2'].index, y=m2017['SO2'], name='2017'),
                go.Bar(x=m2018['SO2'].index, y=m2018['SO2'], name='2018'),
                go.Bar(x=m2019['SO2'].index, y=m2019['SO2'], name='2019'),
                go.Bar(x=m2020['SO2'].index, y=m2020['SO2'], name='2020')]



layout_monthsSO2 = go.Layout(title='SO2 Levels',
                    yaxis={'title':'Level (ppm)'},
                    xaxis={'title':'Month',
                           'tickmode':'array',
                           'tickvals':m2017['O3'].index,
                           'ticktext':['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']},
                    )

In [None]:
fig = go.Figure(data=trace_monthsSO2, layout=layout_monthsSO2)
py.iplot(fig)

In [None]:
trace_monthsNO2 = [go.Bar(x=m2017['NO2'].index, y=m2017['NO2'], name='2017'),
                go.Bar(x=m2018['NO2'].index, y=m2018['NO2'], name='2018'),
                go.Bar(x=m2019['NO2'].index, y=m2019['NO2'], name='2019'),
                go.Bar(x=m2020['NO2'].index, y=m2020['NO2'], name='2020')]



layout_monthsNO2 = go.Layout(title='NO2 Levels',
                    yaxis={'title':'Level (ppm)'},
                    xaxis={'title':'Month',
                           'tickmode':'array',
                           'tickvals':m2017['O3'].index,
                           'ticktext':['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']},
                    )

In [None]:
fig = go.Figure(data=trace_monthsNO2, layout=layout_monthsNO2)
py.iplot(fig)

In [None]:
trace_monthsO3 = [go.Bar(x=m2017['O3'].index, y=m2017['O3'], name='2017'),
                go.Bar(x=m2018['O3'].index, y=m2018['O3'], name='2018'),
                go.Bar(x=m2019['O3'].index, y=m2019['O3'], name='2019'),
                go.Bar(x=m2020['O3'].index, y=m2020['O3'], name='2020')]



layout_monthsO3 = go.Layout(title='O3 Levels',
                    yaxis={'title':'Level (ppm)'},
                    xaxis={'title':'Month',
                           'tickmode':'array',
                           'tickvals':m2017['O3'].index,
                           'ticktext':['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']},
                    )

In [None]:
fig = go.Figure(data=trace_monthsO3, layout=layout_monthsO3)
py.iplot(fig)

In [None]:
trace_monthsCO = [go.Bar(x=m2017['CO'].index, y=m2017['CO'], name='2017'),
                go.Bar(x=m2018['CO'].index, y=m2018['CO'], name='2018'),
                go.Bar(x=m2019['CO'].index, y=m2019['CO'], name='2019'),
                go.Bar(x=m2020['CO'].index, y=m2020['CO'], name='2020')]



layout_monthsCO = go.Layout(title='CO Levels',
                    yaxis={'title':'Level (ppm)'},
                    xaxis={'title':'Month',
                           'tickmode':'array',
                           'tickvals':m2017['CO'].index,
                           'ticktext':['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']},
                    )

In [None]:
m2017

In [None]:
fig = go.Figure(data=trace_monthsCO, layout=layout_monthsCO)
py.iplot(fig)

In [None]:
trace_monthsPM10 = [go.Bar(x=m2017['PM10'].index, y=m2017['PM10'], name='2017'),
                go.Bar(x=m2018['PM10'].index, y=m2018['PM10'], name='2018'),
                go.Bar(x=m2019['PM10'].index, y=m2019['PM10'], name='2019'),
                go.Bar(x=m2020['PM10'].index, y=m2020['PM10'], name='2020')]



layout_monthsPM10 = go.Layout(title='PM10 Levels',
                    yaxis={'title':'Microgram/m3'},
                    xaxis={'title':'Month',
                           'tickmode':'array',
                           'tickvals':m2017['PM10'].index,
                           'ticktext':['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']},
                    )

In [None]:
fig = go.Figure(data=trace_monthsPM10, layout=layout_monthsPM10)


py.iplot(fig)

In [None]:
trace_monthsPM25 = [go.Bar(x=m2017['PM2.5'].index, y=m2017['PM2.5'], name='2017'),
                go.Bar(x=m2018['PM2.5'].index, y=m2018['PM2.5'], name='2018'),
                go.Bar(x=m2019['PM2.5'].index, y=m2019['PM2.5'], name='2019'),
                go.Bar(x=m2020['PM2.5'].index, y=m2020['PM2.5'], name='2020')]



layout_monthsPM25 = go.Layout(title='PM2.5 Levels',
                    yaxis={'title':'Microgram/m3'},
                    xaxis={'title':'Month',
                           'tickmode':'array',
                           'tickvals':m2017['PM2.5'].index,
                           'ticktext':['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']})

In [None]:
fig = go.Figure(data=trace_monthsPM25, layout=layout_monthsPM25)

py.iplot(fig)