In [None]:
# Import libraries
import altair as alt
import seaborn as sns
import matplotlib.pyplot as plt

# Define constants
PAD    = 25
HEIGHT = 300
WIDTH  = 500

# Run Previous ipynb files
%run analyzeData.ipynb

In [None]:
# Temperature Anomadf_merged_final - OMIT
df_merged_final

alt.Chart(df_merged_final).mark_bar().encode(
    x=alt.X('Year_Merge:O', 
        scale=alt.Scale(zero=False),
        title="Year"),
    y=alt.Y('Anomaly_TmpCls:Q', 
        scale=alt.Scale(zero=False),
        title="Temperature °C"),
    color=alt.Color('MLHawai_CO2ppm:Q', 
        scale=alt.Scale(range=["pink", "red"]),
        title="CO2 ppm")
).properties(
    title="Temperature Anomaly",
    height=HEIGHT,
    width=WIDTH
).configure(
    padding={"left": PAD, "top": PAD, "right": PAD, "bottom": PAD}
)

In [None]:
# Mean annual Temperature + Anomaly for 57 years
domain = [3,10]
range_ = ['cyan', 'black']

chart1 = alt.Chart(df_merged_final).mark_bar().encode(
    x=alt.X('Year_Merge:O', title='Year'),
    y=alt.Y('Tmp_Cls:Q', title='Temperature °C'),
    color=alt.Color('Tmp_Cls:Q', scale=alt.Scale(domain=domain, range=range_))
).properties(
    height=HEIGHT,
    width=WIDTH
)

chart2 = alt.Chart(df_merged_final).encode(
    x=alt.X('Year_Merge:O', title='Year'),
    y=alt.Y('Anomaly_TmpCls:Q', title='Temperature °C'),
).properties(
    height=HEIGHT,
    width=WIDTH
)

alt.layer(chart1.mark_bar(),
    chart2.mark_rule(color='orange'),
    title='Mean Annual Temperature + Anomaly (USA)'
).configure(
    padding={"left": PAD, "top": PAD, "right": PAD, "bottom": PAD}
)

In [None]:
# Precipitation Anomaly for 57 years - OMIT

alt.Chart(df_merged_final).mark_bar().encode(
    x=alt.X('Year_Merge:O', scale=alt.Scale(zero=False)),
    y=alt.Y('Anomaly_RnfMM:Q', scale=alt.Scale(zero=False, padding=1)),
    color='MLHawai_CO2ppm:Q'
).properties(
    height=HEIGHT,
    width=WIDTH
).configure(
    padding={"left": PAD, "top": PAD, "right": PAD, "bottom": PAD}
)

In [None]:
# Mean Annual Precipitation + Anomaly for 57 years

chart1 = alt.Chart(df_merged_final).encode(
    x=alt.X('Year_Merge:O', title='Year'),
    y=alt.Y('Rnf_MM:Q', title='Rainfall in MM'),
    color='Rnf_MM:Q'
).properties(
    height=HEIGHT,
    width=WIDTH
)

chart2 = alt.Chart(df_merged_final).encode(
    x=alt.X('Year_Merge:O', title='Year'),
    y=alt.Y('Anomaly_RnfMM:Q', title='Rainfall in MM'),
).properties(
    height=HEIGHT,
    width=WIDTH
)

alt.layer(chart1.mark_bar(),
    chart2.mark_rule(color='orange'),
    title='Mean Annual Precipitation + Anomaly (USA)'
).configure(
    padding={"left": PAD, "top": PAD, "right": PAD, "bottom": PAD}
)

In [None]:
# Sea level rise Anomaly for 57 - OMIT
df_merged_final

alt.Chart(df_merged_final).mark_bar().encode(
    x=alt.X('Year_Merge:O', scale=alt.Scale(zero=False)),
    y=alt.Y('Anomaly_CSIRO_ASLinches:Q', scale=alt.Scale(zero=False, padding=1)),
    color='MLHawai_CO2ppm:Q',
).properties(
    height=HEIGHT,
    width=WIDTH
).configure(
    padding={"left": PAD, "top": PAD, "right": PAD, "bottom": PAD}
)

In [None]:
# Sea level rise + Anomaly for 57 years

chart1 = alt.Chart(df_merged_final).encode(
    x=alt.X('Year_Merge:O', title='Year'),
    y=alt.Y('CSIRO_ASLinches:Q', title='Adjusted Sea Level Rise in Inches'),
    color='CSIRO_ASLinches:Q'
).properties(
    height=HEIGHT,
    width=WIDTH
)

chart2 = alt.Chart(df_merged_final).encode(
   x=alt.X('Year_Merge:O', title='Year'),
    y=alt.Y('Anomaly_CSIRO_ASLinches:Q', title='Adjusted Sea Level Rise in Inches'),
).properties(
    height=HEIGHT,
    width=WIDTH
)

alt.layer(chart1.mark_bar(),
    chart2.mark_rule(color='orange'),
    title='Annual Sea Level Rise + Anomaly (Global)'
).configure(
    padding={"left": PAD, "top": PAD, "right": PAD, "bottom": PAD}
)

In [None]:
# Histogram #Normality Check - OMIT

alt.Chart(df_precip_f).mark_bar().encode(
    x=alt.X( "Rnf_MM:Q", bin= True), #alt.BinParams(maxbins=100)
    y='count(*):Q'
).properties(
    height=HEIGHT,
    width=WIDTH
).configure(
    padding={"left": PAD, "top": PAD, "right": PAD, "bottom": PAD}
)

In [None]:
# Scatter Plot - Distribution

alt.Chart(df_merged_final).mark_circle().encode(
    x=alt.X(alt.repeat("column"), type='quantitative'),
    y=alt.Y(alt.repeat("row"), type='quantitative'),
    color='Year_Merge:O'
).properties(
    width=100,
    height=100
).repeat(
    column=['Rnf_MM','Tmp_Cls','CSIRO_ASLinches','MLHawai_CO2ppm'],
    row=['MLHawai_CO2ppm','CSIRO_ASLinches','Tmp_Cls','Rnf_MM']
).configure(
    padding={"left": PAD, "top": PAD, "right": PAD, "bottom": PAD}
)

In [None]:
# Scatter Plot - Distribution - OMIT

alt.Chart(df_merged_final).mark_circle().encode(
    x=alt.X('Year_Merge:O', scale=alt.Scale(zero=False)),
    y=alt.Y('Tmp_Cls:Q', scale=alt.Scale(zero=False, padding=1)),
    color='Anomaly_TmpCls:Q',
    size='MLHawai_CO2ppm:Q'
).properties(
    height=HEIGHT,
    width=WIDTH
).configure(
    padding={"left": PAD, "top": PAD, "right": PAD, "bottom": PAD}
)

In [None]:
# Residuals scatterplot to check Homoscedasticity of the attributes - OMIT

sns.residplot(x = "MLHawai_CO2ppm", 
              y = "Tmp_Cls", 
              data = df_merged_final) 
  
plt.show() 

In [None]:
#residuals scatterplot to check Homoscedasticity of the attributes. - OMIT

sns.residplot(x = "MLHawai_CO2ppm", 
              y = "CSIRO_ASLinches", 
              data = df_merged_final, 
              lowess = True) 
plt.show() 

In [None]:
#Scatter plot with seaborn to check the correlations - ZOOM INTO IT

plt.figure(figsize=(10,8))
ax = sns.scatterplot(x="Anomaly_CSIRO_ASLinches", y="MLHawai_CO2ppm", data=df_merged_final)

ax.set(xlabel='Adjusted Sea Level Rise in Inches', ylabel='CO2 PPM')
ax.set_title('Sea Level Rise vs CO2')
plt.show()

In [None]:
# Sea level visualization
df_sealevel=MAIN_GRID[2][1]

sealevel_chart=alt.Chart(df_sealevel).mark_line().encode(
    x='Year:Q',
    y=alt.Y('CSIRO - Adjusted sea level (inches):Q', 
    title='Adjusted Sea Level Rise in Inches'),
)

Regline = sealevel_chart.transform_regression('Year', 'CSIRO - Adjusted sea level (inches)', method="linear"
).mark_line(color="red")

alt.layer(Regline, 
    sealevel_chart, 
    title='Annual Sea Level Rise'
).properties(
    height=HEIGHT,
    width=WIDTH
).configure(
    padding={"left": PAD, "top": PAD, "right": PAD, "bottom": PAD}
)


In [None]:
# Snow cover visualization
df_snow_cover=MAIN_GRID[2][2]

snow_chart=alt.Chart(df_snow_cover).mark_line().encode(
    x='Year:Q',
    y=alt.Y('Average mi^2:Q', scale=alt.Scale(domain=(3000000, 3700000)))
)

Regline = snow_chart.transform_regression('Year', 'Average mi^2', method="poly", order=3
).mark_line(color="red")

alt.layer(Regline, 
    snow_chart, 
    title='Snow Cover Levels'
).properties(
    height=HEIGHT,
    width=WIDTH
).configure(
    padding={"left": PAD, "top": PAD, "right": PAD, "bottom": PAD}
)

In [None]:
# GHG chart - adding the regline seems to crash the visualization, not sure why
df_ghg=MAIN_GRID[2][0]

ghg_chart=alt.Chart(df_ghg).mark_line().encode(
    x=alt.X('Year (negative values = BC):Q', 
        title='Year', 
        scale=alt.Scale(domain=(1959, 2020))),
    y=alt.Y('Mauna Loa, Hawaii:Q', 
        title='CO2 PPM', 
        scale=alt.Scale(domain=(300, 420)))
).properties(
    title='CO2 Levels in PPM',
    height=HEIGHT,
    width=WIDTH
).configure(
    padding={"left": PAD, "top": PAD, "right": PAD, "bottom": PAD}
)

# Regline= ghg_chart.transform_regression('Year (negative values = BC)', 'Mauna Loa, Hawaii',method="linear"
# ).mark_line(color="red")

# alt.layer(Regline, ghg_chart).properties(height=400, width=700)

ghg_chart

In [None]:
def graphLines(df, x, y, color, title):
    return alt.Chart(df).mark_line().encode(
    x=alt.X(x, scale=alt.Scale(zero=False)),
    y=alt.Y(y, scale=alt.Scale(zero=False)),
    color=color
).properties(
    title=title,
    height=HEIGHT,
    width=WIDTH
).configure(
    padding={"left": PAD, "top": PAD, "right": PAD, "bottom": PAD}
)

def graphPie(df, title):
    return df.plot.pie(
    subplots=True, 
    shadow=True, 
    autopct="%1.1f%%",
    fontsize=14,
    legend=False,
    title=title,
    figsize=(7,7)
)

In [None]:
graphLines(emisAll, "Year:O", "Emission:Q", "Sector", "Emission By Sector")

In [None]:
chart = alt.Chart(emisTotal).mark_line(color="orange").encode(
    x=alt.X('Year:O', scale=alt.Scale(zero=False)),
    y=alt.Y('Emission:Q', scale=alt.Scale(zero=False))
).properties(
    title="Total Emission Rate of all Sectors by Year",
    height=HEIGHT,
    width=WIDTH
)

chart = chart + chart.transform_regression("Year", "Emission").mark_line(color="black")

alt.layer(chart).configure(
    padding={"left": PAD, "top": PAD, "right": PAD, "bottom": PAD}
)

In [None]:
graphPie(emisBySector, "Average Emission By Sector 1990 - 2018")

In [None]:
graphLines(emisElectric, "Year:Q", "Emission:Q", "Sector", "Emission from Electrcitiy Generation")

In [None]:
graphLines(emisTransport, "Year:Q", "Emission:Q", "Sector", "Emission from Transporation")

In [None]:
graphPie(emisByGas, "Average Gas Emission 1990 - 2018")

In [None]:
tempDf = df_merged_final
tempDf = tempDf.iloc[1:]
cols = tempDf.columns
x=str(cols[0])
y=str(cols[3])

tempGraph = alt.Chart(tempDf).mark_circle(color="black").encode(
    x=alt.X(x + ":Q", 
        scale=alt.Scale(domain=(1960, 2015)),
        title="Year"),
    y=alt.Y(y + ":Q", 
        scale=alt.Scale(domain=(6, 8.5)),
        title="Temp °C")
).properties(
    title="Temperature °C in US from 1960 - 2015",
    height=HEIGHT,
    width=WIDTH-250
)

polynomial_fit = [
    tempGraph.transform_regression(
        x, y, method="poly", order=order, as_=[x, str(order)]
    ).mark_line(size=3)
    .transform_fold([str(order)], as_=["degree", y])
    .encode(alt.Color("degree:N"))
    for order in [1, 5]
]

alt.layer(tempGraph, *polynomial_fit).configure(
     padding={"left": PAD, "top": PAD, "right": PAD, "bottom": PAD}
)