# DATA PREPARATION, PREPARING/TRAINING FOR VISUALIZATION

In [1]:
import pandas as pd
import altair as alt
from altair import datum

In [2]:
df = pd.read_csv('GLB.Ts_dSST.csv', header = 1)
df

Unnamed: 0,Year,Jan,Feb,Mar,Apr,May,Jun,Jul,Aug,Sep,Oct,Nov,Dec,J-D,D-N,DJF,MAM,JJA,SON
0,1880,-0.29,-0.18,-0.11,-0.19,-0.11,-0.23,-0.21,-0.09,-0.16,-0.23,-0.20,-0.23,-0.19,***,***,-0.14,-0.18,-0.20
1,1881,-0.15,-0.17,0.04,0.04,0.02,-0.20,-0.06,-0.02,-0.13,-0.20,-0.21,-0.10,-0.10,-.11,-.18,0.03,-0.09,-0.18
2,1882,0.15,0.15,0.04,-0.18,-0.16,-0.26,-0.20,-0.05,-0.10,-0.24,-0.16,-0.24,-0.10,-.09,.06,-0.10,-0.17,-0.17
3,1883,-0.31,-0.39,-0.13,-0.17,-0.20,-0.12,-0.08,-0.15,-0.20,-0.14,-0.22,-0.16,-0.19,-.20,-.31,-0.16,-0.12,-0.19
4,1884,-0.15,-0.08,-0.37,-0.42,-0.36,-0.40,-0.34,-0.26,-0.27,-0.24,-0.30,-0.28,-0.29,-.28,-.13,-0.39,-0.34,-0.27
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
133,2013,0.66,0.55,0.66,0.52,0.57,0.65,0.57,0.65,0.76,0.67,0.78,0.65,0.64,.63,.58,0.58,0.63,0.74
134,2014,0.73,0.52,0.76,0.77,0.85,0.66,0.56,0.80,0.88,0.81,0.66,0.78,0.73,.72,.63,0.79,0.67,0.78
135,2015,0.81,0.87,0.90,0.74,0.75,0.79,0.71,0.79,0.81,1.07,1.02,1.10,0.86,.84,.82,0.80,0.76,0.97
136,2016,1.15,1.34,1.30,1.07,0.90,0.78,0.82,0.99,0.87,0.89,0.90,0.82,0.99,1.01,1.19,1.09,0.86,0.88


In [3]:
df = df.replace('***', 0.0)

In [4]:
df['D-N'] = df['D-N'].astype(float)
df['DJF'] = df['DJF'].astype(float)

In [5]:
df.dtypes

Year      int64
Jan     float64
Feb     float64
Mar     float64
Apr     float64
May     float64
Jun     float64
Jul     float64
Aug     float64
Sep     float64
Oct     float64
Nov     float64
Dec     float64
J-D     float64
D-N     float64
DJF     float64
MAM     float64
JJA     float64
SON     float64
dtype: object

In [6]:
df['ANN AVG'] = (df['Jan'] +  df['Feb'] +  df['Mar'] +  df['Apr'] +  df['May'] +  df['Jun'] +  df['Jul'] +  df['Aug'] +  df['Sep'] +  df['Oct'] +  df['Nov'] +  df['Dec'])/12
df

Unnamed: 0,Year,Jan,Feb,Mar,Apr,May,Jun,Jul,Aug,Sep,Oct,Nov,Dec,J-D,D-N,DJF,MAM,JJA,SON,ANN AVG
0,1880,-0.29,-0.18,-0.11,-0.19,-0.11,-0.23,-0.21,-0.09,-0.16,-0.23,-0.20,-0.23,-0.19,0.00,0.00,-0.14,-0.18,-0.20,-0.185833
1,1881,-0.15,-0.17,0.04,0.04,0.02,-0.20,-0.06,-0.02,-0.13,-0.20,-0.21,-0.10,-0.10,-0.11,-0.18,0.03,-0.09,-0.18,-0.095000
2,1882,0.15,0.15,0.04,-0.18,-0.16,-0.26,-0.20,-0.05,-0.10,-0.24,-0.16,-0.24,-0.10,-0.09,0.06,-0.10,-0.17,-0.17,-0.104167
3,1883,-0.31,-0.39,-0.13,-0.17,-0.20,-0.12,-0.08,-0.15,-0.20,-0.14,-0.22,-0.16,-0.19,-0.20,-0.31,-0.16,-0.12,-0.19,-0.189167
4,1884,-0.15,-0.08,-0.37,-0.42,-0.36,-0.40,-0.34,-0.26,-0.27,-0.24,-0.30,-0.28,-0.29,-0.28,-0.13,-0.39,-0.34,-0.27,-0.289167
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
133,2013,0.66,0.55,0.66,0.52,0.57,0.65,0.57,0.65,0.76,0.67,0.78,0.65,0.64,0.63,0.58,0.58,0.63,0.74,0.640833
134,2014,0.73,0.52,0.76,0.77,0.85,0.66,0.56,0.80,0.88,0.81,0.66,0.78,0.73,0.72,0.63,0.79,0.67,0.78,0.731667
135,2015,0.81,0.87,0.90,0.74,0.75,0.79,0.71,0.79,0.81,1.07,1.02,1.10,0.86,0.84,0.82,0.80,0.76,0.97,0.863333
136,2016,1.15,1.34,1.30,1.07,0.90,0.78,0.82,0.99,0.87,0.89,0.90,0.82,0.99,1.01,1.19,1.09,0.86,0.88,0.985833


In [7]:
average = (df.at[0, 'ANN AVG'] + df.at[1, 'ANN AVG'] + df.at[2, 'ANN AVG'] +
           df.at[3, 'ANN AVG'] + df.at[4, 'ANN AVG'] + df.at[5, 'ANN AVG'] +
           df.at[6, 'ANN AVG'] + df.at[7, 'ANN AVG'] + df.at[8, 'ANN AVG'] +
           df.at[9, 'ANN AVG'] + df.at[10, 'ANN AVG'] + df.at[11, 'ANN AVG'] +
           df.at[12, 'ANN AVG'] + df.at[13, 'ANN AVG'] + df.at[14, 'ANN AVG'] +
           df.at[15, 'ANN AVG'] + df.at[16, 'ANN AVG'] + df.at[17, 'ANN AVG'] +
           df.at[18, 'ANN AVG'])/19
average

-0.23039473684210524

In [8]:
alt.Chart(data=df, mark="point")

In [9]:
alt.Chart(df).mark_point().properties(width = 950, 
                                      height = 500, 
                                      background = '#F9F9F9', 
                                      padding = 25)

In [10]:
alt.Chart(df).mark_point(
    size = 100,
    fill = "orange",
    color = "black",
).encode(
    x = alt.X(field='Year', 
              type = 'quantitative')
).properties(width = 950, 
             height = 500, 
             background = '#F9F9F9', 
             padding = 25)


In [11]:
alt.Chart(df).mark_point(
     size = 100,
    fill = "orange",
    color = "black",
    ).encode(
    x = alt.X(field='Year', type = 'quantitative'),
    y = alt.Y(field = 'ANN AVG', type = 'quantitative'),
).properties(width = 950, height = 500, background = '#F9F9F9', padding = 25)

In [12]:
main_t = alt.Chart(df).mark_point(
    size = 70,
    fill = "orange",
    opacity = 1,
    color = "black",
    ).encode(
    x = alt.X(
        field='Year', 
        type = 'quantitative',
        
        scale=alt.Scale(domain=[1880, 2017]),
        axis=alt.Axis(
            values = list(range(1880, 2017, 10)),
            format = '1',
        ),
        title = "",

    ),
    y = alt.Y(
        field = 'ANN AVG', 
        type = 'quantitative',
        axis=alt.Axis(
            values = [x * 0.1 for x in range(-6, 11)],
            labelExpr = 'datum.value > 0 ?' + 
            '(datum.value == 1 ? toString("+")+datum.label+toString("°F") : toString("+")+datum.label+toString("°"))' +
            ': datum.label+toString("°")',
        ),
        title = ""
    ),
).properties(width = 850, height = 500, background = '#FFFFFF', padding = 25)
main_t

In [13]:
main_t.configure_axisX(ticks = False,
                           domain = False,
                           labelFont = 'Helvetica',
                           labelFontSize = 13,
                           labelColor='grey',
                           labelPadding = 13,
    
           ).configure_axisY(domain = False,
                             tickColor = 'light grey',
                             labelFont = 'Helvetica',
                             labelFontSize = 13,
                             labelColor='grey',
                             labelPadding = 7,
           )

In [14]:
alt.Chart(df).mark_text(
    align='left',
    baseline='middle',
    dx=7
).encode(
    x = alt.X(
        field='Year', 
        type = 'quantitative',
        
        scale=alt.Scale(domain=[1880, 2017]),
        axis=alt.Axis(
            values = list(range(1880, 2017, 10)),
            format = '1'
        ),
        title = ""
    ),
    y = alt.Y(
        field = 'ANN AVG', 
        type = 'quantitative',
        axis=alt.Axis(
            values = [x * 0.1 for x in range(-6, 11)],
            labelExpr = 'datum.value > 0 ?' + 
            '(datum.value == 1 ? toString("+")+datum.label+toString("°F") : toString("+")+datum.label+toString("°"))' +
            ': datum.label+toString("°")',
        ),
        title = ""
    ),
    text= 'Year'
).transform_filter(
     alt.FieldOneOfPredicate(field='Year', oneOf=[1904, 1944, 1998, 2014, 2015, 2017])
).properties(width = 850, height = 500, background = '#FFFFFF', padding = 25)

In [15]:
alt.Chart(df).mark_text(
    align='left',
    baseline='middle',
    dx=7,
    fontWeight = "bold"
).encode(
    x = alt.X(
        field='Year', 
        type = 'quantitative',
        
        scale=alt.Scale(domain=[1880, 2017]),
        axis=alt.Axis(
            values = list(range(1880, 2017, 10)),
            format = '1'
        ),
        title = ""
    ),
    y = alt.Y(
        field = 'ANN AVG', 
        type = 'quantitative',
        axis=alt.Axis(
            values = [x * 0.1 for x in range(-6, 11)],
            labelExpr = 'datum.value > 0 ?' + 
            '(datum.value == 1 ? toString("+")+datum.label+toString("°F") : toString("+")+datum.label+toString("°"))' +
            ': datum.label+toString("°")',
        ),
        title = ""
    ),
    text= 'Year',
).transform_filter(
     (datum.Year == 2016)
).properties(width = 850, height = 500, background = '#FFFFFF', padding = 25)

# VISUALIZATION

In [37]:
main_table = alt.Chart(df).mark_point(
    size = 70,
    fill = "orange",
    opacity = 1,
    color = "black",
    ).encode(
    x = alt.X(
        field='Year', 
        type = 'quantitative',
        
        scale=alt.Scale(domain=[1880, 2017]),
        axis=alt.Axis(
            values = list(range(1880, 2017, 10)),
            format = '1'
        ),
        title = ""
    ),
    y = alt.Y(
        field = 'ANN AVG', 
        type = 'quantitative',
        axis=alt.Axis(
            values = [x * 0.1 for x in range(-6, 11)],
            labelExpr = 'datum.value > 0 ?' + 
            '(datum.value == 1 ? toString("+")+datum.label+toString("°F") : toString("+")+datum.label+toString("°"))' +
            ': datum.label+toString("°")',
        ),
        title = ""
    ),
)

In [38]:
upper_text_1 = alt.Chart(pd.DataFrame({'y': [0]})).mark_text(
            x=830, 
            y=350, 
            fontSize=14, 
            align='right',
            baseline='middle',
            opacity=0.9,
            color='black',
            fontWeight = 200, 
            font = 'Ubuntu Mono', 
            text='HOTTER THAN THE',
).encode()

upper_text_2 = alt.Chart(pd.DataFrame({'y': [0]})).mark_text(
            x=830, 
            y=365, 
            fontSize=14, 
            align='right',
            baseline='middle',
            opacity=0.9,
            color='black',
            fontWeight = 200, 
            font = 'Ubuntu Mono', 
            text='1880-1899 AVERAGE',
).encode()


upper_text = upper_text_1 + upper_text_2

lower_text = alt.Chart(pd.DataFrame({'y': [0]})).mark_text(
            x=830, 
            y=404, 
            fontSize=14, 
            align='right',
            baseline='middle',
            opacity=0.9,
            color='black',
            fontWeight = 200, 
            font = 'Ubuntu Mono', 
            text='COLDER',
).encode()

In [39]:
main_title_1 = alt.Chart(pd.DataFrame({'y': [0]})).mark_text(
            x=280, 
            y=30, 
            fontSize=15.5, 
            align='right',
            baseline='middle',
            opacity=0.9,
            color='black',
            fontWeight = "bold", 
            font = 'Helvetica',
            text='Annual Global Surface Temperature,',
).encode()

main_title_2 = alt.Chart(pd.DataFrame({'y': [0]})).mark_text(
            x=291, 
            y=50, 
            fontSize=15.5, 
            align='right',
            baseline='middle',
            opacity=0.9,
            color='black',
            fontWeight = "bold", 
            font = 'Helvetica', 
            text='Relative to Late 19th Century Average',
).encode()

main_title = main_title_1 + main_title_2

In [40]:
years_normal_right = alt.Chart(df).mark_text(
    align='left',
    dx=7,
    font = 'Helvetica',
    fontSize = 13,
).encode(
    x = alt.X(
        field='Year', 
        type = 'quantitative',
    ),
    y = alt.Y(
        field = 'ANN AVG', 
        type = 'quantitative',
    ),
    text= 'Year'
).transform_filter(
     alt.FieldOneOfPredicate(field='Year', oneOf=[1944, 2014, 2015, 2017])
)

years_normal_left = alt.Chart(df).mark_text(
    align='right',
    dx=-7,
    font = 'Helvetica',
    fontSize = 13,
).encode(
    x = alt.X(
        field='Year', 
        type = 'quantitative',
    ),
    y = alt.Y(
        field = 'ANN AVG', 
        type = 'quantitative',
    ),
    text= 'Year'
).transform_filter(
     alt.FieldOneOfPredicate(field='Year', oneOf=[1904, 1998])
)

years_normal = years_normal_right + years_normal_left

In [41]:
years_bold = alt.Chart(df).mark_text(
    align='left',
    dx=7,
    fontWeight = "bold",
    font = 'Helvetica',
    fontSize = 13,
).encode(
    x = alt.X(
        field='Year', 
        type = 'quantitative',
    ),
    y = alt.Y(
        field = 'ANN AVG', 
        type = 'quantitative',
    ),
    text= 'Year',
).transform_filter(
     (datum.Year == 2016)
)

In [42]:
line = alt.Chart(pd.DataFrame({'y': [average]})).mark_rule(
            color='grey',
            fontWeight = "bold",
            ).encode(y='y')

In [43]:
full_table = alt.layer(line, main_table, upper_text, lower_text, main_title, years_normal, years_bold
                      ).configure_view(strokeOpacity=0
                    ).configure_axisX(ticks = False,
                           domain = False,
                           labelFont = 'Helvetica',
                           labelFontSize = 13,
                           labelColor='grey',
                           labelPadding = 13,
    
           ).configure_axisY(domain = False,
                             tickColor = '#DCDCDC',
                             labelFont = 'Helvetica',
                             labelFontSize = 13,
                             labelColor='grey',
                             labelPadding = 7,
           ).properties(width = 850, height = 500, background = '#FFFFFF', padding = 25
        )

full_table