In [90]:
# Annual Mean Temperature vs Sea Level - Data Preparation
# This is a personal project using data from Nasa and sealevel.info.
# Here we are trying to have some perspective on the effect of the raising in global temperatures in the sea level.
# Of course, there are many other factors involved in the rise of the sea level, but this is just a simplification, not validated.
# Fabio Cardoso - Jan/2022

In [91]:
# Imports

import json
import numpy as np
import pandas as pd
from datetime import datetime
from pandas.io.json import json_normalize

#pipeline e modeling libraries
!pip install sklearn2pmml
!pip install pypmml
!pip install sklearn-pandas
from sklearn.linear_model import LinearRegression
from joblib import dump, load
from sklearn2pmml.pipeline import PMMLPipeline
from sklearn2pmml import sklearn2pmml
from pypmml import Model
from sklearn_pandas import DataFrameMapper

In [None]:
# The code was removed by Watson Studio for sharing.

In [92]:
# Storage access instantiation

import types
import pandas as pd
import ibm_boto3
from botocore.client import Config

def __iter__(self): return 0

cnx_fcsinsights_client = ibm_boto3.client(
    service_name = 's3',
    ibm_api_key_id = parm1_ibm_api_key_id,
    ibm_service_instance_id = parm1_ibm_service_instance_id,
    ibm_auth_endpoint = parm1_ibm_auth_endpoint,
    config=Config(signature_version='oauth'),
    endpoint_url = parm1_endpoint_url)

In [93]:
# Local data

cnx_local = ibm_boto3.client(service_name='s3',
    ibm_api_key_id = parm2_ibm_api_key_id,
    ibm_auth_endpoint = parm2_ibm_auth_endpoint,
    config=Config(signature_version='oauth'),
    endpoint_url = parm2_endpoint_url)

bucket_local = parm2_bucket_local

In [94]:
# Download files

file_temps_in  = 'Global Historical Temperatures - giss.nasa.gov.txt'
file_temps_out = 'Global Historical Temperatures - giss.nasa.gov.csv'

file_tides_in  = 'sealevel.infodata_8443970_data.json'
file_tides_out = 'Boston Historical Mean Sea Level (MSL).csv'

bucket_in = parm1_bucket_in
bucket_out = parm1_bucket_out

cnx_fcsinsights_client.download_file(Bucket=bucket_in, Filename=file_temps_in, Key=file_temps_in)
cnx_fcsinsights_client.download_file(Bucket=bucket_in, Filename=file_tides_in, Key=file_tides_in)

In [95]:
# Extracting historical global temperatures.
# sources: https://data.giss.nasa.gov/gistemp/graphs_v4/graph_data/Temperature_Change_for_Three_Latitude_Bands/graph.txt
# Note: Boston is in the 24N-90N band.

f_temps_in = open(file_temps_in, mode='r')
f_temps_out = open(file_temps_out, mode='w')

eof = False
lin_nr = 0
while not eof:
    lin = f_temps_in.readline()
    if lin=='':
        eof = True
    else:
        lin_nr +=1
        
        if lin_nr >= 10:
            if lin_nr == 10:
                f_temps_out.write("Year,Global,Lat_24N_90N\n")
            year = lin[:4]
            glob = lin[7:11]
            lat_24n_90n = lin[26:30] 
            lin_out = year + ', ' + glob + ', ' + lat_24n_90n + '\n'
            f_temps_out.write(lin_out)
f_temps_in.close()
f_temps_out.close()

In [97]:
# Extracting Boston historical sea level
# Source: https://sealevel.info/MSL_graph.php?id=8443970
# Note: see concept of sea level and metonic cycle in
# https://www.nationalgeographic.org/encyclopedia/sea-level/#:~:text=In%20the%20United%20States%20and,is%20called%20a%20Metonic%20cycle.

with open(file_tides_in) as json_data:
    data = json.load(json_data)
    
df_tides = pd.DataFrame(data['msl_data'], columns=data['msl_headers'])
df_tides.to_csv(file_tides_out)

In [98]:
# Prepare temperatures data

df_temps = pd.read_csv(file_temps_out)
df_temps['Global'] = df_temps['Global'].rolling(3, center=False).mean()
df_temps['Lat_24N_90N'] = df_temps['Lat_24N_90N'].rolling(3, center=False).mean()
df_temps.dropna(inplace=True)
df_temps.to_csv(file_temps_out) #saving for spss

In [99]:
# Prepare tides data

df_tides = pd.read_csv(file_tides_out)
df_tides.replace('NaN',np.nan, inplace=True)
df_tides['Data'] = df_tides[['year','month']].apply(lambda r: datetime(r['year'],r['month'],1), axis=1).astype('datetime64[M]')
df_tides.set_index('Data', inplace=True)
df_tides = df_tides.resample('A').mean()
df_tides['Year'] = df_tides.index.year.values
df_tides.set_index('Year', inplace=True)
df_tides.rename(columns={'NOAA_adjusted_MSL':'Mean Sea Level'}, inplace=True) #compatibilization with old column name
df_tides = df_tides[['Mean Sea Level']].copy()
df_tides['Mean Sea Level'] = df_tides['Mean Sea Level'].interpolate()
#df_tides['Mean Sea Level'] = df_tides['Mean Sea Level'].rolling(3, center=False).mean() #better to not roll to be trained with the metonic cycle (19 autoregression points)
df_tides.dropna(inplace=True)
df_tides.to_csv(file_tides_out) #saving for spss

In [100]:
# Upload treated files

cnx_fcsinsights_client.upload_file(Bucket=bucket_out, Filename=file_temps_out, Key=file_temps_out)
cnx_fcsinsights_client.upload_file(Bucket=bucket_out, Filename=file_tides_out, Key=file_tides_out)

In [25]:
# Regression - mean temperature at the location vs sea level

df_merge = df_temps.merge(df_tides, how='inner', on='Year')
mod = LinearRegression()
nug = mod.fit(df_merge[['Lat_24N_90N']], df_merge['Mean Sea Level'])
dump(nug,'nug1.nug')
nug1 = load('nug1.nug')
nug1.predict([[10]])

array([4.23872526])

In [26]:
# Simple pipeline (just the regression model in it) - temperature at the location vs sea level

pipeline = PMMLPipeline([("lr", LinearRegression())])
pipeline.fit(df_merge[['Lat_24N_90N']], df_merge['Mean Sea Level'])
sklearn2pmml(pipeline, "lr1.pmml", )
pipeline1 = Model.fromFile('lr1.pmml')
pipeline1.predict([[10]])

  self.stdout = io.open(c2pread, 'rb', bufsize)
  self.stderr = io.open(errread, 'rb', bufsize)


[[4.238725260710763]]

In [27]:
# Uploading pipeline
# After uploading, this model must be promoted and published as a service.

cnx_local.upload_file(Bucket=bucket_local, Filename='lr1.pmml', Key='lr1.pmml')

In [1]:
print('ok')

ok
