In [None]:
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import seaborn as sb
import pandas as pd

# REFERENCES
# Plotting with Pandas: http://jonathansoma.com/lede/algorithms-2017/classes/fuzziness-matplotlib/understand-df-plot-in-pandas/
# Regression, etc.: https://becominghuman.ai/linear-regression-in-python-with-pandas-scikit-learn-72574a2ec1a5

In [None]:
# What will happen?

# Data=pd.read_csv('data/global_temperature_anomaly.csv')

# Oops! You have to look at the data
# OK boss, I'm going to the command line

# Your friends:

# cd              -- change directory
# cd ..           -- go up one level
# cd -            -- go back to where you were
# head FILE       -- look at the first few lines of FILE
# head -12 FILE   -- look at the first 12 lines of FILE
# tail FILE       -- look at last few lines of FILE
# tail -8 FILE    -- what does this do?
# grep 1929 FILE  -- display lines containing 1929
# grep -v '#'     -- display all line not containing '#'
# more FILE       -- look at a file
# cat FILE        -- copy FILE to terminal (stdout)
# cat FILE > FF   -- copy FILE to FF

# Combining things

# grep -v '#' > anomalies-good.csv.  -- We just cleaned our data in one line!!
#  -- How would we do this with the sealevel data?


df=pd.read_csv('data/anomalies-good.csv', header=None, names=["year", "anomaly"])
df.head(10)
df.sample(5)

# df=pd.read_csv('data/anomalies-good.csv', names=["year", "anomaly"])
# df.head(10)
# Alternatives: omit names..., use header=None, ...

In [None]:
# Another way to get out
df2=pd.read_csv('data/global_temperature_anomaly.csv',skiprows=7, header=None)
df2.head(10)

In [None]:
# df.plot()
# Oops! that didn't work!
df.plot(x='year')

In [None]:
# The below won't work:
df3 = pd.read_csv('data/sea_level3.txt', sep=r"\s+", header=None)

# Let's try this:
# df3 = pd.read_csv('data/sea_level3.txt', sep=r"\s+",header=None)


In [None]:
df3.head(3)

In [None]:
"""
HDR 1 altimeter type 0=dual-frequency  999=single frequency (ie Poseidon-1)
HDR 2 merged file cycle #
HDR 3 year+fraction of year (mid-cycle)
HDR 4 number of observations
HDR 5 number of weighted observations
HDR 6 GMSL (Global Isostatic Adjustment (GIA) not applied) variation (mm) with respect to 20-year TOPEX/Jason collinear mean reference
HDR 7 standard deviation of GMSL (GIA not applied) variation estimate (mm)
HDR* 8 smoothed (60-day Gaussian type filter) GMSL (GIA not applied) variation (mm)  with respect to 20-year mean
HDR* 9 GMSL (Global Isostatic Adjustment (GIA) applied) variation (mm) )  with respect to 20-year mean
HDR 10 standard deviation of GMSL (GIA applied) variation estimate (mm)
HDR* 11 smoothed (60-day Gaussian type filter) GMSL (GIA applied) variation (mm) )  with respect to 20-year mean
HDR* 12 moothed (60-day Gaussian type filter) GMSL (GIA applied) variation (mm); annual and semi-annual signal removed )  with respect to 20-year mean
"""
meta = ['alt', 'cycle', 'year', 'obs', 'wobs', 'GMSL', 'devGMSL', 'smGMSL', 'GMSL2', 'devGMSL2', 'smGMSL2', 'sm2GMSL2']
print("len(meta) =", len(meta))
df3 = pd.read_csv('data/sea_level3.txt', sep=r"\s+", names=meta)
df3.head(5)

In [None]:
df3.plot(x='year', y='sm2GMSL2')

In [None]:
df3.plot(x='year', y='GMSL2')

In [None]:
fig = df3.plot(x='year', y=['sm2GMSL2','GMSL2'],figsize=(10,7)).get_figure()
fig.savefig('sealevel.png')

# seaplot.savefig('test.pdf')
# What are the peaks? Seasonal?
# Can you write a program to extract the maxima? The minima?

In [None]:
years = df3['year']
sealevel = df3['sm2GMSL2']

In [None]:
type(years)

In [None]:
plt.plot(years, sealevel)    

In [None]:
m, b = np.polyfit(years, sealevel, 1)
m, b

In [None]:
def y(x):
    return m*x + b
y(2019), y(2049)

In [None]:
linfit = list(map(lambda x: y(x), years))

plt.plot( years, sealevel, color='blue', linestyle='solid')
plt.plot( years, linfit, color='red', linestyle='solid')
plt.title("Sea leval")
plt.ylabel("Delta level (mm)")

plt.show()