In [88]:
import csv
import pandas as pd
import plotly.express as px

In [89]:
#Rain CSV from Pacific Climate Change Data Portal, Australia BOM
#http://www.bom.gov.au/climate/pccsp/
with open('PLW_000001_Rain.csv') as f:
    lines_after_header = f.readlines()[2:]

In [90]:
data = []

In [91]:
for line in lines_after_header:
    data.append(line[:-1].split(","))

In [92]:
data

[['1905', '1', '0', '-99.9'],
 ['1905', '2', '0', '-99.9'],
 ['1905', '3', '0', '69.1'],
 ['1905', '4', '0', '49'],
 ['1905', '5', '0', '309.1'],
 ['1905', '6', '0', '224.8'],
 ['1905', '7', '0', '744'],
 ['1905', '8', '0', '345.9'],
 ['1905', '9', '0', '279.9'],
 ['1905', '10', '0', '497.1'],
 ['1905', '11', '0', '312.9'],
 ['1905', '12', '0', '387.1'],
 ['1906', '1', '0', '195.1'],
 ['1906', '2', '0', '209'],
 ['1906', '3', '0', '80'],
 ['1906', '4', '0', '157'],
 ['1906', '5', '0', '572'],
 ['1906', '6', '0', '281.9'],
 ['1906', '7', '0', '664'],
 ['1906', '8', '0', '392.9'],
 ['1906', '9', '0', '379'],
 ['1906', '10', '0', '194.1'],
 ['1906', '11', '0', '275.1'],
 ['1906', '12', '0', '405.9'],
 ['1907', '1', '0', '312.9'],
 ['1907', '2', '0', '161'],
 ['1907', '3', '0', '172'],
 ['1907', '4', '0', '47'],
 ['1907', '5', '0', '124'],
 ['1907', '6', '0', '336'],
 ['1907', '7', '0', '340.1'],
 ['1907', '8', '0', '625.1'],
 ['1907', '9', '0', '216.9'],
 ['1907', '10', '0', '64'],
 ['190

In [93]:
df = pd.DataFrame(data, columns = ["Year", "Month", "Day", "Value"])

In [94]:
df

Unnamed: 0,Year,Month,Day,Value
0,1905,1,0,-99.9
1,1905,2,0,-99.9
2,1905,3,0,69.1
3,1905,4,0,49
4,1905,5,0,309.1
...,...,...,...,...
1376,2020,8,0,532.5
1377,2020,9,0,340
1378,2020,10,0,392.1
1379,2020,11,0,-99.9


In [95]:
df['Month'] = df["Month"]
df['Year'] = df["Year"]
df['Day'] = df["Day"]
df['Value'] = pd.to_numeric(df["Value"])

In [96]:
df

Unnamed: 0,Year,Month,Day,Value
0,1905,1,0,-99.9
1,1905,2,0,-99.9
2,1905,3,0,69.1
3,1905,4,0,49.0
4,1905,5,0,309.1
...,...,...,...,...
1376,2020,8,0,532.5
1377,2020,9,0,340.0
1378,2020,10,0,392.1
1379,2020,11,0,-99.9


In [97]:
df_clean = df[df["Value"] != -99.9]

In [98]:
df_clean

Unnamed: 0,Year,Month,Day,Value
2,1905,3,0,69.1
3,1905,4,0,49.0
4,1905,5,0,309.1
5,1905,6,0,224.8
6,1905,7,0,744.0
...,...,...,...,...
1375,2020,7,0,282.6
1376,2020,8,0,532.5
1377,2020,9,0,340.0
1378,2020,10,0,392.1


In [99]:
df_clean['Month'] = pd.to_numeric(df_clean["Month"])



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [100]:
df_clean

Unnamed: 0,Year,Month,Day,Value
2,1905,3,0,69.1
3,1905,4,0,49.0
4,1905,5,0,309.1
5,1905,6,0,224.8
6,1905,7,0,744.0
...,...,...,...,...
1375,2020,7,0,282.6
1376,2020,8,0,532.5
1377,2020,9,0,340.0
1378,2020,10,0,392.1


In [101]:
# df_clean[pd.to_numeric(df_clean[df_clean["Month"] == 6]["Value"]) > 0]

In [102]:
monthly_avg = {}

In [103]:
month_code = {1:'Jan', 
      2:'Feb', 
      3:'Mar', 
      4:'Apr', 
      5:'May', 
      6:'Jun', 
      7:'Jul', 
      8:'Aug', 
      9:'Sep', 
      10:'Oct', 
      11:'Nov', 
      12:'Dec'}

In [104]:
for x in range(1,13):
    monthly_avg[month_code[x]] = pd.to_numeric(df_clean[df_clean['Month'] == x]["Value"]).sum()/pd.to_numeric(df_clean[df_clean['Month'] == x]["Value"]).count()

In [105]:
monthly_avg

{'Jan': 286.09270833333335,
 'Feb': 218.83333333333334,
 'Mar': 203.48969072164945,
 'Apr': 229.14791666666665,
 'May': 335.3546391752577,
 'Jun': 383.7319587628867,
 'Jul': 440.72626262626267,
 'Aug': 371.7857142857142,
 'Sep': 332.4683673469387,
 'Oct': 321.1632653061224,
 'Nov': 298.92164948453603,
 'Dec': 315.8571428571429}

In [106]:
monthly_df = pd.DataFrame(monthly_avg.items(), columns = ["Month", "Rainfall (mm)"])

In [107]:
monthly_df

Unnamed: 0,Month,Rainfall (mm)
0,Jan,286.092708
1,Feb,218.833333
2,Mar,203.489691
3,Apr,229.147917
4,May,335.354639
5,Jun,383.731959
6,Jul,440.726263
7,Aug,371.785714
8,Sep,332.468367
9,Oct,321.163265


In [108]:
fig = px.bar(monthly_df, x= "Month", y = "Rainfall (mm)")
fig.update_xaxes(type= 'category')
fig.show()

In [109]:
with open('PLW_000001_Rain_daily.csv') as f:
    lines_after_header = f.readlines()[2:]

In [110]:
data = []

In [111]:
for line in lines_after_header:
    data.append(line[:-1].split(","))

In [112]:
data

[['1947', '7', '1', '4.6'],
 ['1947', '7', '2', '3.8'],
 ['1947', '7', '3', '0'],
 ['1947', '7', '4', '2'],
 ['1947', '7', '5', '4.3'],
 ['1947', '7', '6', '15.5'],
 ['1947', '7', '7', '1.8'],
 ['1947', '7', '8', '8.9'],
 ['1947', '7', '9', '50.8'],
 ['1947', '7', '10', '1.3'],
 ['1947', '7', '11', '0.8'],
 ['1947', '7', '12', '22.1'],
 ['1947', '7', '13', '0.5'],
 ['1947', '7', '14', '4.1'],
 ['1947', '7', '15', '21.6'],
 ['1947', '7', '16', '18.8'],
 ['1947', '7', '17', '2'],
 ['1947', '7', '18', '47.2'],
 ['1947', '7', '19', '5.6'],
 ['1947', '7', '20', '0'],
 ['1947', '7', '21', '3.8'],
 ['1947', '7', '22', '83.1'],
 ['1947', '7', '23', '21.6'],
 ['1947', '7', '24', '9.1'],
 ['1947', '7', '25', '9.1'],
 ['1947', '7', '26', '0'],
 ['1947', '7', '27', '2.5'],
 ['1947', '7', '28', '9.7'],
 ['1947', '7', '29', '0'],
 ['1947', '7', '30', '0'],
 ['1947', '7', '31', '1'],
 ['1947', '8', '1', '9.9'],
 ['1947', '8', '2', '1.5'],
 ['1947', '8', '3', '-99.9'],
 ['1947', '8', '4', '0'],
 ['194

In [113]:
df_daily = pd.DataFrame(data, columns = ["Year", "Month", "Day", "Value"])

In [114]:
df_daily

Unnamed: 0,Year,Month,Day,Value
0,1947,7,1,4.6
1,1947,7,2,3.8
2,1947,7,3,0
3,1947,7,4,2
4,1947,7,5,4.3
...,...,...,...,...
26843,2020,12,27,22.9
26844,2020,12,28,5.1
26845,2020,12,29,7.9
26846,2020,12,30,3


In [115]:
df_daily['Month'] = df_daily["Month"]
df_daily['Year'] = df_daily["Year"]
df_daily['Day'] = df_daily["Day"]
df_daily['Value'] = pd.to_numeric(df_daily["Value"])

In [116]:
df_daily_cleaned = df_daily[df_daily["Value"] != -99.9]

In [117]:
df_daily_cleaned

Unnamed: 0,Year,Month,Day,Value
0,1947,7,1,4.6
1,1947,7,2,3.8
2,1947,7,3,0.0
3,1947,7,4,2.0
4,1947,7,5,4.3
...,...,...,...,...
26843,2020,12,27,22.9
26844,2020,12,28,5.1
26845,2020,12,29,7.9
26846,2020,12,30,3.0


In [118]:
df_daily_cleaned[(df_daily_cleaned['Month'] == "12") & (df_daily_cleaned["Year"] == "2020")]["Value"].sum()/df_daily_cleaned[(df_daily_cleaned['Month'] == "12")&(df_daily_cleaned["Year"] == "2020")]["Value"].count()

10.206451612903225

In [119]:
df

Unnamed: 0,Year,Month,Day,Value
0,1905,1,0,-99.9
1,1905,2,0,-99.9
2,1905,3,0,69.1
3,1905,4,0,49.0
4,1905,5,0,309.1
...,...,...,...,...
1376,2020,8,0,532.5
1377,2020,9,0,340.0
1378,2020,10,0,392.1
1379,2020,11,0,-99.9


In [120]:
empty_months = df[df["Value"] == -99.9][["Year","Month"]]

In [121]:
empty_months

Unnamed: 0,Year,Month
0,1905,1
1,1905,2
96,1913,1
97,1913,2
98,1913,3
...,...,...
1368,2019,12
1369,2020,1
1370,2020,2
1371,2020,3


In [122]:
col_year= empty_months["Year"].tolist()
col_month = empty_months["Month"].tolist()

In [123]:
col_year

['1905',
 '1905',
 '1913',
 '1913',
 '1913',
 '1913',
 '1913',
 '1913',
 '1913',
 '1913',
 '1913',
 '1913',
 '1913',
 '1913',
 '1914',
 '1914',
 '1914',
 '1914',
 '1914',
 '1914',
 '1914',
 '1914',
 '1914',
 '1914',
 '1914',
 '1914',
 '1915',
 '1915',
 '1915',
 '1915',
 '1915',
 '1915',
 '1915',
 '1915',
 '1915',
 '1915',
 '1915',
 '1915',
 '1916',
 '1916',
 '1916',
 '1916',
 '1916',
 '1916',
 '1916',
 '1916',
 '1916',
 '1916',
 '1916',
 '1916',
 '1917',
 '1917',
 '1917',
 '1917',
 '1917',
 '1917',
 '1917',
 '1917',
 '1917',
 '1917',
 '1917',
 '1917',
 '1918',
 '1918',
 '1918',
 '1918',
 '1918',
 '1918',
 '1918',
 '1918',
 '1918',
 '1918',
 '1918',
 '1918',
 '1919',
 '1919',
 '1919',
 '1919',
 '1919',
 '1919',
 '1919',
 '1919',
 '1919',
 '1919',
 '1919',
 '1919',
 '1920',
 '1920',
 '1920',
 '1920',
 '1920',
 '1920',
 '1920',
 '1920',
 '1920',
 '1920',
 '1920',
 '1920',
 '1921',
 '1921',
 '1921',
 '1921',
 '1921',
 '1921',
 '1921',
 '1921',
 '1921',
 '1921',
 '1921',
 '1921',
 '1922',
 

In [124]:
for x,y in zip(col_year,col_month):
    remaining = df_daily_cleaned[(df_daily_cleaned["Year"] == str(x))&(df_daily_cleaned["Month"] == str(y))]["Value"]
    #print(remaining, remaining.count())
    if remaining.count() > 0:
        #print(x,y)
        print(df_daily_cleaned[(df_daily_cleaned["Year"] == str(x))&(df_daily_cleaned["Month"] == str(y))]["Value"].sum()/df_daily_cleaned[(df_daily_cleaned["Year"] == str(x))&(df_daily_cleaned["Month"] == str(y))]["Value"].count())

11.438461538461537


In [125]:
df_daily

Unnamed: 0,Year,Month,Day,Value
0,1947,7,1,4.6
1,1947,7,2,3.8
2,1947,7,3,0.0
3,1947,7,4,2.0
4,1947,7,5,4.3
...,...,...,...,...
26843,2020,12,27,22.9
26844,2020,12,28,5.1
26845,2020,12,29,7.9
26846,2020,12,30,3.0


In [126]:
holes = list(df_daily[df_daily["Value"] == -99.9]["Year"].unique())

In [127]:
blacklist = []

In [128]:
for x in holes:
    for y in range(1,13):
        if df_daily[(df_daily["Value"] == -99.9) & (df_daily["Year"] == x) & (df_daily["Month"] == str(y))]["Value"].count() > 3:
            blacklist.append([x,str(y)])

In [129]:
df

Unnamed: 0,Year,Month,Day,Value
0,1905,1,0,-99.9
1,1905,2,0,-99.9
2,1905,3,0,69.1
3,1905,4,0,49.0
4,1905,5,0,309.1
...,...,...,...,...
1376,2020,8,0,532.5
1377,2020,9,0,340.0
1378,2020,10,0,392.1
1379,2020,11,0,-99.9


In [130]:
for pair in blacklist:
    df = df.drop(df[(df["Year"]==str(pair[0])) & (df["Month"]==str(pair[1]))].index)

In [131]:
df

Unnamed: 0,Year,Month,Day,Value
0,1905,1,0,-99.9
1,1905,2,0,-99.9
2,1905,3,0,69.1
3,1905,4,0,49.0
4,1905,5,0,309.1
...,...,...,...,...
1375,2020,7,0,282.6
1376,2020,8,0,532.5
1377,2020,9,0,340.0
1378,2020,10,0,392.1


In [132]:
df_monthly_cleaned = df[df["Value"] != -99.9]

In [133]:
df_monthly_cleaned

Unnamed: 0,Year,Month,Day,Value
2,1905,3,0,69.1
3,1905,4,0,49.0
4,1905,5,0,309.1
5,1905,6,0,224.8
6,1905,7,0,744.0
...,...,...,...,...
1375,2020,7,0,282.6
1376,2020,8,0,532.5
1377,2020,9,0,340.0
1378,2020,10,0,392.1


In [134]:
df_clean

Unnamed: 0,Year,Month,Day,Value
2,1905,3,0,69.1
3,1905,4,0,49.0
4,1905,5,0,309.1
5,1905,6,0,224.8
6,1905,7,0,744.0
...,...,...,...,...
1375,2020,7,0,282.6
1376,2020,8,0,532.5
1377,2020,9,0,340.0
1378,2020,10,0,392.1


In [135]:
df_monthly_cleaned["Month"].unique()

array(['3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '1', '2'],
      dtype=object)

In [136]:
monthly_avg = {}

In [137]:
for x in range(1,13):
    monthly_avg[month_code[x]] = df_monthly_cleaned[df_monthly_cleaned['Month'] == str(x)]["Value"].sum()/df_monthly_cleaned[df_monthly_cleaned['Month'] == str(x)]["Value"].count()

In [138]:
monthly_df = pd.DataFrame(monthly_avg.items(), columns = ["Month", "Rainfall (mm)"])

In [139]:
monthly_df

Unnamed: 0,Month,Rainfall (mm)
0,Jan,286.092708
1,Feb,218.833333
2,Mar,203.489691
3,Apr,229.147917
4,May,335.354639
5,Jun,383.731959
6,Jul,440.726263
7,Aug,371.785714
8,Sep,333.831959
9,Oct,322.020619


In [140]:
0 	Jan 	286.092708
1 	Feb 	218.833333
2 	Mar 	203.489691
3 	Apr 	229.147917
4 	May 	335.354639
5 	Jun 	383.731959
6 	Jul 	440.726263
7 	Aug 	371.785714
8 	Sep 	332.468367
9 	Oct 	321.163265
10 	Nov 	298.921649
11 	Dec 	315.857143

SyntaxError: invalid syntax (2913873898.py, line 1)

In [141]:
fig = px.bar(monthly_df, x= "Month", y = "Rainfall (mm)")
fig.update_xaxes(type= 'category')
fig.show()

In [142]:
df_daily

Unnamed: 0,Year,Month,Day,Value
0,1947,7,1,4.6
1,1947,7,2,3.8
2,1947,7,3,0.0
3,1947,7,4,2.0
4,1947,7,5,4.3
...,...,...,...,...
26843,2020,12,27,22.9
26844,2020,12,28,5.1
26845,2020,12,29,7.9
26846,2020,12,30,3.0


In [143]:
df_wet = df_daily[df_daily["Value"] >= 1]

In [144]:
df_wet = df_wet.groupby(['Year',"Month"])

In [145]:
df_wet

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x7f8cf8166670>

In [146]:
df_wet = df_wet["Day"].count()

In [147]:
df_wet

Year  Month
1947  10        3
      12       17
      7        24
      8        17
      9         8
               ..
2020  5        19
      6        23
      7        19
      8        24
      9        17
Name: Day, Length: 857, dtype: int64

In [148]:
df_wet = df_wet.reset_index()

In [149]:
df_wet=df_wet.rename(columns = {"Day":"Days"})

In [150]:
import datetime
df_wet['DateInt']=df_wet['Year'].astype(str) + df_wet['Month'].astype(str).str.zfill(2)
df_wet["Date"] = pd.to_datetime(df_wet['DateInt'], format='%Y%m')

In [151]:
df_wet

Unnamed: 0,Year,Month,Days,DateInt,Date
0,1947,10,3,194710,1947-10-01
1,1947,12,17,194712,1947-12-01
2,1947,7,24,194707,1947-07-01
3,1947,8,17,194708,1947-08-01
4,1947,9,8,194709,1947-09-01
...,...,...,...,...,...
852,2020,5,19,202005,2020-05-01
853,2020,6,23,202006,2020-06-01
854,2020,7,19,202007,2020-07-01
855,2020,8,24,202008,2020-08-01


In [152]:
df_wet=df_wet.sort_values(by="Date")
print(df_wet)

     Year Month  Days DateInt       Date
2    1947     7    24  194707 1947-07-01
3    1947     8    17  194708 1947-08-01
4    1947     9     8  194709 1947-09-01
0    1947    10     3  194710 1947-10-01
1    1947    12    17  194712 1947-12-01
..    ...   ...   ...     ...        ...
855  2020     8    24  202008 2020-08-01
856  2020     9    17  202009 2020-09-01
848  2020    10    16  202010 2020-10-01
849  2020    11    18  202011 2020-11-01
850  2020    12    25  202012 2020-12-01

[857 rows x 5 columns]


In [153]:
fig = px.bar(df_wet, x= "Date", y = "Days")
#fig.update_xaxes(type= 'category')
fig.show()

In [154]:
df_wet_year = df_daily[df_daily["Value"] >= 1]

In [155]:
df_wet_year

Unnamed: 0,Year,Month,Day,Value
0,1947,7,1,4.6
1,1947,7,2,3.8
3,1947,7,4,2.0
4,1947,7,5,4.3
5,1947,7,6,15.5
...,...,...,...,...
26842,2020,12,26,20.3
26843,2020,12,27,22.9
26844,2020,12,28,5.1
26845,2020,12,29,7.9


In [156]:
df_wet_counts = df_wet_year["Year"].value_counts()

In [157]:
df_wet_year[df_wet_year["Year"] == "2020"]

Unnamed: 0,Year,Month,Day,Value
26584,2020,4,12,3.0
26585,2020,4,13,25.4
26586,2020,4,14,3.0
26587,2020,4,15,11.7
26589,2020,4,17,20.1
...,...,...,...,...
26842,2020,12,26,20.3
26843,2020,12,27,22.9
26844,2020,12,28,5.1
26845,2020,12,29,7.9


In [158]:
df_wet_counts

2011    269
1956    261
1999    255
1984    255
1953    251
       ... 
2015    184
1951    182
2020    178
2018    119
1947     69
Name: Year, Length: 73, dtype: int64

In [159]:
df_wet_counts = df_wet_counts.reset_index()

In [160]:
df_wet_counts

Unnamed: 0,index,Year
0,2011,269
1,1956,261
2,1999,255
3,1984,255
4,1953,251
...,...,...
68,2015,184
69,1951,182
70,2020,178
71,2018,119


In [161]:
df_wet_counts = df_wet_counts.rename(columns = {"index":"Year", "Year":"Days"})

In [162]:
df_wet_counts=df_wet_counts.sort_values(by="Year")
df_wet_counts=df_wet_counts[pd.to_numeric(df_wet_counts["Year"]) >= 1951]

In [163]:
df_wet_counts

Unnamed: 0,Year,Days
69,1951,182
37,1952,227
4,1953,251
25,1954,235
10,1955,246
...,...,...
68,2015,184
40,2016,225
9,2017,246
71,2018,119


In [164]:
df_wet_counts_n = df_wet_counts
df_wet_counts_n["Year"] = pd.to_numeric(df_wet_counts_n["Year"])

In [165]:
fig = px.bar(df_wet_counts, x= "Year", y = "Days")
#fig.update_xaxes(type= 'category')
fig.update_xaxes(range=["1950", "2020"])
fig.update_layout(
    xaxis = dict(
        tickmode = 'linear',
       # tick0 = 1950,
        dtick = 5
    )
)
fig.show()

In [166]:
df_daily[df_daily["Value"] == -99.9]

Unnamed: 0,Year,Month,Day,Value
33,1947,8,3,-99.9
46,1947,8,16,-99.9
72,1947,9,11,-99.9
74,1947,9,13,-99.9
75,1947,9,14,-99.9
...,...,...,...,...
26764,2020,10,9,-99.9
26810,2020,11,24,-99.9
26811,2020,11,25,-99.9
26812,2020,11,26,-99.9


In [167]:
bad_years = df_daily[df_daily["Value"] == -99.9]["Year"].unique()

In [168]:
remove_years = []
for x in bad_years:
    if len(df_daily[(df_daily["Value"] == -99.9) & (df_daily["Year"] == x)]) > 15:
        remove_years.append(int(x))

In [169]:
remove_years

[1947, 1949, 1951, 2018, 2019, 2020]

In [170]:
df_wet_counts

Unnamed: 0,Year,Days
69,1951,182
37,1952,227
4,1953,251
25,1954,235
10,1955,246
...,...,...
68,2015,184
40,2016,225
9,2017,246
71,2018,119


In [171]:
print(list(df_wet_counts["Year"].unique()))

[1951, 1952, 1953, 1954, 1955, 1956, 1957, 1958, 1959, 1960, 1961, 1962, 1963, 1964, 1965, 1966, 1967, 1968, 1969, 1970, 1971, 1972, 1973, 1974, 1975, 1976, 1977, 1978, 1979, 1980, 1981, 1982, 1983, 1984, 1985, 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2020]


In [172]:
df_wet_counts_cleaned = df_wet_counts[~df_wet_counts["Year"].isin(remove_years)]

In [173]:
fig = px.bar(df_wet_counts_cleaned, x= "Year", y = "Days")
#fig.update_xaxes(type= 'category')
fig.update_xaxes(range=["1950", "2020"])
fig.update_layout(
    xaxis = dict(
        tickmode = 'linear',
       # tick0 = 1950,
        dtick = 5))
fig.update_layout(
    yaxis = dict(
        tickmode = 'linear',
        dtick = 50))
fig.show()

In [174]:
import urllib
url = "http://www.bom.gov.au/climate/history/enso/"
hdr = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11',
       'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
       'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
       'Accept-Encoding': 'none',
       'Accept-Language': 'en-US,en;q=0.8',
       'Connection': 'keep-alive'}

request=urllib.request.Request(url,None,hdr) #The assembled request
response = urllib.request.urlopen(request)
data = response.read()

In [175]:
data



In [176]:
import re

In [177]:
elnino =[a.end() for a in list(re.finditer("<h2>El Ni&ntilde;o: ", str(data)))]

In [178]:
elnino

[25195,
 35779,
 45238,
 47433,
 54772,
 57830,
 59529,
 65314,
 67138,
 68842,
 74731,
 80303,
 81900,
 86395,
 87882,
 91688,
 96634,
 100196,
 101551,
 107593,
 111511,
 115589,
 117628,
 119154,
 124869,
 128527]

In [179]:
elnino_years = []

In [180]:
list(range(2012,2014 + 1))

[2012, 2013, 2014]

In [181]:
for el in elnino:
    elnino_years.extend(list(range(int(str(data)[el:el+4]), int(str(data)[el:el+2]+ str(data)[el+11:el+13])+1)))

In [182]:
elnino_years

[2015,
 2016,
 2009,
 2010,
 2006,
 2007,
 2002,
 2003,
 1997,
 1998,
 1993,
 1994,
 1991,
 1992,
 1987,
 1988,
 1982,
 1983,
 1977,
 1978,
 1972,
 1973,
 1969,
 1970,
 1965,
 1966,
 1963,
 1964,
 1957,
 1958,
 1951,
 1952,
 1946,
 1947,
 1941,
 1942,
 1940,
 1941,
 1925,
 1926,
 1919,
 1920,
 1914,
 1915,
 1913,
 1914,
 1911,
 1912,
 1905,
 1906,
 1902,
 1903]

In [183]:
len([(a.start(), a.end()) for a in list(re.finditer("<h2>El Ni&ntilde;o: ", str(data)))])

26

In [184]:
lanina = [ a.end() for a in list(re.finditer("<h2>La Ni&ntilde;a: ", str(data)))]

In [185]:
len([(a.start(), a.end()) for a in list(re.finditer("<h2>La Ni&ntilde;a: ", str(data)))])

17

In [186]:
lanina

[29994,
 39465,
 42349,
 49698,
 61711,
 70595,
 76199,
 84022,
 89266,
 93387,
 98326,
 103737,
 105525,
 109356,
 120710,
 122588,
 126543]

In [187]:
str(data)[29300:29300+13]

'ng">\\n\\t<img '

In [188]:
str(data)[29300:29300+4]

'ng">'

In [189]:
str(data)[29300+11:29300+13]

'g '

In [190]:
lanina_years = []

In [191]:
list(range(2012,2014 + 1))

[2012, 2013, 2014]

In [192]:
for la in lanina:
    lanina_years.extend(list(range(int(str(data)[la:la+4]), int(str(data)[la:la+2]+ str(data)[la+11:la+13])+1)))

In [193]:
lanina_years

[2010,
 2011,
 2012,
 2008,
 2009,
 2007,
 2008,
 1988,
 1989,
 1973,
 1974,
 1975,
 1976,
 1970,
 1971,
 1972,
 1964,
 1965,
 1954,
 1955,
 1956,
 1957,
 1949,
 1950,
 1951,
 1942,
 1943,
 1938,
 1939,
 1928,
 1929,
 1930,
 1924,
 1925,
 1909,
 1910,
 1911,
 1906,
 1907,
 1903,
 1904]

In [194]:
lanina_years

[2010,
 2011,
 2012,
 2008,
 2009,
 2007,
 2008,
 1988,
 1989,
 1973,
 1974,
 1975,
 1976,
 1970,
 1971,
 1972,
 1964,
 1965,
 1954,
 1955,
 1956,
 1957,
 1949,
 1950,
 1951,
 1942,
 1943,
 1938,
 1939,
 1928,
 1929,
 1930,
 1924,
 1925,
 1909,
 1910,
 1911,
 1906,
 1907,
 1903,
 1904]

In [195]:
print(list(set(elnino_years) & set(lanina_years)))

[1988, 1957, 1925, 1964, 1965, 1903, 1911, 1970, 1906, 1972, 1973, 1942, 2007, 2009, 2010, 1951]


In [196]:
df_wet_counts

Unnamed: 0,Year,Days
69,1951,182
37,1952,227
4,1953,251
25,1954,235
10,1955,246
...,...,...
68,2015,184
40,2016,225
9,2017,246
71,2018,119


In [197]:
enso_labels = {}
for x in range(1951,2020):
    if x in lanina_years and x in elnino_years:
        enso_labels[x] = "Mixed"
    elif x in lanina_years:
        enso_labels[x] = "La Nina"
    elif x in elnino_years:
        enso_labels[x] = "El Nino"
    else:
        enso_labels[x] = "Neutral"

In [198]:
enso_labels

{1951: 'Mixed',
 1952: 'El Nino',
 1953: 'Neutral',
 1954: 'La Nina',
 1955: 'La Nina',
 1956: 'La Nina',
 1957: 'Mixed',
 1958: 'El Nino',
 1959: 'Neutral',
 1960: 'Neutral',
 1961: 'Neutral',
 1962: 'Neutral',
 1963: 'El Nino',
 1964: 'Mixed',
 1965: 'Mixed',
 1966: 'El Nino',
 1967: 'Neutral',
 1968: 'Neutral',
 1969: 'El Nino',
 1970: 'Mixed',
 1971: 'La Nina',
 1972: 'Mixed',
 1973: 'Mixed',
 1974: 'La Nina',
 1975: 'La Nina',
 1976: 'La Nina',
 1977: 'El Nino',
 1978: 'El Nino',
 1979: 'Neutral',
 1980: 'Neutral',
 1981: 'Neutral',
 1982: 'El Nino',
 1983: 'El Nino',
 1984: 'Neutral',
 1985: 'Neutral',
 1986: 'Neutral',
 1987: 'El Nino',
 1988: 'Mixed',
 1989: 'La Nina',
 1990: 'Neutral',
 1991: 'El Nino',
 1992: 'El Nino',
 1993: 'El Nino',
 1994: 'El Nino',
 1995: 'Neutral',
 1996: 'Neutral',
 1997: 'El Nino',
 1998: 'El Nino',
 1999: 'Neutral',
 2000: 'Neutral',
 2001: 'Neutral',
 2002: 'El Nino',
 2003: 'El Nino',
 2004: 'Neutral',
 2005: 'Neutral',
 2006: 'El Nino',
 2007: '

In [199]:
len(enso_labels)

69

In [200]:
df_wet_counts["Year"]

69    1951
37    1952
4     1953
25    1954
10    1955
      ... 
68    2015
40    2016
9     2017
71    2018
70    2020
Name: Year, Length: 69, dtype: int64

In [201]:
enso_labels.values()

dict_values(['Mixed', 'El Nino', 'Neutral', 'La Nina', 'La Nina', 'La Nina', 'Mixed', 'El Nino', 'Neutral', 'Neutral', 'Neutral', 'Neutral', 'El Nino', 'Mixed', 'Mixed', 'El Nino', 'Neutral', 'Neutral', 'El Nino', 'Mixed', 'La Nina', 'Mixed', 'Mixed', 'La Nina', 'La Nina', 'La Nina', 'El Nino', 'El Nino', 'Neutral', 'Neutral', 'Neutral', 'El Nino', 'El Nino', 'Neutral', 'Neutral', 'Neutral', 'El Nino', 'Mixed', 'La Nina', 'Neutral', 'El Nino', 'El Nino', 'El Nino', 'El Nino', 'Neutral', 'Neutral', 'El Nino', 'El Nino', 'Neutral', 'Neutral', 'Neutral', 'El Nino', 'El Nino', 'Neutral', 'Neutral', 'El Nino', 'Mixed', 'La Nina', 'Mixed', 'Mixed', 'La Nina', 'La Nina', 'Neutral', 'Neutral', 'El Nino', 'El Nino', 'Neutral', 'Neutral', 'Neutral'])

In [202]:
df_wet_counts["ENSO"] = enso_labels.values()

In [203]:
df_wet_counts

Unnamed: 0,Year,Days,ENSO
69,1951,182,Mixed
37,1952,227,El Nino
4,1953,251,Neutral
25,1954,235,La Nina
10,1955,246,La Nina
...,...,...,...
68,2015,184,El Nino
40,2016,225,El Nino
9,2017,246,Neutral
71,2018,119,Neutral


In [204]:
df_wet_counts_cleaned_labeled = df_wet_counts[~df_wet_counts["Year"].isin(remove_years)]

In [205]:
fig = px.bar(df_wet_counts_cleaned_labeled, x= "Year", y = "Days", color="ENSO",
    color_discrete_map={
        "La Nina": "blue",
        "Mixed": "purple",
        "Neutral": "gray",
        "El Nino": "red",
    },)
#fig.update_xaxes(type= 'category')
fig.update_xaxes(range=["1950", "2020"])
fig.update_layout(
    xaxis = dict(
        tickmode = 'linear',
       # tick0 = 1950,
        dtick = 5))
fig.update_layout(
    yaxis = dict(
        tickmode = 'linear',
        dtick = 50))
fig.show()

In [206]:
df_test = df_wet_counts_cleaned_labeled.copy(deep=True)

In [207]:
df_test

Unnamed: 0,Year,Days,ENSO
37,1952,227,El Nino
4,1953,251,Neutral
25,1954,235,La Nina
10,1955,246,La Nina
1,1956,261,La Nina
...,...,...,...
19,2013,239,Neutral
31,2014,233,Neutral
68,2015,184,El Nino
40,2016,225,El Nino


In [208]:
import numpy as np
from sklearn.linear_model import LinearRegression


In [209]:
Y = list(df_test["Days"])
X = list(df_test["Year"])

In [210]:
reg = LinearRegression().fit(np.vstack(X), Y)


In [211]:
df_test['bestfit'] = reg.predict(np.vstack(X))

In [212]:
reg.coef_

array([-0.02703267])

In [213]:
df_test

Unnamed: 0,Year,Days,ENSO,bestfit
37,1952,227,El Nino,228.772501
4,1953,251,Neutral,228.745468
25,1954,235,La Nina,228.718436
10,1955,246,La Nina,228.691403
1,1956,261,La Nina,228.664370
...,...,...,...,...
19,2013,239,Neutral,227.123508
31,2014,233,Neutral,227.096476
68,2015,184,El Nino,227.069443
40,2016,225,El Nino,227.042410


In [214]:
import plotly.graph_objects as go

In [215]:
fig = px.bar(df_test, x= "Year", y = "Days", color="ENSO",
    color_discrete_map={
        "La Nina": "blue",
        "Mixed": "purple",
        "Neutral": "gray",
        "El Nino": "red",
    },)
#fig.update_xaxes(type= 'category')
fig.update_xaxes(range=["1950", "2020"])
fig.update_layout(
    xaxis = dict(
        tickmode = 'linear',
       # tick0 = 1950,
        dtick = 5))
fig.update_layout(
    yaxis = dict(
        tickmode = 'linear',
        dtick = 50))
fig.add_trace(go.Scatter(name='line of best fit', x=X, y=df_test['bestfit'], mode='lines'))
fig.show()

In [216]:
#Below are all graphs produced by this notebook

In [217]:
#Monthly average total rainfall derived from Monthly Rain data
fig = px.bar(monthly_df, x= "Month", y = "Rainfall (mm)")
fig.update_xaxes(type= 'category')
fig.show()

In [218]:
#Monthly average total rainfall derived from Daily Rain data
fig = px.bar(monthly_df, x= "Month", y = "Rainfall (mm)")
fig.update_xaxes(type= 'category')
fig.show()

In [219]:
#Number of wet days per MONTH
fig = px.bar(df_wet, x= "Date", y = "Days")
#fig.update_xaxes(type= 'category')
fig.show()

In [220]:
#Number of wet days per YEAR
fig = px.bar(df_wet_counts, x= "Year", y = "Days")
#fig.update_xaxes(type= 'category')
fig.update_xaxes(range=["1950", "2020"])
fig.update_layout(
    xaxis = dict(
        tickmode = 'linear',
       # tick0 = 1950,
        dtick = 5
    )
)
fig.show()

In [221]:
#Number of wet days per YEAR, removing years with missing data
fig = px.bar(df_wet_counts_cleaned, x= "Year", y = "Days")
#fig.update_xaxes(type= 'category')
fig.update_xaxes(range=["1950", "2020"])
fig.update_layout(
    xaxis = dict(
        tickmode = 'linear',
       # tick0 = 1950,
        dtick = 5))
fig.update_layout(
    yaxis = dict(
        tickmode = 'linear',
        dtick = 50))
fig.show()

In [222]:
#El Nino/La Nina labeled wet days per month
fig = px.bar(df_wet_counts_cleaned_labeled, x= "Year", y = "Days", color="ENSO",
    color_discrete_map={
        "La Nina": "blue",
        "Mixed": "purple",
        "Neutral": "gray",
        "El Nino": "red",
    },)
#fig.update_xaxes(type= 'category')
fig.update_xaxes(range=["1950", "2020"])
fig.update_layout(
    xaxis = dict(
        tickmode = 'linear',
       # tick0 = 1950,
        dtick = 5))
fig.update_layout(
    yaxis = dict(
        tickmode = 'linear',
        dtick = 50))
fig.show()

In [223]:
#El Nino/La Nina labeled wet days per month with trend line
fig = px.bar(df_test, x= "Year", y = "Days", color="ENSO",
    color_discrete_map={
        "La Nina": "blue",
        "Mixed": "purple",
        "Neutral": "gray",
        "El Nino": "red",
    },)
#fig.update_xaxes(type= 'category')
fig.update_xaxes(range=["1950", "2020"])
fig.update_layout(
    xaxis = dict(
        tickmode = 'linear',
       # tick0 = 1950,
        dtick = 5))
fig.update_layout(
    yaxis = dict(
        tickmode = 'linear',
        dtick = 50))
fig.add_trace(go.Scatter(name='line of best fit', x=X, y=df_test['bestfit'], mode='lines'))
fig.show()