# Melbourne: Hot Weather

This is an investigation into the frequency of days that reach a maximum of 35 degrees Centigrade and higher.

In [59]:
import pandas as pd
import numpy as np
import math
import datetime as dt

In [8]:
# Load data and prepare for use

bom = pd.read_excel('../data/BOM_Victoria.xlsx')
bom = bom.drop(['Station'], axis=1)

# Only keep data since 1930
bom = bom[bom['Year']>=1930]

In [9]:
# Filter data

temp_filter = bom['Maximum'] >= 35
hot_days = bom[temp_filter]

hot_days["Maximum"].count()

np.int64(925)

In [50]:
# Group to count occurences per month and year

hot_days_per_month = hot_days.loc[:, ['Year', 'Month', 'Maximum']].groupby(['Year', 'Month']).count()
hot_days_per_month.rename(columns={'Maximum':'Count'}, inplace=True)
hot_days_per_month

Unnamed: 0_level_0,Unnamed: 1_level_0,Count
Year,Month,Unnamed: 2_level_1
1930,1,4
1930,2,8
1930,3,1
1931,2,5
1931,3,1
...,...,...
2023,3,1
2024,2,5
2024,3,3
2024,11,2


In [96]:
# Let's see the "hot summers": December, January, February

# TODO:
# We need to find the number of distinct years that the data covers. This will help to define the shape of the ndarray.
year_count = 400

# Level 1 == "Month"
summer = hot_days_per_month.index.isin([12,1,2], level=1)

# Map to boolean values
summer = hot_days_per_month[summer]

# index = np.empty((year_count, 1), dtype=np.str_)
# data = np.empty((year_count, 1), dtype=np.long)

index = []
data = []

counter = 0

for row in summer.iterrows():
    year_int = row[0][0]
    year = str(year_int)

    if (counter == 0) or (index[counter - 1][0] != year):
        if (counter > 0) and (index[counter - 1][1] == ''):
            index[counter - 1] = (index[counter - 1][0], year)
        
        index.append((str(year), ''))
        counter += 1

    # TODO: Sum the days over the summer
    # TODO: Create a string that is '1930 - 1931'
    # TODO: Then create pd.Series from these lists

index


[('1930', '1931'),
 ('1931', '1932'),
 ('1932', '1933'),
 ('1933', '1934'),
 ('1934', '1935'),
 ('1935', '1936'),
 ('1936', '1937'),
 ('1937', '1938'),
 ('1938', '1939'),
 ('1939', '1940'),
 ('1940', '1941'),
 ('1941', '1942'),
 ('1942', '1943'),
 ('1943', '1944'),
 ('1944', '1945'),
 ('1945', '1946'),
 ('1946', '1947'),
 ('1947', '1948'),
 ('1948', '1949'),
 ('1949', '1950'),
 ('1950', '1951'),
 ('1951', '1952'),
 ('1952', '1953'),
 ('1953', '1954'),
 ('1954', '1955'),
 ('1955', '1956'),
 ('1956', '1957'),
 ('1957', '1958'),
 ('1958', '1959'),
 ('1959', '1960'),
 ('1960', '1961'),
 ('1961', '1962'),
 ('1962', '1963'),
 ('1963', '1964'),
 ('1964', '1965'),
 ('1965', '1966'),
 ('1966', '1967'),
 ('1967', '1968'),
 ('1968', '1969'),
 ('1969', '1970'),
 ('1970', '1971'),
 ('1971', '1972'),
 ('1972', '1973'),
 ('1973', '1974'),
 ('1974', '1975'),
 ('1975', '1976'),
 ('1976', '1977'),
 ('1977', '1978'),
 ('1978', '1979'),
 ('1979', '1980'),
 ('1980', '1981'),
 ('1981', '1982'),
 ('1982', '1

In [37]:
# Find the hottest days in Melbourne and sort from the highest down

view = hot_days.sort_values(by='Maximum', ascending=False).head(20)
dates = pd.Series([pd.to_datetime(dt.datetime(r.Year, r.Month, r.Day)) for r in view.itertuples()])

df = pd.DataFrame(
    data = {
        'Date': dates,

        # Specify the "values" property here so the index of the Series is ignored
        'Temperature': view['Maximum'].values
    }
)
df

Unnamed: 0,Date,Temperature
0,2009-02-07,46.4
1,1939-01-13,45.6
2,2009-01-30,45.1
3,1939-01-10,44.7
4,2009-01-29,44.3
5,2003-01-25,44.1
6,2014-01-16,43.9
7,2014-01-17,43.9
8,1968-01-31,43.7
9,2010-01-11,43.6
