In [2]:
import numpy as np
import pandas as pd
import re

In [3]:
folder = "../data"
resolution = '60km'
file = folder + f'/elevation_{resolution}.csv'

df = pd.read_csv(file)

In [4]:
long = []
lat = []
for i, row in df.iterrows():
    coords = re.search(r'\[(.*?)\]', row['.geo']).group(1).split(',')
    long.append(float(coords[0]))
    lat.append(float(coords[1]))

df['long'] = long
df['lat'] = lat
df.drop(columns=['.geo', 'system:index'], axis=1, inplace=True)

In [5]:
print("rows by columns:", df.shape)
print("columns:", df.columns)
print("longitude range:", max(df['long']), min(df['long']))
print("latitude range:", max(df['lat']), min(df['lat']))
print("elevation range:", max(df['elevation']), min(df['elevation']))

rows by columns: (374, 3)
columns: Index(['elevation', 'long', 'lat'], dtype='object')
longitude range: 153.34241899920232 145.79657061259834
latitude range: -23.446028915519513 -37.99873651825576
elevation range: 1578.3523 -0.0027488957


In [6]:
lapse_rate = 0.0065 # deg C per m
sea_level_temp = 19 # deg C - along east coast, using Sydney as reference
err_std_dev = 0.2 # deg C - taking into account errors for lapse rate and sea level temp

def linear_elevation_to_temp(elevations, err=True): # elevation in meters
    temps = []
    for elevation in elevations:
        if err:
            err = np.random.normal(0, err_std_dev)
        else:
            err = 0
        temp = sea_level_temp - (lapse_rate * elevation) + err
        temps.append(temp)
    return temps

In [7]:
df['generated_temp'] = linear_elevation_to_temp(df['elevation'], err=False)
df.to_csv(folder + f'/{resolution}_truth.csv', index=False)

In [8]:
df['generated_temp'] = linear_elevation_to_temp(df['elevation'])
df.drop(columns=['elevation'], axis=1, inplace=True)
df.to_csv(folder + f'/{resolution}_data.csv', index=False)