In [None]:
import os
os.chdir('/content/drive/MyDrive/python-performance')

In [None]:
!python load.py 01044099999,02293099999 2021-2021

{'01044099999': -10.0, '02293099999': -27.6}


In [None]:
!python -m cProfile -s cumulative load.py 01044099999,02293099999 2021-2021 > profile.txt

In [None]:
!cat profile.txt | head -n 20

{'01044099999': -10.0, '02293099999': -27.6}
         413705 function calls (408474 primitive calls) in 5.767 seconds

   Ordered by: cumulative time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
    176/1    0.002    0.000    5.767    5.767 {built-in method builtins.exec}
        1    0.000    0.000    5.767    5.767 load.py:1(<module>)
        1    0.001    0.001    5.256    5.256 load.py:27(download_all_data)
        2    0.001    0.000    5.254    2.627 load.py:17(download_data)
        2    0.001    0.000    5.193    2.596 api.py:62(get)
        2    0.000    0.000    5.192    2.596 api.py:14(request)
        2    0.000    0.000    5.192    2.596 sessions.py:500(request)
        2    0.000    0.000    5.184    2.592 sessions.py:673(send)
     2454    0.004    0.000    4.335    0.002 socket.py:704(readinto)
     2454    0.006    0.000    4.328    0.002 ssl.py:1299(recv_into)
     2454    0.003    0.000    4.322    0.002 ssl.py:1157(read)
     2454    4.31

In [None]:
!python -m cProfile -s cumulative load_cache.py 01044099999,02293099999 2021-2021 > profile_cache.txt

In [None]:
!cat profile_cache.txt | head -n 20

{'01044099999': -10.0, '02293099999': -27.6}
         326212 function calls (321056 primitive calls) in 0.629 seconds

   Ordered by: cumulative time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
    174/1    0.004    0.000    0.629    0.629 {built-in method builtins.exec}
        1    0.000    0.000    0.629    0.629 load_cache.py:1(<module>)
       16    0.001    0.000    0.591    0.037 __init__.py:1(<module>)
        1    0.017    0.017    0.324    0.324 load_cache.py:51(get_all_temperatures)
    203/3    0.003    0.000    0.304    0.101 <frozen importlib._bootstrap>:1165(_find_and_load)
    202/3    0.001    0.000    0.304    0.101 <frozen importlib._bootstrap>:1120(_find_and_load_unlocked)
    33650    0.269    0.000    0.303    0.000 load_cache.py:36(get_file_temperatures)
    195/3    0.001    0.000    0.303    0.101 <frozen importlib._bootstrap>:666(_load_unlocked)
    173/3    0.001    0.000    0.302    0.101 <frozen importlib._bootstrap_external>:93

In [None]:
!python -m cProfile -o distance_cache.prof distance_cache.py

Traceback (most recent call last):
  File "/usr/lib/python3.11/profile.py", line 63, in runctx
    prof.runctx(statement, globals, locals)
  File "/usr/lib/python3.11/cProfile.py", line 101, in runctx
    exec(cmd, globals, locals)
  File "distance_cache.py", line 48, in <module>
    distances = get_distances(stations, locations)
                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "distance_cache.py", line 41, in get_distances
    distances[(first_station, second_station)] = get_distance(
    ~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
KeyboardInterrupt

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "/usr/lib/python3.11/cProfile.py", line 191, in <module>
    main()
  File "/usr/lib/python3.11/cProfile.py", line 180, in main
    runctx(code, globs, None, options.outfile, options.sort)
  File "/usr/lib/python3.11/c

In [None]:
!pip install line_profiler



In [None]:
!kernprof -l lprofile_distance_cache.py

Wrote profile results to lprofile_distance_cache.py.lprof
Inspect results with:
python3 -m line_profiler -rmt "lprofile_distance_cache.py.lprof"


In [None]:
!python -m line_profiler lprofile_distance_cache.py.lprof

Timer unit: 1e-06 s

Total time: 15.9921 s
File: lprofile_distance_cache.py
Function: get_distance at line 16

Line #      Hits         Time  Per Hit   % Time  Line Contents
    16                                           @profile
    17                                           def get_distance(p1, p2):
    18   3433427    1028399.2      0.3      6.4      lat1, lon1 = p1
    19   3433427    1413671.2      0.4      8.8      lat2, lon2 = p2
    20                                           
    21   3433427    1119718.4      0.3      7.0      lat_dist = math.radians(lat2 - lat1)
    22   3433427     906252.4      0.3      5.7      lon_dist = math.radians(lon2 - lon1)
    23   3433427     461903.5      0.1      2.9      a = (
    24   6866854    2337642.4      0.3     14.6          math.sin(lat_dist / 2) * math.sin(lat_dist / 2) +
    25  10300281    2993832.6      0.3     18.7          math.cos(math.radians(lat1)) * math.cos(math.radians(lat2)) *
    26   6866854    1957298.8      0.3  

In [None]:
import collections
import csv
import os

import requests

# stations = sys.argv[1].split(",")
# years = [int(year) for year in sys.argv[2].split("-")]
# start_year = years[0]
# end_year = years[1]

TEMPLATE_URL = "https://www.ncei.noaa.gov/data/global-hourly/access/{year}/{station}.csv"
TEMPLATE_FILE = "station_{station}_{year}.csv"

def download_data(station, year):
    my_url = TEMPLATE_URL.format(station=station, year=year)
    req = requests.get(my_url)
    if req.status_code != 200:
        return  # not found
    w = open(TEMPLATE_FILE.format(station=station, year=year), "wt")
    w.write(req.text)
    w.close()


def download_all_data(stations, start_year, end_year):
    for station in stations:
        for year in range(start_year, end_year + 1):
            if not os.path.exists(TEMPLATE_FILE.format(station=station, year=year)):
                download_data(station, year)


# pandas would be more standard
def get_file_temperatures(file_name):
    with open(file_name, "rt") as f:
        reader = csv.reader(f)
        header = next(reader)
        for row in reader:
            station = row[header.index("STATION")]
            # date = datetime.datetime.fromisoformat(row[header.index('DATE')])
            tmp = row[header.index("TMP")]
            temperature, status = tmp.split(",")
            if status != "1":
                continue
            temperature = int(temperature) / 10
            yield temperature


def get_all_temperatures(stations, start_year, end_year):
    temperatures = collections.defaultdict(list)
    for station in stations:
        for year in range(start_year, end_year + 1):
            for temperature in get_file_temperatures(TEMPLATE_FILE.format(station=station, year=year)):
                temperatures[station].append(temperature)
    return temperatures


stations = ['01044099999']
start_year = 2005
end_year = 2021
download_all_data(stations, start_year, end_year)
all_temperatures = get_all_temperatures(stations, start_year, end_year)

first_all_temperatures = all_temperatures[stations[0]]
print(len(first_all_temperatures), max(first_all_temperatures), min(first_all_temperatures))

%timeit (-10.7 in first_all_temperatures)
%timeit (-100 in first_all_temperatures)

set_first_all_temperatures = set(first_all_temperatures)
print(len(set_first_all_temperatures))

%timeit (-10.7 in set_first_all_temperatures)
%timeit (-100 in set_first_all_temperatures)


a_list_range = list(range(100000))
a_set_range = set(a_list_range)

%timeit 50000 in a_list_range
%timeit 50000 in a_set_range
%timeit 500000 in a_list_range
%timeit 500000 in a_set_range

141082 27.0 -16.0
301 µs ± 66.6 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
1.74 ms ± 249 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
400
63.4 ns ± 14.3 ns per loop (mean ± std. dev. of 7 runs, 10000000 loops each)
19 ns ± 2.29 ns per loop (mean ± std. dev. of 7 runs, 100000000 loops each)
264 µs ± 15 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
24.5 ns ± 0.809 ns per loop (mean ± std. dev. of 7 runs, 10000000 loops each)
670 µs ± 200 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
18.1 ns ± 2.09 ns per loop (mean ± std. dev. of 7 runs, 100000000 loops each)


In [None]:
!python alloc.py 01044099999,02293099999 2021-2021

192
40
72
17
248
1303981
10431912
<class 'list'>
34
53
60
64
64
28
<class 'int'>
46
50
1304030
<class 'bytes'>
1304014
