In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

all_filenames = []
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        all_filenames.append(os.path.join(dirname, filename))

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
def build_city_lookup():
    """
    First 12 rows contain information about cities and no temperture data.
    Extract the rows and transform into useful lookup table
    """
    city_lookup = pd.read_csv(all_filenames[0], nrows=12).T
    city_lookup.columns = city_lookup.iloc[0]
    city_lookup = city_lookup.iloc[1:,:]
    city_lookup["lat"] = city_lookup.lat.astype(np.float)
    city_lookup["lng"] = city_lookup.lng.astype(np.float)
    return city_lookup

build_city_lookup()

In [None]:
def build_reduced_city_lookup():
    """
    Convert city_lookup types and assert data is valid
    """
    city_lookup = build_city_lookup()

    # First 12 rows are information about the cities
    city_lookup = build_city_lookup()
    city_lookup = city_lookup[["city", "country", "lat", "lng", "population"]]
    city_lookup.loc[:,"population"] = city_lookup.population.fillna(0).astype(float).astype(np.uint)

    assert len(city_lookup) == len(city_lookup.drop_duplicates())
    non_null = city_lookup[["city", "country", "lat", "lng"]]
    assert len(non_null[non_null.isnull().T.any()]) == 0, non_null[non_null.isnull().T.any()]

    assert city_lookup.loc[(city_lookup.lat < -90) | (city_lookup.lat > 90), "lat"].count() == 0
    assert city_lookup.loc[(city_lookup.lng < -180) | (city_lookup.lng > 180), "lng"].count() == 0

    return city_lookup

city_lookup = build_reduced_city_lookup()
print(city_lookup.info())
city_lookup

In [None]:
def city_by_name(city_lookup, city_name:str):
    """Lookup col of city and call city_by_index"""
    city_col  = city_lookup[city_lookup["city"]==city_name]
    city_index = int(city_col.index[0])

    return city_by_index(city_index), city_col

def city_by_index(city_col:int):
    """Read only one col from the csv that contains the city we are interested in"""
    city_data = pd.read_csv(all_filenames[0], skiprows=12, usecols=[0, city_col + 1], index_col=0, parse_dates=True, cache_dates=False).iloc[:, 0]

    return city_data


In [None]:
sample_city = city_by_index(0)
summary = f"""This data contains daily temperatures for {len(city_lookup)} cities coving a population of at least {city_lookup["population"].sum():,} and {len(city_lookup["country"].unique())} countries. The first recorded day is {sample_city.index.min().strftime('%d %B, %Y')} and the last {sample_city.index.max().strftime('%d %B, %Y')}."""
summary

# Example city data

In [None]:
# for i in range(1000):
#     city_data = city_by_index(0)
#     assert city_data[city_data < -100].count() == 0
#     assert city_data[city_data > 200].count() == 0
#     assert not np.isnan(city_data).any()

city_by_index(0)

In [None]:
city_by_name(city_lookup, "London")[0].plot()

# Animate years

In [None]:
import matplotlib.pyplot as plt

city_data, city_col = city_by_name(city_lookup, "London")
all_year = city_data.index.year.unique()
print(city_col)

print(all_year)
filenames = []
for year in all_year:
    plt.ylim(city_data.min(), city_data.max())
    city_data[city_data.index.year == year].plot()
    file_name = str(year) + '.png'
    filenames.append(file_name)
    plt.savefig(file_name)
    plt.close()
    

print(filenames)
import imageio
images = []
for filename in filenames:
    images.append(imageio.imread(filename))
imageio.mimsave('movie.gif', images)

[](http://)![movie.gif](movie.gif)
