In [1]:
import pandas as pd
import numpy as np
from matplotlib import colors
import folium
from folium import plugins
from branca.colormap import linear

In [2]:
import warnings

warnings.filterwarnings("ignore")

In [3]:
# import city data
df = pd.read_csv('GlobalLandTemperaturesByMajorCity.csv')

In [4]:
df['dt'] = pd.to_datetime(df['dt'])

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 239177 entries, 0 to 239176
Data columns (total 7 columns):
 #   Column                         Non-Null Count   Dtype         
---  ------                         --------------   -----         
 0   dt                             239177 non-null  datetime64[ns]
 1   AverageTemperature             228175 non-null  float64       
 2   AverageTemperatureUncertainty  228175 non-null  float64       
 3   City                           239177 non-null  object        
 4   Country                        239177 non-null  object        
 5   Latitude                       239177 non-null  object        
 6   Longitude                      239177 non-null  object        
dtypes: datetime64[ns](1), float64(2), object(4)
memory usage: 12.8+ MB


In [6]:
# trim data to since 1900 (less uncertainty)
df2 = df[df['dt'].dt.year >= 1900]

In [7]:
# create year column
df2.loc[:, 'Year'] = df2['dt'].dt.year

In [8]:
# average temperature by year by city
df3 = df2.groupby(['City', 'Year']).agg({
    'Latitude': 'first',
    'Longitude': 'first', 
    'AverageTemperature': 'mean',
}).reset_index()

In [9]:
df3.head(3)

Unnamed: 0,City,Year,Latitude,Longitude,AverageTemperature
0,Abidjan,1900,5.63N,3.23W,26.1185
1,Abidjan,1901,5.63N,3.23W,25.91825
2,Abidjan,1902,5.63N,3.23W,25.589417


In [10]:
# datasets for just 1900-1905 and 2008-2013
# Filter data for the specified time periods
df_1900_1905 = df3[(df3['Year'] >= 1900) & (df3['Year'] <= 1905)]
df_2008_2013 = df3[(df3['Year'] >= 2008) & (df3['Year'] <= 2013)]

# calculate average temperatures for each city in the specified time periods
df_1900_1905 = df_1900_1905.groupby('City').agg({
    'Latitude': 'first',
    'Longitude': 'first', 
    'AverageTemperature': 'mean',
}).reset_index()
df_2008_2013 = df_2008_2013.groupby('City').agg({
    'Latitude': 'first',
    'Longitude': 'first', 
    'AverageTemperature': 'mean',
}).reset_index()

In [11]:
# combine 1900 and 2013 sets
df4 = pd.merge(df_1900_1905[['City', 'Latitude', 'Longitude', 'AverageTemperature']],
               df_2008_2013[['City', 'AverageTemperature']], on='City', suffixes=('_1900', '_2013'))

In [12]:
df4.head(3)

Unnamed: 0,City,Latitude,Longitude,AverageTemperature_1900,AverageTemperature_2013
0,Abidjan,5.63N,3.23W,25.687986,27.064056
1,Addis Abeba,8.84N,38.11E,17.278792,18.637153
2,Ahmadabad,23.31N,72.52E,26.694069,27.514743


In [13]:
df4.tail(3)

Unnamed: 0,City,Latitude,Longitude,AverageTemperature_1900,AverageTemperature_2013
97,Umm Durman,15.27N,32.50E,28.928292,30.279486
98,Wuhan,29.74N,114.46E,16.501861,17.8015
99,Xian,34.56N,108.97E,11.152069,12.613375


In [14]:
# create temperature change column
df4['TemperatureDelta'] = df4['AverageTemperature_2013'] - df4['AverageTemperature_1900']
df4 = df4[['City', 'Latitude', 'Longitude', 'TemperatureDelta']]

In [15]:
# we now have a dataset with the change in temperature from the average of 1900-1905 and 2008-2013!
df4.head(3)

Unnamed: 0,City,Latitude,Longitude,TemperatureDelta
0,Abidjan,5.63N,3.23W,1.376069
1,Addis Abeba,8.84N,38.11E,1.358361
2,Ahmadabad,23.31N,72.52E,0.820674


In [16]:
# function to convert latitude and longitude to numeric
def convert_lat_lon(value):
    numeric_value = float(value[:-1])
    direction = -1 if value.endswith('S') or value.endswith('W') else 1
    return numeric_value * direction

In [17]:
# apply lat/long conversion
df4['Latitude'] = df4['Latitude'].apply(convert_lat_lon)
df4['Longitude'] = df4['Longitude'].apply(convert_lat_lon)

In [20]:
df4.tail(3)

Unnamed: 0,City,Latitude,Longitude,TemperatureDelta
97,Umm Durman,15.27,32.5,1.351194
98,Wuhan,29.74,114.46,1.299639
99,Xian,34.56,108.97,1.461306


In [19]:
# mapping temperature deltas of each city
map_center = [df4['Latitude'].mean(), df4['Longitude'].mean()]

max_bounds = [[-90, -180], [90, 180]]

# create the folium map with min and max zoom levels and bounds
temperature_map = folium.Map(location=map_center, zoom_start=2, min_zoom=2, max_zoom=10, max_bounds=max_bounds)

# Create a colormap for the temperature deltas
colormap = linear.RdYlBu_09.scale(df4['TemperatureDelta'].min(), df4['TemperatureDelta'].max())
colormap.colors.reverse()

# add markers to the map with colors based on temperature deltas
for index, row in df4.iterrows():
    color = colormap(row['TemperatureDelta'])
    folium.CircleMarker(
        location=[row['Latitude'], row['Longitude']],
        radius=5,
        color=color,
        fill=True,
        fill_color=color,
        fill_opacity=.85,
        popup=f"{row['City']}: {row['TemperatureDelta']:.2f}°C"
    ).add_to(temperature_map)

# Add a colormap legend
colormap.add_to(temperature_map)

# save the map to an HTML file
temperature_map.save('temperature_map.html')

# display the map
temperature_map
