# An Analysis of Global Warming
***

## Introduction

## Business Understanding

## Data Understanding

In [9]:
import pandas as pd
import numpy as np
import seaborn as sns
import plotly.express as px
from plotly.offline import init_notebook_mode
init_notebook_mode(connected=True)
from opencage.geocoder import OpenCageGeocode
import folium
from folium.plugins import HeatMap
import plotly.graph_objs as go

In [2]:
global_temp_country = pd.read_csv('Data/GlobalLandTemperaturesByCountry.csv')
global_temp_country.head()

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,Country
0,1743-11-01,4.384,2.294,Åland
1,1743-12-01,,,Åland
2,1744-01-01,,,Åland
3,1744-02-01,,,Åland
4,1744-03-01,,,Åland


In [3]:
global_temp_country.shape

(577462, 4)

## Data Preprocessing

In [4]:
global_temp_country.isna().sum()

dt                                   0
AverageTemperature               32651
AverageTemperatureUncertainty    31912
Country                              0
dtype: int64

In [5]:
global_temp_country.dropna(subset=['AverageTemperature'], inplace=True)

global_temp_country.isna().sum()

dt                               0
AverageTemperature               0
AverageTemperatureUncertainty    0
Country                          0
dtype: int64

In [6]:
global_temp_country['Country'].nunique()

242

In [7]:
global_temp_country['Country'].unique()

array(['Åland', 'Afghanistan', 'Africa', 'Albania', 'Algeria',
       'American Samoa', 'Andorra', 'Angola', 'Anguilla',
       'Antigua And Barbuda', 'Argentina', 'Armenia', 'Aruba', 'Asia',
       'Australia', 'Austria', 'Azerbaijan', 'Bahamas', 'Bahrain',
       'Baker Island', 'Bangladesh', 'Barbados', 'Belarus', 'Belgium',
       'Belize', 'Benin', 'Bhutan', 'Bolivia',
       'Bonaire, Saint Eustatius And Saba', 'Bosnia And Herzegovina',
       'Botswana', 'Brazil', 'British Virgin Islands', 'Bulgaria',
       'Burkina Faso', 'Burma', 'Burundi', "Côte D'Ivoire", 'Cambodia',
       'Cameroon', 'Canada', 'Cape Verde', 'Cayman Islands',
       'Central African Republic', 'Chad', 'Chile', 'China',
       'Christmas Island', 'Colombia', 'Comoros',
       'Congo (Democratic Republic Of The)', 'Congo', 'Costa Rica',
       'Croatia', 'Cuba', 'Curaçao', 'Cyprus', 'Czech Republic',
       'Denmark (Europe)', 'Denmark', 'Djibouti', 'Dominica',
       'Dominican Republic', 'Ecuador', 'Egypt'

In [8]:
replace_dict={
    'France (Europe)': 'France',
    'United Kingdom (Europe)': 'United Kingdom',
    'Denmark (Europe)': 'Denmark',
    'Netherlands (Europe)': 'Netherlands'
}

global_temp_country['Country'].replace(replace_dict, inplace=True)

## Analysis of Average Temperature by Country

In [9]:
avg_temp = global_temp_country.groupby(['Country'])['AverageTemperature'].mean().to_frame().reset_index()
avg_temp.tail()

Unnamed: 0,Country,AverageTemperature
233,Western Sahara,22.319818
234,Yemen,26.253597
235,Zambia,21.282956
236,Zimbabwe,21.117547
237,Åland,5.291383


In [10]:
fig = px.choropleth(avg_temp, locations='Country', locationmode='country names', color='AverageTemperature')
fig.update_layout(title='Choropleth Map of Average Temperature by Country') 
fig.show()

## Global Average Land Temperature 

In [11]:
global_temp = pd.read_csv('Data/GlobalTemperatures.csv')
global_temp.head()

Unnamed: 0,dt,LandAverageTemperature,LandAverageTemperatureUncertainty,LandMaxTemperature,LandMaxTemperatureUncertainty,LandMinTemperature,LandMinTemperatureUncertainty,LandAndOceanAverageTemperature,LandAndOceanAverageTemperatureUncertainty
0,1750-01-01,3.034,3.574,,,,,,
1,1750-02-01,3.083,3.702,,,,,,
2,1750-03-01,5.626,3.076,,,,,,
3,1750-04-01,8.49,2.451,,,,,,
4,1750-05-01,11.573,2.072,,,,,,


In [12]:
global_temp['dt'][3].split('-')[0]

'1750'

In [13]:
def fetch_year(date):
    return date.split('-')[0]

global_temp['Year'] = global_temp['dt'].apply(fetch_year)

global_temp.head()

Unnamed: 0,dt,LandAverageTemperature,LandAverageTemperatureUncertainty,LandMaxTemperature,LandMaxTemperatureUncertainty,LandMinTemperature,LandMinTemperatureUncertainty,LandAndOceanAverageTemperature,LandAndOceanAverageTemperatureUncertainty,Year
0,1750-01-01,3.034,3.574,,,,,,,1750
1,1750-02-01,3.083,3.702,,,,,,,1750
2,1750-03-01,5.626,3.076,,,,,,,1750
3,1750-04-01,8.49,2.451,,,,,,,1750
4,1750-05-01,11.573,2.072,,,,,,,1750


In [14]:
avg_land_temp = global_temp.groupby('Year').agg({'LandAverageTemperature': 'mean', 'LandAverageTemperatureUncertainty': 'mean'}).reset_index()
avg_land_temp.head()

Unnamed: 0,Year,LandAverageTemperature,LandAverageTemperatureUncertainty
0,1750,8.719364,2.637818
1,1751,7.976143,2.781143
2,1752,5.779833,2.977
3,1753,8.388083,3.176
4,1754,8.469333,3.49425


In [15]:
avg_land_temp['Uncertainity Top'] = avg_land_temp['LandAverageTemperature'] + avg_land_temp['LandAverageTemperatureUncertainty']

avg_land_temp['Uncertainity Bottom'] = avg_land_temp['LandAverageTemperature'] - avg_land_temp['LandAverageTemperatureUncertainty']

# 
avg_land_temp.head()

Unnamed: 0,Year,LandAverageTemperature,LandAverageTemperatureUncertainty,Uncertainity Top,Uncertainity Bottom
0,1750,8.719364,2.637818,11.357182,6.081545
1,1751,7.976143,2.781143,10.757286,5.195
2,1752,5.779833,2.977,8.756833,2.802833
3,1753,8.388083,3.176,11.564083,5.212083
4,1754,8.469333,3.49425,11.963583,4.975083


In [16]:
fig = px.line(avg_land_temp, x='Year', 
        y=['LandAverageTemperature', 'LandAverageTemperatureUncertainty', 'Uncertainity Top', 'Uncertainity Bottom'],
       title='Global Average Land Temperature')
fig.show()

## Global Average Land Temperature by Season

In [17]:
global_temp.head()

Unnamed: 0,dt,LandAverageTemperature,LandAverageTemperatureUncertainty,LandMaxTemperature,LandMaxTemperatureUncertainty,LandMinTemperature,LandMinTemperatureUncertainty,LandAndOceanAverageTemperature,LandAndOceanAverageTemperatureUncertainty,Year
0,1750-01-01,3.034,3.574,,,,,,,1750
1,1750-02-01,3.083,3.702,,,,,,,1750
2,1750-03-01,5.626,3.076,,,,,,,1750
3,1750-04-01,8.49,2.451,,,,,,,1750
4,1750-05-01,11.573,2.072,,,,,,,1750


In [18]:
global_temp['dt'].dtype #string

dtype('O')

In [19]:
global_temp['dt'] = pd.to_datetime(global_temp['dt'])

In [20]:
global_temp['month'] = global_temp['dt'].dt.month

global_temp.head()

Unnamed: 0,dt,LandAverageTemperature,LandAverageTemperatureUncertainty,LandMaxTemperature,LandMaxTemperatureUncertainty,LandMinTemperature,LandMinTemperatureUncertainty,LandAndOceanAverageTemperature,LandAndOceanAverageTemperatureUncertainty,Year,month
0,1750-01-01,3.034,3.574,,,,,,,1750,1
1,1750-02-01,3.083,3.702,,,,,,,1750,2
2,1750-03-01,5.626,3.076,,,,,,,1750,3
3,1750-04-01,8.49,2.451,,,,,,,1750,4
4,1750-05-01,11.573,2.072,,,,,,,1750,5


In [21]:
def get_season(month):
    if month >= 3 and month <= 5:
        return 'spring'
    elif month >= 6 and month <= 8:
        return 'summer'
    elif month >= 9 and month <= 11:
        return 'autumn'
    else:
        return 'winter'
    
global_temp['season'] = global_temp['month'].apply(get_season)

global_temp.head()

Unnamed: 0,dt,LandAverageTemperature,LandAverageTemperatureUncertainty,LandMaxTemperature,LandMaxTemperatureUncertainty,LandMinTemperature,LandMinTemperatureUncertainty,LandAndOceanAverageTemperature,LandAndOceanAverageTemperatureUncertainty,Year,month,season
0,1750-01-01,3.034,3.574,,,,,,,1750,1,winter
1,1750-02-01,3.083,3.702,,,,,,,1750,2,winter
2,1750-03-01,5.626,3.076,,,,,,,1750,3,spring
3,1750-04-01,8.49,2.451,,,,,,,1750,4,spring
4,1750-05-01,11.573,2.072,,,,,,,1750,5,spring


In [22]:
years = global_temp['Year'].unique()


spring_temps = []
autumn_temps = []
summer_temps = []    
winter_temps = []


for year in years:
    year_df = global_temp[global_temp['Year']==year]
    spring_temps.append(year_df[year_df['season']=='spring']['LandAverageTemperature'].mean())
    autumn_temps.append(year_df[year_df['season']=='autumn']['LandAverageTemperature'].mean())
    summer_temps.append(year_df[year_df['season']=='summer']['LandAverageTemperature'].mean())
    winter_temps.append(year_df[year_df['season']=='winter']['LandAverageTemperature'].mean())
    
winter_temps    

[2.9630000000000005,
 1.729,
 2.7170000000000005,
 1.1043333333333332,
 1.6373333333333333,
 1.1743333333333332,
 3.906,
 1.4829999999999999,
 0.8506666666666667,
 3.1143333333333327,
 1.3926666666666667,
 0.8326666666666666,
 4.930000000000001,
 1.8413333333333333,
 4.406,
 3.3369999999999997,
 2.1453333333333333,
 1.4926666666666666,
 0.36433333333333334,
 1.1636666666666666,
 3.6043333333333334,
 2.449666666666667,
 2.747666666666667,
 2.0249999999999995,
 1.6330000000000002,
 3.9906666666666673,
 3.013333333333333,
 4.6690000000000005,
 3.571999999999999,
 2.9596666666666667,
 3.0596666666666668,
 2.6519999999999997,
 0.967,
 2.029,
 2.3333333333333335,
 1.261,
 1.3846666666666667,
 2.4013333333333335,
 1.856,
 2.8266666666666667,
 2.724,
 2.828333333333333,
 2.452,
 2.4343333333333335,
 3.411,
 3.2313333333333336,
 2.279666666666667,
 4.209333333333333,
 2.639666666666667,
 2.496,
 3.4376666666666664,
 3.4713333333333334,
 3.1496666666666666,
 1.4573333333333334,
 2.20266666666666

In [23]:
season = pd.DataFrame()

season['year'] = years
season['spring_temps'] = spring_temps
season['summer_temps'] = summer_temps
season['winter_temps'] = winter_temps
season['autumn_temps'] = autumn_temps

season.head()

Unnamed: 0,year,spring_temps,summer_temps,winter_temps,autumn_temps
0,1750,8.563,14.518333,2.963,8.89
1,1751,6.735,14.116,1.729,10.673
2,1752,7.0355,,2.717,7.587
3,1753,8.627333,14.608333,1.104333,9.212333
4,1754,9.074333,14.208333,1.637333,8.957333


In [24]:
fig = px.line(season, x='year', y=['spring_temps', 'summer_temps', 'winter_temps', 'autumn_temps'],
       title='Average Temperature for Each Season')
fig.show()

## Temperature Trends for the Largest Economies

In [25]:
countries = ['Russia', 'United States', 'India', 'Japan', 
             'China', 'Australia']

countries_df = global_temp_country[global_temp_country['Country'].isin(countries)]
countries_df.head()

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,Country
34816,1852-07-01,14.116,1.53,Australia
34817,1852-08-01,15.33,1.4,Australia
34818,1852-09-01,18.74,1.446,Australia
34819,1852-10-01,21.984,1.493,Australia
34820,1852-11-01,24.073,1.466,Australia


In [26]:
countries_df['years'] = countries_df['dt'].apply(fetch_year)
countries_df.head()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,Country,years
34816,1852-07-01,14.116,1.53,Australia,1852
34817,1852-08-01,15.33,1.4,Australia,1852
34818,1852-09-01,18.74,1.446,Australia,1852
34819,1852-10-01,21.984,1.493,Australia,1852
34820,1852-11-01,24.073,1.466,Australia,1852


In [27]:
avg_countries_temp = countries_df.groupby(['years', 'Country']).agg({'AverageTemperature': 'mean'}).reset_index()
avg_countries_temp.head()

Unnamed: 0,years,Country,AverageTemperature
0,1768,United States,5.57275
1,1769,United States,10.4465
2,1774,United States,1.603
3,1775,United States,9.499167
4,1776,United States,8.11


In [28]:
fig = px.line(avg_countries_temp, x='years', y='AverageTemperature', color='Country',
       title='Average Land Temperature of Biggest Economies')
fig.show()

## Average Temperature of USA States

In [29]:
temp_state = pd.read_csv('Data/GlobalLandTemperaturesByState.csv')

temp_state.head()

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,State,Country
0,1855-05-01,25.544,1.171,Acre,Brazil
1,1855-06-01,24.228,1.103,Acre,Brazil
2,1855-07-01,24.371,1.044,Acre,Brazil
3,1855-08-01,25.427,1.073,Acre,Brazil
4,1855-09-01,25.675,1.014,Acre,Brazil


In [30]:
temp_usa = temp_state[temp_state['Country']=='United States']

temp_usa.head()

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,State,Country
7458,1743-11-01,10.722,2.898,Alabama,United States
7459,1743-12-01,,,Alabama,United States
7460,1744-01-01,,,Alabama,United States
7461,1744-02-01,,,Alabama,United States
7462,1744-03-01,,,Alabama,United States


In [31]:
temp_usa.dropna(inplace=True)



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [32]:
temp_usa['State'].unique()

array(['Alabama', 'Alaska', 'Arizona', 'Arkansas', 'California',
       'Colorado', 'Connecticut', 'Delaware', 'District Of Columbia',
       'Florida', 'Georgia (State)', 'Hawaii', 'Idaho', 'Illinois',
       'Indiana', 'Iowa', 'Kansas', 'Kentucky', 'Louisiana', 'Maine',
       'Maryland', 'Massachusetts', 'Michigan', 'Minnesota',
       'Mississippi', 'Missouri', 'Montana', 'Nebraska', 'Nevada',
       'New Hampshire', 'New Jersey', 'New Mexico', 'New York',
       'North Carolina', 'North Dakota', 'Ohio', 'Oklahoma', 'Oregon',
       'Pennsylvania', 'Rhode Island', 'South Carolina', 'South Dakota',
       'Tennessee', 'Texas', 'Utah', 'Vermont', 'Virginia', 'Washington',
       'West Virginia', 'Wisconsin', 'Wyoming'], dtype=object)

In [33]:
replace_dict = {'Georgia (State)': 'Georgia'}

temp_usa['State'].replace(replace_dict, inplace=True)



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [34]:
temp_usa = temp_usa[['AverageTemperature', 'State']]

temp_usa.head()

Unnamed: 0,AverageTemperature,State
7458,10.722,Alabama
7463,19.075,Alabama
7464,21.197,Alabama
7465,25.29,Alabama
7466,26.42,Alabama


In [35]:
temp_usa = temp_usa.groupby('State')['AverageTemperature'].mean().reset_index()

temp_usa.head()

Unnamed: 0,State,AverageTemperature
0,Alabama,17.066138
1,Alaska,-4.890738
2,Arizona,15.381526
3,Arkansas,15.573963
4,California,14.327677


In [36]:
# Use API key to establish connection 
# Signup at opencagedata.com to get API key

key = '23d076ad91a9499cb7b31b3be62c7bbc'
geocoder = OpenCageGeocode(key)

In [37]:
location = 'Membley. Kenya'
results = geocoder.geocode(location)
results

[{'annotations': {'DMS': {'lat': "1° 9' 35.47440'' S",
    'lng': "36° 56' 3.37128'' E"},
   'MGRS': '37MBU7013271717',
   'Maidenhead': 'KI88lu21cp',
   'Mercator': {'x': 4111504.107, 'y': -128258.894},
   'OSM': {'edit_url': 'https://www.openstreetmap.org/edit?node=4344463196#map=16/-1.15985/36.93427',
    'note_url': 'https://www.openstreetmap.org/note/new#map=16/-1.15985/36.93427&layers=N',
    'url': 'https://www.openstreetmap.org/?mlat=-1.15985&mlon=36.93427#map=16/-1.15985/36.93427'},
   'UN_M49': {'regions': {'AFRICA': '002',
     'EASTERN_AFRICA': '014',
     'KE': '404',
     'SUB-SAHARAN_AFRICA': '202',
     'WORLD': '001'},
    'statistical_groupings': ['LEDC']},
   'callingcode': 254,
   'currency': {'alternate_symbols': ['Sh'],
    'decimal_mark': '.',
    'html_entity': '',
    'iso_code': 'KES',
    'iso_numeric': '404',
    'name': 'Kenyan Shilling',
    'smallest_denomination': 50,
    'subunit': 'Cent',
    'subunit_to_unit': 100,
    'symbol': 'KSh',
    'symbol_fir

In [38]:
print(results[0]['geometry']['lat'])
print(results[0]['geometry']['lng'])

-1.159854
36.9342698


In [39]:
lat_list = []
lng_list = []

for state in temp_usa['State']:
    results = geocoder.geocode(state)
    lat = results[0]['geometry']['lat']
    lng = results[0]['geometry']['lng']
    # Add
    lat_list.append(lat)
    lng_list.append(lng)

In [40]:
temp_usa['lat'] = lat_list
temp_usa['lng'] = lng_list

temp_usa.head()

Unnamed: 0,State,AverageTemperature,lat,lng
0,Alabama,17.066138,33.258882,-86.829534
1,Alaska,-4.890738,64.445961,-149.680909
2,Arizona,15.381526,34.395342,-111.763275
3,Arkansas,15.573963,35.204888,-92.447911
4,California,14.327677,36.701463,-118.755997


In [41]:
basemap = folium.Map()

HeatMap(temp_usa[['lat', 'lng', 'AverageTemperature']]).add_to(basemap)
basemap

## Average Temperature of Major European Cities by Month

In [10]:
temp_cities = pd.read_csv('Data/GlobalLandTemperaturesByCity.csv')

temp_cities.head()

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,City,Country,Latitude,Longitude
0,1743-11-01,6.068,1.737,Århus,Denmark,57.05N,10.33E
1,1743-12-01,,,Århus,Denmark,57.05N,10.33E
2,1744-01-01,,,Århus,Denmark,57.05N,10.33E
3,1744-02-01,,,Århus,Denmark,57.05N,10.33E
4,1744-03-01,,,Århus,Denmark,57.05N,10.33E


In [11]:
#
european_countries = ['Turkey', 'Russia', 'France', 'United Kingdom', 'Spain',
                     'Italy', 'Germany', 'Greece', 'Ukraine', 'Portugal', 'Belgium',
                     'Belarus', 'Austria', 'Switzerland', 'Denmark', 'Finland', 'Norway']

temp_cities = temp_cities[temp_cities['Country'].isin(european_countries)]

#
major_cities = ['Istanbul', 'Moscow', 'London', 'Paris', 'Berlin', 'Madrid', 'Vienna'
               'Barcelona', 'Minsk', 'Rome', 'Athens', 'Milan', 'Kiev', 'Saint Petersburg'
               'Lisbon', 'Manchester', 'Naples', 'Brussels', 'Copenhagen', 'Helsinki', 'Geneva']

#
temp_major_eur_cities = temp_cities[temp_cities['City'].isin(major_cities)]

#
temp_major_eur_cities.head()

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,City,Country,Latitude,Longitude
460994,1753-01-01,8.77,5.135,Athens,Greece,37.78N,24.41E
460995,1753-02-01,8.46,2.932,Athens,Greece,37.78N,24.41E
460996,1753-03-01,12.955,2.759,Athens,Greece,37.78N,24.41E
460997,1753-04-01,14.956,4.846,Athens,Greece,37.78N,24.41E
460998,1753-05-01,20.138,2.034,Athens,Greece,37.78N,24.41E


In [12]:
temp_major_eur_cities['Latitude'] = temp_major_eur_cities['Latitude'].str.strip('N')
temp_major_eur_cities['Longitude'] = temp_major_eur_cities['Longitude'].str.strip('E')
temp_major_eur_cities['Longitude'] = temp_major_eur_cities['Longitude'].str.strip('W')

temp_major_eur_cities.head()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,City,Country,Latitude,Longitude
460994,1753-01-01,8.77,5.135,Athens,Greece,37.78,24.41
460995,1753-02-01,8.46,2.932,Athens,Greece,37.78,24.41
460996,1753-03-01,12.955,2.759,Athens,Greece,37.78,24.41
460997,1753-04-01,14.956,4.846,Athens,Greece,37.78,24.41
460998,1753-05-01,20.138,2.034,Athens,Greece,37.78,24.41


In [13]:
temp_major_eur_cities['dt'] = pd.to_datetime(temp_major_eur_cities['dt'])

temp_major_eur_cities['month'] = temp_major_eur_cities['dt'].dt.month

temp_major_eur_cities.head()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,City,Country,Latitude,Longitude,month
460994,1753-01-01,8.77,5.135,Athens,Greece,37.78,24.41,1
460995,1753-02-01,8.46,2.932,Athens,Greece,37.78,24.41,2
460996,1753-03-01,12.955,2.759,Athens,Greece,37.78,24.41,3
460997,1753-04-01,14.956,4.846,Athens,Greece,37.78,24.41,4
460998,1753-05-01,20.138,2.034,Athens,Greece,37.78,24.41,5


In [14]:
temp_major_eur_cities_month = temp_major_eur_cities.groupby(['month', 'City'])['AverageTemperature'].mean().reset_index()
temp_major_eur_cities_month

Unnamed: 0,month,City,AverageTemperature
0,1,Athens,9.867383
1,1,Berlin,-0.959668
2,1,Brussels,1.833438
3,1,Copenhagen,-0.677842
4,1,Geneva,-0.744072
...,...,...,...
199,12,Minsk,-4.866555
200,12,Moscow,-7.515954
201,12,Naples,6.785251
202,12,Paris,3.442567


In [15]:
temp_major_eur_cities_month.columns = ['Month', 'City', 'Mean_temperature']

In [16]:
df = temp_major_eur_cities_month.merge(temp_major_eur_cities, on='City')
df.tail()

Unnamed: 0,Month,City,Mean_temperature,dt,AverageTemperature,AverageTemperatureUncertainty,Country,Latitude,Longitude,month
659431,12,Rome,3.966848,2013-05-01,14.863,0.407,Italy,42.59,13.09,5
659432,12,Rome,3.966848,2013-06-01,19.338,0.482,Italy,42.59,13.09,6
659433,12,Rome,3.966848,2013-07-01,23.375,0.598,Italy,42.59,13.09,7
659434,12,Rome,3.966848,2013-08-01,23.556,0.682,Italy,42.59,13.09,8
659435,12,Rome,3.966848,2013-09-01,,,Italy,42.59,13.09,9


In [17]:
df = df.drop_duplicates(subset=['Month', 'City'])
df.head()

Unnamed: 0,Month,City,Mean_temperature,dt,AverageTemperature,AverageTemperatureUncertainty,Country,Latitude,Longitude,month
0,1,Athens,9.867383,1753-01-01,8.77,5.135,Greece,37.78,24.41,1
3129,2,Athens,10.171625,1753-01-01,8.77,5.135,Greece,37.78,24.41,1
6258,3,Athens,11.739188,1753-01-01,8.77,5.135,Greece,37.78,24.41,1
9387,4,Athens,14.839464,1753-01-01,8.77,5.135,Greece,37.78,24.41,1
12516,5,Athens,19.31595,1753-01-01,8.77,5.135,Greece,37.78,24.41,1


In [18]:
df = df[['Month', 'City', 'Mean_temperature', 'Country', 'Latitude', 'Longitude']]
df.head()

Unnamed: 0,Month,City,Mean_temperature,Country,Latitude,Longitude
0,1,Athens,9.867383,Greece,37.78,24.41
3129,2,Athens,10.171625,Greece,37.78,24.41
6258,3,Athens,11.739188,Greece,37.78,24.41
9387,4,Athens,14.839464,Greece,37.78,24.41
12516,5,Athens,19.31595,Greece,37.78,24.41


In [19]:
data = [go.Heatmap(x=df['Month'], y=df['City'], z=df['Mean_temperature'])]

layout = go.Layout(title='Average Temperature of Major European Cities by Month')

fig = go.Figure(data=data, layout=layout)
fig.show()

## Spatial Analysis on Average Temperature of Major European Cities

In [20]:
basemap2 = folium.Map()
df.head()

Unnamed: 0,Month,City,Mean_temperature,Country,Latitude,Longitude
0,1,Athens,9.867383,Greece,37.78,24.41
3129,2,Athens,10.171625,Greece,37.78,24.41
6258,3,Athens,11.739188,Greece,37.78,24.41
9387,4,Athens,14.839464,Greece,37.78,24.41
12516,5,Athens,19.31595,Greece,37.78,24.41


In [22]:
for id, row in df.iterrows():
    folium.Marker(location=[row['Latitude'], row['Longitude']],
                  popup=f"Mean Temperature: {row['Mean_temperature']}°C<br>City: {row['City']}").add_to(basemap2)
    
basemap2