In [1]:
import matplotlib.pyplot as plt
import plotly
import plotly.graph_objs as go
import plotly.tools as tls
import seaborn as sns
import time
import warnings
import pandas as pd
warnings.filterwarnings('ignore')

In [2]:
global_tempby_country = pd.read_csv('GlobalLandTemperaturesByCountry.csv')
global_tempby_country.head()

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,Country
0,1743-11-01,4.384,2.294,Åland
1,1743-12-01,,,Åland
2,1744-01-01,,,Åland
3,1744-02-01,,,Åland
4,1744-03-01,,,Åland


In [3]:
###CHECKING AND DROPING ANY MISSING DATA

In [4]:
global_tempby_country.isna().sum()

dt                                   0
AverageTemperature               32651
AverageTemperatureUncertainty    31912
Country                              0
dtype: int64

In [5]:
global_tempby_country.dropna(axis='index', 
                             how ='any', 
                             subset=['AverageTemperature',
                                     'AverageTemperatureUncertainty'], 
                             inplace = True)

In [6]:
global_tempby_country.isna().sum()

dt                               0
AverageTemperature               0
AverageTemperatureUncertainty    0
Country                          0
dtype: int64

In [7]:
###CHECKING IF THERE IS DUPLICATES IN THE COUNTRY VALUES

In [8]:
global_tempby_country['Country'].unique()

array(['Åland', 'Afghanistan', 'Africa', 'Albania', 'Algeria',
       'American Samoa', 'Andorra', 'Angola', 'Anguilla',
       'Antigua And Barbuda', 'Argentina', 'Armenia', 'Aruba', 'Asia',
       'Australia', 'Austria', 'Azerbaijan', 'Bahamas', 'Bahrain',
       'Baker Island', 'Bangladesh', 'Barbados', 'Belarus', 'Belgium',
       'Belize', 'Benin', 'Bhutan', 'Bolivia',
       'Bonaire, Saint Eustatius And Saba', 'Bosnia And Herzegovina',
       'Botswana', 'Brazil', 'British Virgin Islands', 'Bulgaria',
       'Burkina Faso', 'Burma', 'Burundi', "Côte D'Ivoire", 'Cambodia',
       'Cameroon', 'Canada', 'Cape Verde', 'Cayman Islands',
       'Central African Republic', 'Chad', 'Chile', 'China',
       'Christmas Island', 'Colombia', 'Comoros',
       'Congo (Democratic Republic Of The)', 'Congo', 'Costa Rica',
       'Croatia', 'Cuba', 'Curaçao', 'Cyprus', 'Czech Republic',
       'Denmark (Europe)', 'Denmark', 'Djibouti', 'Dominica',
       'Dominican Republic', 'Ecuador', 'Egypt'

In [9]:
global_tempby_country['Country'].nunique()

242

In [10]:
#Removing the duplicates
dict={'Denmark (Europe)':'Denmark',
      'France (Europe)':'France',
      'Netherlands (Europe)':'Netherlands',
      'United Kingdom (Europe)':'United Kingdom',
     'Congo (Democratic Republic Of The)':'Congo'}
global_tempby_country['Country']=global_tempby_country['Country'].replace(dict)
global_tempby_country['Country'].unique()

array(['Åland', 'Afghanistan', 'Africa', 'Albania', 'Algeria',
       'American Samoa', 'Andorra', 'Angola', 'Anguilla',
       'Antigua And Barbuda', 'Argentina', 'Armenia', 'Aruba', 'Asia',
       'Australia', 'Austria', 'Azerbaijan', 'Bahamas', 'Bahrain',
       'Baker Island', 'Bangladesh', 'Barbados', 'Belarus', 'Belgium',
       'Belize', 'Benin', 'Bhutan', 'Bolivia',
       'Bonaire, Saint Eustatius And Saba', 'Bosnia And Herzegovina',
       'Botswana', 'Brazil', 'British Virgin Islands', 'Bulgaria',
       'Burkina Faso', 'Burma', 'Burundi', "Côte D'Ivoire", 'Cambodia',
       'Cameroon', 'Canada', 'Cape Verde', 'Cayman Islands',
       'Central African Republic', 'Chad', 'Chile', 'China',
       'Christmas Island', 'Colombia', 'Comoros', 'Congo', 'Costa Rica',
       'Croatia', 'Cuba', 'Curaçao', 'Cyprus', 'Czech Republic',
       'Denmark', 'Djibouti', 'Dominica', 'Dominican Republic', 'Ecuador',
       'Egypt', 'El Salvador', 'Equatorial Guinea', 'Eritrea', 'Estonia',
     

In [11]:
global_tempby_country['Country'].nunique()

237

# Average temperature for each country

In [12]:
avg_temp=global_tempby_country.groupby(['Country'])['AverageTemperature'].mean().to_frame().reset_index()
avg_temp.head()

Unnamed: 0,Country,AverageTemperature
0,Afghanistan,14.045007
1,Africa,24.074203
2,Albania,12.610646
3,Algeria,22.985112
4,American Samoa,26.611965


In [13]:
import plotly.express as px
import plotly.graph_objs as go
import pandas as pd

from plotly.offline import download_plotlyjs, init_notebook_mode, iplot, plot
init_notebook_mode(connected = True)

In [14]:
avg_tempfig=px.choropleth(avg_temp,
                          locations='Country',
                          locationmode='country names',
                          color='AverageTemperature')
avg_tempfig.update_layout(title='Map of Global AverageTemperature per country')
avg_tempfig.show()

Globally average temerature per country ranges from -18.6 to 28.4 degree celcius; Mali recording the highest of 28.44 and Greenland recording an average of -18.57

# Global Warming Analysis

We are going to read the 'GlobalTemperatures.csv' and plot a chart

In [15]:
global_temp = pd.read_csv('GlobalTemperatures.csv')
global_temp.head()

Unnamed: 0,dt,LandAverageTemperature,LandAverageTemperatureUncertainty,LandMaxTemperature,LandMaxTemperatureUncertainty,LandMinTemperature,LandMinTemperatureUncertainty,LandAndOceanAverageTemperature,LandAndOceanAverageTemperatureUncertainty
0,1750-01-01,3.034,3.574,,,,,,
1,1750-02-01,3.083,3.702,,,,,,
2,1750-03-01,5.626,3.076,,,,,,
3,1750-04-01,8.49,2.451,,,,,,
4,1750-05-01,11.573,2.072,,,,,,


In [16]:
#FETCHING THE YEAR VALUES FROM 'dt' column
global_temp['dt'][0].split('-')[0] #splits the month and day from the date

def fetch_year(date):
    return date.split('-')[0]

global_temp['years']=global_temp['dt'].apply(fetch_year) #fetches the year from the 'dt' column

In [17]:
global_temp.groupby('years').agg({'LandAverageTemperature':'mean',
                                  'LandAverageTemperatureUncertainty':'mean'})

Unnamed: 0_level_0,LandAverageTemperature,LandAverageTemperatureUncertainty
years,Unnamed: 1_level_1,Unnamed: 2_level_1
1750,8.719364,2.637818
1751,7.976143,2.781143
1752,5.779833,2.977000
1753,8.388083,3.176000
1754,8.469333,3.494250
...,...,...
2011,9.516000,0.082000
2012,9.507333,0.083417
2013,9.606500,0.097667
2014,9.570667,0.090167


In [18]:
df=global_temp.groupby('years').agg({'LandAverageTemperature':'mean',
                                     'LandAverageTemperatureUncertainty':'mean'}).reset_index()
df.head()

Unnamed: 0,years,LandAverageTemperature,LandAverageTemperatureUncertainty
0,1750,8.719364,2.637818
1,1751,7.976143,2.781143
2,1752,5.779833,2.977
3,1753,8.388083,3.176
4,1754,8.469333,3.49425


In [19]:
###DATA FOR ANALYSIS PURPOSES
df['Uncertainty top']=df['LandAverageTemperature']+df['LandAverageTemperatureUncertainty']
df['Uncertainty bottom']=df['LandAverageTemperature']-df['LandAverageTemperatureUncertainty']

In [20]:
df.head()

Unnamed: 0,years,LandAverageTemperature,LandAverageTemperatureUncertainty,Uncertainty top,Uncertainty bottom
0,1750,8.719364,2.637818,11.357182,6.081545
1,1751,7.976143,2.781143,10.757286,5.195
2,1752,5.779833,2.977,8.756833,2.802833
3,1753,8.388083,3.176,11.564083,5.212083
4,1754,8.469333,3.49425,11.963583,4.975083


In [21]:
global_tempfig=px.line(df,
                       x="years",y=["Uncertainty top","Uncertainty bottom","LandAverageTemperature"],
                       title="Global average land Temperature")
global_tempfig.show()

As seen from the chart, there have been a sharp increase in land average tempearature for the last 30years. This is a clear indication of global warming. Humans on earth are therefore advised to use ozone friendly source of energy to reduce the risk of adversity.

# Visualizing Average Temperature per season

In [22]:
global_temp.head()

Unnamed: 0,dt,LandAverageTemperature,LandAverageTemperatureUncertainty,LandMaxTemperature,LandMaxTemperatureUncertainty,LandMinTemperature,LandMinTemperatureUncertainty,LandAndOceanAverageTemperature,LandAndOceanAverageTemperatureUncertainty,years
0,1750-01-01,3.034,3.574,,,,,,,1750
1,1750-02-01,3.083,3.702,,,,,,,1750
2,1750-03-01,5.626,3.076,,,,,,,1750
3,1750-04-01,8.49,2.451,,,,,,,1750
4,1750-05-01,11.573,2.072,,,,,,,1750


In [23]:
###TO ANALYSE THE DATA BASED ON SEASONS WE HAVE TO COME UP WITH A MONTH COLUMN; THIS SHALL ENABLE US TO KNOW THE SEASONS OF THE YEAR(Autumn, Spring, Winter and Summer)

In [24]:
global_temp['dt']=pd.to_datetime(global_temp['dt'])
global_temp['month'] = global_temp['dt'].dt.month
global_temp.drop('dt',axis=1,inplace=True)
global_temp.head()

Unnamed: 0,LandAverageTemperature,LandAverageTemperatureUncertainty,LandMaxTemperature,LandMaxTemperatureUncertainty,LandMinTemperature,LandMinTemperatureUncertainty,LandAndOceanAverageTemperature,LandAndOceanAverageTemperatureUncertainty,years,month
0,3.034,3.574,,,,,,,1750,1
1,3.083,3.702,,,,,,,1750,2
2,5.626,3.076,,,,,,,1750,3
3,8.49,2.451,,,,,,,1750,4
4,11.573,2.072,,,,,,,1750,5


In [25]:
global_temp.dtypes

LandAverageTemperature                       float64
LandAverageTemperatureUncertainty            float64
LandMaxTemperature                           float64
LandMaxTemperatureUncertainty                float64
LandMinTemperature                           float64
LandMinTemperatureUncertainty                float64
LandAndOceanAverageTemperature               float64
LandAndOceanAverageTemperatureUncertainty    float64
years                                         object
month                                          int64
dtype: object

# Analysis of Average temperature per season

In [26]:
#Fetching the Seasons
def get_season(month):
    if month >= 3 and month <= 5:
        return 'spring'
    elif month >= 6 and month <= 8:
        return 'summer'
    elif month >= 9 and month <= 11:
        return 'autumn'
    else:
        return 'winter'

In [27]:
#Adding Seasons to global_temp dataframe
global_temp['season'] = global_temp['month'].apply(get_season)
global_temp.head()

Unnamed: 0,LandAverageTemperature,LandAverageTemperatureUncertainty,LandMaxTemperature,LandMaxTemperatureUncertainty,LandMinTemperature,LandMinTemperatureUncertainty,LandAndOceanAverageTemperature,LandAndOceanAverageTemperatureUncertainty,years,month,season
0,3.034,3.574,,,,,,,1750,1,winter
1,3.083,3.702,,,,,,,1750,2,winter
2,5.626,3.076,,,,,,,1750,3,spring
3,8.49,2.451,,,,,,,1750,4,spring
4,11.573,2.072,,,,,,,1750,5,spring


In [28]:
global_temp['years'].unique()

array(['1750', '1751', '1752', '1753', '1754', '1755', '1756', '1757',
       '1758', '1759', '1760', '1761', '1762', '1763', '1764', '1765',
       '1766', '1767', '1768', '1769', '1770', '1771', '1772', '1773',
       '1774', '1775', '1776', '1777', '1778', '1779', '1780', '1781',
       '1782', '1783', '1784', '1785', '1786', '1787', '1788', '1789',
       '1790', '1791', '1792', '1793', '1794', '1795', '1796', '1797',
       '1798', '1799', '1800', '1801', '1802', '1803', '1804', '1805',
       '1806', '1807', '1808', '1809', '1810', '1811', '1812', '1813',
       '1814', '1815', '1816', '1817', '1818', '1819', '1820', '1821',
       '1822', '1823', '1824', '1825', '1826', '1827', '1828', '1829',
       '1830', '1831', '1832', '1833', '1834', '1835', '1836', '1837',
       '1838', '1839', '1840', '1841', '1842', '1843', '1844', '1845',
       '1846', '1847', '1848', '1849', '1850', '1851', '1852', '1853',
       '1854', '1855', '1856', '1857', '1858', '1859', '1860', '1861',
      

In [29]:
years = global_temp['years'].unique()

In [30]:
#Extracting average temperature for each season
spring_temps = []
summer_temps = []
autumn_temps = []
winter_temps = []

for year in years:
        currentdf= global_temp[global_temp['years']==year]
        spring_temps.append(currentdf[currentdf['season'] == 'spring']['LandAverageTemperature'].mean())
        summer_temps.append(currentdf[currentdf['season'] == 'summer']['LandAverageTemperature'].mean())
        autumn_temps.append(currentdf[currentdf['season'] == 'autumn']['LandAverageTemperature'].mean())
        winter_temps.append(currentdf[currentdf['season'] == 'winter']['LandAverageTemperature'].mean())

In [31]:
#Creating a dataframe for the average temperature for each season
season=pd.DataFrame()
season['year']=years
season['spring_temp']=spring_temps
season['summer_temp']=summer_temps
season['autumn_temp']=autumn_temps
season['winter_temp']=winter_temps
season.head()

Unnamed: 0,year,spring_temp,summer_temp,autumn_temp,winter_temp
0,1750,8.563,14.518333,8.89,2.963
1,1751,6.735,14.116,10.673,1.729
2,1752,7.0355,,7.587,2.717
3,1753,8.627333,14.608333,9.212333,1.104333
4,1754,9.074333,14.208333,8.957333,1.637333


In [32]:
season.isna().sum()

year           0
spring_temp    0
summer_temp    1
autumn_temp    0
winter_temp    0
dtype: int64

In [33]:
season.dropna()

Unnamed: 0,year,spring_temp,summer_temp,autumn_temp,winter_temp
0,1750,8.563000,14.518333,8.890000,2.963000
1,1751,6.735000,14.116000,10.673000,1.729000
3,1753,8.627333,14.608333,9.212333,1.104333
4,1754,9.074333,14.208333,8.957333,1.637333
5,1755,8.583667,14.430667,9.233667,1.174333
...,...,...,...,...,...
261,2011,9.190000,14.954667,10.026000,3.893333
262,2012,9.429667,14.762667,10.208000,3.629000
263,2013,9.166667,14.771000,10.278000,4.210333
264,2014,9.516333,14.694333,10.044667,4.027333


In [34]:
season.isna().sum()

year           0
spring_temp    0
summer_temp    1
autumn_temp    0
winter_temp    0
dtype: int64

In [35]:
fig=px.line(season,
            x="year",
            y=['spring_temp', 
               'summer_temp', 
               'autumn_temp', 
               'winter_temp'],
            title="Average Temperature in Each season")
fig.show()

# Analysing trends in top econonimies

In [36]:
continent = ['Russia', 
             'United States', 
             'China', 
             'Japan', 
             'Australia',
             'United Kingdom']

In [37]:
global_tempby_country.head()

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,Country
0,1743-11-01,4.384,2.294,Åland
5,1744-04-01,1.53,4.68,Åland
6,1744-05-01,6.702,1.789,Åland
7,1744-06-01,11.609,1.577,Åland
8,1744-07-01,15.342,1.41,Åland


In [38]:
#Adding 'Country' column to 'global_tempby_country' datframe
global_tempby_country[global_tempby_country['Country'].isin(continent)]

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,Country
34816,1852-07-01,14.116,1.530,Australia
34817,1852-08-01,15.330,1.400,Australia
34818,1852-09-01,18.740,1.446,Australia
34819,1852-10-01,21.984,1.493,Australia
34820,1852-11-01,24.073,1.466,Australia
...,...,...,...,...
557816,2013-05-01,14.073,0.178,United States
557817,2013-06-01,20.198,0.236,United States
557818,2013-07-01,22.074,0.152,United States
557819,2013-08-01,21.168,0.249,United States


In [39]:
continent_df=global_tempby_country[global_tempby_country['Country'].isin(continent)]
continent_df.head()

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,Country
34816,1852-07-01,14.116,1.53,Australia
34817,1852-08-01,15.33,1.4,Australia
34818,1852-09-01,18.74,1.446,Australia
34819,1852-10-01,21.984,1.493,Australia
34820,1852-11-01,24.073,1.466,Australia


In [40]:
continent_df.shape

(17422, 4)

In [41]:
#Fetching years for each Country 
continent_df['years']=continent_df['dt'].apply(fetch_year)
continent_df.head()

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,Country,years
34816,1852-07-01,14.116,1.53,Australia,1852
34817,1852-08-01,15.33,1.4,Australia,1852
34818,1852-09-01,18.74,1.446,Australia,1852
34819,1852-10-01,21.984,1.493,Australia,1852
34820,1852-11-01,24.073,1.466,Australia,1852


In [42]:
#Aggregating 'Continent' dataframe to 'AverageTemperature':'mean' column
continent_df.groupby(['years','Country']).agg({'AverageTemperature':'mean'})

Unnamed: 0_level_0,Unnamed: 1_level_0,AverageTemperature
years,Country,Unnamed: 2_level_1
1743,United Kingdom,7.131000
1744,United Kingdom,9.225688
1745,United Kingdom,3.934000
1750,United Kingdom,9.249591
1751,United Kingdom,8.929063
...,...,...
2013,China,9.295000
2013,Japan,12.964750
2013,Russia,-2.263125
2013,United Kingdom,9.052875


In [43]:
#Generating the dataframe for top economies Average Temperature
avg_temp=continent_df.groupby(['years','Country']).agg({'AverageTemperature':'mean'}).reset_index()
avg_temp.head(10)

Unnamed: 0,years,Country,AverageTemperature
0,1743,United Kingdom,7.131
1,1744,United Kingdom,9.225688
2,1745,United Kingdom,3.934
3,1750,United Kingdom,9.249591
4,1751,United Kingdom,8.929063
5,1752,United Kingdom,6.22475
6,1753,United Kingdom,8.509583
7,1754,United Kingdom,8.40875
8,1755,United Kingdom,8.125708
9,1756,United Kingdom,8.602833


In [44]:
fig=px.line(avg_temp,
            x="years",
            y=["AverageTemperature"],
            color='Country',
            title="Average Land Temperature top World economies")
fig.show()

Australia records the highest Average Land temperature this is most probably due to its climate and Russia records the lowest Average Land Temperature

# Spatial analysis on Average Tempearture of USA states

In [45]:
global_temp_state = pd.read_csv('GlobalLandTemperaturesByState.csv')
global_temp_state.head()

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,State,Country
0,1855-05-01,25.544,1.171,Acre,Brazil
1,1855-06-01,24.228,1.103,Acre,Brazil
2,1855-07-01,24.371,1.044,Acre,Brazil
3,1855-08-01,25.427,1.073,Acre,Brazil
4,1855-09-01,25.675,1.014,Acre,Brazil


In [46]:
USA=global_temp_state[global_temp_state['Country']=='United States']

In [47]:
USA.dropna(inplace=True)

In [48]:
USA['State'].unique()

array(['Alabama', 'Alaska', 'Arizona', 'Arkansas', 'California',
       'Colorado', 'Connecticut', 'Delaware', 'District Of Columbia',
       'Florida', 'Georgia (State)', 'Hawaii', 'Idaho', 'Illinois',
       'Indiana'], dtype=object)

In [49]:
#Naming the columns
state={'Georgia (State)':'Georgia',
       'District Of Columbia':'Columbia'}

USA['State'].replace(state,inplace=True)

In [50]:
#Extracting the columns('AverageTemperature',''State'') 
USA=USA[['AverageTemperature',
         'State']]

In [51]:
#Generating dataframe for the states
USA_temp=USA.groupby('State')['AverageTemperature'].mean().reset_index()
USA_temp.head()

Unnamed: 0,State,AverageTemperature
0,Alabama,17.066138
1,Alaska,-4.890738
2,Arizona,15.381526
3,Arkansas,15.573963
4,California,14.327677


In [52]:
USA_temp.shape

(15, 2)

In [53]:
!pip install opencage



In [54]:
from opencage.geocoder import OpenCageGeocode

In [55]:
key = '5ca89e8267cb476a88cd247f5d0dc35d'  # get api key from:  https://opencagedata.com/api
geocoder = OpenCageGeocode(key)
query = 'Bijuesca, Spain'  
results = geocoder.geocode(query)
print (results)

[{'annotations': {'DMS': {'lat': "41° 32' 25.83312'' N", 'lng': "1° 55' 13.28232'' W"}, 'MGRS': '30TWL9005499324', 'Maidenhead': 'IN91am99nr', 'Mercator': {'x': -213773.074, 'y': 5064053.763}, 'OSM': {'edit_url': 'https://www.openstreetmap.org/edit?relation=342295#map=17/41.54051/-1.92036', 'note_url': 'https://www.openstreetmap.org/note/new#map=17/41.54051/-1.92036&layers=N', 'url': 'https://www.openstreetmap.org/?mlat=41.54051&mlon=-1.92036#map=17/41.54051/-1.92036'}, 'UN_M49': {'regions': {'ES': '724', 'EUROPE': '150', 'SOUTHERN_EUROPE': '039', 'WORLD': '001'}, 'statistical_groupings': ['MEDC']}, 'callingcode': 34, 'currency': {'alternate_symbols': [], 'decimal_mark': ',', 'html_entity': '&#x20AC;', 'iso_code': 'EUR', 'iso_numeric': '978', 'name': 'Euro', 'smallest_denomination': 1, 'subunit': 'Cent', 'subunit_to_unit': 100, 'symbol': '€', 'symbol_first': 0, 'thousands_separator': '.'}, 'flag': '🇪🇸', 'geohash': 'ezqsk61xh2nts1rzhnmu', 'qibla': 106.81, 'roadinfo': {'drive_on': 'right

In [56]:
lat = results[0]['geometry']['lat']
lon = results[0]['geometry']['lng']
print (lat, lon)

41.5405092 -1.9203562


In [57]:
#Extracting lat and lon for the States
list_lat=[]
list_long=[]
for state in USA_temp['State']: # iterate over rows in dataframe
    results = geocoder.geocode(state)   
    lat = results[0]['geometry']['lat']
    lon = results[0]['geometry']['lng']

    list_lat.append(lat)
    list_long.append(lon)

In [58]:
# create new columns from lists    

USA_temp['lat'] = list_lat   
USA_temp['lon'] = list_long

In [59]:
USA_temp.head()

Unnamed: 0,State,AverageTemperature,lat,lon
0,Alabama,17.066138,33.258882,-86.829534
1,Alaska,-4.890738,64.445961,-149.680909
2,Arizona,15.381526,34.395342,-111.763275
3,Arkansas,15.573963,35.204888,-92.447911
4,California,14.327677,36.701463,-118.755997


In [60]:
import folium
from folium.plugins import HeatMap
basemap=folium.Map()

In [61]:
HeatMap(USA_temp[['lat',
                  'lon',
                  'AverageTemperature']],
        zoom=20,
        radius=10).add_to(basemap)
basemap

# Analysis of Average temperature of Indian Cities

In [62]:
cities=pd.read_csv('GlobalLandTemperaturesByCity.csv')
cities.head()

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,City,Country,Latitude,Longitude
0,1743-11-01,6.068,1.737,Århus,Denmark,57.05N,10.33E
1,1743-12-01,,,Århus,Denmark,57.05N,10.33E
2,1744-01-01,,,Århus,Denmark,57.05N,10.33E
3,1744-02-01,,,Århus,Denmark,57.05N,10.33E
4,1744-03-01,,,Århus,Denmark,57.05N,10.33E


In [63]:
cities.shape

(275015, 7)

In [64]:
India=cities[cities['Country']=='India']

In [65]:
India['City'].unique()

array(['Abohar', 'Achalpur', 'Adilabad', 'Adoni', 'Agartala', 'Agra',
       'Ahmadabad', 'Ahmadnagar', 'Aizawl', 'Ajmer', 'Akola', 'Alandur',
       'Alappuzha', 'Aligarh', 'Allahabad', 'Alwar', 'Ambala',
       'Ambarnath', 'Ambattur', 'Ambur', 'Amravati', 'Amritsar', 'Amroha'],
      dtype=object)

In [66]:
Cities=['Abohar', 'Achalpur', 'Adilabad', 'Adoni', 'Agartala']

In [67]:
Cities=India[India['City'].isin(Cities)]

In [68]:
cities.shape

(275015, 7)

In [69]:
Cities.head()

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,City,Country,Latitude,Longitude
49880,1816-03-01,19.934,2.258,Abohar,India,29.74N,73.85E
49881,1816-04-01,26.641,3.398,Abohar,India,29.74N,73.85E
49882,1816-05-01,32.535,2.408,Abohar,India,29.74N,73.85E
49883,1816-06-01,33.254,2.123,Abohar,India,29.74N,73.85E
49884,1816-07-01,31.105,1.848,Abohar,India,29.74N,73.85E


In [70]:
## remove N & E from lat & Lon
cities['Latitude']=cities['Latitude'].str.strip('N')
cities['Longitude']=cities['Longitude'].str.strip('E')

In [71]:
Cities.head()

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,City,Country,Latitude,Longitude
49880,1816-03-01,19.934,2.258,Abohar,India,29.74N,73.85E
49881,1816-04-01,26.641,3.398,Abohar,India,29.74N,73.85E
49882,1816-05-01,32.535,2.408,Abohar,India,29.74N,73.85E
49883,1816-06-01,33.254,2.123,Abohar,India,29.74N,73.85E
49884,1816-07-01,31.105,1.848,Abohar,India,29.74N,73.85E


In [72]:
Cities['dt']=pd.to_datetime(Cities['dt'])
#Dropping date-time
Cities['Month']=Cities['dt'].dt.month
Cities.drop('dt',axis=1,inplace=True)


In [73]:
Cities.head()

Unnamed: 0,AverageTemperature,AverageTemperatureUncertainty,City,Country,Latitude,Longitude,Month
49880,19.934,2.258,Abohar,India,29.74N,73.85E,3
49881,26.641,3.398,Abohar,India,29.74N,73.85E,4
49882,32.535,2.408,Abohar,India,29.74N,73.85E,5
49883,33.254,2.123,Abohar,India,29.74N,73.85E,6
49884,31.105,1.848,Abohar,India,29.74N,73.85E,7


In [74]:
Cities.isna().sum()

AverageTemperature               684
AverageTemperatureUncertainty    684
City                               0
Country                            0
Latitude                           0
Longitude                          0
Month                              0
dtype: int64

In [75]:
Cities.dropna()

Unnamed: 0,AverageTemperature,AverageTemperatureUncertainty,City,Country,Latitude,Longitude,Month
49880,19.934,2.258,Abohar,India,29.74N,73.85E,3
49881,26.641,3.398,Abohar,India,29.74N,73.85E,4
49882,32.535,2.408,Abohar,India,29.74N,73.85E,5
49883,33.254,2.123,Abohar,India,29.74N,73.85E,6
49884,31.105,1.848,Abohar,India,29.74N,73.85E,7
...,...,...,...,...,...,...,...
98312,28.242,0.571,Agartala,India,23.31N,91.75E,4
98313,27.854,0.747,Agartala,India,23.31N,91.75E,5
98314,29.664,0.575,Agartala,India,23.31N,91.75E,6
98315,28.926,0.425,Agartala,India,23.31N,91.75E,7


In [76]:
India_Cities=Cities.dropna()

In [77]:
India_Cities.head()

Unnamed: 0,AverageTemperature,AverageTemperatureUncertainty,City,Country,Latitude,Longitude,Month
49880,19.934,2.258,Abohar,India,29.74N,73.85E,3
49881,26.641,3.398,Abohar,India,29.74N,73.85E,4
49882,32.535,2.408,Abohar,India,29.74N,73.85E,5
49883,33.254,2.123,Abohar,India,29.74N,73.85E,6
49884,31.105,1.848,Abohar,India,29.74N,73.85E,7


In [78]:
trace = go.Heatmap(z=India_Cities['AverageTemperature'],
                   x=India_Cities['Month'],
                   y=India_Cities['City'],
                  colorscale='Viridis')
India_Cities=[trace]
layout = go.Layout(
    title='Average Temperature Of Major Cities in India',)


In [79]:
fig = go.Figure(data=India_Cities, 
                layout=layout)
fig.show()

# Analysis of Average temperature of major cities in the world

In [80]:
cities.head()

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,City,Country,Latitude,Longitude
0,1743-11-01,6.068,1.737,Århus,Denmark,57.05,10.33
1,1743-12-01,,,Århus,Denmark,57.05,10.33
2,1744-01-01,,,Århus,Denmark,57.05,10.33
3,1744-02-01,,,Århus,Denmark,57.05,10.33
4,1744-03-01,,,Århus,Denmark,57.05,10.33


In [81]:
cities['dt']=pd.to_datetime(cities['dt'])

In [82]:
#Dropping date-time
cities['Month']=cities['dt'].dt.month
cities.drop('dt',axis=1,inplace=True)

In [83]:
cities.groupby(['Month','City'])['AverageTemperature'].mean().to_frame()

Unnamed: 0_level_0,Unnamed: 1_level_0,AverageTemperature
Month,City,Unnamed: 2_level_1
1,A Coruña,8.238385
1,Aachen,0.204257
1,Aalborg,-0.496834
1,Aba,26.439301
1,Abadan,12.737139
...,...,...
12,Århus,0.914878
12,Çorlu,6.242232
12,Çorum,1.001556
12,Öskemen,-14.369171


In [84]:
cities_temp=cities.groupby(['Month','City'])['AverageTemperature'].mean().to_frame().reset_index()
cities_temp.columns=['month',
                     'City',
                     'Mean_temp']
cities_temp.head()

Unnamed: 0,month,City,Mean_temp
0,1,A Coruña,8.238385
1,1,Aachen,0.204257
2,1,Aalborg,-0.496834
3,1,Aba,26.439301
4,1,Abadan,12.737139


In [85]:
df=cities_temp.merge(cities,
                     on='City',
                     how='left')
df.head()

Unnamed: 0,month,City,Mean_temp,AverageTemperature,AverageTemperatureUncertainty,Country,Latitude,Longitude,Month
0,1,A Coruña,8.238385,10.779,1.942,Spain,42.59,8.73W,11
1,1,A Coruña,8.238385,,,Spain,42.59,8.73W,12
2,1,A Coruña,8.238385,,,Spain,42.59,8.73W,1
3,1,A Coruña,8.238385,,,Spain,42.59,8.73W,2
4,1,A Coruña,8.238385,,,Spain,42.59,8.73W,3


In [86]:
data=df.drop_duplicates(subset=['month','City'])

In [87]:
data.head()

Unnamed: 0,month,City,Mean_temp,AverageTemperature,AverageTemperatureUncertainty,Country,Latitude,Longitude,Month
0,1,A Coruña,8.238385,10.779,1.942,Spain,42.59,8.73W,11
3239,1,Aachen,0.204257,6.425,1.628,Germany,50.63,6.34,11
6478,1,Aalborg,-0.496834,6.068,1.737,Denmark,57.05,10.33,11
9717,1,Aba,26.439301,26.366,1.287,Nigeria,5.63,8.07,1
11610,1,Abadan,12.737139,32.877,1.752,Iran,29.74,48.00,8


In [88]:
data2=data[['month',
            'City',
            'Mean_temp',
            'Country',
            'Latitude',
            'Longitude']]
data2.head()

Unnamed: 0,month,City,Mean_temp,Country,Latitude,Longitude
0,1,A Coruña,8.238385,Spain,42.59,8.73W
3239,1,Aachen,0.204257,Germany,50.63,6.34
6478,1,Aalborg,-0.496834,Denmark,57.05,10.33
9717,1,Aba,26.439301,Nigeria,5.63,8.07
11610,1,Abadan,12.737139,Iran,29.74,48.00


In [89]:
trace = go.Heatmap(z=data2['Mean_temp'],
                   x=data2['month'],
                   y=data2['City'],
                  colorscale='Viridis')

In [90]:
data=[trace]
layout = go.Layout(
    title='Average Temperature Of Major Cities By Month',)

In [91]:
fig = go.Figure(data=data, layout=layout)
fig.show()