In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import re
from opencage.geocoder import OpenCageGeocode
import folium
from folium.plugins import HeatMap
import plotly.graph_objs as go
import plotly.express as px
from plotly.offline import init_notebook_mode
init_notebook_mode(connected=True)

### Clean Data and Plot Avg Temprature on World Map

Dataset and Resources - https://drive.google.com/drive/folders/1xlJZVvQHthNFFyclhmTVnIMMX9am8oYE?usp=sharing

In [2]:
global_temp_country = pd.read_csv('GlobalLandTemperaturesByCountry.csv')
global_temp_country.head()

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,Country
0,1743-11-01,4.384,2.294,Åland
1,1743-12-01,,,Åland
2,1744-01-01,,,Åland
3,1744-02-01,,,Åland
4,1744-03-01,,,Åland


In [3]:
global_temp_country.shape

(577462, 4)

In [4]:
global_temp_country.isna().sum()

dt                                   0
AverageTemperature               32651
AverageTemperatureUncertainty    31912
Country                              0
dtype: int64

In [5]:
global_temp_country.dropna(axis='index', how='any', inplace=True)
global_temp_country.isna().sum()

dt                               0
AverageTemperature               0
AverageTemperatureUncertainty    0
Country                          0
dtype: int64

In [6]:
global_temp_country.nunique()

dt                                3167
AverageTemperature               71085
AverageTemperatureUncertainty     8979
Country                            242
dtype: int64

In [7]:
global_temp_country['Country'].unique()

array(['Åland', 'Afghanistan', 'Africa', 'Albania', 'Algeria',
       'American Samoa', 'Andorra', 'Angola', 'Anguilla',
       'Antigua And Barbuda', 'Argentina', 'Armenia', 'Aruba', 'Asia',
       'Australia', 'Austria', 'Azerbaijan', 'Bahamas', 'Bahrain',
       'Baker Island', 'Bangladesh', 'Barbados', 'Belarus', 'Belgium',
       'Belize', 'Benin', 'Bhutan', 'Bolivia',
       'Bonaire, Saint Eustatius And Saba', 'Bosnia And Herzegovina',
       'Botswana', 'Brazil', 'British Virgin Islands', 'Bulgaria',
       'Burkina Faso', 'Burma', 'Burundi', "Côte D'Ivoire", 'Cambodia',
       'Cameroon', 'Canada', 'Cape Verde', 'Cayman Islands',
       'Central African Republic', 'Chad', 'Chile', 'China',
       'Christmas Island', 'Colombia', 'Comoros',
       'Congo (Democratic Republic Of The)', 'Congo', 'Costa Rica',
       'Croatia', 'Cuba', 'Curaçao', 'Cyprus', 'Czech Republic',
       'Denmark (Europe)', 'Denmark', 'Djibouti', 'Dominica',
       'Dominican Republic', 'Ecuador', 'Egypt'

In [8]:
# import re
def fix_country_name(name):
    my_country = re.match(r'([\w ]*)(\(?\w*\)?)', name)
    if my_country is not None:
        return my_country[1].rstrip()
    return name.rstrip()

global_temp_country['Country'].apply(lambda x: fix_country_name(x)).unique()
# OR global_temp_country['Country'].replace(dict, inplace=True)
# where dict = {'what to replace' : 'replace with what',... }

array(['Åland', 'Afghanistan', 'Africa', 'Albania', 'Algeria',
       'American Samoa', 'Andorra', 'Angola', 'Anguilla',
       'Antigua And Barbuda', 'Argentina', 'Armenia', 'Aruba', 'Asia',
       'Australia', 'Austria', 'Azerbaijan', 'Bahamas', 'Bahrain',
       'Baker Island', 'Bangladesh', 'Barbados', 'Belarus', 'Belgium',
       'Belize', 'Benin', 'Bhutan', 'Bolivia', 'Bonaire',
       'Bosnia And Herzegovina', 'Botswana', 'Brazil',
       'British Virgin Islands', 'Bulgaria', 'Burkina Faso', 'Burma',
       'Burundi', 'Côte D', 'Cambodia', 'Cameroon', 'Canada',
       'Cape Verde', 'Cayman Islands', 'Central African Republic', 'Chad',
       'Chile', 'China', 'Christmas Island', 'Colombia', 'Comoros',
       'Congo', 'Costa Rica', 'Croatia', 'Cuba', 'Curaçao', 'Cyprus',
       'Czech Republic', 'Denmark', 'Djibouti', 'Dominica',
       'Dominican Republic', 'Ecuador', 'Egypt', 'El Salvador',
       'Equatorial Guinea', 'Eritrea', 'Estonia', 'Ethiopia', 'Europe',
       'Falkland

In [9]:
global_temp_country['Fixed_Countries_Name'] = global_temp_country['Country'].apply(lambda x: fix_country_name(x))

In [10]:
global_temp_country.columns

Index(['dt', 'AverageTemperature', 'AverageTemperatureUncertainty', 'Country',
       'Fixed_Countries_Name'],
      dtype='object')

In [11]:
global_temp_country.drop('Country', axis=1,inplace=True)
global_temp_country.columns

Index(['dt', 'AverageTemperature', 'AverageTemperatureUncertainty',
       'Fixed_Countries_Name'],
      dtype='object')

In [12]:
global_temp_country.columns = ['dt', 'AverageTemperature', 'AverageTemperatureUncertainty', 'Country']
global_temp_country.head()

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,Country
0,1743-11-01,4.384,2.294,Åland
5,1744-04-01,1.53,4.68,Åland
6,1744-05-01,6.702,1.789,Åland
7,1744-06-01,11.609,1.577,Åland
8,1744-07-01,15.342,1.41,Åland


In [13]:
avg_country_temp = global_temp_country.groupby(['Country'])['AverageTemperature'].mean().reset_index()
avg_country_temp

Unnamed: 0,Country,AverageTemperature
0,Afghanistan,14.045007
1,Africa,24.074203
2,Albania,12.610646
3,Algeria,22.985112
4,American Samoa,26.611965
...,...,...
232,Western Sahara,22.319818
233,Yemen,26.253597
234,Zambia,21.282956
235,Zimbabwe,21.117547


In [14]:
fig = px.choropleth(avg_country_temp, locations='Country', locationmode='country names', color='AverageTemperature')
fig.update_layout(title='choropleth map of avg temp')
fig.show()

### Check if country has Global Warming or Not

In [15]:
global_temp = pd.read_csv('GlobalTemperatures.csv', parse_dates=['dt'])
global_temp.head()

Unnamed: 0,dt,LandAverageTemperature,LandAverageTemperatureUncertainty,LandMaxTemperature,LandMaxTemperatureUncertainty,LandMinTemperature,LandMinTemperatureUncertainty,LandAndOceanAverageTemperature,LandAndOceanAverageTemperatureUncertainty
0,1750-01-01,3.034,3.574,,,,,,
1,1750-02-01,3.083,3.702,,,,,,
2,1750-03-01,5.626,3.076,,,,,,
3,1750-04-01,8.49,2.451,,,,,,
4,1750-05-01,11.573,2.072,,,,,,


In [16]:
global_temp.dtypes

dt                                           datetime64[ns]
LandAverageTemperature                              float64
LandAverageTemperatureUncertainty                   float64
LandMaxTemperature                                  float64
LandMaxTemperatureUncertainty                       float64
LandMinTemperature                                  float64
LandMinTemperatureUncertainty                       float64
LandAndOceanAverageTemperature                      float64
LandAndOceanAverageTemperatureUncertainty           float64
dtype: object

In [17]:
global_temp['dt'][2]

Timestamp('1750-03-01 00:00:00')

In [18]:
print(global_temp['dt'][2].year)
print(global_temp['dt'][2].month)
print(global_temp['dt'][2].day)
print(global_temp['dt'][2].dayofweek)
print(global_temp['dt'][2].day_name())

1750
3
1
6
Sunday


In [19]:
global_temp.isna().sum()

dt                                              0
LandAverageTemperature                         12
LandAverageTemperatureUncertainty              12
LandMaxTemperature                           1200
LandMaxTemperatureUncertainty                1200
LandMinTemperature                           1200
LandMinTemperatureUncertainty                1200
LandAndOceanAverageTemperature               1200
LandAndOceanAverageTemperatureUncertainty    1200
dtype: int64

In [20]:
global_temp['year'] = global_temp['dt'].apply(lambda x: x.year)
global_temp.head()

Unnamed: 0,dt,LandAverageTemperature,LandAverageTemperatureUncertainty,LandMaxTemperature,LandMaxTemperatureUncertainty,LandMinTemperature,LandMinTemperatureUncertainty,LandAndOceanAverageTemperature,LandAndOceanAverageTemperatureUncertainty,year
0,1750-01-01,3.034,3.574,,,,,,,1750
1,1750-02-01,3.083,3.702,,,,,,,1750
2,1750-03-01,5.626,3.076,,,,,,,1750
3,1750-04-01,8.49,2.451,,,,,,,1750
4,1750-05-01,11.573,2.072,,,,,,,1750


In [21]:
year_land_avg_temp = global_temp.groupby('year').agg({'LandAverageTemperature':'mean',
                                                      'LandAverageTemperatureUncertainty':'mean'}).reset_index()
year_land_avg_temp.head()

Unnamed: 0,year,LandAverageTemperature,LandAverageTemperatureUncertainty
0,1750,8.719364,2.637818
1,1751,7.976143,2.781143
2,1752,5.779833,2.977
3,1753,8.388083,3.176
4,1754,8.469333,3.49425


In [22]:
year_land_avg_temp['UncertanityTop'] = year_land_avg_temp['LandAverageTemperature'] + year_land_avg_temp['LandAverageTemperatureUncertainty']
year_land_avg_temp['UncertanityBottom'] = year_land_avg_temp['LandAverageTemperature'] - year_land_avg_temp['LandAverageTemperatureUncertainty']
year_land_avg_temp.head()

Unnamed: 0,year,LandAverageTemperature,LandAverageTemperatureUncertainty,UncertanityTop,UncertanityBottom
0,1750,8.719364,2.637818,11.357182,6.081545
1,1751,7.976143,2.781143,10.757286,5.195
2,1752,5.779833,2.977,8.756833,2.802833
3,1753,8.388083,3.176,11.564083,5.212083
4,1754,8.469333,3.49425,11.963583,4.975083


In [23]:
year_land_avg_temp.columns

Index(['year', 'LandAverageTemperature', 'LandAverageTemperatureUncertainty',
       'UncertanityTop', 'UncertanityBottom'],
      dtype='object')

In [24]:
fig = px.line(year_land_avg_temp, x='year', y=['LandAverageTemperature', 'UncertanityTop', 'UncertanityBottom'],
        title='Avg Land Temp in World', labels=dict(year='Year', value='(-)  LandAverageTemperature  (+)'))
fig.show()

### Average Temprature in Each Season

In [25]:
global_temp.head()

Unnamed: 0,dt,LandAverageTemperature,LandAverageTemperatureUncertainty,LandMaxTemperature,LandMaxTemperatureUncertainty,LandMinTemperature,LandMinTemperatureUncertainty,LandAndOceanAverageTemperature,LandAndOceanAverageTemperatureUncertainty,year
0,1750-01-01,3.034,3.574,,,,,,,1750
1,1750-02-01,3.083,3.702,,,,,,,1750
2,1750-03-01,5.626,3.076,,,,,,,1750
3,1750-04-01,8.49,2.451,,,,,,,1750
4,1750-05-01,11.573,2.072,,,,,,,1750


In [26]:
global_temp['dt'].dtype

dtype('<M8[ns]')


datetime64[ns] is a general dtype, while <M8[ns] is a specific dtype. General dtypes map to specific dtypes, but may be different from one installation of NumPy to the next.

On a machine whose byte order is little endian, there is no difference between np.dtype('datetime64[ns]') and np.dtype('<M8[ns]'):

In [6]: np.dtype('datetime64[ns]') == np.dtype('<M8[ns]')  
Out[6]: True  
  
However, on a big endian machine, np.dtype('datetime64[ns]') would equal np.dtype('>M8[ns]').

So datetime64[ns] maps to either <M8[ns] or >M8[ns] depending on the endian-ness of the machine.

There are many other similar examples of general dtypes mapping to specific dtypes: int64 maps to <i8 or >i8, and int maps to either int32 or int64 depending on the bit architecture of the OS and how NumPy was compiled.


In [27]:
np.dtype('datetime64[ns]') == np.dtype('<M8[ns]')

True

In [28]:
np.dtype('datetime64[ns]') == np.dtype('>M8[ns]')

False

In [31]:
global_temp['dt'][0]

Timestamp('1750-01-01 00:00:00')

In [32]:
global_temp['dt'][0].month

1

if not datetime format then you can convert it using below -  
global_temp['dt'] = pd.to_datetime(global_temp['dt'])

In [34]:
global_temp['month'] = global_temp['dt'].dt.month
global_temp.head()

Unnamed: 0,dt,LandAverageTemperature,LandAverageTemperatureUncertainty,LandMaxTemperature,LandMaxTemperatureUncertainty,LandMinTemperature,LandMinTemperatureUncertainty,LandAndOceanAverageTemperature,LandAndOceanAverageTemperatureUncertainty,year,month
0,1750-01-01,3.034,3.574,,,,,,,1750,1
1,1750-02-01,3.083,3.702,,,,,,,1750,2
2,1750-03-01,5.626,3.076,,,,,,,1750,3
3,1750-04-01,8.49,2.451,,,,,,,1750,4
4,1750-05-01,11.573,2.072,,,,,,,1750,5


In [39]:
def get_season(month):
    if 2 <= month <= 5 :
        return "Summer"
    elif 6 <= month <= 9 :
        return "Rainy"
    else :
        return "Winter"

In [43]:
# global_temp['season'] = global_temp['month'].apply(lambda x : get_season(x)) OR
global_temp['season'] = global_temp['month'].apply(get_season)
global_temp.head()

Unnamed: 0,dt,LandAverageTemperature,LandAverageTemperatureUncertainty,LandMaxTemperature,LandMaxTemperatureUncertainty,LandMinTemperature,LandMinTemperatureUncertainty,LandAndOceanAverageTemperature,LandAndOceanAverageTemperatureUncertainty,year,month,season
0,1750-01-01,3.034,3.574,,,,,,,1750,1,Winter
1,1750-02-01,3.083,3.702,,,,,,,1750,2,Summer
2,1750-03-01,5.626,3.076,,,,,,,1750,3,Summer
3,1750-04-01,8.49,2.451,,,,,,,1750,4,Summer
4,1750-05-01,11.573,2.072,,,,,,,1750,5,Summer


In [48]:
years=global_temp['year'].unique()
years

array([1750, 1751, 1752, 1753, 1754, 1755, 1756, 1757, 1758, 1759, 1760,
       1761, 1762, 1763, 1764, 1765, 1766, 1767, 1768, 1769, 1770, 1771,
       1772, 1773, 1774, 1775, 1776, 1777, 1778, 1779, 1780, 1781, 1782,
       1783, 1784, 1785, 1786, 1787, 1788, 1789, 1790, 1791, 1792, 1793,
       1794, 1795, 1796, 1797, 1798, 1799, 1800, 1801, 1802, 1803, 1804,
       1805, 1806, 1807, 1808, 1809, 1810, 1811, 1812, 1813, 1814, 1815,
       1816, 1817, 1818, 1819, 1820, 1821, 1822, 1823, 1824, 1825, 1826,
       1827, 1828, 1829, 1830, 1831, 1832, 1833, 1834, 1835, 1836, 1837,
       1838, 1839, 1840, 1841, 1842, 1843, 1844, 1845, 1846, 1847, 1848,
       1849, 1850, 1851, 1852, 1853, 1854, 1855, 1856, 1857, 1858, 1859,
       1860, 1861, 1862, 1863, 1864, 1865, 1866, 1867, 1868, 1869, 1870,
       1871, 1872, 1873, 1874, 1875, 1876, 1877, 1878, 1879, 1880, 1881,
       1882, 1883, 1884, 1885, 1886, 1887, 1888, 1889, 1890, 1891, 1892,
       1893, 1894, 1895, 1896, 1897, 1898, 1899, 19

In [47]:
global_temp['year'].nunique()

266

In [51]:
summer_temps = []
rainy_temps = []
winter_temps = []

for year in years:
    current_df = global_temp[global_temp['year']==year]
    summer_temps.append(current_df[current_df['season']=='Summer']['LandAverageTemperature'].mean())
    rainy_temps.append(current_df[current_df['season']=='Rainy']['LandAverageTemperature'].mean())
    winter_temps.append(current_df[current_df['season']=='Winter']['LandAverageTemperature'].mean())

season_temp = pd.DataFrame()
season_temp['year'] = years
season_temp['summer_temps'] = summer_temps
season_temp['rainy_temps'] = rainy_temps
season_temp['winter_temps'] = winter_temps
season_temp.head()

Unnamed: 0,year,summer_temps,rainy_temps,winter_temps
0,1750,7.193,13.742,4.057667
1,1751,4.811,12.968333,2.495
2,1752,7.0355,,5.152
3,1753,6.64925,14.28675,4.22825
4,1754,6.4935,13.4385,5.476


In [52]:
season_temp.columns

Index(['year', 'summer_temps', 'rainy_temps', 'winter_temps'], dtype='object')

In [60]:
fig = px.line(season_temp, x='year', y= ['summer_temps', 'rainy_temps', 'winter_temps'], 
              title='Avg Temp in Each Season', labels=dict(year='Year', value='AverageTemperature'))
fig.show()

### Average Temprature for Major Countries 

In [61]:
country = ['Russia', 'United States', 'China', 'Japan', 'Australia', 'India']
global_temp_country.head()

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,Country
0,1743-11-01,4.384,2.294,Åland
5,1744-04-01,1.53,4.68,Åland
6,1744-05-01,6.702,1.789,Åland
7,1744-06-01,11.609,1.577,Åland
8,1744-07-01,15.342,1.41,Åland


In [62]:
country_temp = global_temp_country[global_temp_country['Country'].isin(country)]
country_temp.head()

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,Country
34816,1852-07-01,14.116,1.53,Australia
34817,1852-08-01,15.33,1.4,Australia
34818,1852-09-01,18.74,1.446,Australia
34819,1852-10-01,21.984,1.493,Australia
34820,1852-11-01,24.073,1.466,Australia


In [69]:
country_temp.loc['dt'] = pd.to_datetime(country_temp['dt'])
country_temp.head()



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,Country
34816,1852-07-01,14.116,1.53,Australia
34817,1852-08-01,15.33,1.4,Australia
34818,1852-09-01,18.74,1.446,Australia
34819,1852-10-01,21.984,1.493,Australia
34820,1852-11-01,24.073,1.466,Australia


In [88]:
country_temp.loc['years'] = country_temp['dt'].dt.year
country_temp.loc['years'] = country_temp.loc['years'].astype('Int64')
country_temp.head()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,Country,years
34816,1852-07-01,14.116,1.53,Australia,1852
34817,1852-08-01,15.33,1.4,Australia,1852
34818,1852-09-01,18.74,1.446,Australia,1852
34819,1852-10-01,21.984,1.493,Australia,1852
34820,1852-11-01,24.073,1.466,Australia,1852


In [84]:
country_temp._is_view

False

In [91]:
country_avg_temp = country_temp.groupby(['years', 'Country']).agg({'AverageTemperature':'mean'}).reset_index()
country_avg_temp.head()

Unnamed: 0,years,Country,AverageTemperature
0,1768,United States,5.57275
1,1769,United States,10.4465
2,1774,United States,1.603
3,1775,United States,9.499167
4,1776,United States,8.11


In [93]:
fig = px.line(country_avg_temp, x='years', y='AverageTemperature', color='Country', title = 'Avg land temp in world')
fig.show()

### State wise Average Temprature for India

In [94]:
global_temp_state = pd.read_csv('GlobalLandTemperaturesByState.csv')
global_temp_state.head()

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,State,Country
0,1855-05-01,25.544,1.171,Acre,Brazil
1,1855-06-01,24.228,1.103,Acre,Brazil
2,1855-07-01,24.371,1.044,Acre,Brazil
3,1855-08-01,25.427,1.073,Acre,Brazil
4,1855-09-01,25.675,1.014,Acre,Brazil


In [109]:
India = global_temp_state[global_temp_state['Country'] == 'India']
India.head()

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,State,Country
24709,1796-01-01,26.534,1.904,Andaman And Nicobar,India
24710,1796-02-01,26.294,1.122,Andaman And Nicobar,India
24711,1796-03-01,26.18,1.959,Andaman And Nicobar,India
24712,1796-04-01,27.942,1.281,Andaman And Nicobar,India
24713,1796-05-01,28.651,1.119,Andaman And Nicobar,India


In [106]:
India._is_view

False

In [110]:
India._is_copy

<weakref at 0x7f63960442f0; to 'DataFrame' at 0x7f6395f8cb50>

In [114]:
India = global_temp_state[global_temp_state['Country'] == 'India'].copy()
India.head()

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,State,Country
24709,1796-01-01,26.534,1.904,Andaman And Nicobar,India
24710,1796-02-01,26.294,1.122,Andaman And Nicobar,India
24711,1796-03-01,26.18,1.959,Andaman And Nicobar,India
24712,1796-04-01,27.942,1.281,Andaman And Nicobar,India
24713,1796-05-01,28.651,1.119,Andaman And Nicobar,India


In [115]:
India._is_view

False

In [116]:
India._is_copy # outputs nothing

In [117]:
India.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 86664 entries, 24709 to 615161
Data columns (total 5 columns):
 #   Column                         Non-Null Count  Dtype  
---  ------                         --------------  -----  
 0   dt                             86664 non-null  object 
 1   AverageTemperature             81620 non-null  float64
 2   AverageTemperatureUncertainty  81620 non-null  float64
 3   State                          86664 non-null  object 
 4   Country                        86664 non-null  object 
dtypes: float64(2), object(3)
memory usage: 4.0+ MB


In [118]:
India.dropna(inplace=True)
India.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 81620 entries, 24709 to 615160
Data columns (total 5 columns):
 #   Column                         Non-Null Count  Dtype  
---  ------                         --------------  -----  
 0   dt                             81620 non-null  object 
 1   AverageTemperature             81620 non-null  float64
 2   AverageTemperatureUncertainty  81620 non-null  float64
 3   State                          81620 non-null  object 
 4   Country                        81620 non-null  object 
dtypes: float64(2), object(3)
memory usage: 3.7+ MB


In [120]:
India['State'].unique()

array(['Andaman And Nicobar', 'Andhra Pradesh', 'Arunachal Pradesh',
       'Assam', 'Bihar', 'Chandigarh', 'Chhattisgarh',
       'Dadra And Nagar Haveli', 'Daman And Diu', 'Delhi', 'Goa',
       'Gujarat', 'Haryana', 'Himachal Pradesh', 'Jammu And Kashmir',
       'Jharkhand', 'Karnataka', 'Kerala', 'Madhya Pradesh',
       'Maharashtra', 'Manipur', 'Meghalaya', 'Mizoram', 'Nagaland',
       'Orissa', 'Puducherry', 'Punjab', 'Rajasthan', 'Sikkim',
       'Tamil Nadu', 'Tripura', 'Uttar Pradesh', 'Uttaranchal',
       'West Bengal'], dtype=object)

In [121]:
India = India[['AverageTemperature', 'State']].copy()
India.head()

Unnamed: 0,AverageTemperature,State
24709,26.534,Andaman And Nicobar
24710,26.294,Andaman And Nicobar
24711,26.18,Andaman And Nicobar
24712,27.942,Andaman And Nicobar
24713,28.651,Andaman And Nicobar


In [123]:
India_temp = India.groupby('State')['AverageTemperature'].mean().reset_index()
India_temp.head()

Unnamed: 0,State,AverageTemperature
0,Andaman And Nicobar,27.231677
1,Andhra Pradesh,27.090003
2,Arunachal Pradesh,13.79567
3,Assam,21.875561
4,Bihar,24.486945


In [136]:
# !pip install opencage
# from opencage.geocoder import OpenCageGeocode
key = '5ca89e8267cb476a88cd247f5d0dc35d'
geocoder = OpenCageGeocode(key)

In [137]:
results = geocoder.geocode('Pune,Maharashtra')
results

[{'annotations': {'DMS': {'lat': "18° 31' 17.14080'' N",
    'lng': "73° 51' 16.03476'' E"},
   'MGRS': '43QCA7908448259',
   'Maidenhead': 'MK68wm25md',
   'Mercator': {'x': 8221440.223, 'y': 2085105.665},
   'OSM': {'edit_url': 'https://www.openstreetmap.org/edit?node=16174445#map=16/18.52143/73.85445',
    'note_url': 'https://www.openstreetmap.org/note/new#map=16/18.52143/73.85445&layers=N',
    'url': 'https://www.openstreetmap.org/?mlat=18.52143&mlon=73.85445#map=16/18.52143/73.85445'},
   'UN_M49': {'regions': {'ASIA': '142',
     'IN': '356',
     'SOUTHERN_ASIA': '034',
     'WORLD': '001'},
    'statistical_groupings': ['LEDC']},
   'callingcode': 91,
   'currency': {'alternate_symbols': ['Rs', '৳', '૱', '௹', 'रु', '₨'],
    'decimal_mark': '.',
    'html_entity': '&#x20b9;',
    'iso_code': 'INR',
    'iso_numeric': '356',
    'name': 'Indian Rupee',
    'smallest_denomination': 50,
    'subunit': 'Paisa',
    'subunit_to_unit': 100,
    'symbol': '₹',
    'symbol_first': 1,

In [138]:
results[0]['geometry']

{'lat': 18.521428, 'lng': 73.8544541}

In [139]:
India_temp['StateLocInfo'] = India_temp['State'].apply(geocoder.geocode)
India_temp.head()

Unnamed: 0,State,AverageTemperature,StateLocInfo
0,Andaman And Nicobar,27.231677,"[{'annotations': {'DMS': {'lat': ""7° 0' 0.0601..."
1,Andhra Pradesh,27.090003,"[{'annotations': {'DMS': {'lat': ""15° 55' 26.7..."
2,Arunachal Pradesh,13.79567,"[{'annotations': {'DMS': {'lat': ""28° 5' 37.57..."
3,Assam,21.875561,"[{'annotations': {'DMS': {'lat': ""26° 24' 26.5..."
4,Bihar,24.486945,"[{'annotations': {'DMS': {'lat': ""25° 38' 38.7..."


In [167]:
India_temp['Latitude'] = India_temp['StateLocInfo'].str[0].str['geometry'].str['lat']
India_temp['Longitude'] = India_temp['StateLocInfo'].str[0].str['geometry'].str['lng']
India_temp.head()

Unnamed: 0,State,AverageTemperature,StateLocInfo,Latitude,Longitude
0,Andaman And Nicobar,27.231677,"[{'annotations': {'DMS': {'lat': ""7° 0' 0.0601...",7.000017,93.811082
1,Andhra Pradesh,27.090003,"[{'annotations': {'DMS': {'lat': ""15° 55' 26.7...",15.924091,80.186381
2,Arunachal Pradesh,13.79567,"[{'annotations': {'DMS': {'lat': ""28° 5' 37.57...",28.09377,94.592133
3,Assam,21.875561,"[{'annotations': {'DMS': {'lat': ""26° 24' 26.5...",26.407384,93.25513
4,Bihar,24.486945,"[{'annotations': {'DMS': {'lat': ""25° 38' 38.7...",25.644085,85.906508


In [168]:
# import folium
# from folium.plugins import HeatMap
basemap = folium.Map()
basemap

In [170]:
HeatMap(India_temp[['Latitude', 'Longitude', 'AverageTemperature']]).add_to(basemap)
basemap

### Average Temperature for Swiz Cities by Month

In [172]:
cities = pd.read_csv('GlobalLandTemperaturesByCity.csv')
cities.head()

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,City,Country,Latitude,Longitude
0,1743-11-01,6.068,1.737,Århus,Denmark,57.05N,10.33E
1,1743-12-01,,,Århus,Denmark,57.05N,10.33E
2,1744-01-01,,,Århus,Denmark,57.05N,10.33E
3,1744-02-01,,,Århus,Denmark,57.05N,10.33E
4,1744-03-01,,,Århus,Denmark,57.05N,10.33E


In [194]:
swiz = cities[cities['Country']=='Switzerland'].copy()
swiz

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,City,Country,Latitude,Longitude
723662,1743-11-01,3.544,1.764,Basel,Switzerland,47.42N,8.29E
723663,1743-12-01,,,Basel,Switzerland,47.42N,8.29E
723664,1744-01-01,,,Basel,Switzerland,47.42N,8.29E
723665,1744-02-01,,,Basel,Switzerland,47.42N,8.29E
723666,1744-03-01,,,Basel,Switzerland,47.42N,8.29E
...,...,...,...,...,...,...,...
8592839,2013-05-01,9.400,0.396,Zurich,Switzerland,47.42N,8.29E
8592840,2013-06-01,14.444,0.388,Zurich,Switzerland,47.42N,8.29E
8592841,2013-07-01,18.658,0.231,Zurich,Switzerland,47.42N,8.29E
8592842,2013-08-01,16.657,0.490,Zurich,Switzerland,47.42N,8.29E


In [195]:
swiz['City'].unique()

array(['Basel', 'Bern', 'Geneva', 'Lausanne', 'Zurich'], dtype=object)

In [196]:
swiz['Latitude'] = swiz['Latitude'].str.strip('N')
swiz['Longitude'] = swiz['Longitude'].str.strip('E')
swiz.head()

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,City,Country,Latitude,Longitude
723662,1743-11-01,3.544,1.764,Basel,Switzerland,47.42,8.29
723663,1743-12-01,,,Basel,Switzerland,47.42,8.29
723664,1744-01-01,,,Basel,Switzerland,47.42,8.29
723665,1744-02-01,,,Basel,Switzerland,47.42,8.29
723666,1744-03-01,,,Basel,Switzerland,47.42,8.29


In [197]:
swiz.dtypes

dt                                object
AverageTemperature               float64
AverageTemperatureUncertainty    float64
City                              object
Country                           object
Latitude                          object
Longitude                         object
dtype: object

In [198]:
swiz['dt'] = pd.to_datetime(swiz['dt'])
swiz.dtypes

dt                               datetime64[ns]
AverageTemperature                      float64
AverageTemperatureUncertainty           float64
City                                     object
Country                                  object
Latitude                                 object
Longitude                                object
dtype: object

In [200]:
swiz['Month'] = swiz['dt'].dt.month
swiz.head()

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,City,Country,Latitude,Longitude,Month
723662,1743-11-01,3.544,1.764,Basel,Switzerland,47.42,8.29,11
723663,1743-12-01,,,Basel,Switzerland,47.42,8.29,12
723664,1744-01-01,,,Basel,Switzerland,47.42,8.29,1
723665,1744-02-01,,,Basel,Switzerland,47.42,8.29,2
723666,1744-03-01,,,Basel,Switzerland,47.42,8.29,3


In [201]:
swiz.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 16195 entries, 723662 to 8592843
Data columns (total 8 columns):
 #   Column                         Non-Null Count  Dtype         
---  ------                         --------------  -----         
 0   dt                             16195 non-null  datetime64[ns]
 1   AverageTemperature             15830 non-null  float64       
 2   AverageTemperatureUncertainty  15830 non-null  float64       
 3   City                           16195 non-null  object        
 4   Country                        16195 non-null  object        
 5   Latitude                       16195 non-null  object        
 6   Longitude                      16195 non-null  object        
 7   Month                          16195 non-null  int64         
dtypes: datetime64[ns](1), float64(2), int64(1), object(4)
memory usage: 1.1+ MB


In [212]:
swiz_temp = swiz.groupby(['Month', 'City'])['AverageTemperature'].mean().to_frame().reset_index()
swiz_temp.head()

Unnamed: 0,Month,City,AverageTemperature
0,1,Basel,-2.440374
1,1,Bern,-2.440374
2,1,Geneva,-0.744072
3,1,Lausanne,-0.744072
4,1,Zurich,-2.440374


In [233]:
# to get lat longs
df = swiz_temp.merge(swiz[['City', 'Latitude', 'Longitude']], on='City')
df.head()

Unnamed: 0,Month,City,AverageTemperature,Latitude,Longitude
0,1,Basel,-2.440374,47.42,8.29
1,1,Basel,-2.440374,47.42,8.29
2,1,Basel,-2.440374,47.42,8.29
3,1,Basel,-2.440374,47.42,8.29
4,1,Basel,-2.440374,47.42,8.29


In [234]:
df.shape

(194340, 5)

In [235]:
df = df.drop_duplicates().reset_index(drop=True)
df.shape

(60, 5)

In [247]:
df.head()

Unnamed: 0,Month,City,AverageTemperature,Latitude,Longitude
0,1,Basel,-2.440374,47.42,8.29
1,2,Basel,-0.89608,47.42,8.29
2,3,Basel,2.288589,47.42,8.29
3,4,Basel,6.281526,47.42,8.29
4,5,Basel,10.951681,47.42,8.29


In [254]:
# import plotly.graph_objs as go
data = [go.Heatmap(x=df['Month'], y=df['City'], z=df['AverageTemperature'])]
layout = go.Layout(title='Average Temperature for Swiz Cities by Month')
fig = go.Figure(data=data, layout=layout)
fig.show()

In [257]:
basemap2 = folium.Map()
HeatMap(df[['Latitude', 'Longitude', 'AverageTemperature']]).add_to(basemap2)
basemap2

In [263]:
df2 = Swiz.groupby('City')['AverageTemperature'].mean().reset_index()
df2

Unnamed: 0,City,AverageTemperature
0,Basel,6.839018
1,Bern,6.839018
2,Geneva,8.596416
3,Lausanne,8.596416
4,Zurich,6.839018


In [264]:
df2['LocInfo'] = df2['City'].apply(geocoder.geocode)
df2.head()

Unnamed: 0,City,AverageTemperature,LocInfo
0,Basel,6.839018,"[{'annotations': {'DMS': {'lat': ""47° 33' 29.1..."
1,Bern,6.839018,"[{'annotations': {'DMS': {'lat': ""46° 56' 53.7..."
2,Geneva,8.596416,"[{'annotations': {'DMS': {'lat': ""46° 12' 6.32..."
3,Lausanne,8.596416,"[{'annotations': {'DMS': {'lat': ""46° 31' 18.5..."
4,Zurich,6.839018,"[{'annotations': {'DMS': {'lat': ""47° 22' 28.0..."


In [265]:
df2['Latitude'] = df2['LocInfo'].str[0].str['geometry'].str['lat']
df2['Longitude'] = df2['LocInfo'].str[0].str['geometry'].str['lng']
df2.head()

Unnamed: 0,City,AverageTemperature,LocInfo,Latitude,Longitude
0,Basel,6.839018,"[{'annotations': {'DMS': {'lat': ""47° 33' 29.1...",47.558108,7.587826
1,Bern,6.839018,"[{'annotations': {'DMS': {'lat': ""46° 56' 53.7...",46.948271,7.451451
2,Geneva,8.596416,"[{'annotations': {'DMS': {'lat': ""46° 12' 6.32...",46.201756,6.146601
3,Lausanne,8.596416,"[{'annotations': {'DMS': {'lat': ""46° 31' 18.5...",46.521827,6.632702
4,Zurich,6.839018,"[{'annotations': {'DMS': {'lat': ""47° 22' 28.0...",47.374449,8.541042


In [266]:
basemap3 = folium.Map()
HeatMap(df2[['Latitude', 'Longitude', 'AverageTemperature']]).add_to(basemap3)
basemap3

In [267]:
for id,row in df2.iterrows():
    folium.Marker(location=[row['Latitude'], row['Longitude']], popup=row['AverageTemperature']).add_to(basemap3)
basemap3