In [111]:
import pandas as pd
import numpy as np
import seaborn as sns
import plotly.express as px

In [112]:
global_temp_country = pd.read_csv("../data/GlobalLandTemperaturesByCountry.csv")
global_temp_country.head()

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,Country
0,1743-11-01,4.384,2.294,Åland
1,1743-12-01,,,Åland
2,1744-01-01,,,Åland
3,1744-02-01,,,Åland
4,1744-03-01,,,Åland


In [113]:
global_temp_country.shape

(577462, 4)

In [114]:
global_temp_country.isna().sum()

dt                                   0
AverageTemperature               32651
AverageTemperatureUncertainty    31912
Country                              0
dtype: int64

In [115]:
global_temp_country.dropna(axis='index', how='any', subset=['AverageTemperature'], inplace=True)

In [116]:
global_temp_country.isna().sum()

dt                               0
AverageTemperature               0
AverageTemperatureUncertainty    0
Country                          0
dtype: int64

In [117]:
global_temp_country['Country'].nunique()

242

In [118]:
global_temp_country['Country'].unique()

array(['Åland', 'Afghanistan', 'Africa', 'Albania', 'Algeria',
       'American Samoa', 'Andorra', 'Angola', 'Anguilla',
       'Antigua And Barbuda', 'Argentina', 'Armenia', 'Aruba', 'Asia',
       'Australia', 'Austria', 'Azerbaijan', 'Bahamas', 'Bahrain',
       'Baker Island', 'Bangladesh', 'Barbados', 'Belarus', 'Belgium',
       'Belize', 'Benin', 'Bhutan', 'Bolivia',
       'Bonaire, Saint Eustatius And Saba', 'Bosnia And Herzegovina',
       'Botswana', 'Brazil', 'British Virgin Islands', 'Bulgaria',
       'Burkina Faso', 'Burma', 'Burundi', "Côte D'Ivoire", 'Cambodia',
       'Cameroon', 'Canada', 'Cape Verde', 'Cayman Islands',
       'Central African Republic', 'Chad', 'Chile', 'China',
       'Christmas Island', 'Colombia', 'Comoros',
       'Congo (Democratic Republic Of The)', 'Congo', 'Costa Rica',
       'Croatia', 'Cuba', 'Curaçao', 'Cyprus', 'Czech Republic',
       'Denmark (Europe)', 'Denmark', 'Djibouti', 'Dominica',
       'Dominican Republic', 'Ecuador', 'Egypt'

In [119]:
dict = {
    'Denmark (Europe)':'Denmark',
    'France (Europe)':'France',
    'Netherlands (Europe)': 'Netherlands',
    'United Kingdom (Europe)':'United Kingdom',
    'Congo (Democratic Republic Of The)': 'Congo'    
}

In [120]:
global_temp_country['Country'].replace(dict, inplace=True)

In [121]:
avg_temp = global_temp_country.groupby(['Country'])['AverageTemperature'].mean().to_frame().reset_index()
avg_temp

Unnamed: 0,Country,AverageTemperature
0,Afghanistan,14.045007
1,Africa,24.074203
2,Albania,12.610646
3,Algeria,22.985112
4,American Samoa,26.611965
...,...,...
232,Western Sahara,22.319818
233,Yemen,26.253597
234,Zambia,21.282956
235,Zimbabwe,21.117547


In [122]:
from plotly.offline import init_notebook_mode
init_notebook_mode(connected = True)

In [123]:
fig = px.choropleth(avg_temp,locations='Country',locationmode='country names', color='AverageTemperature')
fig.update_layout(title='choropleth map of avg temp')
fig.show()

### Is there a Global Warmming?

In [124]:
global_temp = pd.read_csv('../data/GlobalTemperatures.csv')
global_temp.head()

Unnamed: 0,dt,LandAverageTemperature,LandAverageTemperatureUncertainty,LandMaxTemperature,LandMaxTemperatureUncertainty,LandMinTemperature,LandMinTemperatureUncertainty,LandAndOceanAverageTemperature,LandAndOceanAverageTemperatureUncertainty
0,1750-01-01,3.034,3.574,,,,,,
1,1750-02-01,3.083,3.702,,,,,,
2,1750-03-01,5.626,3.076,,,,,,
3,1750-04-01,8.49,2.451,,,,,,
4,1750-05-01,11.573,2.072,,,,,,


In [125]:
global_temp['dt'][0].split('-')[0]

'1750'

In [126]:
def fetch_year(date):
    return date.split('-')[0]

In [127]:
global_temp['years']= global_temp['dt'].apply(fetch_year)

In [128]:
global_temp.head()

Unnamed: 0,dt,LandAverageTemperature,LandAverageTemperatureUncertainty,LandMaxTemperature,LandMaxTemperatureUncertainty,LandMinTemperature,LandMinTemperatureUncertainty,LandAndOceanAverageTemperature,LandAndOceanAverageTemperatureUncertainty,years
0,1750-01-01,3.034,3.574,,,,,,,1750
1,1750-02-01,3.083,3.702,,,,,,,1750
2,1750-03-01,5.626,3.076,,,,,,,1750
3,1750-04-01,8.49,2.451,,,,,,,1750
4,1750-05-01,11.573,2.072,,,,,,,1750


In [129]:
data = global_temp.groupby('years').aggregate({'LandAverageTemperature':'mean','LandAverageTemperatureUncertainty':'mean'}).reset_index()

In [130]:
data['Uncertainty Top'] = data['LandAverageTemperature'] + data['LandAverageTemperatureUncertainty']
data['Uncertainty Bottom'] = data['LandAverageTemperature'] - data['LandAverageTemperatureUncertainty']

In [131]:
data.head()

Unnamed: 0,years,LandAverageTemperature,LandAverageTemperatureUncertainty,Uncertainty Top,Uncertainty Bottom
0,1750,8.719364,2.637818,11.357182,6.081545
1,1751,7.976143,2.781143,10.757286,5.195
2,1752,5.779833,2.977,8.756833,2.802833
3,1753,8.388083,3.176,11.564083,5.212083
4,1754,8.469333,3.49425,11.963583,4.975083


In [132]:
data.columns

Index(['years', 'LandAverageTemperature', 'LandAverageTemperatureUncertainty',
       'Uncertainty Top', 'Uncertainty Bottom'],
      dtype='object')

In [133]:
fig = px.line(data, x=data['years'], y=['LandAverageTemperature',
       'Uncertainty Top', 'Uncertainty Bottom'], title='Avg Land Temp in World')
fig.show()

### Analyze temperature in each season

In [134]:
global_temp.head()

Unnamed: 0,dt,LandAverageTemperature,LandAverageTemperatureUncertainty,LandMaxTemperature,LandMaxTemperatureUncertainty,LandMinTemperature,LandMinTemperatureUncertainty,LandAndOceanAverageTemperature,LandAndOceanAverageTemperatureUncertainty,years
0,1750-01-01,3.034,3.574,,,,,,,1750
1,1750-02-01,3.083,3.702,,,,,,,1750
2,1750-03-01,5.626,3.076,,,,,,,1750
3,1750-04-01,8.49,2.451,,,,,,,1750
4,1750-05-01,11.573,2.072,,,,,,,1750


In [135]:
global_temp['dt'].dtype

dtype('O')

In [136]:
global_temp['dt'] = pd.to_datetime(global_temp['dt'])

In [137]:
global_temp['month'] = global_temp['dt'].dt.month

In [138]:
def get_season(month):
    if month>=3 and month<5:
        return 'spring'
    elif month>=6 and month<=8:
        return 'summer'
    elif month>=9 and month<=11:
        return 'autumn'
    else:
        return 'winter'
    

In [139]:
global_temp['season']= global_temp['month'].apply(get_season)

In [140]:
global_temp.head()

Unnamed: 0,dt,LandAverageTemperature,LandAverageTemperatureUncertainty,LandMaxTemperature,LandMaxTemperatureUncertainty,LandMinTemperature,LandMinTemperatureUncertainty,LandAndOceanAverageTemperature,LandAndOceanAverageTemperatureUncertainty,years,month,season
0,1750-01-01,3.034,3.574,,,,,,,1750,1,winter
1,1750-02-01,3.083,3.702,,,,,,,1750,2,winter
2,1750-03-01,5.626,3.076,,,,,,,1750,3,spring
3,1750-04-01,8.49,2.451,,,,,,,1750,4,spring
4,1750-05-01,11.573,2.072,,,,,,,1750,5,winter


In [141]:
years = global_temp['years'].unique()

In [142]:
spring_temps=[]
summer_temps=[]
autumn_temps=[]
winter_temps=[]

In [143]:
for year in years:
    current_df = global_temp[global_temp['years']== year]
    spring_temps.append(current_df[current_df['season']=='spring']['LandAverageTemperature'].mean())
    summer_temps.append(current_df[current_df['season']=='summer']['LandAverageTemperature'].mean())
    autumn_temps.append(current_df[current_df['season']=='autumn']['LandAverageTemperature'].mean())
    winter_temps.append(current_df[current_df['season']=='winter']['LandAverageTemperature'].mean())

In [144]:
spring_temps

[7.058,
 6.734999999999999,
 7.035499999999999,
 6.848000000000001,
 7.6434999999999995,
 6.4350000000000005,
 7.117,
 6.387499999999999,
 5.4875,
 5.2515,
 4.6499999999999995,
 6.6485,
 6.2735,
 4.7235,
 6.7005,
 7.936,
 7.0344999999999995,
 6.65,
 4.683,
 6.768499999999999,
 5.7,
 4.265499999999999,
 5.957499999999999,
 6.9185,
 7.898,
 7.855499999999999,
 6.045999999999999,
 5.967,
 7.793999999999999,
 8.466000000000001,
 9.728,
 5.8475,
 5.591499999999999,
 6.6865000000000006,
 6.030500000000001,
 4.334,
 7.483,
 6.234999999999999,
 7.0249999999999995,
 5.8415,
 5.317499999999999,
 6.958500000000001,
 6.211499999999999,
 6.842499999999999,
 7.531500000000001,
 7.3134999999999994,
 5.625,
 5.327999999999999,
 7.5455,
 6.386,
 6.7885,
 6.9295,
 6.411999999999999,
 7.083,
 7.241,
 6.9415,
 6.3614999999999995,
 5.8795,
 5.9215,
 5.629,
 5.7735,
 5.0015,
 4.496999999999999,
 4.888999999999999,
 4.9285,
 5.1255,
 5.7555,
 6.228999999999999,
 7.7330000000000005,
 4.98,
 6.659,
 6.6005,
 8

In [145]:
season = pd.DataFrame()

In [146]:
season['years'] = years 
season['spring_temps']=spring_temps
season['summer_temps']=summer_temps
season['autumn_temps']=autumn_temps
season['winter_temps']=winter_temps

In [147]:
season.head()

Unnamed: 0,years,spring_temps,summer_temps,autumn_temps,winter_temps
0,1750,7.058,14.518333,8.89,5.1155
1,1751,6.735,14.116,10.673,1.729
2,1752,7.0355,,7.587,2.717
3,1753,6.848,14.608333,9.212333,3.87475
4,1754,7.6435,14.208333,8.957333,4.212


In [148]:
season.columns

Index(['years', 'spring_temps', 'summer_temps', 'autumn_temps',
       'winter_temps'],
      dtype='object')

In [149]:
fig = px.line(season,x='years',y=['spring_temps', 'summer_temps', 'autumn_temps',
       'winter_temps'], title='avg temp in each season')
fig.show()

### Analyze Trend Temperatures for the Top Economies

In [150]:
continent = ['Russia','United States', 'China','Japan','Australia', 'India']

In [151]:
global_temp_country.head()

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,Country
0,1743-11-01,4.384,2.294,Åland
5,1744-04-01,1.53,4.68,Åland
6,1744-05-01,6.702,1.789,Åland
7,1744-06-01,11.609,1.577,Åland
8,1744-07-01,15.342,1.41,Åland


In [152]:
continent_df = global_temp_country[global_temp_country['Country'].isin(continent)]
continent_df.head()

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,Country
34816,1852-07-01,14.116,1.53,Australia
34817,1852-08-01,15.33,1.4,Australia
34818,1852-09-01,18.74,1.446,Australia
34819,1852-10-01,21.984,1.493,Australia
34820,1852-11-01,24.073,1.466,Australia


In [153]:
continent_df['years']= continent_df['dt'].apply(fetch_year)




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [154]:
avg_temp = continent_df.groupby(['years','Country']).agg({'AverageTemperature': 'mean'}).reset_index()
avg_temp.tail()

Unnamed: 0,years,Country,AverageTemperature
1143,2013,China,9.295
1144,2013,India,25.41325
1145,2013,Japan,12.96475
1146,2013,Russia,-2.263125
1147,2013,United States,11.297111


In [155]:
fig = px.line(avg_temp,x='years', y='AverageTemperature', color='Country', title='Average Land Termperature in world')
fig.show()

### Analyze US Trend Temperature

In [156]:
global_temp_state = pd.read_csv('../data/GlobalLandTemperaturesByState.csv')

In [157]:
global_temp_state.head()

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,State,Country
0,1855-05-01,25.544,1.171,Acre,Brazil
1,1855-06-01,24.228,1.103,Acre,Brazil
2,1855-07-01,24.371,1.044,Acre,Brazil
3,1855-08-01,25.427,1.073,Acre,Brazil
4,1855-09-01,25.675,1.014,Acre,Brazil


In [158]:
filter=global_temp_state['Country'] == 'United States'
USA=global_temp_state[filter]

In [159]:
USA.head()

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,State,Country
7458,1743-11-01,10.722,2.898,Alabama,United States
7459,1743-12-01,,,Alabama,United States
7460,1744-01-01,,,Alabama,United States
7461,1744-02-01,,,Alabama,United States
7462,1744-03-01,,,Alabama,United States


In [160]:
USA.dropna(inplace=True)



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [161]:
USA['State'].unique()

array(['Alabama', 'Alaska', 'Arizona', 'Arkansas', 'California',
       'Colorado', 'Connecticut', 'Delaware', 'District Of Columbia',
       'Florida', 'Georgia (State)', 'Hawaii', 'Idaho', 'Illinois',
       'Indiana', 'Iowa', 'Kansas', 'Kentucky', 'Louisiana', 'Maine',
       'Maryland', 'Massachusetts', 'Michigan', 'Minnesota',
       'Mississippi', 'Missouri', 'Montana', 'Nebraska', 'Nevada',
       'New Hampshire', 'New Jersey', 'New Mexico', 'New York',
       'North Carolina', 'North Dakota', 'Ohio', 'Oklahoma', 'Oregon',
       'Pennsylvania', 'Rhode Island', 'South Carolina', 'South Dakota',
       'Tennessee', 'Texas', 'Utah', 'Vermont', 'Virginia', 'Washington',
       'West Virginia', 'Wisconsin', 'Wyoming'], dtype=object)

In [162]:
state={'Georgia (State)':'Georgia', 'District Of Columbia':'Columbia'}

In [163]:
USA['State'].replace(state, inplace=True)



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [164]:
USA=USA[['AverageTemperature', 'State']]

In [165]:
USA.head()

Unnamed: 0,AverageTemperature,State
7458,10.722,Alabama
7463,19.075,Alabama
7464,21.197,Alabama
7465,25.29,Alabama
7466,26.42,Alabama


In [166]:
USA_temp=USA.groupby('State')['AverageTemperature'].mean().reset_index()
USA_temp.head()

Unnamed: 0,State,AverageTemperature
0,Alabama,17.066138
1,Alaska,-4.890738
2,Arizona,15.381526
3,Arkansas,15.573963
4,California,14.327677


In [167]:
from opencage.geocoder import OpenCageGeocode

In [168]:
key ='5d16629cb1554e4fbcc9d5c93309b1a9'

In [169]:
geocoder = OpenCageGeocode(key)

In [170]:
result= geocoder.geocode(location)


In [171]:
result[0]['geometry']['lat']

41.5405092

In [172]:
result[0]['geometry']['lng']

-1.9203562

In [180]:
list_lat=[]
list_long = []

for state in USA_temp['State']:
    results = geocoder.geocode(state)
    lat = results[0]['geometry']['lat']
    lon = results[0]['geometry']['lng']
    
    list_lat.append(lat)
    list_long.append(lon)

In [181]:
USA_temp['lat'] = list_lat
USA_temp['lon'] = list_long

In [182]:
USA_temp.head()

Unnamed: 0,State,AverageTemperature,lat,lon
0,Alabama,17.066138,33.258882,-86.829534
1,Alaska,-4.890738,64.445961,-149.680909
2,Arizona,15.381526,34.395342,-111.763275
3,Arkansas,15.573963,35.204888,-92.447911
4,California,14.327677,36.701463,-118.755997


In [183]:
import folium

In [184]:
from folium.plugins import HeatMap

In [185]:
basemap=folium.Map()

In [186]:
HeatMap(USA_temp[['lat', 'lon','AverageTemperature']]).add_to(basemap)
basemap