<a href="https://colab.research.google.com/github/kansetejas/Analysis_on_Global_Data/blob/main/Global_average_Temperature_analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Global Average Temperature Analysis

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import plotly.express as px

In [2]:
global_temp_country = pd.read_csv('/content/GlobalLandTemperaturesByCountry.csv')

In [3]:
global_temp_country.head(2)

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,Country
0,1743-11-01,4.384,2.294,Åland
1,1743-12-01,,,Åland


In [4]:
global_temp_country.shape

(80388, 4)

In [5]:
global_temp_country.duplicated().sum()

0

In [6]:
global_temp_country.isna().sum()

dt                                  0
AverageTemperature               4457
AverageTemperatureUncertainty    3718
Country                             0
dtype: int64

In [7]:
global_temp_country.dropna(axis = 'index', how = 'any', subset = ['AverageTemperature'],inplace = True)

In [8]:
global_temp_country.isna().sum()

dt                               0
AverageTemperature               0
AverageTemperatureUncertainty    0
Country                          0
dtype: int64

In [9]:
global_temp_country['Country'].nunique()

34

In [10]:
global_temp_country['Country'].unique()

array(['Åland', 'Afghanistan', 'Africa', 'Albania', 'Algeria',
       'American Samoa', 'Andorra', 'Angola', 'Anguilla',
       'Antigua And Barbuda', 'Argentina', 'Armenia', 'Aruba', 'Asia',
       'Australia', 'Austria', 'Azerbaijan', 'Bahamas', 'Bahrain',
       'Baker Island', 'Bangladesh', 'Barbados', 'Belarus', 'Belgium',
       'Belize', 'Benin', 'Bhutan', 'Bolivia',
       'Bonaire, Saint Eustatius And Saba', 'Bosnia And Herzegovina',
       'Botswana', 'Brazil', 'British Virgin Islands',
       'British Virgin Islan'], dtype=object)

In [11]:
dict = {
    'Congo (Democratic Republic Of The':'Congo',
    'Denmark (Europe)':'Denmark',
    'Falkland Islands (Islas Malvinas)':'Falkin Islands',
    'France (Europe)':'France'

}

In [12]:
global_temp_country['Country'].replace(dict, inplace = True)

In [13]:
average_temp = global_temp_country.groupby(['Country'])['AverageTemperature'].mean().to_frame().reset_index()

In [14]:
average_temp.head(1)

Unnamed: 0,Country,AverageTemperature
0,Afghanistan,14.045007


In [15]:
from plotly.offline import init_notebook_mode
init_notebook_mode(connected=True)

In [16]:
fig = px.choropleth(average_temp , locations = "Country" , locationmode = "country names" , color = "AverageTemperature")
fig.update_layout(title = "Chloropleath map of average temperature")
fig.show()

Analyzing Existence of Global Warming

In [18]:
global_temp = pd.read_csv('/content/GlobalTemperatures.csv')

In [19]:
global_temp.head(10)

Unnamed: 0,dt,LandAverageTemperature,LandAverageTemperatureUncertainty,LandMaxTemperature,LandMaxTemperatureUncertainty,LandMinTemperature,LandMinTemperatureUncertainty,LandAndOceanAverageTemperature,LandAndOceanAverageTemperatureUncertainty
0,1750-01-01,3.034,3.574,,,,,,
1,1750-02-01,3.083,3.702,,,,,,
2,1750-03-01,5.626,3.076,,,,,,
3,1750-04-01,8.49,2.451,,,,,,
4,1750-05-01,11.573,2.072,,,,,,
5,1750-06-01,12.937,1.724,,,,,,
6,1750-07-01,15.868,1.911,,,,,,
7,1750-08-01,14.75,2.231,,,,,,
8,1750-09-01,11.413,2.637,,,,,,
9,1750-10-01,6.367,2.668,,,,,,


In [20]:
global_temp.duplicated().sum()

0

In [21]:
global_temp.isnull().sum()

dt                                              0
LandAverageTemperature                         12
LandAverageTemperatureUncertainty              12
LandMaxTemperature                           1200
LandMaxTemperatureUncertainty                1200
LandMinTemperature                           1200
LandMinTemperatureUncertainty                1200
LandAndOceanAverageTemperature               1200
LandAndOceanAverageTemperatureUncertainty    1200
dtype: int64

In [22]:
global_temp.shape

(3192, 9)

In [23]:
global_temp['dt'][0].split('-')[0]

'1750'

In [24]:
def fetch_year(date):
  return date.split('-')[0]

In [25]:
global_temp['Year'] = global_temp['dt'].apply(fetch_year)

In [26]:
global_temp.head(3)

Unnamed: 0,dt,LandAverageTemperature,LandAverageTemperatureUncertainty,LandMaxTemperature,LandMaxTemperatureUncertainty,LandMinTemperature,LandMinTemperatureUncertainty,LandAndOceanAverageTemperature,LandAndOceanAverageTemperatureUncertainty,Year
0,1750-01-01,3.034,3.574,,,,,,,1750
1,1750-02-01,3.083,3.702,,,,,,,1750
2,1750-03-01,5.626,3.076,,,,,,,1750


In [27]:
data = global_temp.groupby('Year').agg({'LandAverageTemperature' : 'mean' , 'LandAverageTemperatureUncertainty' : 'mean'}).reset_index()

In [28]:
data.head(2)

Unnamed: 0,Year,LandAverageTemperature,LandAverageTemperatureUncertainty
0,1750,8.719364,2.637818
1,1751,7.976143,2.781143


In [29]:
data['Uncertainity Top'] = data['LandAverageTemperature'] + data['LandAverageTemperatureUncertainty']


In [30]:
data['Uncertainity low'] = data['LandAverageTemperature'] - data['LandAverageTemperatureUncertainty']

In [31]:
data.head(1)

Unnamed: 0,Year,LandAverageTemperature,LandAverageTemperatureUncertainty,Uncertainity Top,Uncertainity low
0,1750,8.719364,2.637818,11.357182,6.081545


In [32]:
fig = px.line(data, x='Year', y=['LandAverageTemperature', 'Uncertainity Top', 'Uncertainity low'])
fig.show()

Analyze Average Temperature in each Season

In [33]:
import numpy as np
import pandas as pd
import plotly.express as px

In [34]:
data.head(2)

Unnamed: 0,Year,LandAverageTemperature,LandAverageTemperatureUncertainty,Uncertainity Top,Uncertainity low
0,1750,8.719364,2.637818,11.357182,6.081545
1,1751,7.976143,2.781143,10.757286,5.195


In [35]:
global_temp.head(2)

Unnamed: 0,dt,LandAverageTemperature,LandAverageTemperatureUncertainty,LandMaxTemperature,LandMaxTemperatureUncertainty,LandMinTemperature,LandMinTemperatureUncertainty,LandAndOceanAverageTemperature,LandAndOceanAverageTemperatureUncertainty,Year
0,1750-01-01,3.034,3.574,,,,,,,1750
1,1750-02-01,3.083,3.702,,,,,,,1750


In [36]:
global_temp['dt'].dtype

dtype('O')

In [37]:
global_temp['dt'] = pd.to_datetime(global_temp['dt'])

In [38]:
global_temp['month'] = global_temp['dt'].dt.month

In [39]:
def get_season(month):
  if month>=3 and month<=5:
    return 'spring'
  elif month>=6 and month<=8:
    return 'summer'
  elif month>=9 and month<=11:
    return 'autum'
  else:
    return 'winter'

In [40]:
global_temp['season'] = global_temp['month'].apply(get_season)

In [41]:
global_temp.head(10)

Unnamed: 0,dt,LandAverageTemperature,LandAverageTemperatureUncertainty,LandMaxTemperature,LandMaxTemperatureUncertainty,LandMinTemperature,LandMinTemperatureUncertainty,LandAndOceanAverageTemperature,LandAndOceanAverageTemperatureUncertainty,Year,month,season
0,1750-01-01,3.034,3.574,,,,,,,1750,1,winter
1,1750-02-01,3.083,3.702,,,,,,,1750,2,winter
2,1750-03-01,5.626,3.076,,,,,,,1750,3,spring
3,1750-04-01,8.49,2.451,,,,,,,1750,4,spring
4,1750-05-01,11.573,2.072,,,,,,,1750,5,spring
5,1750-06-01,12.937,1.724,,,,,,,1750,6,summer
6,1750-07-01,15.868,1.911,,,,,,,1750,7,summer
7,1750-08-01,14.75,2.231,,,,,,,1750,8,summer
8,1750-09-01,11.413,2.637,,,,,,,1750,9,autum
9,1750-10-01,6.367,2.668,,,,,,,1750,10,autum


In [42]:
 years =global_temp['Year'].unique()

In [43]:
spring_temp = []
winter_temp = []
summer_temp = []
autum_temp = []

In [44]:
for i in years:
  current_df = global_temp[global_temp['Year']==i]
  spring_temp.append(current_df[current_df['season']=='spring']['LandAverageTemperature'].mean())
  winter_temp.append(current_df[current_df['season']=='winter']['LandAverageTemperature'].mean())
  summer_temp.append(current_df[current_df['season']=='summer']['LandAverageTemperature'].mean())
  autum_temp.append(current_df[current_df['season']=='autum']['LandAverageTemperature'].mean())

In [45]:
spring_temp

[8.563,
 6.734999999999999,
 7.035499999999999,
 8.627333333333334,
 9.074333333333334,
 8.583666666666666,
 9.466,
 8.604666666666667,
 6.896666666666666,
 6.897333333333333,
 6.653666666666666,
 8.916,
 7.809333333333332,
 6.716,
 8.192,
 8.868666666666668,
 8.432333333333332,
 7.831,
 6.144000000000001,
 8.803333333333333,
 7.132000000000001,
 6.0523333333333325,
 7.148666666666666,
 8.866999999999999,
 10.607,
 9.036666666666667,
 7.522333333333333,
 7.774333333333334,
 8.957999999999998,
 10.370666666666667,
 11.737666666666664,
 7.599,
 7.390999999999998,
 8.397333333333334,
 7.3580000000000005,
 6.173666666666667,
 8.849666666666666,
 7.9576666666666656,
 8.159333333333334,
 7.783,
 6.997333333333333,
 7.9253333333333345,
 7.914666666666666,
 8.248,
 9.146333333333333,
 8.552,
 7.507666666666666,
 7.024333333333334,
 8.953333333333333,
 8.041666666666666,
 8.224666666666666,
 8.660666666666666,
 7.760333333333333,
 8.653666666666666,
 8.863,
 8.328999999999999,
 8.07533333333333

In [46]:
season = pd.DataFrame()

In [47]:
season['year'] = years
season['spring_temp'] = spring_temp
season['winter_temp'] = winter_temp
season['summer_temp'] = summer_temp
season['autum_temp'] = autum_temp


In [48]:
season.head(3)

Unnamed: 0,year,spring_temp,winter_temp,summer_temp,autum_temp
0,1750,8.563,2.963,14.518333,8.89
1,1751,6.735,1.729,14.116,10.673
2,1752,7.0355,2.717,,7.587


In [49]:
fig = px.line(season, x= 'year', y=['spring_temp', 'winter_temp', 'summer_temp', 'autum_temp'], title = "average temperature in each season")
fig.show()