### First lets import all the libraries that will be used.

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import warnings 
warnings.filterwarnings('ignore')
import plotly.graph_objects as go
from matplotlib.pyplot import figure
import matplotlib.ticker as mticker
import bar_chart_race as bcr
import os
import pickle
import matplotlib.animation as animation
from matplotlib import style
import plotly
import plotly.express as px
import time

### Let's load Global temperature data month wise for last 182 years.

In [2]:
Temperature_Data_By_Country = pd.read_csv('../datasets/raw-datasets/GlobalLandTemperatures_GlobalLandTemperaturesByCountry.csv',error_bad_lines=False)
Temperature_Data_By_Country = Temperature_Data_By_Country[3239:]
Temperature_Data_By_Country = Temperature_Data_By_Country.dropna()
Temperature_Data_By_Country['Temperature_Change'] = 0
Temperature_Data_By_Country['Fahrenheit'] = Temperature_Data_By_Country.apply(lambda x: (9/5)*x['AverageTemperature']+32,axis=1)
Temperature_Data_By_Country = Temperature_Data_By_Country[['dt','Country','Fahrenheit']]
Temperature_Data_By_Country.columns = ['Date', 'Country', 'Average Temperature Fahrenheit']

Temperature_Data_By_Country

Unnamed: 0,Date,Country,Average Temperature Fahrenheit
3239,1838-04-01,Afghanistan,55.4144
3241,1838-06-01,Afghanistan,75.1100
3242,1838-07-01,Afghanistan,80.3786
3243,1838-08-01,Afghanistan,76.8884
3244,1838-09-01,Afghanistan,66.1658
...,...,...,...
577456,4/1/2013,Zimbabwe,70.0556
577457,5/1/2013,Zimbabwe,66.3062
577458,6/1/2013,Zimbabwe,63.7034
577459,7/1/2013,Zimbabwe,62.6000


### Performing data cleaning and filtering to filter out monthly data of last 100 years for all countries in individual dataframes for further use.

Creating a dummy dataframe to append our data.

In [3]:
months = ['January','February','March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December']

Dummy = pd.DataFrame()

for i in range(len(months)):
    Dummy.at[i,'Month/Year'] = months[i]

Dummy = Dummy.fillna(0)
Dummy.head()

Unnamed: 0,Month/Year
0,January
1,February
2,March
3,April
4,May


Now adding data to this dummy dataframe created and creating one for every country.

In [4]:
Temperature_Data_Of = dict()
gb = Temperature_Data_By_Country.groupby(['Country'])
for x in gb.groups:
    temp_d1 = gb.get_group(x)
    name = temp_d1.iat[0,1]
#     temp_d1 = temp_d1[1288:2500]
    temp_d1 = temp_d1.reset_index()
    temp_d1 = temp_d1.drop(columns=["index","Country"])
    df = temp_d1.copy()
    df['Modified_date'] = pd.to_datetime(df.Date)
    df['DATE'] = df['Modified_date'].dt.strftime('%m/%d/%Y')
    df['DATE'] = pd.to_datetime(df['DATE'], format='%m/%d/%Y').dt.strftime('%m_%Y')
    df = df[['DATE', 'Average Temperature Fahrenheit']]
    df = df.set_index(['DATE'])
    dummy1 = Dummy.copy()
    for i in dummy1.index:
        if i < 9:
            m = '0' + str(i+1)
        else:
            m = str(i+1)
        for j in range(1912,2012):
            Id = m + '_' + str(j)
            try:
                dummy1.at[i,str(j)] = df.at[Id,'Average Temperature Fahrenheit']
            except:
                dummy1.at[i,str(j)] = 0
    temp = dummy1.copy()
    temp['Minimum'] = temp.iloc[:,1:].min(axis=1)
    temp['Maximum'] = temp.iloc[:,1:].max(axis=1)
    temp['Median'] = temp.iloc[:,1:].median(axis=1)
    temp['Average'] = temp.iloc[:,1:].mean(axis=1)
    Average = temp.iloc[:,1:].mean(axis=0)
    Minimum = temp.iloc[:,1:].min(axis=0)
    Maximum = temp.iloc[:,1:].max(axis=0)
    Median = temp.iloc[:,1:].median(axis=0)
    temp = temp.append(Average,ignore_index = True) 
    temp = temp.append(Minimum,ignore_index = True) 
    temp = temp.append(Maximum,ignore_index = True) 
    temp = temp.append(Median,ignore_index = True) 
    temp.iat[12,0] = 'Average'
    temp.iat[13,0] = 'Minimun'
    temp.iat[14,0] = 'Maximum'
    temp.iat[15,0] = 'Median'

    Temperature_Data_Of[name] = temp.copy()

Let us take an example and check what we created. \
So as we were creating dataframe for every country, now lets check if it really worked or not. \
\
Let's check dataset for 'Russia' :-

In [5]:
Temperature_Data_Of['Russia']

Unnamed: 0,Month/Year,1912,1913,1914,1915,1916,1917,1918,1919,1920,...,2006,2007,2008,2009,2010,2011,Minimum,Maximum,Median,Average
0,January,-12.964,-16.5334,-12.316,-20.0236,-13.1044,-15.5794,-11.407,-20.9974,-9.7816,...,-19.1866,-1.6996,-11.0272,-9.8986,-16.069,-11.1172,-21.6202,-1.6996,-13.3168,-13.237845
1,February,-13.558,-13.1836,-3.9154,-10.2766,-8.473,-11.542,-9.1444,-14.2294,-9.5386,...,-9.1714,-10.9696,-2.7886,-12.2134,-13.4158,-10.4836,-21.2098,2.777,-9.5566,-9.150779
2,March,-2.2468,7.3526,0.3686,2.0048,1.3838,1.4342,4.6958,0.4082,9.635,...,5.945,9.2912,13.2224,6.5138,4.6094,11.8148,-3.2674,15.3896,4.5545,4.680378
3,April,22.0532,20.4386,20.1092,21.6392,19.58,23.1566,21.6806,22.2008,25.6388,...,19.0202,29.5106,23.513,26.0168,23.7092,29.0102,17.1014,29.8976,22.1234,22.502676
4,May,38.4782,37.3586,39.029,40.8128,36.1076,39.7508,35.663,36.455,41.2772,...,40.4438,42.1826,41.3222,40.82,43.6766,42.971,35.1464,43.6766,39.3575,39.405244
5,June,53.1086,51.557,52.457,53.4812,51.53,53.2076,52.3436,51.4562,52.7378,...,56.5142,54.0806,55.1282,54.8168,55.8428,56.6384,50.2934,56.6384,53.1554,53.285874
6,July,56.3918,57.6392,58.1864,60.3266,58.3088,59.0972,58.1414,58.7102,59.2466,...,59.801,61.2536,60.503,59.8262,62.4074,61.4462,56.3918,62.4074,59.2322,59.365278
7,August,50.9126,54.3506,55.085,52.781,53.7944,53.4704,53.7836,55.3406,55.8194,...,55.2704,57.7238,56.4854,55.3478,56.7032,55.841,50.9126,57.7238,54.8204,54.800775
8,September,40.091,41.5616,42.341,41.0198,41.27,43.0916,42.773,43.9718,41.0108,...,45.3722,45.6062,43.3868,45.8474,43.5956,44.1122,40.0658,46.1948,43.0664,43.003645
9,October,16.1096,22.64,23.0306,19.2074,25.7252,23.441,24.6398,27.1652,19.8662,...,24.0584,28.8068,29.8292,27.6116,26.8646,29.5412,16.1096,30.2864,24.7019,24.689038


Therefore our code worked and we successfully created dataframe containing monthly last 100 years of data for every country and we made a dictionary for this and that dictionary contains keys as country names and values as a dataframe containing that country data.

So now lets save this data in our required folder.

In [6]:
pickle.dump( Temperature_Data_Of, open( "../datasets/cleaned-datasets/Temperature_Data_Of_World.p", "wb"))

### Now let's use all these dataframes all together and create single separate dataframes each having worldwide data for average, minimum, maximum and median temperatures for last 100 years 

In [7]:
Average_temperature_Country_Wise = pd.DataFrame()
Minimum_temperature_Country_Wise = pd.DataFrame()
Maximum_temperature_Country_Wise = pd.DataFrame()
Median_temperature_Country_Wise = pd.DataFrame()

for key in Temperature_Data_Of:
    result = Temperature_Data_Of[key].transpose()
    result = result.reset_index()
    result.columns = result.iloc[0]
    result = result[1:]
    result.drop(result.iloc[:, 1:12], axis=1, inplace=True)
    result = result[:-4]
    result = result.fillna(0)
    result = result.set_index(['Month/Year'])
    Average_temperature_Country_Wise[key] = result['Average']
    Minimum_temperature_Country_Wise[key] = result['Minimun']
    Maximum_temperature_Country_Wise[key] = result['Maximum']
    Median_temperature_Country_Wise[key] = result['Median']

#End_Loop

Let's have a look at what we created \
\
Let's see data containing median temperature country wise for last 100 years.

In [8]:
Median_temperature_Country_Wise

Unnamed: 0_level_0,Afghanistan,Africa,Albania,Algeria,American Samoa,Andorra,Angola,Anguilla,Antigua And Barbuda,Argentina,...,United States,Uruguay,Uzbekistan,Venezuela,Vietnam,Virgin Islands,Western Sahara,Yemen,Zambia,Zimbabwe
Month/Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1912,58.0055,76.4420,51.6299,72.1139,79.2671,49.7507,72.5018,80.0015,79.6586,58.8335,...,47.7545,62.0303,54.7511,76.5455,75.2423,79.6001,71.2283,79.4201,71.8160,72.1535
1913,57.1928,76.8020,54.9302,73.3937,79.0304,51.6965,72.5540,79.0871,78.8945,59.4599,...,46.7213,63.7349,51.6146,76.0820,75.0281,78.4013,71.1563,79.1744,71.6936,71.8403
1914,57.9776,77.0009,53.4065,73.9922,79.5398,52.0250,72.9131,79.9367,79.7234,57.3332,...,48.7607,62.4704,54.6674,76.5536,76.5077,79.4129,72.2840,79.5335,72.3488,73.2488
1915,57.1271,77.0252,53.1041,71.9663,79.0781,48.0722,72.5099,80.4722,80.2265,58.7273,...,49.9100,63.5144,54.1949,77.4050,76.5491,79.9502,71.7071,80.6261,72.2480,71.6783
1916,56.6996,76.3610,54.3191,72.1715,79.1681,50.8982,72.9599,79.8080,79.4291,60.3275,...,47.2946,63.9770,54.3794,76.4114,74.7752,79.3634,70.6208,78.7325,72.3326,72.6422
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2007,59.8613,78.2582,55.7186,75.7490,81.4658,54.2219,73.2452,82.0841,81.4487,60.6929,...,49.4672,66.0911,56.9111,78.2087,76.1270,81.4802,73.4801,81.7826,72.0284,73.2371
2008,61.4858,78.1988,55.9373,75.9983,80.7008,52.1555,72.0581,81.2534,80.9294,59.5463,...,47.3594,64.1786,58.4303,78.0575,77.5112,80.8745,75.6500,81.4271,70.8539,72.3506
2009,57.0074,78.6083,55.7231,74.7743,80.4245,53.0465,72.6737,81.6800,81.4217,61.5020,...,47.0354,64.3334,54.0293,78.8477,77.2268,81.1193,73.4162,81.5927,71.8448,72.4748
2010,63.3713,79.0871,54.8285,77.8235,81.1868,52.5920,73.5431,82.1057,81.8060,58.8398,...,50.4167,62.8772,59.1359,78.7487,76.7858,81.5657,75.6257,82.9310,72.8384,73.9472


So similarly we created country wise data for Average, Minimum and Maximum temperture as well. \
\
Now let's save all these data-sets in our required folder.

In [9]:
Average_temperature_Country_Wise.to_csv("../datasets/cleaned-datasets/Average_temperature_Country_Wise.csv")
Minimum_temperature_Country_Wise.to_csv("../datasets/cleaned-datasets/Minimum_temperature_Country_Wise.csv")
Maximum_temperature_Country_Wise.to_csv("../datasets/cleaned-datasets/Maximum_temperature_Country_Wise.csv")
Median_temperature_Country_Wise.to_csv("../datasets/cleaned-datasets/Median_temperature_Country_Wise.csv")

### Now let's  create single separate dataframes each having worldwide data for average, minimum, maximum and median temperatures for last 100 years but with 10-10 years grouping. 

In [10]:
List_datasets = [Average_temperature_Country_Wise, Minimum_temperature_Country_Wise, Maximum_temperature_Country_Wise, Median_temperature_Country_Wise]

Average_temperature_Country_Wise_10_10_years = pd.DataFrame()
Minimum_temperature_Country_Wise_10_10_years = pd.DataFrame()
Maximum_temperature_Country_Wise_10_10_years = pd.DataFrame()
Median_temperature_Country_Wise_10_10_years = pd.DataFrame()
num = 0
for value in List_datasets:
    df_temp = value.copy()
    df_temp = df_temp.reset_index()
    df_temp = df_temp.groupby(df_temp.index // 10).sum()
    df_temp.insert(0, 'Year', 'Null')
    df_temp['Year'] = ['1912-1921','1922-1931','1932-1941','1942-1951','1952-1961','1962-1971',
                                       '1972-1981', '1982-1991', '1992-2001', '2002-2011']
    df_temp.reset_index(inplace = True)
    df_temp.drop('index',axis = 1,inplace = True)
    df_temp.set_index('Year',inplace = True)
    df_temp = (df_temp / 10).round(2)
    if num == 0:
        Average_temperature_Country_Wise_10_10_years = df_temp.copy()
        
    elif num == 1:
        Minimum_temperature_Country_Wise_10_10_years = df_temp.copy()
        
    elif num == 2:
        Maximum_temperature_Country_Wise_10_10_years = df_temp.copy()
        
    elif num == 3:
        Median_temperature_Country_Wise_10_10_years = df_temp.copy()
    num += 1
#EndLoop

Let us have a look at what we created \
\
Let's see data containing average temperature country wise for last 100 years with 10-10 years grouping.

In [11]:
Average_temperature_Country_Wise_10_10_years

Unnamed: 0_level_0,Afghanistan,Africa,Albania,Algeria,American Samoa,Andorra,Angola,Anguilla,Antigua And Barbuda,Argentina,...,United States,Uruguay,Uzbekistan,Venezuela,Vietnam,Virgin Islands,Western Sahara,Yemen,Zambia,Zimbabwe
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1912-1921,56.98,75.08,54.61,73.03,79.2,51.7,71.02,79.4,79.07,58.23,...,47.28,62.85,53.97,76.47,74.36,78.94,72.03,78.59,70.19,69.86
1922-1931,56.59,75.42,55.2,73.66,79.59,52.33,71.34,79.8,79.5,58.02,...,47.85,62.79,53.48,76.89,74.61,79.3,72.47,79.07,70.29,70.0
1932-1941,57.27,75.52,54.81,73.6,80.26,52.07,71.2,79.94,79.62,58.34,...,48.37,62.96,54.21,77.18,74.89,79.39,72.21,78.85,70.01,69.72
1942-1951,57.32,75.49,55.48,73.81,80.09,53.02,71.49,79.94,79.6,58.76,...,47.83,63.32,53.91,77.23,75.17,79.44,72.28,79.34,70.37,70.26
1952-1961,57.39,75.44,55.27,73.71,80.22,52.51,71.34,80.46,80.14,58.37,...,48.09,63.13,54.29,77.57,75.25,79.95,72.33,79.15,70.53,70.2
1962-1971,57.41,75.47,55.03,73.66,79.93,52.23,71.42,80.17,79.9,58.83,...,47.66,63.17,54.9,77.53,75.04,79.71,72.54,79.25,70.34,70.25
1972-1981,57.29,75.56,54.59,73.41,80.26,52.04,71.45,80.32,80.04,58.88,...,47.97,63.55,54.66,77.63,75.2,79.81,72.33,79.52,70.21,69.82
1982-1991,58.2,76.04,55.19,74.38,80.55,53.32,72.04,80.64,80.31,59.2,...,48.42,63.92,55.48,77.81,75.32,80.14,73.27,79.28,71.23,71.1
1992-2001,58.86,76.38,55.96,74.88,80.81,53.85,72.51,81.13,80.83,59.33,...,48.85,64.16,55.91,78.17,75.37,80.63,73.67,80.41,71.51,71.31
2002-2011,59.67,76.95,56.31,75.65,81.05,54.13,72.54,81.61,81.3,59.52,...,49.3,64.31,56.79,78.6,75.71,81.12,74.23,81.02,71.58,71.16


So similarly we created country wise dataets for Median, Minimum and Maximum temperture as well with 10-10 years grouping. \
\
Now let's save all these data-sets in our required folder.

In [12]:
Average_temperature_Country_Wise_10_10_years.to_csv("../datasets/cleaned-datasets/Average_temperature_Country_Wise_10_10_years.csv")

Minimum_temperature_Country_Wise_10_10_years.to_csv("../datasets/cleaned-datasets/Minimum_temperature_Country_Wise_10_10_years.csv")

Maximum_temperature_Country_Wise_10_10_years.to_csv("../datasets/cleaned-datasets/Maximum_temperature_Country_Wise_10_10_years.csv")

Median_temperature_Country_Wise_10_10_years.to_csv("../datasets/cleaned-datasets/Median_temperature_Country_Wise_10_10_years.csv")


So this was all the data cleaning, pre-processing and summarizing. Now its time for analysis. 

## End of Data Cleaning, Pre-processing and Summarizing..
# Finished..:)