In [63]:
# Importing the necessary libraries
import pandas as pd
import numpy as np

In [64]:
# Importing the CSV file containing the average temperature value at December 2009
Dec_temp = pd.read_csv('Resources/Dec_2009_Temp.csv')
Dec_temp.head()

Unnamed: 0,AverageTemperature,AverageTemperatureUncertainty,Country,Time
0,-0.823,0.427,Aland,Dec-09
1,3.88,0.455,Afghanistan,Dec-09
2,22.738,0.222,Africa,Dec-09
3,7.522,0.369,Albania,Dec-09
4,15.889,0.345,Algeria,Dec-09


In [66]:
# Creating a list that contains the name of 48 countries of study
country_list=['Australia', 'Austria', 'Belgium','Canada', 'Chile', 'Colombia', 'Costa Rica',
 'Czech Republic', 'Denmark', 'Estonia', 'Finland', 'France', 'Germany',
 'Greece', 'Hungary', 'Iceland', 'Ireland', 'Italy', 'Japan', 'Korea', 'Latvia',
 'Lithuania', 'Luxembourg', 'Mexico', 'Netherlands', 'New Zealand', 'Norway',
 'Poland', 'Portugal', 'Slovak Republic', 'Slovenia', 'Spain', 'Sweden',
 'Switzerland', 'Turkey', 'United Kingdom', 'United States',
 'Argentina', 'Brazil', 'Bulgaria', 'China', 'Croatia',
 'Cyprus', 'India', 'Malta', 'North Macedonia', 'Romania', 'Serbia']
country_list.sort()
len(country_list)

48

In [67]:
# Checking how many countries are there in imported CSV file.
list1=Dec_temp['Country'].unique()
len(list1)

242

In [68]:
# Checking if the same countries (with the same spelling) exist in December 2009 temperature list
common_list = []
for i in range(0,len(list1)):
    if list1[i] in country_list:
        common_list.append(list1[i])
print(len(common_list))



48


In [69]:
# Removing unnecessary columns from the Data Frame
Dec_temp_filtered=Dec_temp.drop(columns="AverageTemperatureUncertainty")


In [71]:
# Read the CSV file that contains the country code
countries_48 = pd.read_csv('Resources/48_countries_codes.csv')
countries_48.sort_values('Country', inplace=True)
countries_48.head()

Unnamed: 0,Country,Country Code
0,Argentina,ARG
1,Australia,AUS
2,Austria,AUT
3,Belgium,BEL
4,Brazil,BRA


In [72]:
# Merge the December temperature data frame with 48 countries list
select_temp_df = pd.merge(countries_48, Dec_temp_filtered, how='left', on='Country')

# Add new columns for Year and Month (all the same figures)
select_temp_df['Year'] = 2009
select_temp_df['Month'] = 12

select_temp_df.head()

Unnamed: 0,Country,Country Code,AverageTemperature,Time,Year,Month
0,Argentina,ARG,20.274,Dec-09,2009,12
1,Australia,AUS,28.021,Dec-09,2009,12
2,Austria,AUT,-2.061,Dec-09,2009,12
3,Belgium,BEL,2.2,Dec-09,2009,12
4,Brazil,BRA,25.998,Dec-09,2009,12


## Importing the data from temperature changes 

In [73]:
# Select the new CSV file that contains the temperature changes data
temp_changes=pd.read_csv('Resources/Monthly Temperature Changes Data.csv')

temp_changes.head()

Unnamed: 0,Area,Months,Year Code,Unit,Value,Flag Description
0,Afghanistan,January,1961,?C,0.746,Calculated data
1,Afghanistan,January,1962,?C,0.009,Calculated data
2,Afghanistan,January,1963,?C,2.695,Calculated data
3,Afghanistan,January,1964,?C,-5.277,Calculated data
4,Afghanistan,January,1965,?C,1.827,Calculated data


In [74]:
# Replacing the names of the countries that mismatch with the 48 countries list
# This has been identified using Python and through the processes. 
temp_changes['Area'] = temp_changes['Area'].replace("Czechia","Czech Republic")
temp_changes['Area'] = temp_changes['Area'].replace("United States of America","United States")
temp_changes['Area'] = temp_changes['Area'].replace("Republic of Korea","Korea")
temp_changes['Area'] = temp_changes['Area'].replace("Macedonia","North Macedonia")
temp_changes['Area'] = temp_changes['Area'].replace("United Kingdom of Great Britain and Northern Ireland","United Kingdom")
temp_changes['Area'] = temp_changes['Area'].replace("Slovakia","Slovak Republic")



In [75]:
# Changing the name of the columns on the temperature changes data frame
temp_changes_2=temp_changes.drop(["Unit","Flag Description"],axis=1)
temp_changes_2=temp_changes_2.rename(columns={'Area':'Country', 'Year Code': 'Year'})
temp_changes_2.head()

Unnamed: 0,Country,Months,Year,Value
0,Afghanistan,January,1961,0.746
1,Afghanistan,January,1962,0.009
2,Afghanistan,January,1963,2.695
3,Afghanistan,January,1964,-5.277
4,Afghanistan,January,1965,1.827


In [76]:
# Merging the temperature changes data frame with the country codes 
temp_changes_3=pd.merge(temp_changes_2,countries_48, how='left',on='Country')
temp_changes_3.head()

Unnamed: 0,Country,Months,Year,Value,Country Code
0,Afghanistan,January,1961,0.746,
1,Afghanistan,January,1962,0.009,
2,Afghanistan,January,1963,2.695,
3,Afghanistan,January,1964,-5.277,
4,Afghanistan,January,1965,1.827,


In [77]:
# Removing the rows which have integrated data; i.e., the seasonal average or the yearly average
temp_changes_4=temp_changes_3.loc[~((temp_changes_3['Months']=='Dec?Jan?Feb')|(temp_changes_3['Months']=='Mar?Apr?May')|(temp_changes_3['Months']=='Jun?Jul?Aug')|(temp_changes_3['Months']=='Sep?Oct?Nov')|(temp_changes_3['Months']=='Meteorological year')| (temp_changes_3['Year']<2010) )]
temp_changes_4.head()

Unnamed: 0,Country,Months,Year,Value,Country Code
49,Afghanistan,January,2010,3.755,
50,Afghanistan,January,2011,1.312,
51,Afghanistan,January,2012,-0.437,
52,Afghanistan,January,2013,1.366,
53,Afghanistan,January,2014,1.808,


In [85]:
# Making a list that shows the names and number of the countries without null country code 
NotNull=temp_changes_4[~(temp_changes_4['Country Code'].isnull())]
print(f"The number of countries with country codes is:  {NotNull['Country'].nunique()}")
print(f"And the list of these countries is like below:")
NotNull['Country'].unique()

The number of countries with country codes is:  48
And the list of these countries is like below:


array(['Argentina', 'Australia', 'Austria', 'Belgium', 'Brazil',
       'Bulgaria', 'Canada', 'Chile', 'China', 'Colombia', 'Costa Rica',
       'Croatia', 'Cyprus', 'Czech Republic', 'Denmark', 'Estonia',
       'Finland', 'France', 'Germany', 'Greece', 'Hungary', 'Iceland',
       'India', 'Ireland', 'Italy', 'Japan', 'Latvia', 'Lithuania',
       'Luxembourg', 'Malta', 'Mexico', 'Netherlands', 'New Zealand',
       'North Macedonia', 'Norway', 'Poland', 'Portugal', 'Korea',
       'Romania', 'Serbia', 'Slovak Republic', 'Slovenia', 'Spain',
       'Sweden', 'Switzerland', 'Turkey', 'United Kingdom',
       'United States'], dtype=object)

## Making the final DF using the previously created DF

In [43]:
# Import calendar dependencies
import calendar
month_dict = {month: index for index, month in enumerate(calendar.month_name) if month}
print(month_dict)

{'January': 1, 'February': 2, 'March': 3, 'April': 4, 'May': 5, 'June': 6, 'July': 7, 'August': 8, 'September': 9, 'October': 10, 'November': 11, 'December': 12}


In [44]:
# Create empty list to store numerical month values
month_ints = []

# Create for loop that assigns numerical value for respective month and appends to list
for i in range(0,len(selected_country_df)):
    month_int = month_dict[selected_country_df['Months'][i]]
    month_ints.append(month_int)
    
# Create new column and insert numerical values from list
selected_country_df['Month'] = month_ints

# Check numerical month values correctly asisgned using groupby
selected_country_df.groupby('Months').mean().sort_values('Month')

Unnamed: 0_level_0,Year,Value,Month
Months,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
January,2015,1.362445,1
February,2015,1.464989,2
March,2015,1.610926,3
April,2015,1.570214,4
May,2015,1.190585,5
June,2015,1.345741,6
July,2015,1.547222,7
August,2015,1.609725,8
September,2015,1.289307,9
October,2015,1.069411,10


In [45]:
# Show df head
selected_country_df.head()

Unnamed: 0,Country,Months,Year,Value,Country Code,Month
0,Argentina,January,2010,0.713,ARG,1
1,Argentina,January,2011,0.597,ARG,1
2,Argentina,January,2012,1.555,ARG,1
3,Argentina,January,2013,0.889,ARG,1
4,Argentina,January,2014,1.089,ARG,1


In [56]:
# Sort df by country, then year, then month
ordered_temp_df = selected_country_df.sort_values(['Country','Year','Month'])
ordered_temp_df.count()

Country         6336
Months          6336
Year            6336
Value           6336
Country Code    6336
Month           6336
dtype: int64

In [65]:
# Merge (via union) 2009 average temperature df with 2010-2020 temperature change df
all_temp_df = pd.merge(select_temp_df,ordered_temp_df, how='outer')
sorted_temp_df = all_temp_df.sort_values(['Country','Year','Month'])
dropped_temp_df = sorted_temp_df.drop(columns=['Time','Months'])
dropped_temp_df

Unnamed: 0,Country,Country Code,AverageTemperature,Year,Month,Value
0,Argentina,ARG,20.274,2009,12,
48,Argentina,ARG,,2010,1,0.713
49,Argentina,ARG,,2010,2,0.329
50,Argentina,ARG,,2010,3,1.426
51,Argentina,ARG,,2010,4,-0.184
...,...,...,...,...,...,...
6379,United States,USA,,2020,8,1.471
6380,United States,USA,,2020,9,0.721
6381,United States,USA,,2020,10,0.508
6382,United States,USA,,2020,11,2.328
