#### In this project, i will examine if there has been a change in temperature in the EU over the past two decades and if there are factors that might have had an impact on those changes (if they exist).

# Temperature Data

In [1]:
import pandas as pd

In [2]:
# global average monthly temperatures from 1743 to 2013
temp_country = pd.read_csv('GlobalLandTemperatures_GlobalLandTemperaturesByCountry.csv')
temp_country

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,Country
0,1743-11-01,4.384,2.294,Åland
1,1743-12-01,,,Åland
2,1744-01-01,,,Åland
3,1744-02-01,,,Åland
4,1744-03-01,,,Åland
...,...,...,...,...
577457,2013-05-01,19.059,1.022,Zimbabwe
577458,2013-06-01,17.613,0.473,Zimbabwe
577459,2013-07-01,17.000,0.453,Zimbabwe
577460,2013-08-01,19.759,0.717,Zimbabwe


In [3]:
# select EU countries
eu_temp_country = temp_country[temp_country.Country.str.contains('Austria|Belgium|Bulgaria|Croatia|Cyprus|Czech Republic|Denmark|Estonia|Finland|France|Germany|Greece|Hungary|Ireland|Italy|Latvia|Lithuania|Luxembourg|Malta|Netherlands|Poland|Portugal|Romania|Slovakia|Slovenia|Spain|Sweden')]
# France, Denmark Netherlands were in there twice (one of the times with '(europe)' at the end)
eu_temp_country = eu_temp_country[~eu_temp_country.Country.str.contains('Europe')]
# keeping only data from 2000 onwards (September of 2013 was NaN for every country, so deleting it as well)
eu_temp_country = eu_temp_country[~eu_temp_country.dt.str.contains('17|18|19|2013-09-01')]
# dropping the uncertainty column as i dont need it for my analysis
eu_temp_country = eu_temp_country.drop(['AverageTemperatureUncertainty'], axis = 1)
eu_temp_country

Unnamed: 0,dt,AverageTemperature,Country
39825,2000-01-01,-4.598,Austria
39826,2000-02-01,0.896,Austria
39827,2000-03-01,3.068,Austria
39828,2000-04-01,9.160,Austria
39829,2000-05-01,13.570,Austria
...,...,...,...
508629,2013-04-01,1.059,Sweden
508630,2013-05-01,10.509,Sweden
508631,2013-06-01,13.741,Sweden
508632,2013-07-01,15.262,Sweden


In [4]:
# checking dtypes
eu_temp_country.dtypes
# dt (date) not in datetime format, changing that
eu_temp_country['dt'] = pd.to_datetime(eu_temp_country['dt'])

In [5]:
# renaming dt to date, standardizing column names
eu_temp_country = eu_temp_country.rename(columns={"dt": "date"})
eu_temp_country.columns = [x.lower() for x in eu_temp_country.columns]
# eu_temp_country.columns = eu_temp_country.columns.str.replace(" ", "_", regex=True)
# resetting index
eu_temp_country.reset_index(inplace=True)
eu_temp_country = eu_temp_country.drop(['index'], axis = 1)
eu_temp_country

Unnamed: 0,date,averagetemperature,country
0,2000-01-01,-4.598,Austria
1,2000-02-01,0.896,Austria
2,2000-03-01,3.068,Austria
3,2000-04-01,9.160,Austria
4,2000-05-01,13.570,Austria
...,...,...,...
4423,2013-04-01,1.059,Sweden
4424,2013-05-01,10.509,Sweden
4425,2013-06-01,13.741,Sweden
4426,2013-07-01,15.262,Sweden


In [6]:
# checking for NaNs
eu_temp_country.isnull().sum()

date                  0
averagetemperature    0
country               0
dtype: int64

In [7]:
# saving the Dataframe 
eu_temp_country.to_csv('EU_Countries_2000_2013_AVG_Temp.csv', index=False)

In [8]:
eu_temp_country

Unnamed: 0,date,averagetemperature,country
0,2000-01-01,-4.598,Austria
1,2000-02-01,0.896,Austria
2,2000-03-01,3.068,Austria
3,2000-04-01,9.160,Austria
4,2000-05-01,13.570,Austria
...,...,...,...
4423,2013-04-01,1.059,Sweden
4424,2013-05-01,10.509,Sweden
4425,2013-06-01,13.741,Sweden
4426,2013-07-01,15.262,Sweden


### saved dataframe, now the next one

In [9]:
# 
eu_temp_change_country = pd.read_csv('temp_change_eu_2000_2013.csv')
eu_temp_change_country

Unnamed: 0,Domain,Area,Element,Months,Year,Unit,Value
0,Temperature change,Austria,Temperature change,January,2000,°C,-0.244
1,Temperature change,Austria,Temperature change,January,2001,°C,1.924
2,Temperature change,Austria,Temperature change,January,2002,°C,1.392
3,Temperature change,Austria,Temperature change,January,2003,°C,0.363
4,Temperature change,Austria,Temperature change,January,2004,°C,0.074
...,...,...,...,...,...,...,...
4531,Temperature change,Sweden,Temperature change,December,2009,°C,-0.984
4532,Temperature change,Sweden,Temperature change,December,2010,°C,-5.641
4533,Temperature change,Sweden,Temperature change,December,2011,°C,4.159
4534,Temperature change,Sweden,Temperature change,December,2012,°C,-2.813


In [10]:
# changing months to numbers (applying the first of each month to adjust to eu_temp_country)
eu_temp_change_country.Months = eu_temp_change_country.Months.str.replace('January', '-01-01')
eu_temp_change_country.Months = eu_temp_change_country.Months.str.replace('February', '-02-01')
eu_temp_change_country.Months = eu_temp_change_country.Months.str.replace('March', '-03-01')
eu_temp_change_country.Months = eu_temp_change_country.Months.str.replace('April', '-04-01')
eu_temp_change_country.Months = eu_temp_change_country.Months.str.replace('May', '-05-01')
eu_temp_change_country.Months = eu_temp_change_country.Months.str.replace('June', '-06-01')
eu_temp_change_country.Months = eu_temp_change_country.Months.str.replace('July', '-07-01')
eu_temp_change_country.Months = eu_temp_change_country.Months.str.replace('August', '-08-01')
eu_temp_change_country.Months = eu_temp_change_country.Months.str.replace('September', '-09-01')
eu_temp_change_country.Months = eu_temp_change_country.Months.str.replace('October', '-10-01')
eu_temp_change_country.Months = eu_temp_change_country.Months.str.replace('November', '-11-01')
eu_temp_change_country.Months = eu_temp_change_country.Months.str.replace('December', '-12-01')


In [11]:
# concatinating Months and Year column
eu_temp_change_country["date"] = eu_temp_change_country['Year'].astype(str) +""+ eu_temp_change_country["Months"]


# sorting by date
eu_temp_change_country.sort_values(by=['Area','date'], inplace = True) 
eu_temp_change_country

Unnamed: 0,Domain,Area,Element,Months,Year,Unit,Value,date
0,Temperature change,Austria,Temperature change,-01-01,2000,°C,-0.244,2000-01-01
14,Temperature change,Austria,Temperature change,-02-01,2000,°C,3.230,2000-02-01
28,Temperature change,Austria,Temperature change,-03-01,2000,°C,1.710,2000-03-01
42,Temperature change,Austria,Temperature change,-04-01,2000,°C,3.280,2000-04-01
56,Temperature change,Austria,Temperature change,-05-01,2000,°C,2.955,2000-05-01
...,...,...,...,...,...,...,...,...
4479,Temperature change,Sweden,Temperature change,-08-01,2013,°C,1.221,2013-08-01
4493,Temperature change,Sweden,Temperature change,-09-01,2013,°C,1.588,2013-09-01
4507,Temperature change,Sweden,Temperature change,-10-01,2013,°C,1.318,2013-10-01
4521,Temperature change,Sweden,Temperature change,-11-01,2013,°C,1.869,2013-11-01


In [12]:
# dropping row sept - dec 2013 to make it equal to eu_temp_country
eu_temp_change_country = eu_temp_change_country[~eu_temp_change_country.date.str.contains('2013-09-01|2013-10-01|2013-11-01|2013-12-01')]
eu_temp_change_country

Unnamed: 0,Domain,Area,Element,Months,Year,Unit,Value,date
0,Temperature change,Austria,Temperature change,-01-01,2000,°C,-0.244,2000-01-01
14,Temperature change,Austria,Temperature change,-02-01,2000,°C,3.230,2000-02-01
28,Temperature change,Austria,Temperature change,-03-01,2000,°C,1.710,2000-03-01
42,Temperature change,Austria,Temperature change,-04-01,2000,°C,3.280,2000-04-01
56,Temperature change,Austria,Temperature change,-05-01,2000,°C,2.955,2000-05-01
...,...,...,...,...,...,...,...,...
4423,Temperature change,Sweden,Temperature change,-04-01,2013,°C,0.399,2013-04-01
4437,Temperature change,Sweden,Temperature change,-05-01,2013,°C,3.458,2013-05-01
4451,Temperature change,Sweden,Temperature change,-06-01,2013,°C,1.420,2013-06-01
4465,Temperature change,Sweden,Temperature change,-07-01,2013,°C,1.009,2013-07-01


In [13]:
# changing date column into datetime format
eu_temp_change_country['date'] = pd.to_datetime(eu_temp_change_country['date'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  eu_temp_change_country['date'] = pd.to_datetime(eu_temp_change_country['date'])


In [14]:
# sorting by date
eu_temp_change_country.sort_values(by=['Area','date'], inplace = True) 
eu_temp_change_country

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  eu_temp_change_country.sort_values(by=['Area','date'], inplace = True)


Unnamed: 0,Domain,Area,Element,Months,Year,Unit,Value,date
0,Temperature change,Austria,Temperature change,-01-01,2000,°C,-0.244,2000-01-01
14,Temperature change,Austria,Temperature change,-02-01,2000,°C,3.230,2000-02-01
28,Temperature change,Austria,Temperature change,-03-01,2000,°C,1.710,2000-03-01
42,Temperature change,Austria,Temperature change,-04-01,2000,°C,3.280,2000-04-01
56,Temperature change,Austria,Temperature change,-05-01,2000,°C,2.955,2000-05-01
...,...,...,...,...,...,...,...,...
4423,Temperature change,Sweden,Temperature change,-04-01,2013,°C,0.399,2013-04-01
4437,Temperature change,Sweden,Temperature change,-05-01,2013,°C,3.458,2013-05-01
4451,Temperature change,Sweden,Temperature change,-06-01,2013,°C,1.420,2013-06-01
4465,Temperature change,Sweden,Temperature change,-07-01,2013,°C,1.009,2013-07-01


In [15]:
# checkin dtypes to see if date conversion worked
eu_temp_change_country.dtypes

Domain             object
Area               object
Element            object
Months             object
Year                int64
Unit               object
Value             float64
date       datetime64[ns]
dtype: object

In [16]:
# dropping Element, Months, Year, Domain Unit Column
eu_temp_change_country = eu_temp_change_country.drop(['Domain','Element', 'Months', 'Year', 'Unit'], axis = 1)

In [17]:
# resetting index
eu_temp_change_country.reset_index(inplace=True)
eu_temp_change_country = eu_temp_change_country.drop(['index'], axis = 1)
eu_temp_change_country

Unnamed: 0,Area,Value,date
0,Austria,-0.244,2000-01-01
1,Austria,3.230,2000-02-01
2,Austria,1.710,2000-03-01
3,Austria,3.280,2000-04-01
4,Austria,2.955,2000-05-01
...,...,...,...
4423,Sweden,0.399,2013-04-01
4424,Sweden,3.458,2013-05-01
4425,Sweden,1.420,2013-06-01
4426,Sweden,1.009,2013-07-01


In [18]:
# renaming Area to Country and Value to Temperature change
eu_temp_change_country = eu_temp_change_country.rename(columns={"Area" : "country", "Value" : "temperature_change"})
eu_temp_change_country

Unnamed: 0,country,temperature_change,date
0,Austria,-0.244,2000-01-01
1,Austria,3.230,2000-02-01
2,Austria,1.710,2000-03-01
3,Austria,3.280,2000-04-01
4,Austria,2.955,2000-05-01
...,...,...,...
4423,Sweden,0.399,2013-04-01
4424,Sweden,3.458,2013-05-01
4425,Sweden,1.420,2013-06-01
4426,Sweden,1.009,2013-07-01


In [19]:
# merging eu_temp_change_country with eu_temp_country
eu_temp_change_avg = pd.merge(left = eu_temp_country, 
                              right = eu_temp_change_country,
                              how = 'left', 
                              left_on = ['country', 'date'], 
                              right_on = ['country', 'date'])
eu_temp_change_avg

Unnamed: 0,date,averagetemperature,country,temperature_change
0,2000-01-01,-4.598,Austria,-0.244
1,2000-02-01,0.896,Austria,3.230
2,2000-03-01,3.068,Austria,1.710
3,2000-04-01,9.160,Austria,3.280
4,2000-05-01,13.570,Austria,2.955
...,...,...,...,...
4423,2013-04-01,1.059,Sweden,0.399
4424,2013-05-01,10.509,Sweden,3.458
4425,2013-06-01,13.741,Sweden,1.420
4426,2013-07-01,15.262,Sweden,1.009


In [22]:
# checking if merging went right
display(eu_temp_change_avg.head(5))
display(eu_temp_country.head(5))
display(eu_temp_change_country.head(5))

Unnamed: 0,date,averagetemperature,country,temperature_change
0,2000-01-01,-4.598,Austria,-0.244
1,2000-02-01,0.896,Austria,3.23
2,2000-03-01,3.068,Austria,1.71
3,2000-04-01,9.16,Austria,3.28
4,2000-05-01,13.57,Austria,2.955


Unnamed: 0,date,averagetemperature,country
0,2000-01-01,-4.598,Austria
1,2000-02-01,0.896,Austria
2,2000-03-01,3.068,Austria
3,2000-04-01,9.16,Austria
4,2000-05-01,13.57,Austria


Unnamed: 0,country,temperature_change,date
0,Austria,-0.244,2000-01-01
1,Austria,3.23,2000-02-01
2,Austria,1.71,2000-03-01
3,Austria,3.28,2000-04-01
4,Austria,2.955,2000-05-01


In [27]:
# reordering columns for readability
eu_temp_change_avg = eu_temp_change_avg[['country', 'date', 'averagetemperature', 'temperature_change']]
eu_temp_change_avg

Unnamed: 0,country,date,averagetemperature,temperature_change
0,Austria,2000-01-01,-4.598,-0.244
1,Austria,2000-02-01,0.896,3.230
2,Austria,2000-03-01,3.068,1.710
3,Austria,2000-04-01,9.160,3.280
4,Austria,2000-05-01,13.570,2.955
...,...,...,...,...
4423,Sweden,2013-04-01,1.059,0.399
4424,Sweden,2013-05-01,10.509,3.458
4425,Sweden,2013-06-01,13.741,1.420
4426,Sweden,2013-07-01,15.262,1.009


In [28]:
# saving the final temperature table
eu_temp_change_avg.to_csv('Final_Temperature_Table.csv', index=False)