In [2]:
import pandas as pd
import numpy as np
import matplotlib as mt
import plotly as plt
import seaborn as sns
from statsmodels.tsa.seasonal import seasonal_decompose

## Loading the data

In [20]:
data = pd.read_csv("Environment_Temperature_change_E_All_Data_NOFLAG.csv", encoding = 'latin1')
data.head(5)

Unnamed: 0,Area Code,Area,Months Code,Months,Element Code,Element,Unit,Y1961,Y1962,Y1963,...,Y2010,Y2011,Y2012,Y2013,Y2014,Y2015,Y2016,Y2017,Y2018,Y2019
0,2,Afghanistan,7001,January,7271,Temperature change,°C,0.777,0.062,2.744,...,3.601,1.179,-0.583,1.233,1.755,1.943,3.416,1.201,1.996,2.951
1,2,Afghanistan,7001,January,6078,Standard Deviation,°C,1.95,1.95,1.95,...,1.95,1.95,1.95,1.95,1.95,1.95,1.95,1.95,1.95,1.95
2,2,Afghanistan,7002,February,7271,Temperature change,°C,-1.743,2.465,3.919,...,1.212,0.321,-3.201,1.494,-3.187,2.699,2.251,-0.323,2.705,0.086
3,2,Afghanistan,7002,February,6078,Standard Deviation,°C,2.597,2.597,2.597,...,2.597,2.597,2.597,2.597,2.597,2.597,2.597,2.597,2.597,2.597
4,2,Afghanistan,7003,March,7271,Temperature change,°C,0.516,1.336,0.403,...,3.39,0.748,-0.527,2.246,-0.076,-0.497,2.296,0.834,4.418,0.234


## Data Preprocessing

Remove rows pertaining to standard deviation

In [21]:
data = data[data["Element Code"] < 7271]
data.head(2)

Unnamed: 0,Area Code,Area,Months Code,Months,Element Code,Element,Unit,Y1961,Y1962,Y1963,...,Y2010,Y2011,Y2012,Y2013,Y2014,Y2015,Y2016,Y2017,Y2018,Y2019
1,2,Afghanistan,7001,January,6078,Standard Deviation,°C,1.95,1.95,1.95,...,1.95,1.95,1.95,1.95,1.95,1.95,1.95,1.95,1.95,1.95
3,2,Afghanistan,7002,February,6078,Standard Deviation,°C,2.597,2.597,2.597,...,2.597,2.597,2.597,2.597,2.597,2.597,2.597,2.597,2.597,2.597


Remove data on seasonal and meteorological year periodicity

In [22]:
data = data[data["Months Code"] < 7016]

Drop irrelevant columns

In [23]:
data.drop(columns=['Area Code', 'Months Code', 'Element Code','Element', 'Unit'], axis=1, inplace=True)
data.head(5)

Unnamed: 0,Area,Months,Y1961,Y1962,Y1963,Y1964,Y1965,Y1966,Y1967,Y1968,...,Y2010,Y2011,Y2012,Y2013,Y2014,Y2015,Y2016,Y2017,Y2018,Y2019
1,Afghanistan,January,1.95,1.95,1.95,1.95,1.95,1.95,1.95,1.95,...,1.95,1.95,1.95,1.95,1.95,1.95,1.95,1.95,1.95,1.95
3,Afghanistan,February,2.597,2.597,2.597,2.597,2.597,2.597,2.597,2.597,...,2.597,2.597,2.597,2.597,2.597,2.597,2.597,2.597,2.597,2.597
5,Afghanistan,March,1.512,1.512,1.512,1.512,1.512,1.512,1.512,1.512,...,1.512,1.512,1.512,1.512,1.512,1.512,1.512,1.512,1.512,1.512
7,Afghanistan,April,1.406,1.406,1.406,1.406,1.406,1.406,1.406,1.406,...,1.406,1.406,1.406,1.406,1.406,1.406,1.406,1.406,1.406,1.406
9,Afghanistan,May,1.23,1.23,1.23,1.23,1.23,1.23,1.23,1.23,...,1.23,1.23,1.23,1.23,1.23,1.23,1.23,1.23,1.23,1.23


Transform dataframe to have a column for year

In [28]:
data = data.melt(id_vars=['Area','Months'],var_name='Year', value_name='Temperature Change')
data["Year"] = data["Year"].apply(lambda x: x.lstrip("Y"))
data.head(5)

Unnamed: 0,Area,Months,Year,Temperature Change
0,Afghanistan,January,1961,1.95
1,Afghanistan,February,1961,2.597
2,Afghanistan,March,1961,1.512
3,Afghanistan,April,1961,1.406
4,Afghanistan,May,1961,1.23


## Data Analysis

Can we identify recurring seasonal patterns in temperature anomalies within the dataset, and if so, which countries show the most pronounced seasonal variations?

Do abrupt changes or shifts in temperature anomalies occur within certain countries or regions, and can we characterize these patterns?

Are there any constraints or rules that govern the relationship between temperature change and other environmental factors like land use or geographical features?